tern coverage - 95.32%

Files
Source

#' Confidence Intervals for a Difference of Binomials
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Several confidence intervals for the difference between proportions.
#'
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @name desctools_binom
NULL

#' Recycle List of Parameters
#'
#' This function recycles all supplied elements to the maximal dimension.
#'
#' @param ... (`any`)\cr Elements to recycle.
#'
#' @return A `list`.
#'
#' @keywords internal
#' @noRd
h_recycle <- function(...) {
  lst <- list(...)
  maxdim <- max(lengths(lst))
  res <- lapply(lst, rep, length.out = maxdim)
  attr(res, "maxdim") <- maxdim
  return(res)
}

#' @describeIn desctools_binom Several confidence intervals for the difference between proportions.
#'
#' @return A `matrix` of 3 values:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: estimate of lower end of the confidence interval.
#'   * `upr.ci`: estimate of upper end of the confidence interval.
#'
#' @examples
#' # Internal function - desctools_binom
#' \dontrun{
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#' tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
#' desctools_binom(
#'   tbl[1], sum(tbl[1], tbl[3]), tbl[2], sum(tbl[2], tbl[4]),
#'   conf.level = 0.90, method = "waldcc"
#' )
#' }
#'
#' @keywords internal
desctools_binom <- function(x1, n1, x2, n2, conf.level = 0.95, sides = c( # nolint
                              "two.sided",
                              "left", "right"
                            ), method = c(
                              "ac", "wald", "waldcc", "score",
                              "scorecc", "mn", "mee", "blj", "ha", "hal", "jp"
                            )) {
  if (missing(sides)) {
    sides <- match.arg(sides)
  }
  if (missing(method)) {
    method <- match.arg(method)
  }
  iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, # nolint
                           method) {
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p1_hat <- x1 / n1
    p2_hat <- x2 / n2
    est <- p1_hat - p2_hat
    switch(method,
      wald = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      waldcc = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        term2 <- term2 + 0.5 * (1 / n1 + 1 / n2)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      ac = {
        n1 <- n1 + 2
        n2 <- n2 + 2
        x1 <- x1 + 1
        x2 <- x2 + 1
        p1_hat <- x1 / n1
        p2_hat <- x2 / n2
        est1 <- p1_hat - p2_hat
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est1 - term2)
        ci_upr <- min(1, est1 + term2)
      },
      exact = {
        ci_lwr <- NA
        ci_upr <- NA
      },
      score = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilson"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilson"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 +
          u2 * (1 - u2) / n2)
        ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 +
          l2 * (1 - l2) / n2)
      },
      scorecc = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilsoncc"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilsoncc"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 +
          (u2 - p2_hat)^2))
        ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat -
          l2)^2))
      },
      mee = {
        .score <- function(p1, n1, p2, n2, dif) {
          if (dif > 1) dif <- 1
          if (dif < -1) dif <- -1
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 +
              t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            if (abs(v) < .Machine$double.eps) v <- 0
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2)
          }
          return(sqrt(res))
        }
        pval <- function(delta) {
          z <- (est - delta) / .score(
            p1_hat, n1, p2_hat,
            n2, delta
          )
          2 * min(stats::pnorm(z), 1 - stats::pnorm(z))
        }
        ci_lwr <- max(-1, stats::uniroot(function(delta) {
          pval(delta) -
            alpha
        }, interval = c(-1 + 1e-06, est - 1e-06))$root)
        ci_upr <- min(1, stats::uniroot(function(delta) {
          pval(delta) -
            alpha
        }, interval = c(est + 1e-06, 1 - 1e-06))$root)
      },
      blj = {
        p1_dash <- (x1 + 0.5) / (n1 + 1)
        p2_dash <- (x2 + 0.5) / (n2 + 1)
        vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 -
          p2_dash) / n2
        term2 <- kappa * sqrt(vd)
        est_dash <- p1_dash - p2_dash
        ci_lwr <- max(-1, est_dash - term2)
        ci_upr <- min(1, est_dash + term2)
      },
      ha = {
        term2 <- 1 / (2 * min(n1, n2)) + kappa * sqrt(p1_hat *
          (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 -
            1))
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      mn = {
        .conf <- function(x1, n1, x2, n2, z, lower = FALSE) {
          p1 <- x1 / n1
          p2 <- x2 / n2
          p_hat <- p1 - p2
          dp <- 1 + ifelse(lower, 1, -1) * p_hat
          i <- 1
          while (i <= 50) {
            dp <- 0.5 * dp
            y <- p_hat + ifelse(lower, -1, 1) * dp
            score <- .score(p1, n1, p2, n2, y)
            if (score < z) {
              p_hat <- y
            }
            if ((dp < 1e-07) || (abs(z - score) < 1e-06)) {
              (break)()
            } else {
              i <- i +
                1
            }
          }
          return(y)
        }
        .score <- function(p1, n1, p2, n2, dif) {
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 +
              t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) *
              n / (n - 1)
            res <- diff^2 / var
          }
          return(res)
        }
        z <- stats::qchisq(conf.level, 1)
        ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE))
        ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE))
      },
      beal = {
        a <- p1_hat + p2_hat
        b <- p1_hat - p2_hat
        u <- ((1 / n1) + (1 / n2)) / 4
        v <- ((1 / n1) - (1 / n2)) / 4
        V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint
        z <- stats::qchisq(p = 1 - alpha / 2, df = 1)
        A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint
        B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint
        ci_lwr <- max(-1, B - A / (1 + z * u))
        ci_upr <- min(1, B + A / (1 + z * u))
      },
      hal = {
        psi <- (p1_hat + p2_hat) / 2
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 *
          psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) -
          (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) *
          psi + z^2 * v^2 * (1 - 2 * psi)^2)
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
      jp = {
        psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 +
          1))
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 *
          psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) -
          (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) *
          psi + z^2 * v^2 * (1 - 2 * psi)^2)
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
    )
    ci <- c(
      est = est, lwr.ci = min(ci_lwr, ci_upr),
      upr.ci = max(ci_lwr, ci_upr)
    )
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- -1
    }
    return(ci)
  }
  method <- match.arg(arg = method, several.ok = TRUE)
  sides <- match.arg(arg = sides, several.ok = TRUE)
  lst <- h_recycle(
    x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level,
    sides = sides, method = method
  )
  res <- t(sapply(1:attr(lst, "maxdim"), function(i) {
    iBinomDiffCI(
      x1 = lst$x1[i],
      n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i],
      sides = lst$sides[i], method = lst$method[i]
    )
  }))
  lgn <- h_recycle(x1 = if (is.null(names(x1))) {
    paste("x1", seq_along(x1), sep = ".")
  } else {
    names(x1)
  }, n1 = if (is.null(names(n1))) {
    paste("n1", seq_along(n1), sep = ".")
  } else {
    names(n1)
  }, x2 = if (is.null(names(x2))) {
    paste("x2", seq_along(x2), sep = ".")
  } else {
    names(x2)
  }, n2 = if (is.null(names(n2))) {
    paste("n2", seq_along(n2), sep = ".")
  } else {
    names(n2)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  rownames(res) <- xn
  return(res)
}

#' @describeIn desctools_binom Compute confidence intervals for binomial proportions.
#'
#' @param x (`count`)\cr number of successes
#' @param n (`count`)\cr number of trials
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95.
#' @param sides (`character`)\cr side of the confidence interval to compute. Must be one of "two-sided" (default),
#'   "left", or "right".
#' @param method (`character`)\cr method to use. Can be one out of: "wald", "wilson", "wilsoncc", "agresti-coull",
#'   "jeffreys", "modified wilson", "modified jeffreys", "clopper-pearson", "arcsine", "logit", "witting", "pratt",
#'   "midp", "lik", and "blaker".
#'
#' @return A `matrix` with 3 columns containing:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: lower end of the confidence interval.
#'   * `upr.ci`: upper end of the confidence interval.
#'
#' @keywords internal
desctools_binomci <- function(x,
                              n,
                              conf.level = 0.95, # nolint
                              sides = c("two.sided", "left", "right"),
                              method = c(
                                "wilson", "wald", "waldcc", "agresti-coull",
                                "jeffreys", "modified wilson", "wilsoncc", "modified jeffreys",
                                "clopper-pearson", "arcsine", "logit", "witting", "pratt",
                                "midp", "lik", "blaker"
                              ),
                              rand = 123,
                              tol = 1e-05) {
  if (missing(method)) {
    method <- "wilson"
  }
  if (missing(sides)) {
    sides <- "two.sided"
  }
  iBinomCI <- function(x, n, conf.level = 0.95, sides = c( # nolint
                         "two.sided",
                         "left", "right"
                       ), method = c(
                         "wilson", "wilsoncc", "wald",
                         "waldcc", "agresti-coull", "jeffreys", "modified wilson",
                         "modified jeffreys", "clopper-pearson", "arcsine", "logit",
                         "witting", "pratt", "midp", "lik", "blaker"
                       ), rand = 123,
                       tol = 1e-05) {
    if (length(x) != 1) {
      stop("'x' has to be of length 1 (number of successes)")
    }
    if (length(n) != 1) {
      stop("'n' has to be of length 1 (number of trials)")
    }
    if (length(conf.level) != 1) {
      stop("'conf.level' has to be of length 1 (confidence level)")
    }
    if (conf.level < 0.5 || conf.level > 1) {
      stop("'conf.level' has to be in [0.5, 1]")
    }
    sides <- match.arg(sides, choices = c(
      "two.sided", "left",
      "right"
    ), several.ok = FALSE)
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p_hat <- x / n
    q_hat <- 1 - p_hat
    est <- p_hat
    switch(match.arg(arg = method, choices = c(
      "wilson",
      "wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys",
      "modified wilson", "modified jeffreys", "clopper-pearson",
      "arcsine", "logit", "witting", "pratt", "midp", "lik",
      "blaker"
    )),
    wald = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    waldcc = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      term2 <- term2 + 1 / (2 * n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    wilson = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat *
        q_hat + kappa^2 / (4 * n))
      ci_lwr <- max(0, term1 - term2)
      ci_upr <- min(1, term1 + term2)
    },
    wilsoncc = {
      lci <- (2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 -
        2 - 1 / n + 4 * p_hat * (n * q_hat + 1))) / (2 *
        (n + kappa^2))
      uci <- (2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 +
        2 - 1 / n + 4 * p_hat * (n * q_hat - 1))) / (2 *
        (n + kappa^2))
      ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci))
      ci_upr <- min(1, ifelse(p_hat == 1, 1, uci))
    },
    `agresti-coull` = {
      x_tilde <- x + kappa^2 / 2
      n_tilde <- n + kappa^2
      p_tilde <- x_tilde / n_tilde
      q_tilde <- 1 - p_tilde
      est <- p_tilde
      term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
      ci_lwr <- max(0, p_tilde - term2)
      ci_upr <- min(1, p_tilde + term2)
    },
    jeffreys = {
      if (x == 0) {
        ci_lwr <- 0
      } else {
        ci_lwr <- stats::qbeta(
          alpha / 2,
          x + 0.5, n - x + 0.5
        )
      }
      if (x == n) {
        ci_upr <- 1
      } else {
        ci_upr <- stats::qbeta(1 -
          alpha / 2, x + 0.5, n - x + 0.5)
      }
    },
    `modified wilson` = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat *
        q_hat + kappa^2 / (4 * n))
      if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in%
        c(1:3))) {
        ci_lwr <- 0.5 * stats::qchisq(alpha, 2 *
          x) / n
      } else {
        ci_lwr <- max(0, term1 - term2)
      }
      if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 &
        x %in% c(n - (1:3)))) {
        ci_upr <- 1 - 0.5 * stats::qchisq(
          alpha,
          2 * (n - x)
        ) / n
      } else {
        ci_upr <- min(1, term1 +
          term2)
      }
    },
    `modified jeffreys` = {
      if (x == n) {
        ci_lwr <- (alpha / 2)^(1 / n)
      } else {
        if (x <= 1) {
          ci_lwr <- 0
        } else {
          ci_lwr <- stats::qbeta(
            alpha / 2,
            x + 0.5, n - x + 0.5
          )
        }
      }
      if (x == 0) {
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else {
        if (x >= n - 1) {
          ci_upr <- 1
        } else {
          ci_upr <- stats::qbeta(1 -
            alpha / 2, x + 0.5, n - x + 0.5)
        }
      }
    },
    `clopper-pearson` = {
      ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1)
      ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x)
    },
    arcsine = {
      p_tilde <- (x + 0.375) / (n + 0.75)
      est <- p_tilde
      ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2
      ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2
    },
    logit = {
      lambda_hat <- log(x / (n - x))
      V_hat <- n / (x * (n - x)) # nolint
      lambda_lower <- lambda_hat - kappa * sqrt(V_hat)
      lambda_upper <- lambda_hat + kappa * sqrt(V_hat)
      ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower))
      ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper))
    },
    witting = {
      set.seed(rand)
      x_tilde <- x + stats::runif(1, min = 0, max = 1)
      pbinom_abscont <- function(q, size, prob) {
        v <- trunc(q)
        term1 <- stats::pbinom(v - 1, size = size, prob = prob)
        term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob)
        return(term1 + term2)
      }
      qbinom_abscont <- function(p, size, x) {
        fun <- function(prob, size, x, p) {
          pbinom_abscont(x, size, prob) - p
        }
        stats::uniroot(fun,
          interval = c(0, 1), size = size,
          x = x, p = p
        )$root
      }
      ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde)
      ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde)
    },
    pratt = {
      if (x == 0) {
        ci_lwr <- 0
        ci_upr <- 1 - alpha^(1 / n)
      } else if (x == 1) {
        ci_lwr <- 1 - (1 - alpha / 2)^(1 / n)
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else if (x == (n - 1)) {
        ci_lwr <- (alpha / 2)^(1 / n)
        ci_upr <- (1 - alpha / 2)^(1 / n)
      } else if (x == n) {
        ci_lwr <- alpha^(1 / n)
        ci_upr <- 1
      } else {
        z <- stats::qnorm(1 - alpha / 2)
        A <- ((x + 1) / (n - x))^2 # nolint
        B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint
        C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_upr <- 1 / E
        A <- (x / (n - x - 1))^2 # nolint
        B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint
        C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_lwr <- 1 / E
      }
    },
    midp = {
      f_low <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x,
          size = n, prob = pi, lower.tail = FALSE
        ) -
          (1 - conf.level) / 2
      }
      f_up <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x -
          1, size = n, prob = pi) - (1 - conf.level) / 2
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::uniroot(f_low,
          interval = c(0, p_hat),
          x = x, n = n
        )$root
      }
      if (x != n) {
        ci_upr <- stats::uniroot(f_up, interval = c(
          p_hat,
          1
        ), x = x, n = n)$root
      }
    },
    lik = {
      ci_lwr <- 0
      ci_upr <- 1
      z <- stats::qnorm(1 - alpha * 0.5)
      tol <- .Machine$double.eps^0.5
      BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint
                         ...) {
        ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt,
          y,
          log = TRUE
        ))
        ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x,
          wt, mu,
          log = TRUE
        ))
        res <- ifelse(abs(y - mu) < tol, 0, sign(y -
          mu) * sqrt(-2 * (ll_y - ll_mu)))
        return(res - bound)
      }
      if (x != 0 && tol < p_hat) {
        ci_lwr <- if (BinDev(
          tol, x, p_hat, n, -z,
          tol
        ) <= 0) {
          stats::uniroot(
            f = BinDev, interval = c(tol, if (p_hat <
              tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }), bound = -z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
      if (x != n && p_hat < (1 - tol)) {
        ci_upr <- if (BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat >
          1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) <
          0) {
          ci_lwr <- if (BinDev(
            tol, x, if (p_hat <
              tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }, n,
            -z, tol
          ) <= 0) {
            stats::uniroot(
              f = BinDev, interval = c(tol, p_hat),
              bound = -z, x = x, mu = p_hat, wt = n
            )$root
          }
        } else {
          stats::uniroot(
            f = BinDev, interval = c(if (p_hat >
              1 - tol) {
              tol
            } else {
              p_hat
            }, 1 - tol), bound = z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
    },
    blaker = {
      acceptbin <- function(x, n, p) {
        p1 <- 1 - stats::pbinom(x - 1, n, p)
        p2 <- stats::pbinom(x, n, p)
        a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p)
        a2 <- p2 + 1 - stats::pbinom(
          stats::qbinom(1 - p2, n, p), n,
          p
        )
        return(min(a1, a2))
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n -
          x + 1)
        while (acceptbin(x, n, ci_lwr + tol) < (1 -
          conf.level)) {
          ci_lwr <- ci_lwr + tol
        }
      }
      if (x != n) {
        ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x +
          1, n - x)
        while (acceptbin(x, n, ci_upr - tol) < (1 -
          conf.level)) {
          ci_upr <- ci_upr - tol
        }
      }
    }
    )
    ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min(
      1,
      ci_upr
    ))
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- 0
    }
    return(ci)
  }
  lst <- list(
    x = x, n = n, conf.level = conf.level, sides = sides,
    method = method, rand = rand
  )
  maxdim <- max(unlist(lapply(lst, length)))
  lgp <- lapply(lst, rep, length.out = maxdim)
  lgn <- h_recycle(x = if (is.null(names(x))) {
    paste("x", seq_along(x), sep = ".")
  } else {
    names(x)
  }, n = if (is.null(names(n))) {
    paste("n", seq_along(n), sep = ".")
  } else {
    names(n)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  res <- t(sapply(1:maxdim, function(i) {
    iBinomCI(
      x = lgp$x[i],
      n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i],
      method = lgp$method[i], rand = lgp$rand[i]
    )
  }))
  colnames(res)[1] <- c("est")
  rownames(res) <- xn
  return(res)
}

#' Missing Data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Substitute missing data with a string or factor level.
#'
#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
#' @param label (`character`)\cr string that missing data should be replaced with.
#'
#' @return `x` with any `NA` values substituted by `label`.
#'
#' @examples
#' explicit_na(c(NA, "a", "b"))
#' is.na(explicit_na(c(NA, "a", "b")))
#'
#' explicit_na(factor(c(NA, "a", "b")))
#' is.na(explicit_na(factor(c(NA, "a", "b"))))
#'
#' explicit_na(sas_na(c("a", "")))
#'
#' @export
explicit_na <- function(x, label = "<Missing>") {
  checkmate::assert_string(label)

  if (is.factor(x)) {
    x <- forcats::fct_na_value_to_level(x, label)
    forcats::fct_drop(x, only = label)
  } else if (is.character(x)) {
    x[is.na(x)] <- label
    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Convert Strings to `NA`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
#' convert these values to `NA`s.
#'
#' @inheritParams explicit_na
#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
#'
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
#'   `empty` and `whitespaces`.
#'
#' @examples
#' sas_na(c("1", "", " ", "   ", "b"))
#' sas_na(factor(c("", " ", "b")))
#'
#' is.na(sas_na(c("1", "", " ", "   ", "b")))
#'
#' @export
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
  checkmate::assert_flag(empty)
  checkmate::assert_flag(whitespaces)

  if (is.factor(x)) {
    empty_levels <- levels(x) == ""
    if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA

    ws_levels <- grepl("^\\s+$", levels(x))
    if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA

    x
  } else if (is.character(x)) {
    if (empty) x[x == ""] <- NA_character_

    if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_

    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Compare Variables Between Groups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Comparison with a reference group for different `x` objects.
#'
#' @inheritParams argument_convention
#'
#' @note
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
#'   between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
#'   be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
#'   for as explicit factor levels.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
#' * For character variables, automatic conversion to factor does not guarantee that the table
#'   will be generated correctly. In particular for sparse tables this very likely can fail.
#'   Therefore it is always better to manually convert character variables to factors during pre-processing.
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
#'   is well defined.
#'
#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally
#'   to compute a summary within `s_compare()`.
#'
#' @name compare_variables
#' @include summarize_variables.R
NULL

#' @describeIn compare_variables S3 generic function to produce a comparison summary.
#'
#' @return
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
#'
#' @export
s_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...) {
  UseMethod("s_compare", x)
}

#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
#'   to calculate the p-value.
#'
#' @method s_compare numeric
#'
#' @examples
#' # `s_compare.numeric`
#'
#' ## Usual case where both this and the reference group vector have more than 1 value.
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
#'
#' ## If one group has not more than 1 value, then p-value is not calculated.
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
#'
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
#'
#' @export
s_compare.numeric <- function(x,
                              .ref_group,
                              .in_ref_col,
                              ...) {
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(.ref_group)
  checkmate::assert_flag(.in_ref_col)

  y <- s_summary.numeric(x = x, ...)

  y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
    stats::t.test(x, .ref_group)$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
#'   to calculate the p-value.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions,
#'   can only be `n` (number of values in this row and column intersection).
#'
#' @method s_compare factor
#'
#' @examples
#' # `s_compare.factor`
#'
#' ## Basic usage:
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "b", "c"))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
#' y <- explicit_na(factor(c("a", "b", "c", NA)))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.factor <- function(x,
                             .ref_group,
                             .in_ref_col,
                             denom = "n",
                             na.rm = TRUE, # nolint
                             ...) {
  checkmate::assert_flag(.in_ref_col)
  assert_valid_factor(x)
  assert_valid_factor(.ref_group)
  denom <- match.arg(denom)

  y <- s_summary.factor(
    x = x,
    denom = denom,
    na.rm = na.rm,
    ...
  )

  if (na.rm) {
    x <- x[!is.na(x)] %>% fct_discard("<Missing>")
    .ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>")
  } else {
    x <- x %>% explicit_na(label = "NA")
    .ref_group <- .ref_group %>% explicit_na(label = "NA")
  }

  checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)

  y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    tab <- rbind(table(x), table(.ref_group))
    res <- suppressWarnings(stats::chisq.test(tab))
    res$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `character` class. This makes an automatic
#'   conversion to `factor` (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used
#'   to print out information about factor casting. Defaults to `TRUE`.
#'
#' @method s_compare character
#'
#' @examples
#' # `s_compare.character`
#'
#' ## Basic usage:
#' x <- c("a", "a", "b", "c", "a")
#' y <- c("a", "b", "c")
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' ## Note that missing values handling can make a large difference:
#' x <- c("a", "a", "b", "c", "a", NA)
#' y <- c("a", "b", "c", rep(NA, 20))
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE,
#'   .var = "x", verbose = FALSE
#' )
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE, .var = "x",
#'   na.rm = FALSE, verbose = FALSE
#' )
#'
#' @export
s_compare.character <- function(x,
                                .ref_group,
                                .in_ref_col,
                                denom = "n",
                                na.rm = TRUE, # nolint
                                .var,
                                verbose = TRUE,
                                ...) {
  x <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
  .ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, verbose = verbose)
  s_compare(
    x = x,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    denom = denom,
    na.rm = na.rm,
    ...
  )
}

#' @describeIn compare_variables Method for `logical` class. A chi-squared test
#'   is used. If missing values are not removed, then they are counted as `FALSE`.
#'
#' @method s_compare logical
#'
#' @examples
#' # `s_compare.logical`
#'
#' ## Basic usage:
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' y <- c(FALSE, FALSE, TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' y <- c(NA, NA, NA, NA, FALSE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.logical <- function(x,
                              .ref_group,
                              .in_ref_col,
                              na.rm = TRUE, # nolint
                              denom = "n",
                              ...) {
  denom <- match.arg(denom)

  y <- s_summary.logical(
    x = x,
    na.rm = na.rm,
    denom = denom,
    ...
  )

  if (na.rm) {
    x <- stats::na.omit(x)
    .ref_group <- stats::na.omit(.ref_group)
  } else {
    x[is.na(x)] <- FALSE
    .ref_group[is.na(.ref_group)] <- FALSE
  }

  y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    x <- factor(x, levels = c(TRUE, FALSE))
    .ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
    tbl <- rbind(table(x), table(.ref_group))
    suppressWarnings(prop_chisq(tbl))
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Formatted analysis function which is used as `afun`
#'   in `compare_vars()`.
#'
#' @return
#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...,
                      .var) {
  UseMethod("a_compare", x)
}

#' @describeIn compare_variables Formatted analysis function method for `numeric` class.
#'
#' @examples
#' # `a_compare.numeric`
#' a_compare(
#'   rnorm(10, 5, 1),
#'   .ref_group = rnorm(20, -5, 1),
#'   .in_ref_col = FALSE,
#'   .var = "bla"
#' )
#'
#' @export
a_compare.numeric <- make_afun(
  s_compare.numeric,
  .formats = c(
    .a_summary_numeric_formats,
    pval = "x.xxxx | (<0.0001)"
  ),
  .labels = c(
    .a_summary_numeric_labels,
    pval = "p-value (t-test)"
  ),
  .null_ref_cells = FALSE
)

.a_compare_counts_formats <- c(
  .a_summary_counts_formats,
  pval = "x.xxxx | (<0.0001)"
)

.a_compare_counts_labels <- c(
  pval = "p-value (chi-squared test)"
)

#' @describeIn compare_variables Formatted analysis function method for `factor` class.
#'
#' @examples
#' # `a_compare.factor`
#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
#' # functions can be applied correctly.
#' afun <- make_afun(
#'   getS3method("a_compare", "factor"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "a", "b", "c"))
#' afun(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' @export
a_compare.factor <- make_afun(
  s_compare.factor,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' @describeIn compare_variables Formatted analysis function method for `character` class.
#'
#' @examples
#' # `a_compare.character`
#' afun <- make_afun(
#'   getS3method("a_compare", "character"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' x <- c("A", "B", "A", "C")
#' y <- c("B", "A", "C")
#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' @export
a_compare.character <- make_afun(
  s_compare.character,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' @describeIn compare_variables Formatted analysis function method for `logical` class.
#'
#' @examples
#' # `a_compare.logical`
#' afun <- make_afun(
#'   getS3method("a_compare", "logical")
#' )
#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
#' y <- c(TRUE, FALSE)
#' afun(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' @export
a_compare.logical <- make_afun(
  s_compare.logical,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' Constructor Function for [compare_vars()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return Combined formatted analysis function for use in [compare_vars()].
#'
#' @note Since [a_compare()] is generic and we want customization of the formatting arguments
#'   via [rtables::make_afun()], we need to create another temporary generic function, with
#'   corresponding customized methods. Then in order for the methods to be found,
#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
#'   functions (and possibly others in the future), we provide a constructor that does this:
#'   [create_afun_compare()].
#'
#' @seealso [compare_vars()]
#'
#' @examples
#' # `create_afun_compare()` to create combined `afun`
#'
#' afun <- create_afun_compare(
#'   .stats = c("n", "count_fraction", "mean_sd", "pval"),
#'   .indent_mods = c(pval = 1L)
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
#'   analyze(
#'     "AGE",
#'     afun = afun,
#'     show_labels = "visible"
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
#'   analyze(
#'     "SEX",
#'     afun = afun,
#'     show_labels = "visible"
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
create_afun_compare <- function(.stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  function(x,
           .ref_group,
           .in_ref_col,
           ...,
           .var) {
    afun <- function(x, ...) {
      UseMethod("afun", x)
    }

    numeric_stats <- afun_selected_stats(
      .stats,
      all_stats = c(names(.a_summary_numeric_formats), "pval")
    )
    afun.numeric <- make_afun( # nolint
      a_compare.numeric,
      .stats = numeric_stats,
      .formats = extract_by_name(.formats, numeric_stats),
      .labels = extract_by_name(.labels, numeric_stats),
      .indent_mods = extract_by_name(.indent_mods, numeric_stats),
      .null_ref_cells = FALSE
    )

    factor_stats <- afun_selected_stats(
      .stats,
      all_stats = names(.a_compare_counts_formats)
    )
    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
    afun.factor <- make_afun( # nolint
      a_compare.factor,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats,
      .null_ref_cells = FALSE
    )

    afun.character <- make_afun( # nolint
      a_compare.character,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats,
      .null_ref_cells = FALSE
    )

    afun.logical <- make_afun( # nolint
      a_compare.logical,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .null_ref_cells = FALSE
    )

    afun(
      x = x,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      ...,
      .var = .var
    )
  }
}

#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_compare()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_compare()` to the table layout.
#'
#' @examples
#' # `compare_vars()` in `rtables` pipelines
#'
#' ## Default output within a `rtables` pipeline.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM B") %>%
#'   compare_vars(c("AGE", "SEX"))
#' build_table(lyt, tern_ex_adsl)
#'
#' ## Select and format statistics output.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM C") %>%
#'   compare_vars(
#'     vars = "AGE",
#'     .stats = c("mean_sd", "pval"),
#'     .formats = c(mean_sd = "xx.x, xx.x"),
#'     .labels = c(mean_sd = "Mean, SD")
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
compare_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         nested = TRUE,
                         ...,
                         na_level = NA_character_,
                         show_labels = "default",
                         table_names = vars,
                         .stats = c("n", "mean_sd", "count_fraction", "pval"),
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods)

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = afun,
    nested = nested,
    extra_args = list(...),
    na_str = na_level,
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Summary numeric variables in columns
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Layout-creating function which can be used for creating column-wise summary tables.
#' This function sets the analysis methods as column labels and is a wrapper for
#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
#'
#' @inheritParams argument_convention
#' @inheritParams rtables::analyze_colvars
#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
#'   character vector applies on the column space. You can change the row labels by defining this
#'   parameter to a named character vector with names corresponding to the split values. It defaults
#'   to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
#'   label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
#'   to define row labels. This behavior is not supported as we never need to overload row labels.
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
#'   This option allows you to add multiple instances of this functions, also in a nested fashion,
#'   without adding more splits. This split must happen only one time on a single layout.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
#'   [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
#'   support to more complex analysis pipelines on the column space. For the same reasons,
#'   we encourage to read the examples carefully and file issues for cases that differ from
#'   them.
#'
#'   Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
#'   row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
#'   (`do_summarize_row_groups = FALSE`, the default), and to the group label for
#'   `do_summarize_row_groups = TRUE`.
#'
#' @seealso [summarize_vars()], [rtables::analyze_colvars()].
#'
#' @examples
#' library(dplyr)
#'
#' # Data preparation
#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
#'
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_label = "hidden"
#'   ) %>% # Removes duplicated labels
#'   analyze_vars_in_cols(vars = "AGE")
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # By selecting just some statistics and ad-hoc labels
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_labels = "hidden",
#'     split_fun = drop_split_levels
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     .stats = c("n", "cv", "geom_mean"),
#'     .labels = c(
#'       n = "aN",
#'       cv = "aCV",
#'       geom_mean = "aGeomMean"
#'     )
#'   )
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # Changing row labels
#' lyt <- basic_table() %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     row_labels = "some custom label"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Pharmacokinetic parameters
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft",
#'     child_label = "hidden"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Multiple calls (summarize label and analyze underneath)
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     do_summarize_row_groups = TRUE # does a summarize level
#'   ) %>%
#'   split_rows_by("SEX",
#'     child_label = "hidden",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     split_col_vars = FALSE # avoids re-splitting the columns
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' @export
analyze_vars_in_cols <- function(lyt,
                                 vars,
                                 ...,
                                 .stats = c(
                                   "n",
                                   "mean",
                                   "sd",
                                   "se",
                                   "cv",
                                   "geom_cv"
                                 ),
                                 .labels = c(
                                   n = "n",
                                   mean = "Mean",
                                   sd = "SD",
                                   se = "SE",
                                   cv = "CV (%)",
                                   geom_cv = "CV % Geometric Mean"
                                 ),
                                 row_labels = NULL,
                                 do_summarize_row_groups = FALSE,
                                 split_col_vars = TRUE,
                                 .indent_mods = NULL,
                                 nested = TRUE,
                                 na_level = NULL,
                                 .formats = NULL) {
  checkmate::assert_string(na_level, null.ok = TRUE)
  checkmate::assert_character(row_labels, null.ok = TRUE)
  checkmate::assert_int(.indent_mods, null.ok = TRUE)
  checkmate::assert_flag(nested)
  checkmate::assert_flag(split_col_vars)
  checkmate::assert_flag(do_summarize_row_groups)

  # Automatic assignment of formats
  if (is.null(.formats)) {
    # General values
    sf_numeric <- summary_formats("numeric")
    sf_counts <- summary_formats("counts")[-1]
    formats_v <- c(sf_numeric, sf_counts)
  } else {
    formats_v <- .formats
  }

  # Check for vars in the case that one or more are used
  if (length(vars) == 1) {
    vars <- rep(vars, length(.stats))
  } else if (length(vars) != length(.stats)) {
    stop(
      "Analyzed variables (vars) does not have the same ",
      "number of elements of specified statistics (.stats)."
    )
  }

  if (split_col_vars) {
    # Checking there is not a previous identical column split
    clyt <- tail(clayout(lyt), 1)[[1]]

    dummy_lyt <- split_cols_by_multivar(
      lyt = basic_table(),
      vars = vars,
      varlabels = .labels[.stats]
    )

    if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
      stop(
        "Column split called again with the same values. ",
        "This can create many unwanted columns. Please consider adding ",
        "split_col_vars = FALSE to the last call of ",
        deparse(sys.calls()[[sys.nframe() - 1]]), "."
      )
    }

    # Main col split
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = vars,
      varlabels = .labels[.stats]
    )
  }

  if (do_summarize_row_groups) {
    if (length(unique(vars)) > 1) {
      stop("When using do_summarize_row_groups only one label level var should be inserted.")
    }

    # Function list for do_summarize_row_groups. Slightly different handling of labels
    cfun_list <- Map(
      function(stat) {
        function(u, .spl_context, labelstr, ...) {
          # Statistic
          res <- s_summary(u, ...)[[stat]]

          # Label check and replacement
          if (length(row_labels) > 1) {
            if (!(labelstr %in% names(row_labels))) {
              stop(
                "Replacing the labels in do_summarize_row_groups needs a named vector",
                "that contains the split values. In the current split variable ",
                .spl_context$split[nrow(.spl_context)],
                " the labelstr value (split value by default) ", labelstr, " is not in",
                " row_labels names: ", names(row_labels)
              )
            }
            lbl <- unlist(row_labels[labelstr])
          } else {
            lbl <- labelstr
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_level,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
          )
        }
      },
      stat = .stats
    )

    # Main call to rtables
    summarize_row_groups(
      lyt = lyt,
      var = unique(vars),
      cfun = cfun_list,
      extra_args = list(...)
    )
  } else {
    # Function list for analyze_colvars
    afun_list <- Map(
      function(stat) {
        function(u, .spl_context, ...) {
          # Main statistics
          res <- s_summary(u, ...)[[stat]]

          # Label from context
          label_from_context <- .spl_context$value[nrow(.spl_context)]

          # Label switcher
          if (is.null(row_labels)) {
            lbl <- label_from_context
          } else {
            if (length(row_labels) > 1) {
              if (!(label_from_context %in% names(row_labels))) {
                stop(
                  "Replacing the labels in do_summarize_row_groups needs a named vector",
                  "that contains the split values. In the current split variable ",
                  .spl_context$split[nrow(.spl_context)],
                  " the split value ", label_from_context, " is not in",
                  " row_labels names: ", names(row_labels)
                )
              }
              lbl <- unlist(row_labels[label_from_context])
            } else {
              lbl <- row_labels
            }
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_level,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
          )
        }
      },
      stat = .stats
    )

    # Main call to rtables
    analyze_colvars(lyt,
      afun = afun_list,
      nested = nested,
      extra_args = list(...)
    )
  }
}

# Help function
get_last_col_split <- function(lyt) {
  tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
}

#' Controls for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for Cox regression fit. Used internally.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
#'   treatment and candidate covariate. Note that for univariate models without treatment arm, and
#'   multivariate models, no interaction can be used so that this needs to be `FALSE`.
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
#'   see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
#'
#' @return A `list` of items with names corresponding to the arguments.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @examples
#' control_coxreg()
#'
#' @export
control_coxreg <- function(pval_method = c("wald", "likelihood"),
                           ties = c("exact", "efron", "breslow"),
                           conf_level = 0.95,
                           interaction = FALSE) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  checkmate::assert_flag(interaction)
  assert_proportion_value(conf_level)
  list(
    pval_method = pval_method,
    ties = ties,
    conf_level = conf_level,
    interaction = interaction
  )
}

#' Custom Tidy Methods for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
#'   or [fit_coxreg_multivar()] (for multivariate models).
#'
#' @return [tidy()] returns:
#' * For `summary.coxph` objects,  a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
#'   `upper .95`, `level`, and `n`.
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
#'   `lcl`, `ucl`, `pval`, and `ci`.
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
#'   `level`, and `ci`.
#'
#' @seealso [cox_regression]
#'
#' @name tidy_coxreg
NULL

#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
#'
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
#'
#' @method tidy summary.coxph
#'
#' @examples
#' library(survival)
#' library(broom)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
#' tidy(msum)
#'
#' @export
tidy.summary.coxph <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "summary.coxph")
  pval <- x$coefficients
  confint <- x$conf.int
  levels <- rownames(pval)

  pval <- tibble::as_tibble(pval)
  confint <- tibble::as_tibble(confint)

  ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
  ret$level <- levels
  ret$n <- x[["n"]]
  ret
}

#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
#'
#' @method tidy coxreg.univar
#'
#' @examples
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' tidy(mod1)
#' tidy(mod2)
#'
#' @export
tidy.coxreg.univar <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "coxreg.univar")
  mod <- x$mod
  vars <- c(x$vars$arm, x$vars$covariates)
  has_arm <- "arm" %in% names(x$vars)

  result <- if (!has_arm) {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_multivar_extract(
          var = vars,
          data = x$data,
          mod = mod,
          control = x$control
        )
      }
    )
  } else if (x$control$interaction) {
    Map(
      mod = mod, covar = vars,
      f = function(mod, covar) {
        h_coxreg_extract_interaction(
          effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
          at = x$at, control = x$control
        )
      }
    )
  } else {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_univar_extract(
          effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
          control = x$control
        )
      }
    )
  }
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$n <- lapply(result$n, empty_vector_if_na)
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  if (x$control$interaction) {
    result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
    # Remove interaction p-values due to change in specifications.
    result$pval[result$effect != "Treatment:"] <- NA
  }
  result$pval <- lapply(result$pval, empty_vector_if_na)
  attr(result, "conf_level") <- x$control$conf_level
  result
}

#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
#'
#' @method tidy coxreg.multivar
#'
#' @examples
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#' broom::tidy(multivar_model)
#'
#' @export
tidy.coxreg.multivar <- function(x, # nolint
                                 ...) {
  checkmate::assert_class(x, "coxreg.multivar")
  vars <- c(x$vars$arm, x$vars$covariates)

  # Convert the model summaries to data.
  result <- Map(
    vars = vars,
    f = function(vars) {
      h_coxreg_multivar_extract(
        var = vars, data = x$data,
        mod = x$mod, control = x$control
      )
    }
  )
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  result$pval <- lapply(result$pval, empty_vector_if_na)
  result <- result[, names(result) != "n"]
  attr(result, "conf_level") <- x$control$conf_level

  result
}

#' Fits for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fitting functions for univariate and multivariate Cox regression models.
#'
#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named
#'   list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
#'   `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
#'   estimates will be tabulated later.
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
#'   the value of the covariate at which the effect should be estimated.
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
#'
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name fit_coxreg
NULL

#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
#'
#' @return
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
#'   with 5 elements:
#'   * `mod`: Cox regression models fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'   * `at`: Value of the covariate at which the effect should be estimated.
#'
#' @note When using `fit_coxreg_univar` there should be two study arms.
#'
#' @examples
#' # fit_coxreg_univar
#'
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' ## Cox regression: arm + 1 covariate, stratified analysis.
#' mod3 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", strata = "covar2",
#'     covariates = c("covar1")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: no arm, only covariates.
#' mod4 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_univar <- function(variables,
                              data,
                              at = list(),
                              control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }
  if (has_arm) {
    assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  }
  vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
  for (i in vars) {
    if (is.factor(data[[i]])) {
      attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
    }
  }
  forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
  mod <- lapply(
    forms, function(x) {
      survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
    }
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables,
      at = at
    ),
    class = "coxreg.univar"
  )
}

#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
#'
#' @return
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
#'   with 4 elements:
#'   * `mod`: Cox regression model fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'
#' @examples
#' # fit_coxreg_multivar
#'
#' ## Cox regression: multivariate Cox regression.
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' # Example without treatment arm.
#' multivar_covs_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_multivar <- function(variables,
                                data,
                                control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  if (!is.null(variables$covariates)) {
    checkmate::assert_character(variables$covariates)
  }

  checkmate::assert_false(control$interaction)
  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }

  form <- h_coxreg_multivar_formula(variables)
  mod <- survival::coxph(
    formula = stats::as.formula(form),
    data = data,
    ties = control$ties
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables
    ),
    class = "coxreg.multivar"
  )
}

#' Muffled `car::Anova`
#'
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
#' present, this function deliberately muffles this message.
#'
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#'
#' @return Returns the output of [car::Anova()], with convergence message muffled.
#'
#' @keywords internal
muffled_car_anova <- function(mod, test_statistic) {
  tryCatch(
    withCallingHandlers(
      expr = {
        car::Anova(
          mod,
          test.statistic = test_statistic,
          type = "III"
        )
      },
      message = function(m) invokeRestart("muffleMessage"),
      error = function(e) {
        stop(paste(
          "the model seems to have convergence problems, please try to change",
          "the configuration of covariates or strata variables, e.g.",
          "- original error:", e
        ))
      }
    )
  )
}

#' Cox Regression Helper: Interactions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Test and estimate the effect of a treatment in interaction with a covariate.
#' The effect is estimated as the HR of the tested treatment for a given level
#' of the covariate, in comparison to the treatment control.
#'
#' @inheritParams argument_convention
#' @param x (`numeric` or `factor`)\cr the values of the effect to be tested.
#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
#' @param covar (`string`)\cr the name of the covariate in the model.
#' @param mod (`coxph`)\cr the Cox regression model.
#' @param label (`string`)\cr the label to be returned as `term_label`.
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
#' @param ... see methods.
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4,
#'       labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression_inter
NULL

#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
#'
#' @return
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
#'   variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
#'
#' @export
h_coxreg_inter_effect <- function(x,
                                  effect,
                                  covar,
                                  mod,
                                  label,
                                  control,
                                  ...) {
  UseMethod("h_coxreg_inter_effect", x)
}

#' @describeIn cox_regression_inter Estimate the interaction with a `numeric` covariate.
#'
#' @param at (`list`)\cr a list with items named after the covariate, every
#'   item is a vector of levels at which the interaction should be estimated.
#'
#' @export
h_coxreg_inter_effect.numeric <- function(x,
                                          effect,
                                          covar,
                                          mod,
                                          label,
                                          control,
                                          at,
                                          ...) {
  betas <- stats::coef(mod)
  attrs <- attr(stats::terms(mod), "term.labels")
  term_indices <- grep(
    pattern = effect,
    x = attrs[!grepl("strata\\(", attrs)]
  )
  checkmate::assert_vector(term_indices, len = 2)
  betas <- betas[term_indices]
  betas_var <- diag(stats::vcov(mod))[term_indices]
  betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
  xval <- if (is.null(at[[covar]])) {
    stats::median(x)
  } else {
    at[[covar]]
  }
  effect_index <- !grepl(covar, names(betas))
  coef_hat <- betas[effect_index] + xval * betas[!effect_index]
  coef_se <- sqrt(
    betas_var[effect_index] +
      xval ^ 2 * betas_var[!effect_index] + # styler: off
      2 * xval * betas_cov
  )
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  data.frame(
    effect = "Covariate:",
    term = rep(covar, length(xval)),
    term_label = paste0("  ", xval),
    level = as.character(xval),
    n = NA,
    hr = exp(coef_hat),
    lcl = exp(coef_hat - q_norm * coef_se),
    ucl = exp(coef_hat + q_norm * coef_se),
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Estimate the interaction with a `factor` covariate.
#'
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#'
#' @export
h_coxreg_inter_effect.factor <- function(x,
                                         effect,
                                         covar,
                                         mod,
                                         label,
                                         control,
                                         data,
                                         ...) {
  y <- h_coxreg_inter_estimations(
    variable = effect, given = covar,
    lvl_var = levels(data[[effect]]),
    lvl_given = levels(data[[covar]]),
    mod = mod,
    conf_level = 0.95
  )[[1]]

  data.frame(
    effect = "Covariate:",
    term = rep(covar, nrow(y)),
    term_label = as.character(paste0("  ", levels(data[[covar]]))),
    level = as.character(levels(data[[covar]])),
    n = NA,
    hr = y[, "hr"],
    lcl = y[, "lcl"],
    ucl = y[, "ucl"],
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter A higher level function to get
#'   the results of the interaction test and the estimated values.
#'
#' @return
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
#'   no interaction, [h_coxreg_univar_extract()] is applied instead.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' h_coxreg_extract_interaction(
#'   mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
#'   control = control_coxreg()
#' )
#'
#' @export
h_coxreg_extract_interaction <- function(effect,
                                         covar,
                                         mod,
                                         data,
                                         at,
                                         control) {
  if (!any(attr(stats::terms(mod), "order") == 2)) {
    y <- h_coxreg_univar_extract(
      effect = effect, covar = covar, mod = mod, data = data, control = control
    )
    y$pval_inter <- NA
    y
  } else {
    test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

    # Test the main treatment effect.
    mod_aov <- muffled_car_anova(mod, test_statistic)
    sum_anova <- broom::tidy(mod_aov)
    pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]

    # Test the interaction effect.
    pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
    covar_test <- data.frame(
      effect = "Covariate:",
      term = covar,
      term_label = unname(labels_or_names(data[covar])),
      level = "",
      n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
      pval_inter = pval_inter,
      stringsAsFactors = FALSE
    )
    # Estimate the interaction.
    y <- h_coxreg_inter_effect(
      data[[covar]],
      covar = covar,
      effect = effect,
      mod = mod,
      label = unname(labels_or_names(data[covar])),
      at = at,
      control = control,
      data = data
    )
    rbind(covar_test, y)
  }
}

#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
#'
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
#'   of the levels of `variable` given the levels of `given`.
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   The estimation of the Hazard Ratio for arm C/sex M is given in reference
#'   to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
#'   The interaction coefficient is deduced by b2 + b5 while the standard error
#'   is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
#'
#' @return
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
#'   to the combinations of `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' result <- h_coxreg_inter_estimations(
#'   variable = "armcd", given = "covar1",
#'   lvl_var = levels(dta_bladder$armcd),
#'   lvl_given = levels(dta_bladder$covar1),
#'   mod = mod, conf_level = .95
#' )
#' result
#'
#' @export
h_coxreg_inter_estimations <- function(variable,
                                       given,
                                       lvl_var,
                                       lvl_given,
                                       mod,
                                       conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)
  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )
  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  mmat <- stats::model.matrix(mod)[1, ]
  mmat[!mmat == 0] <- 0

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      mmat
    }
  )
  colnames(design_mat) <- interaction_names

  coef <- stats::coef(mod)
  vcov <- stats::vcov(mod)
  betas <- as.matrix(coef)
  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"
  coef_se <- apply(
    design_mat, 2,
    function(x) {
      vcov_el <- as.logical(x)
      y <- vcov[vcov_el, vcov_el]
      y <- sum(y)
      y <- sqrt(y)
      return(y)
    }
  )
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)
  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
    x
  })
  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)
  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  y
}

#' Encode Categorical Missing Values in a Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function to encode missing entries across groups of categorical
#' variables in a data frame.
#'
#' @details Missing entries are those with `NA` or empty strings and will
#'   be replaced with a specified value. If factor variables include missing
#'   values, the missing value will be inserted as the last level.
#'   Similarly, in case character or logical variables should be converted to factors
#'   with the `char_as_factor` or `logical_as_factor` options, the missing values will
#'   be set as the last level.
#'
#' @param data (`data.frame`)\cr data set.
#' @param omit_columns (`character`)\cr names of variables from `data` that should
#'   not be modified by this function.
#' @param char_as_factor (`flag`)\cr whether to convert character variables
#'   in `data` to factors.
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
#'   in `data` to factors.
#' @param na_level (`string`)\cr used to replace all `NA` or empty
#'   values inside non-`omit_columns` columns.
#'
#' @return A `data.frame` with the chosen modifications applied.
#'
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
#'
#' @examples
#' my_data <- data.frame(
#'   u = c(TRUE, FALSE, NA, TRUE),
#'   v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
#'   w = c("A", "B", NA, "C"),
#'   x = c("D", "E", "F", NA),
#'   y = c("G", "H", "I", ""),
#'   z = c(1, 2, 3, 4),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Example 1
#' # Encode missing values in all character or factor columns.
#' df_explicit_na(my_data)
#' # Also convert logical columns to factor columns.
#' df_explicit_na(my_data, logical_as_factor = TRUE)
#' # Encode missing values in a subset of columns.
#' df_explicit_na(my_data, omit_columns = c("x", "y"))
#'
#' # Example 2
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
#' # included when generating `rtables`.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
#'
#' # Example 3
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
#' # a numeric variable will not be included in the summary statistics, nor will they be included
#' # in the denominator value for calculating the percent values.
#' adsl <- tern_ex_adsl
#' adsl$AGE[adsl$AGE < 30] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' @export
df_explicit_na <- function(data,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           logical_as_factor = FALSE,
                           na_level = "<Missing>") {
  checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_flag(logical_as_factor)
  checkmate::assert_string(na_level)

  target_vars <- if (is.null(omit_columns)) {
    names(data)
  } else {
    setdiff(names(data), omit_columns) # May have duplicates.
  }
  if (length(target_vars) == 0) {
    return(data)
  }

  l_target_vars <- split(target_vars, target_vars)

  # Makes sure target_vars exist in data and names are not duplicated.
  assert_df_with_variables(data, l_target_vars)

  for (x in target_vars) {
    xi <- data[[x]]
    xi_label <- obj_label(xi)

    # Determine whether to convert character or logical input.
    do_char_conversion <- is.character(xi) && char_as_factor
    do_logical_conversion <- is.logical(xi) && logical_as_factor

    # Pre-convert logical to character to deal correctly with replacing NA
    # values below.
    if (do_logical_conversion) {
      xi <- as.character(xi)
    }

    if (is.factor(xi) || is.character(xi)) {
      # Handle empty strings and NA values.
      xi <- explicit_na(sas_na(xi), label = na_level)

      # Convert to factors if requested for the original type,
      # set na_level as the last value.
      if (do_char_conversion || do_logical_conversion) {
        levels_xi <- setdiff(sort(unique(xi)), na_level)
        if (na_level %in% unique(xi)) {
          levels_xi <- c(levels_xi, na_level)
        }

        xi <- factor(xi, levels = levels_xi)
      }

      data[, x] <- formatters::with_label(xi, label = xi_label)
    }
  }
  return(data)
}

#' `rtables` Access Helper Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are a couple of functions that help with accessing the data in `rtables` objects.
#' Currently these work for occurrence tables, which are defined as having a count as the first
#' element and a fraction as the second element in each cell.
#'
#' @seealso [prune_occurrences] for usage of these functions.
#'
#' @name rtables_access
NULL

#' @describeIn rtables_access Helper function to extract the first values from each content
#'   cell and from specified columns in a `TableRow`. Defaults to all columns.
#'
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
#' @param col_names (`character`)\cr the names of the columns to extract from.
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
#'   then these are inferred from the names of `table_row`. Note that this currently only works well with a single
#'   column split.
#'
#' @return
#' * `h_row_first_values()` returns a `vector` of numeric values.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   analyze("AGE", function(x) {
#'     list(
#'       "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
#'       "n" = length(x),
#'       "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
#'     )
#'   }) %>%
#'   build_table(tern_ex_adsl) %>%
#'   prune_table()
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
#' result <- max(h_row_first_values(tree_row_elem))
#' result
#'
#' @export
h_row_first_values <- function(table_row,
                               col_names = NULL,
                               col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  checkmate::assert_integerish(col_indices)
  checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))

  # Main values are extracted
  row_vals <- row_values(table_row)[col_indices]

  # Main return
  vapply(row_vals, function(rv) {
    if (is.null(rv)) {
      NA_real_
    } else {
      rv[1L]
    }
  }, FUN.VALUE = numeric(1))
}

#' @describeIn rtables_access Helper function that extracts row values and checks if they are
#'   convertible to integers (`integerish` values).
#'
#' @return
#' * `h_row_counts()` returns a `vector` of numeric values.
#'
#' @examples
#' # Row counts (integer values)
#' \dontrun{
#' h_row_counts(tree_row_elem) # Fails because there are no integers
#' }
#' # Using values with integers
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
#' result <- h_row_counts(tree_row_elem)
#' # result
#'
#' @export
h_row_counts <- function(table_row,
                         col_names = NULL,
                         col_indices = NULL) {
  counts <- h_row_first_values(table_row, col_names, col_indices)
  checkmate::assert_integerish(counts)
  counts
}

#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`.
#'   More specifically it extracts the second values from each content cell and checks it is a fraction.
#'
#' @return
#' * `h_row_fractions()` returns a `vector` of proportions.
#'
#' @examples
#' # Row fractions
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
#' h_row_fractions(tree_row_elem)
#'
#' @export
h_row_fractions <- function(table_row,
                            col_names = NULL,
                            col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  row_vals <- row_values(table_row)[col_indices]
  fractions <- sapply(row_vals, "[", 2L)
  checkmate::assert_numeric(fractions, lower = 0, upper = 1)
  fractions
}

#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
#'
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
#'
#' @return
#' * `h_col_counts()` returns a `vector` of column counts.
#'
#' @export
h_col_counts <- function(table,
                         col_names = NULL,
                         col_indices = NULL) {
  col_indices <- check_names_indices(table, col_names, col_indices)
  counts <- col_counts(table)[col_indices]
  stats::setNames(counts, col_names)
}

#' @describeIn rtables_access Helper function to get first row of content table of current table.
#'
#' @return
#' * `h_content_first_row()` returns a row from an `rtables` table.
#'
#' @export
h_content_first_row <- function(table) {
  ct <- content_table(table)
  tree_children(ct)[[1]]
}

#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
#'
#' @return
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
#'
#' @keywords internal
is_leaf_table <- function(table) {
  children <- tree_children(table)
  child_classes <- unique(sapply(children, class))
  identical(child_classes, "ElementaryTable")
}

#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
#'
#' @return
#' * `check_names_indices` returns column indices.
#'
#' @keywords internal
check_names_indices <- function(table_row,
                                col_names = NULL,
                                col_indices = NULL) {
  if (!is.null(col_names)) {
    if (!is.null(col_indices)) {
      stop(
        "Inserted both col_names and col_indices when selecting row values. ",
        "Please choose one."
      )
    }
    col_indices <- h_col_indices(table_row, col_names)
  }
  if (is.null(col_indices)) {
    ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
    col_indices <- seq_len(ll)
  }

  return(col_indices)
}

#' Helper Functions for Multivariate Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in calculations for logistic regression.
#'
#' @inheritParams argument_convention
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
#'   Limited functionality is also available for conditional logistic regression models fitted by
#'   [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()].
#' @param x (`string` or `character`)\cr a variable or interaction term in `fit_glm` (depending on the
#'   helper function).
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @name h_logistic_regression
NULL

#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted
#'   model assuming only one interaction term.
#'
#' @return Vector of names of interaction variables.
#'
#' @export
h_get_interaction_vars <- function(fit_glm) {
  checkmate::assert_class(fit_glm, "glm")
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  terms_order <- attr(stats::terms(fit_glm), "order")
  interaction_term <- terms_name[terms_order == 2]
  checkmate::assert_string(interaction_term)
  strsplit(interaction_term, split = ":")[[1]]
}

#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the
#'   interaction variable names and the given levels. The main value here is that the order
#'   of first and second variable is checked in the `interaction_vars` input.
#'
#' @param interaction_vars (`character` of length 2)\cr interaction variable names.
#' @param first_var_with_level (`character` of length 2)\cr the first variable name with
#'   the interaction level.
#' @param second_var_with_level (`character` of length 2)\cr the second variable name with
#'   the interaction level.
#'
#' @return Name of coefficient.
#'
#' @export
h_interaction_coef_name <- function(interaction_vars,
                                    first_var_with_level,
                                    second_var_with_level) {
  checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE)
  checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars)

  first_name <- paste(first_var_with_level, collapse = "")
  second_name <- paste(second_var_with_level, collapse = "")
  if (first_var_with_level[1] == interaction_vars[1]) {
    paste(first_name, second_name, sep = ":")
  } else if (second_var_with_level[1] == interaction_vars[1]) {
    paste(second_name, first_name, sep = ":")
  }
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when both the odds ratio and the interaction variable are categorical.
#'
#' @param odds_ratio_var (`string`)\cr the odds ratio variable.
#' @param interaction_var (`string`)\cr the interaction variable.
#'
#' @return Odds ratio.
#'
#' @export
h_or_cat_interaction <- function(odds_ratio_var,
                                 interaction_var,
                                 fit_glm,
                                 conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)

  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  y <- list()
  for (var_level in xs_level[[odds_ratio_var]][-1]) {
    x <- list()
    for (ref_level in xs_level[[interaction_var]]) {
      coef_names <- paste0(odds_ratio_var, var_level)
      if (ref_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, var_level),
          c(interaction_var, ref_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        ones <- t(c(1, 1))
        est <- as.numeric(ones %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      x[[ref_level]] <- list(or = or, ci = ci)
    }
    y[[var_level]] <- x
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when either the odds ratio or the interaction variable is continuous.
#'
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise
#'   the median is used.
#'
#' @return Odds ratio.
#'
#' @note We don't provide a function for the case when both variables are continuous because
#'   this does not arise in this table, as the treatment arm variable will always be involved
#'   and categorical.
#'
#' @export
h_or_cont_interaction <- function(odds_ratio_var,
                                  interaction_var,
                                  fit_glm,
                                  at = NULL,
                                  conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)
  checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE)
  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")
  model_data <- fit_glm$model
  if (!is.null(at)) {
    checkmate::assert_set_equal(xs_class[interaction_var], "numeric")
  }
  y <- list()
  if (xs_class[interaction_var] == "numeric") {
    if (is.null(at)) {
      at <- ceiling(stats::median(model_data[[interaction_var]]))
    }

    for (var_level in xs_level[[odds_ratio_var]][-1]) {
      x <- list()
      for (increment in at) {
        coef_names <- paste0(odds_ratio_var, var_level)
        if (increment != 0) {
          interaction_coef_name <- h_interaction_coef_name(
            interaction_vars,
            c(odds_ratio_var, var_level),
            c(interaction_var, "")
          )
          coef_names <- c(
            coef_names,
            interaction_coef_name
          )
        }
        if (length(coef_names) > 1) {
          xvec <- t(c(1, increment))
          est <- as.numeric(xvec %*% xs_coef[coef_names])
          se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
        } else {
          est <- xs_coef[coef_names]
          se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
        }
        or <- exp(est)
        ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
        x[[as.character(increment)]] <- list(or = or, ci = ci)
      }
      y[[var_level]] <- x
    }
  } else {
    checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric")
    checkmate::assert_set_equal(xs_class[interaction_var], "factor")
    for (var_level in xs_level[[interaction_var]]) {
      coef_names <- odds_ratio_var
      if (var_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, ""),
          c(interaction_var, var_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        xvec <- t(c(1, 1))
        est <- as.numeric(xvec %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      y[[var_level]] <- list(or = or, ci = ci)
    }
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and
#'   [h_or_cat_interaction()].
#'
#' @return Odds ratio.
#'
#' @export
h_or_interaction <- function(odds_ratio_var,
                             interaction_var,
                             fit_glm,
                             at = NULL,
                             conf_level = 0.95) {
  xs_class <- attr(fit_glm$terms, "dataClasses")
  if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) {
    h_or_cont_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      at = at,
      conf_level = conf_level
    )
  } else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) {
    h_or_cat_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      conf_level = conf_level
    )
  } else {
    stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor")
  }
}

#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table
#'   of numbers of patients.
#'
#' @param terms (`character`)\cr simple terms.
#' @param table (`table`)\cr table containing numbers for terms.
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_simple_term_labels <- function(terms,
                                 table) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_multi_class(terms, classes = c("factor", "character"))
  terms <- as.character(terms)
  term_n <- table[terms]
  paste0(terms, ", n = ", term_n)
}

#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table
#'   of numbers of patients.
#'
#' @param terms1 (`character`)\cr terms for first dimension (rows).
#' @param terms2 (`character`)\cr terms for second dimension (rows).
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the
#'   number of patients. In that case they can only be scalar (strings).
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_interaction_term_labels <- function(terms1,
                                      terms2,
                                      table,
                                      any = FALSE) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_flag(any)
  checkmate::assert_multi_class(terms1, classes = c("factor", "character"))
  checkmate::assert_multi_class(terms2, classes = c("factor", "character"))
  terms1 <- as.character(terms1)
  terms2 <- as.character(terms2)
  if (any) {
    checkmate::assert_scalar(terms1)
    checkmate::assert_scalar(terms2)
    paste0(
      terms1, " or ", terms2, ", n = ",
      # Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract.
      sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2]
    )
  } else {
    term_n <- table[cbind(terms1, terms2)]
    paste0(terms1, " * ", terms2, ", n = ", term_n)
  }
}

#' @describeIn h_logistic_regression Helper function to tabulate the main effect
#'   results of a (conditional) logistic regression model.
#'
#' @return Tabulated main effect results from a logistic regression model.
#'
#' @examples
#' h_glm_simple_term_extract("AGE", mod1)
#' h_glm_simple_term_extract("ARMCD", mod1)
#'
#' @export
h_glm_simple_term_extract <- function(x, fit_glm) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  checkmate::assert_string(x)

  xs_class <- attr(fit_glm$terms, "dataClasses")
  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  stats <- if (inherits(fit_glm, "glm")) {
    c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  } else {
    c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)")
  }
  # Make sure x is not an interaction term.
  checkmate::assert_subset(x, names(xs_class))
  x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1])
  x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
  colnames(x_stats) <- names(stats)
  x_stats$estimate <- as.list(x_stats$estimate)
  x_stats$std_error <- as.list(x_stats$std_error)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$df <- as.list(1)
  if (xs_class[x] == "numeric") {
    x_stats$term <- x
    x_stats$term_label <- if (inherits(fit_glm, "glm")) {
      formatters::var_labels(fit_glm$data[x], fill = TRUE)
    } else {
      # We just fill in here with the `term` itself as we don't have the data available.
      x
    }
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
  } else {
    checkmate::assert_class(fit_glm, "glm")
    # The reason is that we don't have the original data set in the `clogit` object
    # and therefore cannot determine the `x_numbers` here.
    x_numbers <- table(fit_glm$data[[x]])
    x_stats$term <- xs_level[[x]][-1]
    x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers)
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
    main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
    x_main <- data.frame(
      pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
      term = xs_level[[x]][1],
      term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)),
      df = main_effects[x, "Df", drop = TRUE],
      stringsAsFactors = FALSE
    )
    x_main$pvalue <- as.list(x_main$pvalue)
    x_main$df <- as.list(x_main$df)
    x_main$estimate <- list(numeric(0))
    x_main$std_error <- list(numeric(0))
    if (length(xs_level[[x]][-1]) == 1) {
      x_main$pvalue <- list(numeric(0))
      x_main$df <- list(numeric(0))
    }
    x_main$is_variable_summary <- TRUE
    x_main$is_term_summary <- FALSE
    x_stats <- rbind(x_main, x_stats)
  }
  x_stats$variable <- x
  x_stats$variable_label <- if (inherits(fit_glm, "glm")) {
    formatters::var_labels(fit_glm$data[x], fill = TRUE)
  } else {
    x
  }
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction term
#'   results of a logistic regression model.
#'
#' @return Tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
#'
#' @export
h_glm_interaction_extract <- function(x, fit_glm) {
  vars <- h_get_interaction_vars(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")

  checkmate::assert_string(x)

  # Only take two-way interaction
  checkmate::assert_vector(vars, len = 2)

  # Only consider simple case: first variable in interaction is arm, a categorical variable
  checkmate::assert_disjunct(xs_class[vars[1]], "numeric")

  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
  stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  v1_comp <- xs_level[[vars[1]]][-1]
  if (xs_class[vars[2]] == "numeric") {
    x_stats <- as.data.frame(
      xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE],
      stringsAsFactors = FALSE
    )
    colnames(x_stats) <- names(stats)
    x_stats$term <- v1_comp
    x_numbers <- table(fit_glm$data[[vars[1]]])
    x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    term_main <- v1_ref
    ref_label <- h_simple_term_labels(v1_ref, x_numbers)
  } else if (xs_class[vars[2]] != "numeric") {
    v2_comp <- xs_level[[vars[2]]][-1]
    v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp)
    x_sel <- paste(
      paste0(vars[1], v1_v2_grid$v1),
      paste0(vars[2], v1_v2_grid$v2),
      sep = ":"
    )
    x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
    colnames(x_stats) <- names(stats)
    x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2)
    x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]])
    x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    v2_ref <- xs_level[[vars[2]]][1]
    term_main <- paste(vars[1], vars[2], sep = " * ")
    ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE)
  }
  x_stats$df <- as.list(1)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$is_variable_summary <- FALSE
  x_stats$is_term_summary <- TRUE
  x_main <- data.frame(
    pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
    term = term_main,
    term_label = paste("Reference", ref_label),
    df = main_effects[x, "Df", drop = TRUE],
    stringsAsFactors = FALSE
  )
  x_main$pvalue <- as.list(x_main$pvalue)
  x_main$df <- as.list(x_main$df)
  x_main$estimate <- list(numeric(0))
  x_main$std_error <- list(numeric(0))
  x_main$is_variable_summary <- TRUE
  x_main$is_term_summary <- FALSE

  x_stats <- rbind(x_main, x_stats)
  x_stats$variable <- x
  x_stats$variable_label <- paste(
    "Interaction of",
    formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE),
    "*",
    formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE)
  )
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction
#'   results of a logistic regression model. This basically is a wrapper for
#'   [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results
#'   in the right data frame format.
#'
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
#'
#' @export
h_glm_inter_term_extract <- function(odds_ratio_var,
                                     interaction_var,
                                     fit_glm,
                                     ...) {
  # First obtain the main effects.
  main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm)
  main_stats$is_reference_summary <- FALSE
  main_stats$odds_ratio <- NA
  main_stats$lcl <- NA
  main_stats$ucl <- NA

  # Then we get the odds ratio estimates and put into df form.
  or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...)
  is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric"

  if (is_num_or_var) {
    # Numeric OR variable case.
    references <- names(or_numbers)
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          x[[name]][pos]
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = odds_ratio_var,
      term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = references,
      reference_label = references,
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  } else {
    # Categorical OR variable case.
    references <- names(or_numbers[[1]])
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          lapply(x, function(y) y[[name]][pos])
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = rep(names(or_numbers), each = n_ref),
      term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = unlist(lapply(or_numbers, names)),
      reference_label = unlist(lapply(or_numbers, names)),
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  }

  df <- rbind(
    main_stats[, names(or_stats)],
    or_stats
  )
  df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ]
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of simple terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_simple_terms("AGE", mod1)
#'
#' @export
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  if (inherits(fit_glm, "glm")) {
    checkmate::assert_set_equal(fit_glm$family$family, "binomial")
  }
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  checkmate::assert_subset(x, terms_name)
  if (length(interaction) != 0) {
    # Make sure any item in x is not part of interaction term
    checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":")))
  }
  x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl)
  x_stats
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of interaction terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
#'
#' @export
h_logistic_inter_terms <- function(x,
                                   fit_glm,
                                   conf_level = 0.95,
                                   at = NULL) {
  # Find out the interaction variables and interaction term.
  inter_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_vector(inter_vars, len = 2)


  inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x))
  inter_term <- x[inter_term_index]

  # For the non-interaction vars we need the standard stuff.
  normal_terms <- setdiff(x, union(inter_vars, inter_term))

  x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  normal_stats <- x_stats
  normal_stats$is_reference_summary <- FALSE

  # Now the interaction term itself.
  inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm)
  inter_term_stats$odds_ratio <- NA
  inter_term_stats$lcl <- NA
  inter_term_stats$ucl <- NA
  inter_term_stats$is_reference_summary <- FALSE

  is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric"

  # Interaction stuff.
  inter_stats_one <- h_glm_inter_term_extract(
    inter_vars[1],
    inter_vars[2],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, NULL, at)
  )
  inter_stats_two <- h_glm_inter_term_extract(
    inter_vars[2],
    inter_vars[1],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, at, NULL)
  )

  # Now just combine everything in one data frame.
  col_names <- c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "odds_ratio",
    "lcl",
    "ucl",
    "is_variable_summary",
    "is_term_summary",
    "is_reference_summary"
  )
  df <- rbind(
    inter_stats_one[, col_names],
    inter_stats_two[, col_names],
    inter_term_stats[, col_names]
  )
  if (length(normal_terms) > 0) {
    df <- rbind(
      normal_stats[, col_names],
      df
    )
  }
  df$ci <- combine_vectors(df$lcl, df$ucl)
  df
}

#' Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
#'
#' @inheritParams argument_convention
#'
#' @details Cox models are the most commonly used methods to estimate the magnitude of
#'   the effect in survival analysis. It assumes proportional hazards: the ratio
#'   of the hazards between groups (e.g., two arms) is constant over time.
#'   This ratio is referred to as the "hazard ratio" (HR) and is one of the
#'   most commonly reported metrics to describe the effect size in survival
#'   analysis (NEST Team, 2020).
#'
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
#'   helper functions, and [tidy_coxreg] for custom tidy methods.
#'
#' @examples
#' library(survival)
#'
#' # Testing dataset [survival::bladder].
#' set.seed(1, kind = "Mersenne-Twister")
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   tibble::tibble(
#'     TIME = stop,
#'     STATUS = event,
#'     ARM = as.factor(rx),
#'     COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
#'     COVAR2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     ) %>% formatters::with_label("Sex (F/M)")
#'   )
#' )
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#' dta_bladder$STUDYID <- factor("X")
#'
#' plot(
#'   survfit(Surv(TIME, STATUS) ~ ARM + COVAR1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression
NULL

#' @describeIn cox_regression Statistics function that transforms results tabulated
#'   from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
#'
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
#'   function with tidying applied via [broom::tidy()].
#' @param .stats (`character`)\cr the name of statistics to be reported among:
#'   * `n`: number of observations (univariate only)
#'   * `hr`: hazard ratio
#'   * `ci`: confidence interval
#'   * `pval`: p-value of the treatment effect
#'   * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
#'   Defaults to "all". Other options include "var_main" for main effects, "inter" for interaction effects,
#'   and "multi_lvl" for multivariate model covariate level rows. When `.which_vars` is "all" specific
#'   variables can be selected by specifying `.var_nms`.
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
#'   this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
#'   variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
#'   is "var_main" `.var_nms` should be only the variable name.
#'
#' @return
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
#'
#' @examples
#' # s_coxreg
#'
#' # Univariate
#' u1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
#' df1 <- broom::tidy(univar_model)
#' s_coxreg(model_df = df1, .stats = "hr")
#'
#' # Univariate with interactions
#' univar_model_inter <- fit_coxreg_univar(
#'   variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
#' )
#' df1_inter <- broom::tidy(univar_model_inter)
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
#'
#' # Univariate without treatment arm - only "COVAR2" covariate effects
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
#' df1_covs <- broom::tidy(univar_covs_model)
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
#'
#' # Multivariate.
#' m1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
#' df2 <- broom::tidy(multivar_model)
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
#' s_coxreg(
#'   model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
#'   .var_nms = c("COVAR1", "A Covariate Label")
#' )
#'
#' # Multivariate without treatment arm - only "COVAR1" main effect
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
#' df2_covs <- broom::tidy(multivar_covs_model)
#' s_coxreg(model_df = df2_covs, .stats = "hr")
#'
#' @export
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
  assert_df_with_variables(model_df, list(term = "term", stat = .stats))
  checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
  model_df$term <- as.character(model_df$term)
  .var_nms <- .var_nms[!is.na(.var_nms)]

  if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
  if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)

  # We need a list with names corresponding to the stats to display of equal length to the list of stats.
  y <- split(model_df, f = model_df$term, drop = FALSE)
  y <- stats::setNames(y, nm = rep(.stats, length(y)))

  if (.which_vars == "var_main") {
    y <- lapply(y, function(x) x[1, ]) # only main effect
  } else if (.which_vars %in% c("inter", "multi_lvl")) {
    y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
  }

  lapply(
    X = y,
    FUN = function(x) {
      z <- as.list(x[[.stats]])
      stats::setNames(z, nm = x$term_label)
    }
  )
}

#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
#'   and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
#'
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
#' @param na_level (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
#'   avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
#'
#' @return
#' * `a_coxreg()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' tern:::a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "Label 1",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR1"),
#'   .stats = "n",
#'   .formats = "xx"
#' )
#'
#' tern:::a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR2"),
#'   .stats = "pval",
#'   .formats = "xx.xxxx"
#' )
#'
#' @keywords internal
a_coxreg <- function(df,
                     labelstr,
                     eff = FALSE,
                     var_main = FALSE,
                     multivar = FALSE,
                     variables,
                     at = list(),
                     control = control_coxreg(),
                     .spl_context,
                     .stats,
                     .formats,
                     .indent_mods = NULL,
                     na_level = "",
                     cache_env = NULL) {
  cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
  cov <- tail(.spl_context$value, 1) # current variable/covariate
  var_lbl <- formatters::var_labels(df)[cov] # check for df labels
  if (length(labelstr) > 1) {
    labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
  } else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
    labelstr <- var_lbl
  }
  if (eff || multivar || cov_no_arm) {
    control$interaction <- FALSE
  } else {
    variables$covariates <- cov
    if (var_main) control$interaction <- TRUE
  }

  if (is.null(cache_env[[cov]])) {
    if (!multivar) {
      model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
    } else {
      model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
    }
    cache_env[[cov]] <- model
  } else {
    model <- cache_env[[cov]]
  }
  if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_

  if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
    multivar <- TRUE
    if (!cov_no_arm) var_main <- TRUE
  }

  vars_coxreg <- list(which_vars = "all", var_nms = NULL)
  if (eff) {
    if (multivar && !var_main) { # multivar treatment level
      var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
    } else { # treatment effect
      vars_coxreg["var_nms"] <- variables$arm
      if (var_main) vars_coxreg["which_vars"] <- "var_main"
    }
  } else {
    if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
      vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
    } else if (multivar) { # multivar covariate level
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
      if (var_main) model[cov, .stats] <- NA_real_
    }
    if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
  }
  var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
  var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
    paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
  } else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) ||
    (multivar && var_main && is.numeric(df[[cov]]))) {
    labelstr # other main effect labels
  } else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
    "All" # multivar numeric covariate
  } else {
    names(var_vals)
  }
  in_rows(
    .list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
    .formats = stats::setNames(rep(.formats, length(var_names)), var_names),
    .format_na_strs = stats::setNames(rep(na_level, length(var_names)), var_names)
  )
}

#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
#'   layout. This function is a wrapper for several `rtables` layouting functions. This function
#'   is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
#'
#' @inheritParams fit_coxreg_univar
#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
#'   univariate Cox regression will run.
#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
#'   for all rows. This should be created during pre-processing if no such variable currently exists.
#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
#'   Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
#'   treatment and covariate sections and the second between different covariates.
#'
#' @return
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
#'   containing the chosen statistics to the table layout.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
#'   `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
#'   Cox regression models, respectively.
#'
#' @examples
#' # summarize_coxreg
#'
#' result_univar <- basic_table() %>%
#'   summarize_coxreg(variables = u1_variables) %>%
#'   build_table(dta_bladder)
#' result_univar
#'
#' result_multivar <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m1_variables,
#'     multivar = TRUE,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar
#'
#' result_univar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = u2_variables,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_univar_covs
#'
#' result_multivar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m2_variables,
#'     multivar = TRUE,
#'     varlabels = c("Covariate 1", "Covariate 2") # custom labels
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar_covs
#'
#' @export
summarize_coxreg <- function(lyt,
                             variables,
                             control = control_coxreg(),
                             at = list(),
                             multivar = FALSE,
                             common_var = "STUDYID",
                             .stats = c("n", "hr", "ci", "pval", "pval_inter"),
                             .formats = c(
                               n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
                               pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)"
                             ),
                             varlabels = NULL,
                             .indent_mods = NULL,
                             na_level = "",
                             .section_div = NA_character_) {
  if (multivar && control$interaction) {
    warning(paste(
      "Interactions are not available for multivariate cox regression using summarize_coxreg.",
      "The model will be calculated without interaction effects."
    ))
  }
  if (control$interaction && !"arm" %in% names(variables)) {
    stop("To include interactions please specify 'arm' in variables.")
  }

  .stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics
    intersect(c("hr", "ci", "pval"), .stats)
  } else if (control$interaction) {
    intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
  } else {
    intersect(c("n", "hr", "ci", "pval"), .stats)
  }
  stat_labels <- c(
    n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
    pval = "p-value", pval_inter = "Interaction p-value"
  )
  stat_labels <- stat_labels[names(stat_labels) %in% .stats]
  .formats <- .formats[names(.formats) %in% .stats]
  env <- new.env() # create caching environment

  lyt <- lyt %>%
    split_cols_by_multivar(
      vars = rep(common_var, length(.stats)),
      varlabels = stat_labels,
      extra_args = list(
        .stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_level = rep(na_level, length(.stats)),
        cache_env = replicate(length(.stats), list(env))
      )
    )

  if ("arm" %in% names(variables)) { # treatment effect
    lyt <- lyt %>%
      split_rows_by(
        common_var,
        split_label = "Treatment:",
        label_pos = "visible",
        section_div = head(.section_div, 1)
      ) %>%
      summarize_row_groups(
        cfun = a_coxreg,
        extra_args = list(
          variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
        )
      )
    if (multivar) { # treatment level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
        )
    }
  }

  if ("covariates" %in% names(variables)) { # covariate main effects
    lyt <- lyt %>%
      split_rows_by_multivar(
        vars = variables$covariates,
        varlabels = varlabels,
        split_label = "Covariate:",
        nested = FALSE,
        child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden",
        section_div = tail(.section_div, 1)
      )
    if (multivar || control$interaction || !"arm" %in% names(variables)) {
      lyt <- lyt %>%
        summarize_row_groups(
          cfun = a_coxreg,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction
          )
        )
    } else {
      if (!is.null(varlabels)) names(varlabels) <- variables$covariates
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction,
            labelstr = if (is.null(varlabels)) "" else varlabels
          )
        )
    }

    if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
    if (multivar || control$interaction) { # covariate level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = "")
        )
    }
  }

  lyt
}

#' Convert Table into Matrix of Strings
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to use mostly within tests. `with_spaces`parameter allows
#' to test not only for content but also indentation and table structure.
#' `print_txt_to_copy` instead facilitate the testing development by returning a well
#' formatted text that needs only to be copied and pasted in the expected output.
#'
#' @param x `rtables` table.
#' @param with_spaces Should the tested table keep the indentation and other relevant spaces?
#' @param print_txt_to_copy Utility to have a way to copy the input table directly
#'   into the expected variable instead of copying it too manually.
#'
#' @return A `matrix` of `string`s.
#'
#' @export
to_string_matrix <- function(x, with_spaces = FALSE, print_txt_to_copy = FALSE) {
  checkmate::assert_flag(with_spaces)
  checkmate::assert_flag(print_txt_to_copy)

  # Producing the matrix to test
  if (with_spaces) {
    out <- strsplit(toString(matrix_form(x, TRUE)), "\\n")[[1]]
  } else {
    out <- matrix_form(x)$string
  }

  # Printing to console formatted output that needs to be copied in "expected"
  if (print_txt_to_copy) {
    out_tmp <- out
    if (!with_spaces) {
      out_tmp <- apply(out, 1, paste0, collapse = '", "')
    }
    cat(paste0('c(\n  "', paste0(out_tmp, collapse = '",\n  "'), '"\n)'))
  }

  # Return values
  return(out)
}

#' Blank for Missing Input
#'
#' Helper function to use in tabulating model results.
#'
#' @param x (`vector`)\cr input for a cell.
#'
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
#'   the unlisted version of `x`.
#'
#' @keywords internal
unlist_and_blank_na <- function(x) {
  unl <- unlist(x)
  if (all(is.na(unl))) {
    character()
  } else {
    unl
  }
}

#' Constructor for Content Functions given Data Frame with Flag Input
#'
#' This can be useful for tabulating model results.
#'
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
#'   content function.
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
#' @param format (`string`)\cr `rtables` format to use.
#'
#' @return A content function which gives `df$analysis_var` at the row identified by
#'   `.df_row$flag` in the given format.
#'
#' @keywords internal
cfun_by_flag <- function(analysis_var,
                         flag_var,
                         format = "xx",
                         .indent_mods = NULL) {
  checkmate::assert_string(analysis_var)
  checkmate::assert_string(flag_var)
  function(df, labelstr) {
    row_index <- which(df[[flag_var]])
    x <- unlist_and_blank_na(df[[analysis_var]][row_index])
    formatters::with_label(
      rcell(x, format = format, indent_mod = .indent_mods),
      labelstr
    )
  }
}

#' Content Row Function to Add Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total in parentheses.
#'
#' @inheritParams argument_convention
#'
#' @return A `list` containing "row_count" with the row count value and the correct label.
#'
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
#'   the former is already split by columns and will refer to the first column of the data only.
#'
#' @keywords internal
c_label_n <- function(df,
                      labelstr,
                      .N_row) { # nolint
  label <- paste0(labelstr, " (N=", .N_row, ")")
  list(row_count = formatters::with_label(c(.N_row, .N_row), label))
}

#' Layout Creating Function to Add Row Total Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
#'  is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#'
#' @return A modified layout where the latest row split labels now have the row-wise
#'   total counts (i.e. without column-based subsetting) attached in parentheses.
#'
#' @note Row count values are contained in these row count rows but are not displayed
#'   so that they are not considered zero rows by default when pruning.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'   add_rowcounts() %>%
#'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
#'   build_table(DM)
#'
#' @export
add_rowcounts <- function(lyt) {
  c_lbl_n_fun <- make_afun(
    c_label_n,
    .stats = c("row_count"),
    .formats = c(row_count = function(x, ...) "")
  )
  summarize_row_groups(
    lyt,
    cfun = c_lbl_n_fun
  )
}

#' Obtain Column Indices
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to extract column indices from a `VTableTree` for a given
#' vector of column names.
#'
#' @param table_tree (`VTableTree`)\cr table to extract the indices from.
#' @param col_names (`character`)\cr vector of column names.
#'
#' @return A vector of column indices.
#'
#' @export
h_col_indices <- function(table_tree, col_names) {
  checkmate::assert_class(table_tree, "VTableNodeInfo")
  checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
  match(col_names, names(attr(col_info(table_tree), "cextra_args")))
}

#' Labels or Names of List Elements
#'
#' Internal helper function for working with nested statistic function results which typically
#' don't have labels but names that we can use.
#'
#' @param x a list
#'
#' @return A `character` vector with the labels or names for the list elements.
#'
#' @keywords internal
labels_or_names <- function(x) {
  checkmate::assert_multi_class(x, c("data.frame", "list"))
  labs <- sapply(x, obj_label)
  nams <- rlang::names2(x)
  label_is_null <- sapply(labs, is.null)
  result <- unlist(ifelse(label_is_null, nams, labs))
  return(result)
}

#' Convert to `rtable`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a new generic function to convert objects to `rtable` tables.
#'
#' @param x the object which should be converted to an `rtable`.
#' @param ... additional arguments for methods.
#'
#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
#'
#' @export
as.rtable <- function(x, ...) { # nolint
  UseMethod("as.rtable", x)
}

#' @describeIn as.rtable method for converting `data.frame` that contain numeric columns to `rtable`.
#'
#' @param format the format which should be used for the columns.
#'
#' @method as.rtable data.frame
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' as.rtable(x)
#'
#' @export
as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
  checkmate::assert_numeric(unlist(x))
  do.call(
    rtable,
    c(
      list(
        header = labels_or_names(x),
        format = format
      ),
      Map(
        function(row, row_name) {
          do.call(
            rrow,
            c(as.list(unname(row)),
              row.name = row_name
            )
          )
        },
        row = as.data.frame(t(x)),
        row_name = rownames(x)
      )
    )
  )
}

#' Split parameters
#'
#' @description `r lifecycle::badge("stable")`
#'
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
#' specific analysis function.
#'
#' @param param (`vector`)\cr the parameter to be split.
#' @param value (`vector`)\cr the value used to split.
#' @param f (`list` of `vectors`)\cr the reference to make the split
#'
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
#'
#' @examples
#' f <- list(
#'   surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
#'   surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
#' )
#'
#' .stats <- c("pt_at_risk", "rate_diff")
#' h_split_param(.stats, .stats, f = f)
#'
#' # $surv
#' # [1] "pt_at_risk"
#' #
#' # $surv_diff
#' # [1] "rate_diff"
#'
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
#' h_split_param(.formats, names(.formats), f = f)
#'
#' # $surv
#' # pt_at_risk event_free_rate
#' # "xx"           "xxx"
#' #
#' # $surv_diff
#' # NULL
#'
#' @export
h_split_param <- function(param,
                          value,
                          f) {
  y <- lapply(f, function(x) param[value %in% x])
  lapply(y, function(x) if (length(x) == 0) NULL else x)
}

#' Get Selected Statistics Names
#'
#' Helper function to be used for creating `afun`.
#'
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
#'   in this context that all default statistics should be used.
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
#'
#' @return A `character` vector with the selected statistics.
#'
#' @keywords internal
afun_selected_stats <- function(.stats, all_stats) {
  checkmate::assert_character(.stats, null.ok = TRUE)
  checkmate::assert_character(all_stats)
  if (is.null(.stats)) {
    all_stats
  } else {
    intersect(.stats, all_stats)
  }
}

#' Add Variable Labels to Top Left Corner in Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper layout creating function to just append the variable labels of a given variables vector
#' from a given dataset in the top left corner. If a variable label is not found then the
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
#'
#' @inheritParams argument_convention
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
#' @param indent (`integer`)\cr non-negative number of nested indent space, default to 0L which means no indent.
#'   1L means two spaces indent, 2L means four spaces indent and so on.
#'
#' @return A modified layout with the new variable label(s) added to the top-left material.
#'
#' @note This is not an optimal implementation of course, since we are using here the data set
#'   itself during the layout creation. When we have a more mature `rtables` implementation then
#'   this will also be improved or not necessary anymore.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX") %>%
#'   append_varlabels(DM, "SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, "AGE", indent = 1)
#' build_table(lyt, DM)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, c("SEX", "AGE"))
#' build_table(lyt, DM)
#'
#' @export
append_varlabels <- function(lyt, df, vars, indent = 0L) {
  if (checkmate::test_flag(indent)) {
    warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
    indent <- as.integer(indent)
  }

  checkmate::assert_data_frame(df)
  checkmate::assert_character(vars)
  checkmate::assert_count(indent)

  lab <- formatters::var_labels(df[vars], fill = TRUE)
  lab <- paste(lab, collapse = " / ")
  space <- paste(rep(" ", indent * 2), collapse = "")
  lab <- paste0(space, lab)

  append_topleft(lyt, lab)
}

#' Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @seealso [d_proportion_diff()]
#'
#' @name prop_diff
NULL

#' @describeIn prop_diff Statistics function estimating the difference
#'   in terms of responder proportion.
#'
#' @inheritParams prop_diff_strat_nc
#' @param method (`string`)\cr the method used for the confidence interval estimation.
#'
#' @return
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
#'
#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
#'   not permitted.
#'
#' @examples
#' # Summary
#'
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' # CMH example with strata
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "cmh"
#' )
#'
#' @export
s_proportion_diff <- function(df,
                              .var,
                              .ref_group,
                              .in_ref_col,
                              variables = list(strata = NULL),
                              conf_level = 0.95,
                              method = c(
                                "waldcc", "wald", "cmh",
                                "ha", "newcombe", "newcombecc",
                                "strat_newcombe", "strat_newcombecc"
                              ),
                              weights_method = "cmh") {
  method <- match.arg(method)
  if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) {
    stop(paste(
      "When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
      "permitted. Please choose a different method."
    ))
  }
  y <- list(diff = "", diff_ci = "")

  if (!.in_ref_col) {
    rsp <- c(.ref_group[[.var]], df[[.var]])
    grp <- factor(
      rep(
        c("ref", "Not-ref"),
        c(nrow(.ref_group), nrow(df))
      ),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata)) {
      strata_colnames <- variables$strata
      checkmate::assert_character(strata_colnames, null.ok = FALSE)
      strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)

      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)

      # Merging interaction strata for reference group rows data and remaining
      strata <- c(
        interaction(.ref_group[strata_colnames]),
        interaction(df[strata_colnames])
      )
      strata <- as.factor(strata)
    }

    # Defining the std way to calculate weights for strat_newcombe
    if (!is.null(variables$weights_method)) {
      weights_method <- variables$weights_method
    } else {
      weights_method <- "cmh"
    }

    y <- switch(method,
      "wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
      "waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
      "ha" = prop_diff_ha(rsp, grp, conf_level),
      "newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
      "newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
      "strat_newcombe" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = FALSE
      ),
      "strat_newcombecc" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = TRUE
      ),
      "cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")]
    )

    y$diff <- y$diff * 100
    y$diff_ci <- y$diff_ci * 100
  }

  attr(y$diff, "label") <- "Difference in Response rate (%)"
  attr(y$diff_ci, "label") <- d_proportion_diff(
    conf_level, method,
    long = FALSE
  )

  y
}

#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
#'
#' @return
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' @export
a_proportion_diff <- make_afun(
  s_proportion_diff,
  .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
  .indent_mods = c(diff = 0L, diff_ci = 1L)
)

#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_proportion_diff()`.
#'
#' @return
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion_diff()` to the table layout.
#'
#' @examples
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_proportion_diff(
#'     vars = "rsp",
#'     conf_level = 0.90,
#'     method = "ha"
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
estimate_proportion_diff <- function(lyt,
                                     vars,
                                     ...,
                                     var_labels = vars,
                                     show_labels = "hidden",
                                     table_names = vars,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  afun <- make_afun(
    a_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Check: Proportion Difference Arguments
#'
#' Verifies that and/or convert arguments into valid values to be used in the
#' estimation of difference in responder proportions.
#'
#' @inheritParams prop_diff
#' @inheritParams prop_diff_wald
#'
#' @keywords internal
check_diff_prop_ci <- function(rsp,
                               grp,
                               strata = NULL,
                               conf_level,
                               correct = NULL) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct, null.ok = TRUE)

  if (!is.null(strata)) {
    checkmate::assert_factor(strata, len = length(rsp))
  }

  invisible()
}

#' Description of Method Used for Proportion Comparison
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in
#' `s_proportion_diff`.
#'
#' @inheritParams s_proportion_diff
#' @param long (`logical`)\cr Whether a long or a short (default) description is required.
#'
#' @return A `string` describing the analysis.
#'
#' @seealso [prop_diff]
#'
#' @export
d_proportion_diff <- function(conf_level,
                              method,
                              long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")
  if (long) {
    label <- paste(
      label,
      ifelse(
        method == "cmh",
        "for adjusted difference",
        "for difference"
      )
    )
  }

  method_part <- switch(method,
    "cmh" = "CMH, without correction",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "ha" = "Anderson-Hauck",
    "newcombe" = "Newcombe, without correction",
    "newcombecc" = "Newcombe, with correction",
    "strat_newcombe" = "Stratified Newcombe, without correction",
    "strat_newcombecc" = "Stratified Newcombe, with correction",
    stop(paste(method, "does not have a description"))
  )
  paste0(label, " (", method_part, ")")
}

#' Helper Functions to Calculate Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams prop_diff
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
#'   (proportion difference confidence interval).
#'
#' @seealso [prop_diff()] for implementation of these helper functions.
#'
#' @name h_prop_diff
NULL

#' @describeIn h_prop_diff The Wald interval follows the usual textbook
#'   definition for a single proportion confidence interval using the normal
#'   approximation. It is possible to include a continuity correction for Wald's
#'   interval.
#'
#' @param correct (`logical`)\cr whether to include the continuity correction. For further
#'   information, see [stats::prop.test()].
#'
#' @examples
#' # Wald confidence interval
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#'
#' @export
prop_diff_wald <- function(rsp,
                           grp,
                           conf_level = 0.95,
                           correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "waldcc"
  } else {
    mthd <- "wald"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
  )

  # check if binary response is coded as logical
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  diff_ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )

  list(
    "diff" = unname(diff_ci[, "est"]),
    "diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
#'
#' @examples
#' # Anderson-Hauck confidence interval
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#'
#' ## Edge case: Same proportion of response in A and B.
#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#'
#' @export
prop_diff_ha <- function(rsp,
                         grp,
                         conf_level) {
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = "ha"
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Newcombe confidence interval. It is based on
#'   the Wilson score confidence interval for a single binomial proportion.
#'
#' @examples
#' # Newcombe confidence interval
#'
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
#' table(rsp, grp)
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#'
#' @export
prop_diff_nc <- function(rsp,
                         grp,
                         conf_level,
                         correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "scorecc"
  } else {
    mthd <- "score"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  p_grp <- tapply(rsp, grp, mean)
  diff_p <- unname(diff(p_grp))
  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  ci <- desctools_binom(
    # x1 and n1 are non-reference groups.
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
#'   response rates between the experimental treatment group and the control treatment group, adjusted
#'   for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared
#'   test, use [stats::mantelhaen.test()].
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#'
#' @examples
#' # Cochran-Mantel-Haenszel confidence interval
#'
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_cmh <- function(rsp,
                          grp,
                          strata,
                          conf_level = 0.95) {
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )

  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  # first dimension: FALSE, TRUE
  # 2nd dimension: CONTROL, TX
  # 3rd dimension: levels of strat
  # rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n1 <- colSums(t_tbl[1:2, 1, ])
  n2 <- colSums(t_tbl[1:2, 2, ])
  p1 <- t_tbl[2, 1, ] / n1
  p2 <- t_tbl[2, 2, ] / n2
  # CMH weights
  use_stratum <- (n1 > 0) & (n2 > 0)
  n1 <- n1[use_stratum]
  n2 <- n2[use_stratum]
  p1 <- p1[use_stratum]
  p2 <- p2[use_stratum]
  wt <- (n1 * n2 / (n1 + n2))
  wt_normalized <- wt / sum(wt)
  est1 <- sum(wt_normalized * p1)
  est2 <- sum(wt_normalized * p2)
  estimate <- c(est1, est2)
  names(estimate) <- levels(grp)
  se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
  se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
  z <- stats::qnorm((1 + conf_level) / 2)
  err1 <- z * se1
  err2 <- z * se2
  ci1 <- c((est1 - err1), (est1 + err1))
  ci2 <- c((est2 - err2), (est2 + err2))
  estimate_ci <- list(ci1, ci2)
  names(estimate_ci) <- levels(grp)
  diff_est <- est2 - est1
  se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
  diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)

  list(
    prop = estimate,
    prop_ci = estimate_ci,
    diff = diff_est,
    diff_ci = diff_ci,
    weights = wt_normalized,
    n1 = n1,
    n2 = n2
  )
}

#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response
#'   rates between the experimental treatment group and the control treatment group, adjusted for stratification
#'   factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
#'   Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights
#'   (see [prop_diff_cmh()]).
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
#'   and directs the way weights are estimated.
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Newcombe confidence interval
#'
#' set.seed(2)
#' data_set <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "cmh",
#'   conf_level = 0.90
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "wilson_h",
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_strat_nc <- function(rsp,
                               grp,
                               strata,
                               weights_method = c("cmh", "wilson_h"),
                               conf_level = 0.95,
                               correct = FALSE) {
  weights_method <- match.arg(weights_method)
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct)
  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  rsp_by_grp <- split(rsp, f = grp)
  strata_by_grp <- split(strata, f = grp)

  # Finding the weights
  weights <- if (identical(weights_method, "cmh")) {
    prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
  } else if (identical(weights_method, "wilson_h")) {
    prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
  }
  weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0

  # Calculating lower (`l`) and upper (`u`) confidence bounds per group.
  strat_wilson_by_grp <- Map(
    prop_strat_wilson,
    rsp = rsp_by_grp,
    strata = strata_by_grp,
    weights = list(weights, weights),
    conf_level = conf_level,
    correct = correct
  )

  ci_ref <- strat_wilson_by_grp[[1]]
  ci_trt <- strat_wilson_by_grp[[2]]
  l_ref <- as.numeric(ci_ref$conf_int[1])
  u_ref <- as.numeric(ci_ref$conf_int[2])
  l_trt <- as.numeric(ci_trt$conf_int[1])
  u_trt <- as.numeric(ci_trt$conf_int[2])

  # Estimating the diff and n_ref, n_trt (it allows different weights to be used)
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n_ref <- colSums(t_tbl[1:2, 1, ])
  n_trt <- colSums(t_tbl[1:2, 2, ])
  use_stratum <- (n_ref > 0) & (n_trt > 0)
  n_ref <- n_ref[use_stratum]
  n_trt <- n_trt[use_stratum]
  p_ref <- t_tbl[2, 1, use_stratum] / n_ref
  p_trt <- t_tbl[2, 2, use_stratum] / n_trt
  est1 <- sum(weights * p_ref)
  est2 <- sum(weights * p_trt)
  diff_est <- est2 - est1

  lambda1 <- sum(weights^2 / n_ref)
  lambda2 <- sum(weights^2 / n_trt)
  z <- stats::qnorm((1 + conf_level) / 2)

  lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
  upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))

  list(
    "diff" = diff_est,
    "diff_ci" = c("lower" = lower, "upper" = upper)
  )
}

#' Create a Forest Plot based on a Table
#'
#' Create a forest plot from any [rtables::rtable()] object that has a
#' column with a single value and a column with 2 values.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param tbl (`rtable`)
#' @param col_x (`integer`)\cr column index with estimator. By default tries to get this from
#'   `tbl` attribute `col_x`, otherwise needs to be manually specified.
#' @param col_ci (`integer`)\cr column index with confidence intervals. By default tries
#'   to get this from `tbl` attribute `col_ci`, otherwise needs to be manually specified.
#' @param vline (`number`)\cr x coordinate for vertical line, if `NULL` then the line is omitted.
#' @param forest_header (`character`, length 2)\cr text displayed to the left and right of `vline`, respectively.
#'   If `vline = NULL` then `forest_header` needs to be `NULL` too.
#'   By default tries to get this from `tbl` attribute `forest_header`.
#' @param xlim (`numeric`)\cr limits for x axis.
#' @param logx (`flag`)\cr show the x-values on logarithm scale.
#' @param x_at (`numeric`)\cr x-tick locations, if `NULL` they get automatically chosen.
#' @param width_row_names (`unit`)\cr width for row names.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_columns (`unit`)\cr widths for the table columns.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_forest (`unit`)\cr width for the forest column.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param col_symbol_size (`integer`)\cr column index from `tbl` containing data to be used
#'   to determine relative size for estimator plot symbol. Typically, the symbol size is proportional
#'   to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups.
#'   By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified.
#' @param col (`character`)\cr color(s).
#'
#' @return `gTree` object containing the forest plot and table.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(forcats)
#' library(nestcolor)
#'
#' adrs <- tern_ex_adrs
#' n_records <- 20
#' adrs_labels <- formatters::var_labels(adrs, fill = TRUE)
#' adrs <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   slice(seq_len(n_records)) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs) <- c(adrs_labels, "Response")
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")),
#'   data = adrs
#' )
#' # Full commonly used response table.
#'
#' tbl <- basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#' p <- g_forest(tbl)
#'
#' draw_grob(p)
#'
#' # Odds ratio only table.
#'
#' tbl_or <- basic_table() %>%
#'   tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci"))
#' tbl_or
#' p <- g_forest(
#'   tbl_or,
#'   forest_header = c("Comparison\nBetter", "Treatment\nBetter")
#' )
#'
#' draw_grob(p)
#'
#' # Survival forest plot example.
#' adtte <- tern_ex_adtte
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = TRUE)
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- list(
#'   "ARM" = adtte_labels["ARM"],
#'   "SEX" = adtte_labels["SEX"],
#'   "AVALU" = adtte_labels["AVALU"],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels)
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' table_hr <- basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#' g_forest(table_hr)
#' # Works with any `rtable`.
#' tbl <- rtable(
#'   header = c("E", "CI", "N"),
#'   rrow("", 1, c(.8, 1.2), 200),
#'   rrow("", 1.2, c(1.1, 1.4), 50)
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   col_symbol_size = 3
#' )
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", rcell("A", colspan = 2)),
#'     rrow("", "c1", "c2")
#'   ),
#'   rrow("row 1", 1, c(.8, 1.2)),
#'   rrow("row 2", 1.2, c(1.1, 1.4))
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   vline = 1,
#'   forest_header = c("Hello", "World")
#' )
#' }
#'
#' @export
g_forest <- function(tbl,
                     col_x = attr(tbl, "col_x"),
                     col_ci = attr(tbl, "col_ci"),
                     vline = 1,
                     forest_header = attr(tbl, "forest_header"),
                     xlim = c(0.1, 10),
                     logx = TRUE,
                     x_at = c(0.1, 1, 10),
                     width_row_names = NULL,
                     width_columns = NULL,
                     width_forest = grid::unit(1, "null"),
                     col_symbol_size = attr(tbl, "col_symbol_size"),
                     col = getOption("ggplot2.discrete.colour")[1],
                     draw = TRUE,
                     newpage = TRUE) {
  checkmate::assert_class(tbl, "VTableTree")

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  if (is.null(col)) {
    col <- "blue"
  }

  checkmate::assert_number(col_x, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_ci, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_symbol_size, lower = 0, upper = nc, null.ok = TRUE)
  checkmate::assert_true(col_x > 0)
  checkmate::assert_true(col_ci > 0)
  checkmate::assert_character(col)
  if (!is.null(col_symbol_size)) {
    checkmate::assert_true(col_symbol_size > 0)
  }

  x_e <- vapply(seq_len(nr), function(i) {
    # If a label row is selected NULL is returned with a warning (suppressed)
    xi <- suppressWarnings(as.vector(tbl[i, col_x, drop = TRUE]))

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      xi
    } else {
      NA_real_
    }
  }, numeric(1))

  x_ci <- lapply(seq_len(nr), function(i) {
    xi <- suppressWarnings(as.vector(tbl[i, col_ci, drop = TRUE])) # as above

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      if (length(xi) != 2) {
        stop("ci column needs two elements")
      }
      xi
    } else {
      c(NA_real_, NA_real_)
    }
  })

  lower <- vapply(x_ci, `[`, numeric(1), 1)
  upper <- vapply(x_ci, `[`, numeric(1), 2)

  symbol_size <- if (!is.null(col_symbol_size)) {
    tmp_symbol_size <- vapply(seq_len(nr), function(i) {
      suppressWarnings(xi <- as.vector(tbl[i, col_symbol_size, drop = TRUE]))

      if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
        xi
      } else {
        NA_real_
      }
    }, numeric(1))

    # Scale symbol size.
    tmp_symbol_size <- sqrt(tmp_symbol_size)
    max_size <- max(tmp_symbol_size, na.rm = TRUE)
    # Biggest points have radius is 2 * (1/3.5) lines not to overlap.
    # See forest_dot_line.
    2 * tmp_symbol_size / max_size
  } else {
    NULL
  }

  grob_forest <- forest_grob(
    tbl,
    x_e,
    lower,
    upper,
    vline,
    forest_header,
    xlim,
    logx,
    x_at,
    width_row_names,
    width_columns,
    width_forest,
    symbol_size = symbol_size,
    col = col,
    vp = grid::plotViewport(margins = rep(1, 4))
  )

  if (draw) {
    if (newpage) grid::grid.newpage()
    grid::grid.draw(grob_forest)
  }

  invisible(grob_forest)
}

#' Forest Plot Grob
#'
#' @inheritParams g_forest
#' @param tbl ([rtables::rtable()])
#' @param x (`numeric`)\cr coordinate of point.
#' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval.
#' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol.
#' If `NULL`, the same symbol size is used.
#'
#' @details
#' The heights get automatically determined.
#'
#' @noRd
#'
#' @examples
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2), "N"),
#'     rrow("", "A", "B", "C", "D")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1, 16),
#'   rrow("row 2", 1.4, 0.8, 1.6, 25),
#'   rrow("row 3", 1.2, 0.8, 1.6, 36)
#' )
#'
#' x <- c(1, 1.4, 1.2)
#' lower <- c(0.8, 0.8, 0.8)
#' upper <- c(1.1, 1.6, 1.6)
#' # numeric vector with multiplication factor to scale each circle radius
#' # default radius is 1/3.5 lines
#' symbol_scale <- c(1, 1.25, 1.5)
#'
#' # Internal function - forest_grob
#' \dontrun{
#' p <- forest_grob(tbl, x, lower, upper,
#'   vline = 1, forest_header = c("A", "B"),
#'   x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale,
#'   vp = grid::plotViewport(margins = c(1, 1, 1, 1))
#' )
#'
#' draw_grob(p)
#' }
forest_grob <- function(tbl,
                        x,
                        lower,
                        upper,
                        vline,
                        forest_header,
                        xlim = NULL,
                        logx = FALSE,
                        x_at = NULL,
                        width_row_names = NULL,
                        width_columns = NULL,
                        width_forest = grid::unit(1, "null"),
                        symbol_size = NULL,
                        col = "blue",
                        name = NULL,
                        gp = NULL,
                        vp = NULL) {
  nr <- nrow(tbl)
  if (is.null(vline)) {
    checkmate::assert_true(is.null(forest_header))
  } else {
    checkmate::assert_number(vline)
    checkmate::assert_character(forest_header, len = 2, null.ok = TRUE)
  }

  checkmate::assert_numeric(x, len = nr)
  checkmate::assert_numeric(lower, len = nr)
  checkmate::assert_numeric(upper, len = nr)
  checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE)
  checkmate::assert_character(col)

  if (is.null(symbol_size)) {
    symbol_size <- rep(1, nr)
  }

  if (is.null(xlim)) {
    r <- range(c(x, lower, upper), na.rm = TRUE)
    xlim <- r + c(-0.05, 0.05) * diff(r)
  }

  if (logx) {
    if (is.null(x_at)) {
      x_at <- pretty(log(stats::na.omit(c(x, lower, upper))))
      x_labels <- exp(x_at)
    } else {
      x_labels <- x_at
      x_at <- log(x_at)
    }
    xlim <- log(xlim)
    x <- log(x)
    lower <- log(lower)
    upper <- log(upper)
    if (!is.null(vline)) {
      vline <- log(vline)
    }
  } else {
    x_labels <- TRUE
  }

  data_forest_vp <- grid::dataViewport(xlim, c(0, 1))

  # Get table content as matrix form.
  mf <- matrix_form(tbl)

  # Use `rtables` indent_string eventually.
  mf$strings[, 1] <- paste0(
    strrep("    ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)),
    mf$strings[, 1]
  )

  n_header <- attr(mf, "nrow_header")

  if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed")

  # Pre-process the data to be used in lapply and cell_in_rows.
  to_args_for_cell_in_rows_fun <- function(part = c("body", "header"),
                                           underline_colspan = FALSE) {
    part <- match.arg(part)
    if (part == "body") {
      mat_row_indices <- seq_len(nrow(tbl)) + n_header
      row_ind_offset <- -n_header
    } else {
      mat_row_indices <- seq_len(n_header)
      row_ind_offset <- 0
    }

    lapply(mat_row_indices, function(i) {
      disp <- mf$display[i, -1]
      list(
        row_name = mf$strings[i, 1],
        cells = mf$strings[i, -1][disp],
        cell_spans = mf$spans[i, -1][disp],
        row_index = i + row_ind_offset,
        underline_colspan = underline_colspan
      )
    })
  }

  args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE)
  args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE)

  grid::gTree(
    name = name,
    children = grid::gList(
      grid::gTree(
        children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_header")
      ),
      grid::gTree(
        children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::linesGrob(
        grid::unit(c(0, 1), "npc"),
        y = grid::unit(c(.5, .5), "npc"),
        vp = grid::vpPath("vp_table_layout", "vp_spacer")
      ),
      # forest part
      if (is.null(vline)) {
        NULL
      } else {
        grid::gTree(
          children = grid::gList(
            grid::gTree(
              children = grid::gList(
                # this may overflow, to fix, look here
                # https://stackoverflow.com/questions/33623169/add-multi-line-footnote-to-tablegrob-while-using-gridextra-in-r #nolintr
                grid::textGrob(
                  forest_header[1],
                  x = grid::unit(vline, "native") - grid::unit(1, "lines"),
                  just = c("right", "center")
                ),
                grid::textGrob(
                  forest_header[2],
                  x = grid::unit(vline, "native") + grid::unit(1, "lines"),
                  just = c("left", "center")
                )
              ),
              vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp)
            )
          ),
          vp = grid::vpPath("vp_table_layout", "vp_header")
        )
      },
      grid::gTree(
        children = grid::gList(
          grid::gTree(
            children = grid::gList(
              grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")),
              if (is.null(vline)) {
                NULL
              } else {
                grid::linesGrob(
                  x = grid::unit(rep(vline, 2), "native"),
                  y = grid::unit(c(0, 1), "npc"),
                  gp = grid::gpar(lwd = 2),
                  vp = data_forest_vp
                )
              },
              grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp)
            ),
            vp = grid::viewport(layout.pos.col = ncol(tbl) + 2)
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::gTree(
        children = do.call(
          grid::gList,
          Map(
            function(xi, li, ui, row_index, size_i, col) {
              forest_dot_line(
                xi,
                li,
                ui,
                row_index,
                xlim,
                symbol_size = size_i,
                col = col,
                datavp = data_forest_vp
              )
            },
            x,
            lower,
            upper,
            seq_along(x),
            symbol_size,
            col,
            USE.NAMES = FALSE
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      )
    ),
    childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest),
    vp = vp,
    gp = gp
  )
}


cell_in_rows <- function(row_name,
                         cells,
                         cell_spans,
                         row_index,
                         underline_colspan = FALSE) {
  checkmate::assert_string(row_name)
  checkmate::assert_character(cells, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE)
  checkmate::assert_number(row_index)
  checkmate::assert_flag(underline_colspan)

  vp_name_rn <- paste0("rowname-", row_index)
  g_rowname <- if (!is.null(row_name) && row_name != "") {
    grid::textGrob(
      name = vp_name_rn,
      label = row_name,
      x = grid::unit(0, "npc"),
      just = c("left", "center"),
      vp = grid::vpPath(paste0("rowname-", row_index))
    )
  } else {
    NULL
  }

  gl_cols <- if (!(length(cells) > 0)) {
    list(NULL)
  } else {
    j <- 1 # column index of cell

    lapply(seq_along(cells), function(k) {
      cell_ascii <- cells[[k]]
      cs <- cell_spans[[k]]

      if (is.na(cell_ascii) || is.null(cell_ascii)) {
        cell_ascii <- "NA"
      }

      cell_name <- paste0("g-cell-", row_index, "-", j)

      cell_grobs <- if (identical(cell_ascii, "")) {
        NULL
      } else {
        if (cs == 1) {
          grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = grid::vpPath(paste0("cell-", row_index, "-", j))
          )
        } else {
          # +1 because of rowname
          vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs))

          lab <- grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = vp_joined_cols
          )

          if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) {
            lab
          } else {
            grid::gList(
              lab,
              grid::linesGrob(
                x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")),
                y = grid::unit(c(0, 0), "npc"),
                vp = vp_joined_cols
              )
            )
          }
        }
      }
      j <<- j + cs

      cell_grobs
    })
  }

  grid::gList(
    g_rowname,
    do.call(grid::gList, gl_cols)
  )
}

#' Graphic Object: Forest Dot Line
#'
#' Calculate the `grob` corresponding to the dot line within the forest plot.
#'
#' @noRd
forest_dot_line <- function(x,
                            lower,
                            upper,
                            row_index,
                            xlim,
                            symbol_size = 1,
                            col = "blue",
                            datavp) {
  ci <- c(lower, upper)
  if (any(!is.na(c(x, ci)))) {
    # line
    y <- grid::unit(c(0.5, 0.5), "npc")

    g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) {
      # -
      if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) {
        grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y)
      } else if (ci[1] < xlim[1] && ci[2] > xlim[2]) {
        # <->
        grid::linesGrob(
          x = grid::unit(xlim, "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both")
        )
      } else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) {
        # <-
        grid::linesGrob(
          x = grid::unit(c(xlim[1], ci[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first")
        )
      } else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) {
        # ->
        grid::linesGrob(
          x = grid::unit(c(ci[1], xlim[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last")
        )
      }
    } else {
      NULL
    }

    g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) {
      grid::circleGrob(
        x = grid::unit(x, "native"),
        y = y,
        r = grid::unit(1 / 3.5 * symbol_size, "lines"),
        name = "point"
      )
    } else {
      NULL
    }

    grid::gTree(
      children = grid::gList(
        grid::gTree(
          children = grid::gList(
            grid::gList(
              g_line,
              g_circle
            )
          ),
          vp = datavp,
          gp = grid::gpar(col = col, fill = col)
        )
      ),
      vp = grid::vpPath(paste0("forest-", row_index))
    )
  } else {
    NULL
  }
}

#' Create a Viewport Tree for the Forest Plot
#'
#' @return A viewport tree.
#'
#' @examples
#' library(grid)
#'
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2)),
#'     rrow("", "A", "B", "C")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1),
#'   rrow("row 2", 1.4, 0.8, 1.6),
#'   rrow("row 3", 1.2, 0.8, 1.2)
#' )
#'
#' # Internal function - forest_viewport
#' \dontrun{
#' v <- forest_viewport(tbl)
#'
#' grid::grid.newpage()
#' showViewport(v)
#' }
#'
#' @keywords internal
forest_viewport <- function(tbl,
                            width_row_names = NULL,
                            width_columns = NULL,
                            width_forest = grid::unit(1, "null"),
                            gap_column = grid::unit(1, "lines"),
                            gap_header = grid::unit(1, "lines"),
                            mat_form = NULL) {
  checkmate::assert_class(tbl, "VTableTree")
  checkmate::assert_true(grid::is.unit(width_forest))
  if (!is.null(width_row_names)) {
    checkmate::assert_true(grid::is.unit(width_row_names))
  }
  if (!is.null(width_columns)) {
    checkmate::assert_true(grid::is.unit(width_columns))
  }

  if (is.null(mat_form)) mat_form <- matrix_form(tbl)

  mat_form$strings[!mat_form$display] <- ""

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  nr_h <- attr(mat_form, "nrow_header")

  if (is.null(width_row_names) || is.null(width_columns)) {
    tbl_widths <- formatters::propose_column_widths(mat_form)
    strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts
    if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1])
    if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1])
  }

  # Widths for row name, cols, forest.
  widths <- grid::unit.c(
    width_row_names + gap_column,
    width_columns + gap_column,
    width_forest
  )

  n_lines_per_row <- apply(
    X = mat_form$strings,
    MARGIN = 1,
    FUN = function(row) {
      tmp <- vapply(
        gregexpr("\n", row, fixed = TRUE),
        attr, numeric(1),
        "match.length"
      ) + 1
      max(c(tmp, 1))
    }
  )

  i_header <- seq_len(nr_h)

  height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines")
  height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines")

  height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines")
  height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines")

  nc_g <- nc + 2 # number of columns incl. row names and forest

  vp_tbl <- grid::vpTree(
    parent = grid::viewport(
      name = "vp_table_layout",
      layout = grid::grid.layout(
        nrow = 3, ncol = 1,
        heights = grid::unit.c(height_header, gap_header, height_body)
      )
    ),
    children = grid::vpList(
      vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"),
      vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"),
      grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1)
    )
  )
  vp_tbl
}

#' Viewport Forest Plot: Table Part
#'
#' Prepares a viewport for the table included in the forest plot.
#'
#' @noRd
vp_forest_table_part <- function(nrow,
                                 ncol,
                                 l_row,
                                 l_col,
                                 widths,
                                 heights,
                                 name) {
  grid::vpTree(
    grid::viewport(
      name = name,
      layout.pos.row = l_row,
      layout.pos.col = l_col,
      layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights)
    ),
    children = grid::vpList(
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow), function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i))
          }
        )
      ),
      do.call(
        grid::vpList,
        apply(
          expand.grid(seq_len(nrow), seq_len(ncol - 2)),
          1,
          function(x) {
            i <- x[1]
            j <- x[2]
            grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j))
          }
        )
      ),
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow),
          function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i))
          }
        )
      )
    )
  )
}

#' Forest Rendering
#'
#' Renders the forest grob.
#'
#' @noRd
grid.forest <- function(...) { # nolint
  grid::grid.draw(forest_grob(...))
}

#' Control Function for Descriptive Statistics
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
#' details for [s_summary()].
#'
#' @inheritParams argument_convention
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate.
#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
#'   This differs from R's default. See more about `type` in [stats::quantile()].
#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_summarize_vars <- function(conf_level = 0.95,
                                   quantiles = c(0.25, 0.75),
                                   quantile_type = 2,
                                   test_mean = 0) {
  checkmate::assert_vector(quantiles, len = 2)
  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
  checkmate::assert_numeric(test_mean)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
}

#' Format Function for Descriptive Statistics
#'
#' Returns format patterns for descriptive statistics. The format is understood by the `rtables`.
#'
#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
#'
#' @return A named `vector` of default statistic formats for the given data type.
#'
#' @keywords internal
summary_formats <- function(type = "numeric") {
  if (type == "counts") {
    c(
      n = "xx.",
      count = "xx.",
      count_fraction = format_count_fraction,
      n_blq = "xx."
    )
  } else {
    c(
      n = "xx.",
      sum = "xx.x",
      mean = "xx.x",
      sd = "xx.x",
      se = "xx.x",
      mean_sd = "xx.x (xx.x)",
      mean_se = "xx.x (xx.x)",
      mean_ci = "(xx.xx, xx.xx)",
      mean_sei = "(xx.xx, xx.xx)",
      mean_sdi = "(xx.xx, xx.xx)",
      mean_pval = "xx.xx",
      median = "xx.x",
      mad = "xx.x",
      median_ci = "(xx.xx, xx.xx)",
      quantiles = "xx.x - xx.x",
      iqr = "xx.x",
      range = "xx.x - xx.x",
      cv = "xx.x",
      min = "xx.x",
      max = "xx.x",
      median_range = "xx.x (xx.x - xx.x)",
      geom_mean = "xx.x",
      geom_cv = "xx.x"
    )
  }
}

#' Label Function for Descriptive Statistics
#'
#' Returns labels of descriptive statistics for numeric variables.
#'
#' @return A named `vector` of default statistic labels.
#'
#' @keywords internal
summary_labels <- function() {
  c(
    mean = "Mean",
    sum = "Sum",
    sd = "SD",
    se = "SE",
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    mad = "Median Absolute Deviation",
    iqr = "IQR",
    range = "Min - Max",
    median_range = "Median (Min - Max)",
    cv = "CV (%)",
    min = "Minimum",
    max = "Maximum",
    geom_mean = "Geometric Mean",
    geom_cv = "CV % Geometric Mean",
    n = "n"
  )
}

#' Summarize Variables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We use the S3 generic function [s_summary()] to implement summaries for different `x` objects. This
#' is used as a statistics function in combination with the analyze function [summarize_vars()].
#'
#' @inheritParams argument_convention
#'
#' @name summarize_variables
NULL

#' @describeIn summarize_variables S3 generic function to produces a variable summary.
#'
#' @return
#' * `s_summary()` returns different statistics depending on the class of `x`.
#'
#' @export
s_summary <- function(x,
                      na.rm = TRUE, # nolint
                      denom,
                      .N_row, # nolint
                      .N_col, # nolint
                      .var,
                      ...) {
  checkmate::assert_flag(na.rm)
  UseMethod("s_summary", x)
}

#' @describeIn summarize_variables Method for `numeric` class.
#'
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using
#'   the helper function [control_summarize_vars()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles.
#'   * `quantile_type` (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'     See more about `type` in [stats::quantile()].
#'   * `test_mean` (`numeric`)\cr value to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return
#'   * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `sum`: The [sum()] of `x`.
#'     * `mean`: The [mean()] of `x`.
#'     * `sd`: The [stats::sd()] of `x`.
#'     * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`).
#'     * `mean_sd`: The [mean()] and [stats::sd()] of `x`.
#'     * `mean_se`: The [mean()] of `x` and its standard error (see above).
#'     * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]).
#'     * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]).
#'     * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]).
#'     * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]).
#'     * `median`: The [stats::median()] of `x`.
#'     * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`,
#'       where `xc` = `x` - [stats::median()]).
#'     * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]).
#'     * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]).
#'     * `iqr`: The [stats::IQR()] of `x`.
#'     * `range`: The [range_noinf()] of `x`.
#'     * `min`: The [max()] of `x`.
#'     * `max`: The [min()] of `x`.
#'     * `median_range`: The [median()] and [range_noinf()] of `x`.
#'     * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100).
#'     * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`).
#'     * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`).
#'
#' @note
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in
#'   `rtables` when the intersection of a column and a row delimits an empty data selection.
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter
#'   being standard behavior in R.
#'
#' @method s_summary numeric
#'
#' @examples
#' # `s_summary.numeric`
#'
#' ## Basic usage: empty numeric returns NA-filled items.
#' s_summary(numeric())
#'
#' ## Management of NA values.
#' x <- c(NA_real_, 1)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' x <- c(NA_real_, 1, 2)
#' s_summary(x, stats = NULL)
#'
#' ## Benefits in `rtables` contructions:
#' require(rtables)
#' dta_test <- data.frame(
#'   Group = rep(LETTERS[1:3], each = 2),
#'   sub_group = rep(letters[1:2], each = 3),
#'   x = 1:6
#' )
#'
#' ## The summary obtained in with `rtables`:
#' basic_table() %>%
#'   split_cols_by(var = "Group") %>%
#'   split_rows_by(var = "sub_group") %>%
#'   analyze(vars = "x", afun = s_summary) %>%
#'   build_table(df = dta_test)
#'
#' ## By comparison with `lapply`:
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
#' lapply(X, function(x) s_summary(x$x))
#'
#' @export
s_summary.numeric <- function(x,
                              na.rm = TRUE, # nolint
                              denom,
                              .N_row, # nolint
                              .N_col, # nolint
                              .var,
                              control = control_summarize_vars(),
                              ...) {
  checkmate::assert_numeric(x)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  y <- list()

  y$n <- c("n" = length(x))

  y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE)))

  y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE)))

  y$sd <- c("sd" = stats::sd(x, na.rm = FALSE))

  y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x))))

  y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE))

  y$mean_se <- c(y$mean, y$se)

  mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level)))

  mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n)
  names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr")
  y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE")

  mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE)
  names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
  y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD")

  mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2)
  y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))

  y$median <- c("median" = stats::median(x, na.rm = FALSE))

  y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE))

  median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))

  q <- control$quantiles
  if (any(is.na(x))) {
    qnts <- rep(NA_real_, length(q))
  } else {
    qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE)
  }
  names(qnts) <- paste("quantile", q, sep = "_")
  y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile"))

  y$iqr <- c("iqr" = ifelse(
    any(is.na(x)),
    NA_real_,
    stats::IQR(x, na.rm = FALSE, type = control$quantile_type)
  ))

  y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max"))
  y$min <- y$range[1]
  y$max <- y$range[2]

  y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)")

  y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100)

  # Convert negative values to NA for log calculation.
  x_no_negative_vals <- x
  x_no_negative_vals[x_no_negative_vals <= 0] <- NA
  y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE)))
  geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE)
  y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level)))

  y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off

  y
}

#' @describeIn summarize_variables Method for `factor` class.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `count`: A list with the number of cases for each level of the factor `x`.
#'     * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the
#'       factor `x` relative to the denominator, or `NA` if the denominator is zero.
#'
#' @note
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element
#'   per factor level. If there are no levels in `x`, the function fails.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
#'
#' @method s_summary factor
#'
#' @examples
#' # `s_summary.factor`
#'
#' ## Basic usage:
#' s_summary(factor(c("a", "a", "b", "c", "a")))
#' # Empty factor returns NA-filled items.
#' s_summary(factor(levels = c("a", "b", "c")))
#'
#' ## Management of NA values.
#' x <- factor(c(NA, "Female"))
#' x <- explicit_na(x)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- factor(c("a", "a", "b", "c", "a"))
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.factor <- function(x,
                             na.rm = TRUE, # nolint
                             denom = c("n", "N_row", "N_col"),
                             .N_row, # nolint
                             .N_col, # nolint
                             ...) {
  assert_valid_factor(x)
  denom <- match.arg(denom)

  if (na.rm) {
    x <- x[!is.na(x)] %>% fct_discard("<Missing>")
  } else {
    x <- x %>% explicit_na(label = "NA")
  }

  y <- list()

  y$n <- length(x)

  y$count <- as.list(table(x, useNA = "ifany"))
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count_fraction <- lapply(
    y$count,
    function(x) {
      c(x, ifelse(dn > 0, x / dn, 0))
    }
  )

  y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]", x))

  y
}

#' @describeIn summarize_variables Method for `character` class. This makes an automatic
#'   conversion to factor (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Defaults to `TRUE`, which prints out warnings and messages. It is mainly used
#'   to print out information about factor casting.
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee that the table
#'   can be generated correctly. In particular for sparse tables this very likely can fail.
#'   It is therefore better to always pre-process the dataset such that factors are manually
#'   created from character variables before passing the dataset to [rtables::build_table()].
#'
#' @method s_summary character
#'
#' @examples
#' # `s_summary.character`
#'
#' ## Basic usage:
#' s_summary(c("a", "a", "b", "c", "a"), .var = "x", verbose = FALSE)
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE)
#'
#' @export
s_summary.character <- function(x,
                                na.rm = TRUE, # nolint
                                denom = c("n", "N_row", "N_col"),
                                .N_row, # nolint
                                .N_col, # nolint
                                .var,
                                verbose = TRUE,
                                ...) {
  if (na.rm) {
    y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
  } else {
    y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose, na_level = "NA")
  }

  s_summary(
    x = y,
    na.rm = na.rm,
    denom = denom,
    .N_row = .N_row,
    .N_col = .N_col,
    ...
  )
}

#' @describeIn summarize_variables Method for `logical` class.
#'
#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
#'     * `count`: Count of `TRUE` in `x`.
#'     * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the
#'       denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here.
#'
#' @method s_summary logical
#'
#' @examples
#' # `s_summary.logical`
#'
#' ## Basic usage:
#' s_summary(c(TRUE, FALSE, TRUE, TRUE))
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.logical <- function(x,
                              na.rm = TRUE, # nolint
                              denom = c("n", "N_row", "N_col"),
                              .N_row, # nolint
                              .N_col, # nolint
                              ...) {
  denom <- match.arg(denom)
  if (na.rm) x <- x[!is.na(x)]
  y <- list()
  y$n <- length(x)
  count <- sum(x, na.rm = TRUE)
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count <- count
  y$count_fraction <- c(count, ifelse(dn > 0, count / dn, NA))
  y$n_blq <- 0L
  y
}

#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()`.
#'
#' @return
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_summary <- function(x,
                      ...,
                      .N_row, # nolint
                      .N_col, # nolint
                      .var) {
  UseMethod("a_summary", x)
}

.a_summary_numeric_formats <- summary_formats()
.a_summary_numeric_labels <- summary_labels()

#' @describeIn summarize_variables Formatted analysis function method for `numeric` class.
#'
#' @examples
#' # `a_summary.numeric`
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
#'
#' @export
a_summary.numeric <- make_afun(
  s_summary.numeric,
  .formats = .a_summary_numeric_formats,
  .labels = .a_summary_numeric_labels
)

.a_summary_counts_formats <- summary_formats(type = "counts")

#' @describeIn summarize_variables Formatted analysis function method for `factor` class.
#'
#' @examples
#' # `a_summary.factor`
#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
#' # functions can be applied correctly.
#' afun <- make_afun(
#'   getS3method("a_summary", "factor"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
#'
#' @export
a_summary.factor <- make_afun(
  s_summary.factor,
  .formats = .a_summary_counts_formats
)

#' @describeIn summarize_variables Formatted analysis function method for `character` class.
#'
#' @examples
#' # `a_summary.character`
#' afun <- make_afun(
#'   getS3method("a_summary", "character"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
#'
#' @export
a_summary.character <- make_afun(
  s_summary.character,
  .formats = .a_summary_counts_formats
)

#' @describeIn summarize_variables Formatted analysis function method for `logical` class.
#'
#' @examples
#' # `a_summary.logical`
#' afun <- make_afun(
#'   getS3method("a_summary", "logical")
#' )
#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
#'
#' @export
a_summary.logical <- make_afun(
  s_summary.logical,
  .formats = .a_summary_counts_formats
)

#' Constructor Function for [summarize_vars()] and [summarize_colvars()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return Combined formatted analysis function for use in [summarize_vars()].
#'
#' @note Since [a_summary()] is generic and we want customization of the formatting arguments
#'   via [rtables::make_afun()], we need to create another temporary generic function, with
#'   corresponding customized methods. Then in order for the methods to be found,
#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
#'   functions (and possibly others in the future), we provide a constructor that does this:
#'   [create_afun_summary()].
#'
#' @examples
#' # `create_afun_summary()` to create combined `afun`
#'
#' afun <- create_afun_summary(
#'   .stats = NULL,
#'   .formats = c(median = "xx."),
#'   .labels = c(median = "My median"),
#'   .indent_mods = c(median = 1L)
#' )
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze(vars = "AVAL", afun = afun)
#'
#' build_table(l, df = dta_test)
#'
#' @export
create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
  function(x,
           ...,
           .N_row, # nolint
           .N_col, # nolint
           .var) {
    afun <- function(x, ...) {
      UseMethod("afun", x)
    }

    numeric_stats <- afun_selected_stats(
      .stats,
      all_stats = names(.a_summary_numeric_formats)
    )
    afun.numeric <- make_afun( # nolint
      a_summary.numeric,
      .stats = numeric_stats,
      .formats = extract_by_name(.formats, numeric_stats),
      .labels = extract_by_name(.labels, numeric_stats),
      .indent_mods = extract_by_name(.indent_mods, numeric_stats)
    )

    factor_stats <- afun_selected_stats(.stats, c("n", "count", "count_fraction"))
    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
    afun.factor <- make_afun( # nolint
      a_summary.factor,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats
    )

    afun.character <- make_afun( # nolint
      a_summary.character,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats
    )

    afun.logical <- make_afun( # nolint
      a_summary.logical,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats)
    )

    afun(
      x = x,
      ...,
      .N_row = .N_row,
      .N_col = .N_col,
      .var = .var
    )
  }
}

#' @describeIn summarize_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_summary()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `summarize_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_summary()` to the table layout.
#'
#' @examples
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' # `summarize_vars()` in `rtables` pipelines
#' ## Default output within a `rtables` pipeline.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(vars = "AVAL")
#'
#' build_table(l, df = dta_test)
#'
#' ## Select and format statistics output.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(
#'     vars = "AVAL",
#'     .stats = c("n", "mean_sd", "quantiles"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
#'   )
#'
#' results <- build_table(l, df = dta_test)
#' as_html(results)
#'
#' ## Use arguments interpreted by `s_summary`.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(vars = "AVAL", na.rm = FALSE)
#'
#' results <- build_table(l, df = dta_test)
#'
#' ## Handle `NA` levels first when summarizing factors.
#' dta_test$AVISIT <- NA_character_
#' dta_test <- df_explicit_na(dta_test)
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   summarize_vars(vars = "AVISIT", na.rm = FALSE)
#'
#' results <- build_table(l, df = dta_test)
#' \dontrun{
#' Viewer(results)
#' }
#'
#' @export
summarize_vars <- function(lyt,
                           vars,
                           var_labels = vars,
                           nested = TRUE,
                           ...,
                           na_level = NA_character_,
                           show_labels = "default",
                           table_names = vars,
                           section_div = NA_character_,
                           .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = afun,
    nested = nested,
    extra_args = list(...),
    na_str = na_level,
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}

#' Tabulate Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' on a binary response endpoint across population subgroups.
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_rsp_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot`: Total number of patients per group.
#'   * `n_rsp`: Total number of responses per group.
#'   * `prop`: Total response proportion per group.
#'   * `or`: Odds ratio.
#'   * `ci`: Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#'
#' \dontrun{
#' ## Table with default columns.
#' tabulate_rsp_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_rsp_biomarkers(
#'   df = df,
#'   vars = c("n_rsp", "ci", "n_tot", "prop", "or")
#' )
#'
#' ## Finally produce the forest plot.
#' g_forest(tab, xlim = c(0.7, 1.4))
#' }
#'
#' @export
#' @name response_biomarkers_subgroups
tabulate_rsp_biomarkers <- function(df,
                                    vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
                                    .indent_mods = 0L) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, c("n_tot", "n_rsp", "prop", "or", "ci", "pval"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_rsp_one_biomarker(
      df = df_sub,
      vars = vars,
      .indent_mods = .indent_mods
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_id <- grep("n_tot", vars)
  or_id <- match("or", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_id
  )
}

#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of responses, patients and overall response rate,
#' as well as odds ratio estimates, confidence intervals and p-values,
#' for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param control (named `list`)\cr controls for the response definition and the
#'   confidence level produced by [control_logistic()].
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
#'   `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @note You can also specify a continuous variable in `rsp` and then use the
#'   `response_definition` control to convert that internally to a logical
#'   variable reflecting binary response.
#'
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in logistic regression models with one covariate `RACE`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
#' # which is then binarized internally (response is defined as this variable
#' # being larger than 500).
#' df_grouped <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "EOSDY",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   ),
#'   control = control_logistic(
#'     response_definition = "I(response > 500)"
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_biomarkers <- function(variables,
                                   data,
                                   groups_lists = list(),
                                   control = control_logistic(),
                                   label_all = "All Patients") {
  assert_list_of_variables(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_logistic_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_logistic_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Patient Counts for Laboratory Events (Worsen From Baseline) by Highest Grade Post-Baseline
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Patient count and fraction for laboratory events (worsen from baseline) shift table.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()]
#'
#' @name abnormal_by_worst_grade_worsen
NULL

#' Helper Function to Prepare ADLB with Worst Labs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare a `df` for generate the patient count shift table
#'
#' @param adlb (`data.frame`)\cr `ADLB` dataframe
#' @param worst_flag_low (named `vector`)\cr Worst low post-baseline lab grade flag variable
#' @param worst_flag_high (named `vector`)\cr Worst high post-baseline lab grade flag variable
#' @param direction_var (`string`)\cr Direction variable specifying the direction of the shift table of interest.
#'   Only lab records flagged by `L`, `H` or `B` are included in the shift table.
#'   * `L`: low direction only
#'   * `H`: high direction only
#'   * `B`: both low and high directions
#'
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
#'   worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
#'   direction specified according to `direction_var`. For instance, for a lab that is
#'   needed for the low direction only, only records flagged by `worst_flag_low` are
#'   selected. For a lab that is needed for both low and high directions, the worst
#'   low records are selected for the low direction, and the worst high record are selected
#'   for the high direction.
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_adlb_worsen <- function(adlb,
                          worst_flag_low = NULL,
                          worst_flag_high = NULL,
                          direction_var) {
  checkmate::assert_string(direction_var)
  checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
  assert_df_with_variables(adlb, list("Col" = direction_var))

  if (any(unique(adlb[[direction_var]]) == "H")) {
    assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
  }

  if (any(unique(adlb[[direction_var]]) == "L")) {
    assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
  }

  if (any(unique(adlb[[direction_var]]) == "B")) {
    assert_df_with_variables(
      adlb,
      list(
        "Low" = names(worst_flag_low),
        "High" = names(worst_flag_high)
      )
    )
  }

  # extract patients with worst post-baseline lab, either low or high or both
  worst_flag <- c(worst_flag_low, worst_flag_high)
  col_names <- names(worst_flag)
  filter_values <- worst_flag
  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(union, temp)

  # select variables of interest
  adlb_f <- adlb[position_satisfy_filters, ]

  # generate subsets for different directionality
  adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
  adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
  adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]

  # for labs requiring both high and low, data is duplicated and will be stacked on top of each other
  adlb_f_b_h <- adlb_f_b
  adlb_f_b_l <- adlb_f_b

  # extract data with worst lab
  if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
    # change H to High, L to Low
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))

    # change, B to High and Low
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
  } else if (!is.null(worst_flag_high)) {
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]

    out <- rbind(adlb_out_h, adlb_out_b_h)
  } else if (!is.null(worst_flag_low)) {
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_l, adlb_out_b_l)
  }

  # label
  formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
  # NA
  out
}

#' Helper Function to Analyze Patients for [s_count_abnormal_lab_worsen_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to count the number of patients and the fraction of patients according to
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
#' and the direction of interest specified in `direction_var`.
#'
#' @inheritParams argument_convention
#' @inheritParams h_adlb_worsen
#' @param baseline_var (`string`)\cr baseline lab grade variable
#'
#' @return `h_worsen_counter()` returns the counts and fraction of patients
#'   whose worst post-baseline lab grades are worse than their baseline grades, for
#'   post-baseline worst grades "1", "2", "3", "4" and "Any".
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' # `h_worsen_counter`
#' h_worsen_counter(
#'   df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
#'   id = "USUBJID",
#'   .var = "ATOXGR",
#'   baseline_var = "BTOXGR",
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
  checkmate::assert_string(id)
  checkmate::assert_string(.var)
  checkmate::assert_string(baseline_var)
  checkmate::assert_scalar(unique(df[[direction_var]]))
  checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
  assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))

  # remove post-baseline missing
  df <- df[df[[.var]] != "<Missing>", ]

  # obtain directionality
  direction <- unique(df[[direction_var]])

  if (direction == "Low") {
    grade <- -1:-4
    worst_grade <- -4
  } else if (direction == "High") {
    grade <- 1:4
    worst_grade <- 4
  }

  if (nrow(df) > 0) {
    by_grade <- lapply(grade, function(i) {
      # filter baseline values that is less than i or <Missing>
      df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
      # num: number of patients with post-baseline worst lab equal to i
      num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
      # denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
      denom <- length(unique(df_temp[[id]]))
      rm(df_temp)
      c(num = num, denom = denom)
    })
  } else {
    by_grade <- lapply(1, function(i) {
      c(num = 0, denom = 0)
    })
  }

  names(by_grade) <- as.character(seq_along(by_grade))

  # baseline grade less 4 or missing
  df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]

  # denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
  denom <- length(unique(df_temp[, id, drop = TRUE]))

  # condition 1: missing baseline and in the direction of abnormality
  con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
  df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]

  # condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
  if (direction == "Low") {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
  } else {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
  }

  # number of patients satisfy either conditions 1 or 2
  num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))

  list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
}

#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline
#'   lab grades are worse than their baseline grades.
#'
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
#'   * `id` (`string`)\cr subject variable name.
#'   * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
#'   * `direction_var` (`string`)\cr see `direction_var` for more details.
#'
#' @return
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
#'   post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
#'   "1", "2", "3", "4" and "Any".
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#' # Internal function - s_count_abnormal_lab_worsen_by_baseline
#' \dontrun{
#' # Patients with worsening lab grade for CRP in the direction of low
#' s_count_abnormal_lab_worsen_by_baseline(
#'   df = df %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
#'   .var = "ATOXGR",
#'   variables = list(
#'     id = "USUBJID",
#'     baseline_var = "BTOXGR",
#'     direction_var = "GRADDR"
#'   )
#' )
#' }
#'
#' @keywords internal
s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint
                                                    .var = "ATOXGR",
                                                    variables = list(
                                                      id = "USUBJID",
                                                      baseline_var = "BTOXGR",
                                                      direction_var = "GRADDR"
                                                    )) {
  checkmate::assert_string(.var)
  checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
  checkmate::assert_string(variables$id)
  checkmate::assert_string(variables$baseline_var)
  checkmate::assert_string(variables$direction_var)
  assert_df_with_variables(df, c(aval = .var, variables[1:3]))
  assert_list_of_variables(variables)

  h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
}


#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun`
#'   in `count_abnormal_lab_worsen_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
#'   formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_lab_worsen_by_baseline
#' \dontrun{
#' a_count_abnormal_lab_worsen_by_baseline(
#'   df = df %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
#'   .var = "ATOXGR",
#'   variables = list(id = "USUBJID", baseline_var = "BTOXGR", direction_var = "GRADDR")
#' )
#' }
#'
#' @keywords internal
a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint
  s_count_abnormal_lab_worsen_by_baseline,
  .formats = c(fraction = format_fraction),
  .ungroup_stats = "fraction"
)

#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
#'   functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
#'   rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   add_colcounts() %>%
#'   split_rows_by("PARAMCD") %>%
#'   split_rows_by("GRADDR") %>%
#'   count_abnormal_lab_worsen_by_baseline(
#'     var = "ATOXGR",
#'     variables = list(
#'       id = "USUBJID",
#'       baseline_var = "BTOXGR",
#'       direction_var = "GRADDR"
#'     )
#'   ) %>%
#'   append_topleft("Direction of Abnormality") %>%
#'   build_table(df = df, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint
                                                  var,
                                                  ...,
                                                  table_names = NULL,
                                                  .stats = NULL,
                                                  .formats = NULL,
                                                  .labels = NULL,
                                                  .indent_mods = NULL) {
  checkmate::assert_string(var)

  afun <- make_afun(
    a_count_abnormal_lab_worsen_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    extra_args = list(...),
    show_labels = "hidden"
  )

  lyt
}

#' Estimation of Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion of responders within a studied population.
#'
#' @inheritParams argument_convention
#'
#' @seealso [h_proportions]
#'
#' @name estimate_proportions
NULL

#' @describeIn estimate_proportions Statistics function estimating a
#'   proportion along with its confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
#'   it indicates whether each subject is a responder or not. `TRUE` represents
#'   a successful outcome. If a `data.frame` is provided, also the `strata` variable
#'   names must be provided in `variables` as a list element with the strata strings.
#'   In the case of `data.frame`, the logical vector of responses must be indicated as a
#'   variable name in `.var`.
#' @param method (`string`)\cr the method used to construct the confidence interval
#'   for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
#'   `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
#' @param long (`flag`)\cr a long description is required.
#'
#' @return
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
#'   given variable.
#'
#' @examples
#' # Case with only logical vector.
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
#' s_proportion(rsp_v)
#'
#' # Example for Stratified Wilson CI
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion(
#'   df = dta,
#'   .var = "rsp",
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "strat_wilson"
#' )
#'
#' @export
s_proportion <- function(df,
                         .var,
                         conf_level = 0.95,
                         method = c(
                           "waldcc", "wald", "clopper-pearson",
                           "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                           "agresti-coull", "jeffreys"
                         ),
                         weights = NULL,
                         max_iterations = 50,
                         variables = list(strata = NULL),
                         long = FALSE) {
  method <- match.arg(method)
  checkmate::assert_flag(long)
  assert_proportion_value(conf_level)

  if (!is.null(variables$strata)) {
    # Checks for strata
    if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
    strata_colnames <- variables$strata
    checkmate::assert_character(strata_colnames, null.ok = FALSE)
    strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
    assert_df_with_variables(df, strata_vars)

    strata <- interaction(df[strata_colnames])
    strata <- as.factor(strata)

    # Pushing down checks to prop_strat_wilson
  } else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
    stop("To use stratified methods you need to specify the strata variables.")
  }
  if (checkmate::test_atomic_vector(df)) {
    rsp <- as.logical(df)
  } else {
    rsp <- as.logical(df[[.var]])
  }
  n <- sum(rsp)
  p_hat <- mean(rsp)

  prop_ci <- switch(method,
    "clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
    "wilson" = prop_wilson(rsp, conf_level),
    "wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
    "strat_wilson" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = FALSE
    )$conf_int,
    "strat_wilsonc" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = TRUE
    )$conf_int,
    "wald" = prop_wald(rsp, conf_level),
    "waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
    "agresti-coull" = prop_agresti_coull(rsp, conf_level),
    "jeffreys" = prop_jeffreys(rsp, conf_level)
  )

  list(
    "n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
    "prop_ci" = formatters::with_label(
      x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
    )
  )
}

#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
#'   in `estimate_proportion()`.
#'
#' @return
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_proportion <- make_afun(
  s_proportion,
  .formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
)

#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... other arguments are ultimately conveyed to [s_proportion()].
#'
#' @return
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion()` to the table layout.
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = rep(LETTERS[1:3], each = 4),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_proportion(vars = "AVAL") %>%
#'   build_table(df = dta_test)
#'
#' @export
estimate_proportion <- function(lyt,
                                vars,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Calculating Proportion Confidence Intervals
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
#'
#' @inheritParams argument_convention
#' @inheritParams estimate_proportions
#'
#' @return Confidence interval of a proportion.
#'
#' @seealso [estimate_proportions], descriptive function [d_proportion()],
#'  and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
#'
#' @name h_proportions
NULL

#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
#'  Also referred to as Wilson score interval.
#'
#' @examples
#' rsp <- c(
#'   TRUE, TRUE, TRUE, TRUE, TRUE,
#'   FALSE, FALSE, FALSE, FALSE, FALSE
#' )
#' prop_wilson(rsp, conf_level = 0.9)
#'
#' @export
prop_wilson <- function(rsp, conf_level, correct = FALSE) {
  y <- stats::prop.test(
    sum(rsp),
    length(rsp),
    correct = correct,
    conf.level = conf_level
  )

  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the stratified Wilson confidence
#'   interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
#'   estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
#'   minimizes the weighted squared length of the confidence interval.
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
#'   to find estimates of optimal weights.
#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
#'   [stats::prop.test()].
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Wilson confidence interval with unequal probabilities
#'
#' set.seed(1)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' strata <- interaction(strata_data)
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
#'
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   conf_level = 0.90
#' )
#'
#' # Not automatic setting of weights
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   weights = rep(1 / n_strata, n_strata),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_strat_wilson <- function(rsp,
                              strata,
                              weights = NULL,
                              conf_level = 0.95,
                              max_iterations = NULL,
                              correct = FALSE) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(strata, len = length(rsp))
  assert_proportion_value(conf_level)

  tbl <- table(rsp, strata)
  n_strata <- ncol(tbl)

  # Checking the weights and maximum number of iterations.
  do_iter <- FALSE
  if (is.null(weights)) {
    weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
    do_iter <- TRUE

    # Iteration parameters
    if (is.null(max_iterations)) max_iterations <- 10
    checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
  }
  checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = ncol(tbl))
  checkmate::assert_int(sum(weights), lower = 1, upper = 1)


  xs <- tbl["TRUE", ]
  ns <- colSums(tbl)
  use_stratum <- (ns > 0)
  ns <- ns[use_stratum]
  xs <- xs[use_stratum]
  ests <- xs / ns
  vars <- ests * (1 - ests) / ns

  strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)

  # Iterative setting of weights if they were not set externally
  weights_new <- if (do_iter) {
    update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
  } else {
    weights
  }

  strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1

  ci_by_strata <- Map(
    function(x, n) {
      # Classic Wilson's confidence interval
      suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
    },
    x = xs,
    n = ns
  )
  lower_by_strata <- sapply(ci_by_strata, "[", 1L)
  upper_by_strata <- sapply(ci_by_strata, "[", 2L)

  lower <- sum(weights_new * lower_by_strata)
  upper <- sum(weights_new * upper_by_strata)

  # Return values
  if (do_iter) {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      ),
      weights = weights_new
    )
  } else {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      )
    )
  }
}

#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
#'   Also referred to as the `exact` method.
#'
#' @examples
#' prop_clopper_pearson(rsp, conf_level = .95)
#'
#' @export
prop_clopper_pearson <- function(rsp,
                                 conf_level) {
  y <- stats::binom.test(
    x = sum(rsp),
    n = length(rsp),
    conf.level = conf_level
  )
  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
#'   for a single proportion confidence interval using the normal approximation.
#'
#' @param correct (`flag`)\cr apply continuity correction.
#'
#' @examples
#' prop_wald(rsp, conf_level = 0.95)
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
#'
#' @export
prop_wald <- function(rsp, conf_level, correct = FALSE) {
  n <- length(rsp)
  p_hat <- mean(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)
  q_hat <- 1 - p_hat
  correct <- if (correct) 1 / (2 * n) else 0

  err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
  l_ci <- max(0, p_hat - err)
  u_ci <- min(1, p_hat + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Agresti-Coull interval (created by Alan Agresti and Brent Coull) by
#'   (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
#'
#' @examples
#' prop_agresti_coull(rsp, conf_level = 0.95)
#'
#' @export
prop_agresti_coull <- function(rsp, conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)

  # Add here both z^2 / 2 successes and failures.
  x_sum_tilde <- x_sum + z^2 / 2
  n_tilde <- n + z^2

  # Then proceed as with the Wald interval.
  p_tilde <- x_sum_tilde / n_tilde
  q_tilde <- 1 - p_tilde
  err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
  l_ci <- max(0, p_tilde - err)
  u_ci <- min(1, p_tilde + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
#'   non-informative Jeffreys prior for a binomial proportion.
#'
#' @examples
#' prop_jeffreys(rsp, conf_level = 0.95)
#'
#' @export
prop_jeffreys <- function(rsp,
                          conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)

  alpha <- 1 - conf_level
  l_ci <- ifelse(
    x_sum == 0,
    0,
    stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  u_ci <- ifelse(
    x_sum == n,
    1,
    stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  c(l_ci, u_ci)
}

#' Description of the Proportion Summary
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_proportion()].
#'
#' @inheritParams s_proportion
#' @param long (`flag`)\cr whether a long or a short (default) description is required.
#'
#' @return String describing the analysis.
#'
#' @export
d_proportion <- function(conf_level,
                         method,
                         long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")

  if (long) label <- paste(label, "for Response Rates")

  method_part <- switch(method,
    "clopper-pearson" = "Clopper-Pearson",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "wilson" = "Wilson, without correction",
    "strat_wilson" = "Stratified Wilson, without correction",
    "wilsonc" = "Wilson, with correction",
    "strat_wilsonc" = "Stratified Wilson, with correction",
    "agresti-coull" = "Agresti-Coull",
    "jeffreys" = "Jeffreys",
    stop(paste(method, "does not have a description"))
  )

  paste0(label, " (", method_part, ")")
}

#' Helper Function for the Estimation of Stratified Quantiles
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the estimation of stratified percentiles when we assume
#' the approximation for large numbers. This is necessary only in the case
#' proportions for each strata are unequal.
#'
#' @inheritParams argument_convention
#' @inheritParams prop_strat_wilson
#'
#' @return Stratified quantile.
#'
#' @seealso [prop_strat_wilson()]
#'
#' @examples
#' strata_data <- table(data.frame(
#'   "f1" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' ))
#' ns <- colSums(strata_data)
#' ests <- strata_data["TRUE", ] / ns
#' vars <- ests * (1 - ests) / ns
#' weights <- rep(1 / length(ns), length(ns))
#' strata_normal_quantile(vars, weights, 0.95)
#'
#' @export
strata_normal_quantile <- function(vars, weights, conf_level) {
  summands <- weights^2 * vars
  # Stratified quantile
  sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
}

#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the iteration procedure that allows you to estimate
#' the weights for each proportional strata. This assumes to minimize the
#' weighted squared length of the confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param vars (`numeric`)\cr normalized proportions for each strata.
#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
#'   be optimized in the future if we need to estimate better initial weights.
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
#' @param tol (`number`)\cr tolerance threshold for convergence.
#'
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
#'
#' @seealso For references and details see [prop_strat_wilson()].
#'
#' @examples
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
#' sq <- 0.674
#' ws <- rep(1 / length(vs), length(vs))
#' ns <- c(22, 18, 17, 17, 14, 12)
#'
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
#'
#' @export
update_weights_strat_wilson <- function(vars,
                                        strata_qnorm,
                                        initial_weights,
                                        n_per_strata,
                                        max_iterations = 50,
                                        conf_level = 0.95,
                                        tol = 0.001) {
  it <- 0
  diff_v <- NULL

  while (it < max_iterations) {
    it <- it + 1
    weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
    weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
    weights_new <- weights_new_t / weights_new_b
    weights_new <- weights_new / sum(weights_new)
    strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
    diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
    if (diff_v[length(diff_v)] < tol) break
    initial_weights <- weights_new
  }

  if (it == max_iterations) {
    warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
  }

  list(
    "n_it" = it,
    "weights" = weights_new,
    "diff_v" = diff_v
  )
}

#' Multivariate Logistic Regression Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
#' category or specified values and corresponding Wald confidence intervals as default but allow user
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
#' that covariate has no effect on response in model containing all specified covariates.
#' Allow option to include one two-way interaction and present similar output for
#' each interaction degree of freedom.
#'
#' @inheritParams argument_convention
#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
#'
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#'   Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
#'
#' @note For the formula, the variable names need to be standard `data.frame` column names without
#'   special characters.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' # flagging empty strings with "_"
#' df <- df_explicit_na(df, na_level = "_")
#' df2 <- df_explicit_na(df2, na_level = "_")
#'
#' result1 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df)
#' result1
#'
#' result2 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df2)
#' result2
#'
#' @export
summarize_logistic <- function(lyt,
                               conf_level,
                               drop_and_remove_str = "",
                               .indent_mods = NULL) {
  # checks
  checkmate::assert_string(drop_and_remove_str)

  sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
  sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
  sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
  split_fun <- drop_and_remove_levels(drop_and_remove_str)

  lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
  lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
  lyt <- sum_logistic_variable_test(lyt)
  lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
  lyt <- sum_logistic_term_estimates(lyt)
  lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
  lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
  lyt <- sum_logistic_odds_ratios(lyt)
  lyt
}

#' Fit for Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fit a (conditional) logistic regression model.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the (conditional) logistic regression model on the left hand
#'   side of the formula.
#'
#' @return A fitted logistic regression model.
#'
#' @section Model Specification:
#'
#' The `variables` list needs to include the following elements:
#'   * `arm`: Treatment arm variable name.
#'   * `response`: The response arm variable name. Usually this is a 0/1 variable.
#'   * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
#'   * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
#'     included in `covariates`. Then the interaction with the treatment arm is included in the model.
#'
#' @examples
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @export
fit_logistic <- function(data,
                         variables = list(
                           response = "Response",
                           arm = "ARMCD",
                           covariates = NULL,
                           interaction = NULL,
                           strata = NULL
                         ),
                         response_definition = "response") {
  assert_df_with_variables(data, variables)
  checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
  checkmate::assert_string(response_definition)
  checkmate::assert_true(grepl("response", response_definition))

  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (!is.null(variables$covariates)) {
    form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
  }
  if (!is.null(variables$interaction)) {
    checkmate::assert_string(variables$interaction)
    checkmate::assert_subset(variables$interaction, variables$covariates)
    form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  formula <- stats::as.formula(form)
  if (is.null(variables$strata)) {
    stats::glm(
      formula = formula,
      data = data,
      family = stats::binomial("logit")
    )
  } else {
    clogit_with_tryCatch(
      formula = formula,
      data = data,
      x = TRUE
    )
  }
}

#' Custom Tidy Method for Binomial GLM Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
#' with `binomial` family.
#'
#' @inheritParams argument_convention
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
#' @param fit_glm logistic regression model fitted by [stats::glm()] with "binomial" family.
#'
#' @return A `data.frame` containing the tidied model.
#'
#' @method tidy glm
#'
#' @seealso [h_logistic_regression] for relevant helper functions.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' @export
tidy.glm <- function(fit_glm, # nolint
                     conf_level = 0.95,
                     at = NULL) {
  checkmate::assert_class(fit_glm, "glm")
  checkmate::assert_set_equal(fit_glm$family$family, "binomial")

  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  df <- if (length(interaction) == 0) {
    h_logistic_simple_terms(
      x = terms_name,
      fit_glm = fit_glm,
      conf_level = conf_level
    )
  } else {
    h_logistic_inter_terms(
      x = terms_name,
      fit_glm = fit_glm,
      conf_level = conf_level,
      at = at
    )
  }
  for (var in c("variable", "term", "interaction", "reference")) {
    df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
  }
  df
}

#' Logistic Regression Multivariate Column Layout Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which creates a multivariate column layout summarizing logistic
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
#'
#' @inheritParams argument_convention
#'
#' @return A layout object suitable for passing to further layouting functions. Adding this
#'   function to an `rtable` layout will split the table into columns corresponding to
#'   statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
#'
#' @export
logistic_regression_cols <- function(lyt,
                                     conf_level = 0.95) {
  vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
  var_labels <- c(
    df = "Degrees of Freedom",
    estimate = "Parameter Estimate",
    std_error = "Standard Error",
    odds_ratio = "Odds Ratio",
    ci = paste("Wald", f_conf_level(conf_level)),
    pvalue = "p-value"
  )
  split_cols_by_multivar(
    lyt = lyt,
    vars = vars,
    varlabels = var_labels
  )
}

#' Logistic Regression Summary Table Constructor Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
#'   content function.
#'
#' @return A content function.
#'
#' @export
logistic_summary_by_flag <- function(flag_var, .indent_mods = NULL) {
  checkmate::assert_string(flag_var)
  function(lyt) {
    cfun_list <- list(
      df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
      estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
      ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
      pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods)
    )
    summarize_row_groups(
      lyt = lyt,
      cfun = cfun_list
    )
  }
}

#' Line plot with the optional table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot with the optional table.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) to counts objects in strata.
#' @param variables (named `character` vector) of variable names in `df` data set. Details are:
#'   * `x` (`character`)\cr name of x-axis variable.
#'   * `y` (`character`)\cr name of y-axis variable.
#'   * `strata` (`character`)\cr name of grouping variable, i.e. treatment arm. Can be `NA` to indicate lack of groups.
#'   * `paramcd` (`character`)\cr name of the variable for parameter's code. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if paramcd is not to be added to the y-axis label or subtitle.
#'   * `y_unit` (`character`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
#'   All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length one.
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
#'   All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length two.
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Must match the `names`
#'   attribute of the `interval` element in the list returned by `sfun`. It is possible to specify one whisker only,
#'   lower or upper.
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
#'   All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
#' @param sfun (`closure`)\cr the function to compute the values of required statistics. It must return a named `list`
#'   with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
#'   `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
#' @param ... optional arguments to `sfun`.
#' @param mid_type (`character`)\cr controls the type of the `mid` plot, it can be point (`p`), line (`l`),
#'   or point and line (`pl`).
#' @param mid_point_size (`integer` or `double`)\cr controls the font size of the point for `mid` plot.
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
#'   a call to a position adjustment function.
#' @param legend_title (`character` string)\cr legend title.
#' @param legend_position (`character`)\cr the position of the plot legend (`none`, `left`, `right`, `bottom`, `top`,
#'   or two-element numeric vector).
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
#' @param y_lab (`character`)\cr y-axis label. If equal to `NULL`, then no label will be added.
#' @param y_lab_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to the
#'   y-axis label `y_lab`?
#' @param y_lab_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the y-axis
#'   label `y_lab`?
#' @param title (`character`)\cr plot title.
#' @param subtitle (`character`)\cr plot subtitle.
#' @param subtitle_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to
#'   the plot's subtitle `subtitle`?
#' @param subtitle_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the
#'   plot's subtitle `subtitle`?
#' @param caption (`character`)\cr optional caption below the plot.
#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
#'   (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
#'   parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
#'   appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
#' @param table_font_size (`integer` or `double`)\cr controls the font size of values in the table.
#' @param newpage (`logical`)\cr should plot be drawn on new page?
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` line plot (and statistics table if applicable).
#'
#' @examples
#' library(nestcolor)
#'
#' adsl <- tern_ex_adsl
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
#' adlb$AVISIT <- droplevels(adlb$AVISIT)
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
#'
#' # Mean with CI
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
#'
#' # Mean with CI, no stratification
#' g_lineplot(adlb, variables = control_lineplot_vars(strata = NA))
#'
#' # Mean, upper whisker of CI, no strata counts N
#' g_lineplot(
#'   adlb,
#'   whiskers = "mean_ci_upr",
#'   title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
#' )
#'
#' # Median with CI
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   mid = "median",
#'   interval = "median_ci",
#'   whiskers = c("median_ci_lwr", "median_ci_upr"),
#'   title = "Plot of Median and 95% Confidence Limits by Visit"
#' )
#'
#' # Mean, +/- SD
#' g_lineplot(adlb, adsl,
#'   interval = "mean_sdi",
#'   whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
#'   title = "Plot of Median +/- SD by Visit"
#' )
#'
#' # Mean with CI plot with stats table
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
#'
#' # Mean with CI, table and customized confidence level
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   table = c("n", "mean", "mean_ci"),
#'   control = control_summarize_vars(conf_level = 0.80),
#'   title = "Plot of Mean and 80% Confidence Limits by Visit"
#' )
#'
#' # Mean with CI, table, filtered data
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE")
#' g_lineplot(adlb_f, table = c("n", "mean"))
#'
#' @export
g_lineplot <- function(df,
                       alt_counts_df = NULL,
                       variables = control_lineplot_vars(),
                       mid = "mean",
                       interval = "mean_ci",
                       whiskers = c("mean_ci_lwr", "mean_ci_upr"),
                       table = NULL,
                       sfun = tern::s_summary,
                       ...,
                       mid_type = "pl",
                       mid_point_size = 2,
                       position = ggplot2::position_dodge(width = 0.4),
                       legend_title = NULL,
                       legend_position = "bottom",
                       ggtheme = nestcolor::theme_nest(),
                       y_lab = NULL,
                       y_lab_add_paramcd = TRUE,
                       y_lab_add_unit = TRUE,
                       title = "Plot of Mean and 95% Confidence Limits by Visit",
                       subtitle = "",
                       subtitle_add_paramcd = TRUE,
                       subtitle_add_unit = TRUE,
                       caption = NULL,
                       table_format = summary_formats(),
                       table_labels = summary_labels(),
                       table_font_size = 3,
                       newpage = TRUE,
                       col = NULL) {
  checkmate::assert_character(variables, any.missing = TRUE)
  checkmate::assert_character(mid, null.ok = TRUE)
  checkmate::assert_character(interval, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(subtitle, null.ok = TRUE)

  if (is.character(interval)) {
    checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
  }

  if (length(whiskers) == 1) {
    checkmate::assert_character(mid)
  }

  if (is.character(mid)) {
    checkmate::assert_scalar(mid_type)
    checkmate::assert_subset(mid_type, c("pl", "p", "l"))
  }

  x <- variables[["x"]]
  y <- variables[["y"]]
  paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
  y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
  if (is.na(variables["strata"])) {
    strata <- NULL # NULL if strata == NA or it is not in variables
  } else {
    strata <- variables[["strata"]]
  }
  checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) {
    checkmate::assert_false(is.na(paramcd))
    checkmate::assert_scalar(unique(df[[paramcd]]))
  }

  checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) {
    checkmate::assert_false(is.na(y_unit))
    checkmate::assert_scalar(unique(df[[y_unit]]))
  }

  if (!is.null(strata) && !is.null(alt_counts_df)) {
    checkmate::assert_set_equal(unique(alt_counts_df[[strata]]), unique(df[[strata]]))
  }

  ####################################### |
  # ---- Compute required statistics ----
  ####################################### |
  if (!is.null(strata)) {
    df_grp <- tidyr::expand(df, .data[[strata]], .data[[x]]) # expand based on levels of factors
  } else {
    df_grp <- tidyr::expand(df, NULL, .data[[x]])
  }
  df_grp <- df_grp %>%
    dplyr::full_join(y = df[, c(strata, x, y)], by = c(strata, x), multiple = "all") %>%
    dplyr::group_by_at(c(strata, x))

  df_stats <- df_grp %>%
    dplyr::summarise(
      data.frame(t(do.call(c, unname(sfun(.data[[y]], ...)[c(mid, interval)])))),
      .groups = "drop"
    )

  df_stats <- df_stats[!is.na(df_stats[[mid]]), ]

  # add number of objects N in strata
  if (!is.null(strata) && !is.null(alt_counts_df)) {
    strata_N <- paste0(strata, "_N") # nolint

    df_N <- as.data.frame(table(alt_counts_df[[strata]], exclude = c(NA, NaN, Inf))) # nolint
    colnames(df_N) <- c(strata, "N") # nolint
    df_N[[strata_N]] <- paste0(df_N[[strata]], " (N = ", df_N$N, ")") # nolint

    # strata_N should not be in clonames(df_stats)
    checkmate::assert_disjunct(strata_N, colnames(df_stats))

    df_stats <- merge(x = df_stats, y = df_N[, c(strata, strata_N)], by = strata)
  } else if (!is.null(strata)) {
    strata_N <- strata # nolint
  } else {
    strata_N <- NULL # nolint
  }

  ############################################### |
  # ---- Prepare certain plot's properties. ----
  ############################################### |
  # legend title
  if (is.null(legend_title) && !is.null(strata) && legend_position != "none") {
    legend_title <- attr(df[[strata]], "label")
  }

  # y label
  if (!is.null(y_lab)) {
    if (y_lab_add_paramcd) {
      y_lab <- paste(y_lab, unique(df[[paramcd]]))
    }

    if (y_lab_add_unit) {
      y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
    }

    y_lab <- trimws(y_lab)
  }

  # subtitle
  if (!is.null(subtitle)) {
    if (subtitle_add_paramcd) {
      subtitle <- paste(subtitle, unique(df[[paramcd]]))
    }

    if (subtitle_add_unit) {
      subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
    }

    subtitle <- trimws(subtitle)
  }

  ############################### |
  # ---- Build plot object. ----
  ############################### |
  p <- ggplot2::ggplot(
    data = df_stats,
    mapping = ggplot2::aes(
      x = .data[[x]], y = .data[[mid]],
      color = if (is.null(strata_N)) NULL else .data[[strata_N]],
      shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
      lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
      group = if (is.null(strata_N)) NULL else .data[[strata_N]]
    )
  )

  if (!is.null(mid)) {
    # points
    if (grepl("p", mid_type, fixed = TRUE)) {
      p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
    }

    # lines
    # further conditions in if are to ensure that not all of the groups consist of only one observation
    if (grepl("l", mid_type, fixed = TRUE) &&
      !is.null(strata) &&
      !all(dplyr::summarise(df_grp, count_n = dplyr::n())[["count_n"]] == 1L)) {
      p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
    }
  }

  # interval
  if (!is.null(interval)) {
    p <- p +
      ggplot2::geom_errorbar(
        ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
        width = 0.45,
        position = position
      )

    if (length(whiskers) == 1) { # lwr or upr only; mid is then required
      # workaround as geom_errorbar does not provide single-direction whiskers
      p <- p +
        ggplot2::geom_linerange(
          data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
          ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
          position = position,
          na.rm = TRUE,
          show.legend = FALSE
        )
    }
  }

  p <- p +
    ggplot2::scale_y_continuous(labels = scales::comma, expand = ggplot2::expansion(c(0.25, .25))) +
    ggplot2::labs(
      title = title,
      subtitle = subtitle,
      caption = caption,
      color = legend_title,
      lty = legend_title,
      shape = legend_title,
      x = attr(df[[x]], "label"),
      y = y_lab
    )

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ggtheme)) {
    p <- p + ggtheme
  } else {
    p <- p +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.key.width = grid::unit(1, "cm"),
        legend.position = legend_position,
        legend.direction = ifelse(
          legend_position %in% c("top", "bottom"),
          "horizontal",
          "vertical"
        )
      )
  }

  ############################################################# |
  # ---- Optionally, add table to the bottom of the plot. ----
  ############################################################# |
  if (!is.null(table)) {
    df_stats_table <- df_grp %>%
      dplyr::summarise(
        h_format_row(
          x = sfun(.data[[y]], ...)[table],
          format = table_format,
          labels = table_labels
        ),
        .groups = "drop"
      )

    stats_lev <- rev(setdiff(colnames(df_stats_table), c(strata, x)))

    df_stats_table <- df_stats_table %>%
      tidyr::pivot_longer(
        cols = -dplyr::all_of(c(strata, x)),
        names_to = "stat",
        values_to = "value",
        names_ptypes = list(stat = factor(levels = stats_lev))
      )

    tbl <- ggplot2::ggplot(
      df_stats_table,
      ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
    ) +
      ggplot2::geom_text(size = table_font_size) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        panel.border = ggplot2::element_blank(),
        panel.grid.major = ggplot2::element_blank(),
        panel.grid.minor = ggplot2::element_blank(),
        axis.ticks = ggplot2::element_blank(),
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_blank(),
        axis.text.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)),
        strip.text = ggplot2::element_text(hjust = 0),
        strip.text.x = ggplot2::element_text(margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")),
        strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
        legend.position = "none"
      )

    if (!is.null(strata)) {
      tbl <- tbl + ggplot2::facet_wrap(facets = strata, ncol = 1)
    }

    # align plot and table
    cowplot::plot_grid(p, tbl, ncol = 1)
  } else {
    p
  }
}

#' Helper function to get the right formatting in the optional table in g_lineplot.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
#'   Elements of `x` must be `numeric` vectors.
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
#'   match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
#'   function through the `format` parameter.
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
#'   match the names of `x`. When a label is not specified for an element of `x`,
#'   then this function tries to use `label` or `names` (in this order) attribute of that element
#'   (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
#'   are attached to a given element of `x`, then the label is automatically generated.
#'
#' @return A single row `data.frame` object.
#'
#' @examples
#' mean_ci <- c(48, 51)
#' x <- list(mean = 50, mean_ci = mean_ci)
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
#' labels <- c(mean = "My Mean")
#' h_format_row(x, format, labels)
#'
#' attr(mean_ci, "label") <- "Mean 95% CI"
#' x <- list(mean = 50, mean_ci = mean_ci)
#' h_format_row(x, format, labels)
#'
#' @export
h_format_row <- function(x, format, labels = NULL) {
  # cell: one row, one column data.frame
  format_cell <- function(x, format, label = NULL) {
    fc <- format_rcell(x = x, format = format)
    if (is.na(fc)) {
      fc <- "NA"
    }
    x_label <- attr(x, "label")
    if (!is.null(label) && !is.na(label)) {
      names(fc) <- label
    } else if (!is.null(x_label) && !is.na(x_label)) {
      names(fc) <- x_label
    } else if (length(x) == length(fc)) {
      names(fc) <- names(x)
    }
    as.data.frame(t(fc))
  }

  row <- do.call(
    cbind,
    lapply(
      names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn])
    )
  )

  row
}

#' Control Function for g_lineplot Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Default values for `variables` parameter in `g_lineplot` function.
#' A variable's default value can be overwritten for any variable.
#'
#' @param x (`character`)\cr x variable name.
#' @param y (`character`)\cr y variable name.
#' @param strata (`character` or `NA`)\cr strata variable name.
#' @param paramcd (`character` or `NA`)\cr paramcd variable name.
#' @param y_unit (`character` or `NA`)\cr y_unit variable name.
#'
#' @return A named character vector of variable names.
#'
#' @examples
#' control_lineplot_vars()
#' control_lineplot_vars(strata = NA)
#'
#' @export
control_lineplot_vars <- function(x = "AVISIT", y = "AVAL", strata = "ARM", paramcd = "PARAMCD", y_unit = "AVALU") {
  checkmate::assert_string(x)
  checkmate::assert_string(y)
  checkmate::assert_string(strata, na.ok = TRUE)
  checkmate::assert_string(paramcd, na.ok = TRUE)
  checkmate::assert_string(y_unit, na.ok = TRUE)

  variables <- c(x = x, y = y, strata = strata, paramcd = paramcd, y_unit = y_unit)
  return(variables)
}

#' Control Function for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for STEP calculations.
#'
#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
#'   could be used to infer `bandwidth`, see below.
#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
#'   quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
#'   covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
#'   distributed.
#' @param bandwidth (`number` or `NULL`)\cr indicating the bandwidth of each window.
#'   Depending on the argument `use_percentile`, it can be either the length of actual-value
#'   windows on the real biomarker scale, or percentage windows.
#'   If `use_percentile = TRUE`, it should be a number between 0 and 1.
#'   If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
#'   By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
#'   variable for actual-value windows.
#' @param degree (`count`)\cr the degree of polynomial function of the biomarker as an interaction term
#'   with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
#'   is not included in the model fitted in each biomarker window.
#' @param num_points (`count`)\cr the number of points at which the hazard ratios are estimated. The
#'   smallest number is 2.
#'
#' @return A list of components with the same names as the arguments, except `biomarker` which is
#'   just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
#'
#' @examples
#' # Provide biomarker values and request actual values to be used,
#' # so that bandwidth is chosen from range.
#' control_step(biomarker = 1:10, use_percentile = FALSE)
#'
#' # Use a global model with quadratic biomarker interaction term.
#' control_step(bandwidth = NULL, degree = 2)
#'
#' # Reduce number of points to be used.
#' control_step(num_points = 10)
#'
#' @export
control_step <- function(biomarker = NULL,
                         use_percentile = TRUE,
                         bandwidth,
                         degree = 0L,
                         num_points = 39L) {
  checkmate::assert_numeric(biomarker, null.ok = TRUE)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_int(num_points, lower = 2)
  checkmate::assert_count(degree)

  if (missing(bandwidth)) {
    # Infer bandwidth
    bandwidth <- if (use_percentile) {
      0.25
    } else if (!is.null(biomarker)) {
      diff(range(biomarker, na.rm = TRUE)) / 4
    } else {
      NULL
    }
  } else {
    # Check bandwidth
    if (!is.null(bandwidth)) {
      if (use_percentile) {
        assert_proportion_value(bandwidth)
      } else {
        checkmate::assert_scalar(bandwidth)
        checkmate::assert_true(bandwidth > 0)
      }
    }
  }
  list(
    use_percentile = use_percentile,
    bandwidth = bandwidth,
    degree = as.integer(degree),
    num_points = as.integer(num_points)
  )
}

#' Helper Functions for Tabulating Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as median survival
#' time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @inheritParams survival_duration_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_duration_subgroups
NULL

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
#'
#' @return
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
#'
#' @examples
#' # Extract median survival time for one group.
#' h_survtime_df(
#'   tte = adtte_f$AVAL,
#'   is_event = adtte_f$is_event,
#'   arm = adtte_f$ARM
#' )
#'
#' @export
h_survtime_df <- function(tte, is_event, arm) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, len = length(tte))

  df_tte <- data.frame(
    tte = tte,
    is_event = is_event,
    stringsAsFactors = FALSE
  )

  # Delete NAs
  non_missing_rows <- stats::complete.cases(df_tte)
  df_tte <- df_tte[non_missing_rows, ]
  arm <- arm[non_missing_rows]

  lst_tte <- split(df_tte, arm)
  lst_results <- Map(function(x, arm) {
    if (nrow(x) > 0) {
      s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
      median_est <- unname(as.numeric(s_surv$median))
      n_events <- sum(x$is_event)
    } else {
      median_est <- NA
      n_events <- NA
    }

    data.frame(
      arm = arm,
      n = nrow(x),
      n_events = n_events,
      median = median_est,
      stringsAsFactors = FALSE
    )
  }, lst_tte, names(lst_tte))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract median survival time for multiple groups.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_survtime_subgroups_df <- function(variables,
                                    data,
                                    groups_lists = list(),
                                    label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)

  assert_df_with_variables(data, variables)

  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
    l_result <- lapply(l_data, function(grp) {
      result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
#'   treatment hazard ratio.
#'
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
#'   `conf_level`, `pval` and `pval_label`.
#'
#' @examples
#' # Extract hazard ratio for one group.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
#'
#' # Extract hazard ratio for one group with stratification factor.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
#'
#' @export
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, n.levels = 2, len = length(tte))

  df_tte <- data.frame(tte = tte, is_event = is_event)
  strata_vars <- NULL

  if (!is.null(strata_data)) {
    if (is.data.frame(strata_data)) {
      strata_vars <- names(strata_data)
      checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
      assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
    } else {
      assert_valid_factor(strata_data, len = nrow(df_tte))
      strata_vars <- "strata_data"
    }
    df_tte[strata_vars] <- strata_data
  }

  l_df <- split(df_tte, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Hazard ratio and CI.
    result <- s_coxph_pairwise(
      df = l_df[[2]],
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .var = "tte",
      is_event = "is_event",
      strat = strata_vars,
      control = control
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(as.numeric(result$n_tot)),
      n_tot_events = unname(as.numeric(result$n_tot_events)),
      hr = unname(as.numeric(result$hr)),
      lcl = unname(result$hr_ci[1]),
      ucl = unname(result$hr_ci[2]),
      conf_level = control[["conf_level"]],
      pval = as.numeric(result$pvalue),
      pval_label = obj_label(result$pvalue),
      stringsAsFactors = FALSE
    )
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = nrow(df_tte_complete),
      n_tot_events = sum(df_tte_complete$is_event),
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      n_tot_events = 0L,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  }

  df
}

#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
#'   across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
#'   optionally `subgroups` and `strat`. `groups_lists` optionally specifies
#'   groupings for `subgroups` variables.
#'
#' @return
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
#'   `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract hazard ratio for multiple groups.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' # Extract hazard ratio for multiple groups with stratification factors.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#'
#' @export
h_coxph_subgroups_df <- function(variables,
                                 data,
                                 groups_lists = list(),
                                 control = control_coxph(),
                                 label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_coxph_df(
    tte = data[[variables$tte]],
    is_event = data[[variables$is_event]],
    arm = data[[variables$arm]],
    strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_coxph_df(
        tte = grp$df[[variables$tte]],
        is_event = grp$df[[variables$is_event]],
        arm = grp$df[[variables$arm]],
        strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Split Dataframe by Subgroups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Split a dataframe into a non-nested list of subsets.
#'
#' @inheritParams survival_duration_subgroups
#' @param data (`data.frame`)\cr dataset to split.
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
#'   Unused levels not present in `data` are dropped. Note that the order in this vector
#'   determines the order in the downstream table.
#'
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
#'
#' @details Main functionality is to prepare data for use in forest plot layouts.
#'
#' @examples
#' df <- data.frame(
#'   x = c(1:5),
#'   y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
#'   z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
#' )
#' formatters::var_labels(df) <- paste("label for", names(df))
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z")
#' )
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z"),
#'   groups_lists = list(
#'     y = list("AB" = c("A", "B"), "C" = "C")
#'   )
#' )
#'
#' @export
h_split_by_subgroups <- function(data,
                                 subgroups,
                                 groups_lists = list()) {
  checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(groups_lists, names = "named")
  checkmate::assert_subset(names(groups_lists), subgroups)
  assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))

  data_labels <- unname(formatters::var_labels(data))
  df_subgroups <- data[, subgroups, drop = FALSE]
  subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)

  l_labels <- Map(function(grp_i, name_i) {
    existing_levels <- levels(droplevels(grp_i))
    grp_levels <- if (name_i %in% names(groups_lists)) {
      # For this variable groupings are defined. We check which groups are contained in the data.
      group_list_i <- groups_lists[[name_i]]
      group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
      names(which(group_has_levels))
    } else {
      existing_levels
    }
    df_labels <- data.frame(
      subgroup = grp_levels,
      var = name_i,
      var_label = unname(subgroup_labels[name_i]),
      stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
    )
  }, df_subgroups, names(df_subgroups))

  # Create a dataframe with one row per subgroup.
  df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
  row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
  row_split_var <- factor(row_label, levels = row_label)

  # Create a list of data subsets.
  lapply(split(df_labels, row_split_var), function(row_i) {
    which_row <- if (row_i$var %in% names(groups_lists)) {
      data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
    } else {
      data[[row_i$var]] == row_i$subgroup
    }
    df <- data[which_row, ]
    rownames(df) <- NULL
    formatters::var_labels(df) <- data_labels

    list(
      df = df,
      df_labels = data.frame(row_i, row.names = NULL)
    )
  })
}

#' Confidence Interval for Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
#' geometric mean. It can be used as a `ggplot` helper function for plotting.
#'
#' @inheritParams argument_convention
#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
#'
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
#'
#' @examples
#' stat_mean_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5),
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5, geom_mean = TRUE),
#'   geom = "errorbar"
#' )
#'
#' @export
stat_mean_ci <- function(x,
                         conf_level = 0.95,
                         na.rm = TRUE, # nolint
                         n_min = 2,
                         gg_helper = TRUE,
                         geom_mean = FALSE) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  if (!geom_mean) {
    m <- mean(x)
  } else {
    negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
    if (negative_values_exist) {
      m <- NA_real_
    } else {
      x <- log(x)
      m <- mean(x)
    }
  }

  if (n < n_min || is.na(m)) {
    ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
  } else {
    hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
    ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
    if (geom_mean) {
      ci <- exp(ci)
    }
  }

  if (gg_helper) {
    m <- ifelse(is.na(m), NA_real_, m)
    ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
  }

  return(ci)
}

#' Confidence Interval for Median
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
#' function for plotting.
#'
#' @inheritParams argument_convention
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#'
#' @details The function was adapted from `DescTools/versions/0.99.35/source`
#'
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
#'
#' @examples
#' stat_median_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#' p + ggplot2::stat_summary(
#'   fun.data = stat_median_ci,
#'   geom = "errorbar"
#' )
#'
#' @export
stat_median_ci <- function(x,
                           conf_level = 0.95,
                           na.rm = TRUE, # nolint
                           gg_helper = TRUE) {
  x <- unname(x)
  if (na.rm) {
    x <- x[!is.na(x)]
  }
  n <- length(x)
  med <- stats::median(x)

  k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)

  # k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
  if (k == 0 || is.na(med)) {
    ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
    empir_conf_level <- NA_real_
  } else {
    x_sort <- sort(x)
    ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
    empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
  }

  if (gg_helper) {
    ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
  }

  attr(ci, "conf_level") <- empir_conf_level

  return(ci)
}

#' p-Value of the Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the two-sided p-value of the mean.
#'
#' @inheritParams argument_convention
#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
#'
#' @return A p-value.
#'
#' @examples
#' stat_mean_pval(sample(10))
#'
#' stat_mean_pval(rnorm(10), test_mean = 0.5)
#'
#' @export
stat_mean_pval <- function(x,
                           na.rm = TRUE, # nolint
                           n_min = 2,
                           test_mean = 0) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  x_mean <- mean(x)
  x_sd <- stats::sd(x)

  if (n < n_min) {
    pv <- c(p_value = NA_real_)
  } else {
    x_se <- stats::sd(x) / sqrt(n)
    ttest <- (x_mean - test_mean) / x_se
    pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
  }

  return(pv)
}

#' Re-implemented [range()] Default S3 method for numerical objects
#'
#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
#' without any warnings.
#'
#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
#'
#' @return A 2-element vector of class `numeric`.
#'
#' @examples
#' # Internal function - range_noinf
#' \dontrun{
#' range_noinf(1:5)
#' range_noinf(c(1:5, NA, NA), na.rm = TRUE)
#' range_noinf(numeric(), na.rm = TRUE)
#' range_noinf(c(1:5, NA, NA, Inf), na.rm = TRUE, finite = TRUE)
#' range_noinf(Inf)
#' range_noinf(Inf, na.rm = TRUE, finite = TRUE)
#' range_noinf(c(Inf, NA), na.rm = FALSE, finite = TRUE)
#' range_noinf(c(1, Inf, NA), na.rm = FALSE, finite = TRUE)
#' }
#'
#' @keywords internal
range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint

  checkmate::assert_numeric(x)

  if (finite) {
    x <- x[is.finite(x)] # removes NAs too
  } else if (na.rm) {
    x <- x[!is.na(x)]
  }

  if (length(x) == 0) {
    rval <- c(NA, NA)
    mode(rval) <- typeof(x)
  } else {
    rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
  }

  return(rval)
}

#' Utility function to create label for confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @return A `string`.
#'
#' @export
f_conf_level <- function(conf_level) {
  assert_proportion_value(conf_level)
  paste0(conf_level * 100, "% CI")
}

#' Utility function to create label for p-value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
#'
#' @return A `string`.
#'
#' @export
f_pval <- function(test_mean) {
  checkmate::assert_numeric(test_mean, len = 1)
  paste0("p-value (H0: mean = ", test_mean, ")")
}

#' Utility function to return a named list of covariate names.
#'
#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
#'   `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'
#' @return A named `list` of `character` vector.
#'
#' @keywords internal
get_covariates <- function(covariates) {
  checkmate::assert_character(covariates)
  cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
  stats::setNames(as.list(cov_vars), cov_vars)
}

#' Replicate Entries of a Vector if Required
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Replicate entries of a vector if required.
#'
#' @inheritParams argument_convention
#' @param n (`count`)\cr how many entries we need.
#'
#' @return `x` if it has the required length already or is `NULL`,
#'   otherwise if it is scalar the replicated version of it with `n` entries.
#'
#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
#'
#' @export
to_n <- function(x, n) {
  if (is.null(x)) {
    NULL
  } else if (length(x) == 1) {
    rep(x, n)
  } else if (length(x) == n) {
    x
  } else {
    stop("dimension mismatch")
  }
}

#' Check Element Dimension
#'
#' Checks if the elements in `...` have the same dimension.
#'
#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
#'
#' @return A `logical` value.
#'
#' @keywords internal
check_same_n <- function(..., omit_null = TRUE) {
  dots <- list(...)

  n_list <- Map(
    function(x, name) {
      if (is.null(x)) {
        if (omit_null) {
          NA_integer_
        } else {
          stop("arg", name, "is not supposed to be NULL")
        }
      } else if (is.data.frame(x)) {
        nrow(x)
      } else if (is.atomic(x)) {
        length(x)
      } else {
        stop("data structure for ", name, "is currently not supported")
      }
    },
    dots, names(dots)
  )

  n <- stats::na.omit(unlist(n_list))

  if (length(unique(n)) > 1) {
    sel <- which(n != n[1])
    stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
  }

  TRUE
}

#' Make Names Without Dots
#'
#' @param nams (`character`)\cr vector of original names.
#'
#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
#'
#' @examples
#' # Internal function - make_names
#' \dontrun{
#' make_names(c("foo Bar", "1 2 3 bla"))
#' }
#'
#' @keywords internal
make_names <- function(nams) {
  orig <- make.names(nams)
  gsub(".", "", x = orig, fixed = TRUE)
}

#' Conversion of Months to Days
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Conversion of Months to Days. This is an approximative calculation because it
#' considers each month as having an average of 30.4375 days.
#'
#' @param x (`numeric`)\cr time in months.
#'
#' @return A `numeric` vector with the time in days.
#'
#' @examples
#' x <- c(13.25, 8.15, 1, 2.834)
#' month2day(x)
#'
#' @export
month2day <- function(x) {
  checkmate::assert_numeric(x)
  x * 30.4375
}

#' Conversion of Days to Months
#'
#' @param x (`numeric`)\cr time in days.
#'
#' @return A `numeric` vector with the time in months.
#'
#' @examples
#' x <- c(403, 248, 30, 86)
#' day2month(x)
#'
#' @export
day2month <- function(x) {
  checkmate::assert_numeric(x)
  x / 30.4375
}

#' Return an empty numeric if all elements are `NA`.
#'
#' @param x (`numeric`)\cr vector.
#'
#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
#'
#' @examples
#' x <- c(NA, NA, NA)
#' # Internal function - empty_vector_if_na
#' \dontrun{
#' empty_vector_if_na(x)
#' }
#'
#' @keywords internal
empty_vector_if_na <- function(x) {
  if (all(is.na(x))) {
    numeric()
  } else {
    x
  }
}

#' Combine Two Vectors Element Wise
#'
#' @param x (`vector`)\cr first vector to combine.
#' @param y (`vector`)\cr second vector to combine.
#'
#' @return A `list` where each element combines corresponding elements of `x` and `y`.
#'
#' @examples
#' combine_vectors(1:3, 4:6)
#'
#' @export
combine_vectors <- function(x, y) {
  checkmate::assert_vector(x)
  checkmate::assert_vector(y, len = length(x))

  result <- lapply(as.data.frame(rbind(x, y)), `c`)
  names(result) <- NULL
  result
}

#' Extract Elements by Name
#'
#' This utility function extracts elements from a vector `x` by `names`.
#' Differences to the standard `[` function are:
#'
#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
#'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
#'
#' @param x (named `vector`)\cr where to extract named elements from.
#' @param names (`character`)\cr vector of names to extract.
#'
#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
#'
#' @keywords internal
extract_by_name <- function(x, names) {
  if (is.null(x)) {
    return(NULL)
  }
  checkmate::assert_named(x)
  checkmate::assert_character(names)
  which_extract <- intersect(names(x), names)
  if (length(which_extract) > 0) {
    x[which_extract]
  } else {
    NULL
  }
}

#' Labels for Adverse Event Baskets
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param aesi (`character`)\cr with standardized MedDRA query name (e.g. `SMQzzNAM`) or customized query
#'   name (e.g. `CQzzNAM`).
#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
#'
#' @return A `string` with the standard label for the AE basket.
#'
#' @examples
#' adae <- tern_ex_adae
#'
#' # Standardized query label includes scope.
#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
#'
#' # Customized query label.
#' aesi_label(adae$CQ01NAM)
#'
#' @export
aesi_label <- function(aesi, scope = NULL) {
  checkmate::assert_character(aesi)
  checkmate::assert_character(scope, null.ok = TRUE)
  aesi_label <- obj_label(aesi)
  aesi <- sas_na(aesi)
  aesi <- unique(aesi)[!is.na(unique(aesi))]

  lbl <- if (length(aesi) == 1 && !is.null(scope)) {
    scope <- sas_na(scope)
    scope <- unique(scope)[!is.na(unique(scope))]
    checkmate::assert_string(scope)
    paste0(aesi, " (", scope, ")")
  } else if (length(aesi) == 1 && is.null(scope)) {
    aesi
  } else {
    aesi_label
  }

  lbl
}

#' Indicate Study Arm Variable in Formula
#'
#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
#'
#' @param x arm information
#'
#' @return `x`
#'
#' @keywords internal
study_arm <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

#' Smooth Function with Optional Grouping
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param x (`character`)\cr value with x column name.
#' @param y (`character`)\cr value with y column name.
#' @param groups (`character`)\cr vector with optional grouping variables names.
#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
#'
#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
#'   optional `groups` variables formatted as `factor` type.
#'
#' @export
get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
  checkmate::assert_data_frame(df)
  df_cols <- colnames(df)
  checkmate::assert_string(x)
  checkmate::assert_subset(x, df_cols)
  checkmate::assert_numeric(df[[x]])
  checkmate::assert_string(y)
  checkmate::assert_subset(y, df_cols)
  checkmate::assert_numeric(df[[y]])

  if (!is.null(groups)) {
    checkmate::assert_character(groups)
    checkmate::assert_subset(groups, df_cols)
  }

  smooths <- function(x, y) {
    stats::predict(stats::loess(y ~ x), se = TRUE)
  }

  if (!is.null(groups)) {
    cc <- stats::complete.cases(df[c(x, y, groups)])
    df_c <- df[cc, c(x, y, groups)]
    df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
    df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))

    df_smooth_raw <-
      by(df_c_ordered, df_c_g, function(d) {
        plx <- smooths(d[[x]], d[[y]])
        data.frame(
          x = d[[x]],
          y = plx$fit,
          ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
          yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
        )
      })

    df_smooth <- do.call(rbind, df_smooth_raw)
    df_smooth[groups] <- df_c_g

    df_smooth
  } else {
    cc <- stats::complete.cases(df[c(x, y)])
    df_c <- df[cc, ]
    plx <- smooths(df_c[[x]], df_c[[y]])

    df_smooth <- data.frame(
      x = df_c[[x]],
      y = plx$fit,
      ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
      yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
    )

    df_smooth
  }
}

#' Number of Available (Non-Missing Entries) in a Vector
#'
#' Small utility function for better readability.
#'
#' @param x (`any`)\cr vector in which to count non-missing values.
#'
#' @return Number of non-missing values.
#'
#' @examples
#' # Internal function - n_available
#' \dontrun{
#' n_available(c(1, NA, 2))
#' }
#'
#' @keywords internal
n_available <- function(x) {
  sum(!is.na(x))
}

#' Reapply Variable Labels
#'
#' This is a helper function that is used in tests.
#'
#' @param x (`vector`)\cr vector of elements that needs new labels.
#' @param varlabels (`character`)\cr vector of labels for `x`.
#' @param ... further parameters to be added to the list.
#'
#' @return `x` with variable labels reapplied.
#'
#' @export
reapply_varlabels <- function(x, varlabels, ...) {
  named_labels <- c(as.list(varlabels), list(...))
  formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
  x
}

# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
clogit_with_tryCatch <- function(formula, data, ...) { # nolint
  tryCatch(
    survival::clogit(formula = formula, data = data, ...),
    error = function(e) stop("model not built successfully with survival::clogit")
  )
}

#' Kaplan-Meier Plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' From a survival model, a graphic is rendered along with tabulated annotation
#' including the number of patient at risk at given time and the median survival
#' per group.
#'
#' @inheritParams grid::gTree
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param variables (named `list`)\cr variable names. Details are:
#'   * `tte` (`numeric`)\cr variable indicating time-to-event duration values.
#'   * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored.
#'   * `arm` (`factor`)\cr the treatment group variable.
#'   * `strat` (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr "plain" (default), "log", "log-log" for confidence interval type,
#'     see more in [survival::survfit()]. Note that the option "none" is no longer supported.
#' @param xticks (`numeric`, `number`, or `NULL`)\cr numeric vector of ticks or single number with spacing
#'   between ticks on the x axis. If `NULL` (default), [labeling::extended()] is used to determine
#'   an optimal tick position on the x axis.
#' @param yval (`string`)\cr value of y-axis. Options are `Survival` (default) and `Failure` probability.
#' @param censor_show (`flag`)\cr whether to show censored.
#' @param xlab (`string`)\cr label of x-axis.
#' @param ylab (`string`)\cr label of y-axis.
#' @param title (`string`)\cr title for plot.
#' @param footnotes (`string`)\cr footnotes for plot.
#' @param col (`character`)\cr lines colors. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lty (`numeric`)\cr line type. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lwd (`numeric`)\cr line width. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param pch (`numeric`, `string`)\cr value or character of points symbol to indicate censored cases.
#' @param size (`numeric`)\cr size of censored point, a class of `unit`.
#' @param max_time (`numeric`)\cr maximum value to show on X axis. Only data values less than or up to
#'   this threshold value will be plotted (defaults to `NULL`).
#' @param font_size (`number`)\cr font size to be used.
#' @param ci_ribbon (`flag`)\cr draw the confidence interval around the Kaplan-Meier curve.
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control outlook of the Kaplan-Meier curve.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk
#'   matching the main grid of the Kaplan-Meier curve.
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the
#'   median survival time per group.
#' @param annot_coxph (`flag`)\cr add the annotation table from a [survival::coxph()] model.
#' @param annot_stats (`string`)\cr statistics annotations to add to the plot. Options are
#'   `median` (median survival follow-up time) and `min` (minimum survival follow-up time).
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics
#'   specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added.
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1.
#'     Default method is "log-rank", can also be set to "wald" or "likelihood".
#'   * `ties` (`string`)\cr method for tie handling. Default is "efron",
#'     can also be set to "breslow" or "exact". See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param position_coxph (`numeric`)\cr x and y positions for plotting [survival::coxph()] model.
#' @param position_surv_med (`numeric`)\cr x and y positions for plotting annotation table estimating median survival
#'   time per group.
#'
#' @return A `grob` of class `gTree`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(ggplot2)
#' library(survival)
#' library(grid)
#' library(nestcolor)
#'
#' df <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD")
#'
#' # 1. Example - basic option
#'
#' res <- g_km(df = df, variables = variables)
#' res <- g_km(df = df, variables = variables, yval = "Failure")
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   control_surv = control_surv_timepoint(conf_level = 0.9),
#'   col = c("grey25", "grey50", "grey75")
#' )
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal())
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal(), lty = 1:3)
#' res <- g_km(df = df, variables = variables, max = 2000)
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   annot_stats = c("min", "median"),
#'   annot_stats_vlines = TRUE
#' )
#'
#' # 2. Example - Arrange several KM curve on a single graph device
#'
#' # 2.1 Use case: A general graph on the top, a zoom on the bottom.
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE, annot_surv_med = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 1000, newpage = FALSE, annot_surv_med = FALSE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # 2.1 Use case: No annotations on top, annotated graph on bottom
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE,
#'   annot_surv_med = FALSE, annot_at_risk = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 2000, newpage = FALSE, annot_surv_med = FALSE,
#'   annot_at_risk = TRUE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # Add annotation from a pairwise coxph analysis
#' g_km(
#'   df = df, variables = variables,
#'   annot_coxph = TRUE
#' )
#'
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_coxph = c(0.4, 0.5)
#' )
#'
#' # Change position of the treatment group annotation table.
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_surv_med = c(1, 0.7)
#' )
#' }
#'
#' @export
g_km <- function(df,
                 variables,
                 control_surv = control_surv_timepoint(),
                 col = NULL,
                 lty = NULL,
                 lwd = .5,
                 censor_show = TRUE,
                 pch = 3,
                 size = 2,
                 max_time = NULL,
                 xticks = NULL,
                 xlab = "Days",
                 yval = c("Survival", "Failure"),
                 ylab = paste(yval, "Probability"),
                 title = NULL,
                 footnotes = NULL,
                 draw = TRUE,
                 newpage = TRUE,
                 gp = NULL,
                 vp = NULL,
                 name = NULL,
                 font_size = 12,
                 ci_ribbon = FALSE,
                 ggtheme = nestcolor::theme_nest(),
                 annot_at_risk = TRUE,
                 annot_surv_med = TRUE,
                 annot_coxph = FALSE,
                 annot_stats = NULL,
                 annot_stats_vlines = FALSE,
                 control_coxph_pw = control_coxph(),
                 position_coxph = c(0, 0.05),
                 position_surv_med = c(0.9, 0.9)) {
  checkmate::assert_list(variables)
  checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables))
  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(footnotes, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_subset(annot_stats, c("median", "min"))
  checkmate::assert_logical(annot_stats_vlines)

  tte <- variables$tte
  is_event <- variables$is_event
  arm <- variables$arm

  assert_valid_factor(df[[arm]])
  assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm))
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(df[[tte]], min.len = 1, any.missing = FALSE)

  armval <- as.character(unique(df[[arm]]))
  if (length(armval) > 1) {
    armval <- NULL
  }
  yval <- match.arg(yval)
  formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm))
  fit_km <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = control_surv$conf_level,
    conf.type = control_surv$conf_type
  )
  data_plot <- h_data_plot(
    fit_km = fit_km,
    armval = armval,
    max_time = max_time
  )

  xticks <- h_xticks(data = data_plot, xticks = xticks, max_time = max_time)
  gg <- h_ggkm(
    data = data_plot,
    censor_show = censor_show,
    pch = pch,
    size = size,
    xticks = xticks,
    xlab = xlab,
    yval = yval,
    ylab = ylab,
    title = title,
    footnotes = footnotes,
    max_time = max_time,
    lwd = lwd,
    lty = lty,
    col = col,
    ggtheme = ggtheme,
    ci_ribbon = ci_ribbon
  )

  if (!is.null(annot_stats)) {
    if ("median" %in% annot_stats) {
      fit_km_all <- survival::survfit(
        formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)),
        data = df,
        conf.int = control_surv$conf_level,
        conf.type = control_surv$conf_type
      )
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = stats::median(fit_km_all) + 0.065 * max(data_plot$time),
          y = ifelse(yval == "Survival", 0.62, 0.38),
          label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf),
            linetype = 2, col = "darkgray"
          )
      }
    }
    if ("min" %in% annot_stats) {
      min_fu <- min(df[[tte]])
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = min_fu + max(data_plot$time) * ifelse(yval == "Survival", 0.05, 0.07),
          y = ifelse(yval == "Survival", 1.0, 0.05),
          label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = min_fu, xend = min_fu, y = Inf, yend = -Inf), linetype = 2, col = "darkgray")
      }
    }
    gg <- gg + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA, label = "")))
  }

  g_el <- h_decompose_gg(gg)

  if (annot_at_risk) {
    # This is the content of the table that will be below the graph.
    annot_tbl <- summary(fit_km, time = xticks)
    annot_tbl <- if (is.null(fit_km$strata)) {
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = as.factor(armval)
      )
    } else {
      strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
      levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = annot_tbl$strata
      )
    }

    grobs_patient <- h_grob_tbl_at_risk(
      data = data_plot,
      annot_tbl = annot_tbl,
      xlim = max(max_time, data_plot$time, xticks)
    )
  }

  if (annot_at_risk || annot_surv_med || annot_coxph) {
    lyt <- h_km_layout(
      data = data_plot, g_el = g_el, title = title, footnotes = footnotes, annot_at_risk = annot_at_risk
    )
    ttl_row <- as.numeric(!is.null(title))
    foot_row <- as.numeric(!is.null(footnotes))
    km_grob <- grid::gTree(
      vp = grid::viewport(layout = lyt, height = .95, width = .95),
      children = grid::gList(
        # Title.
        if (ttl_row == 1) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 2),
            children = grid::gList(grid::textGrob(label = title, x = grid::unit(0, "npc"), hjust = 0))
          )
        },

        # The Kaplan - Meier curve (top-right corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$panel)
        ),

        # Survfit summary table (top-right corner).
        if (annot_surv_med) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_median_surv(
              fit_km = fit_km,
              armval = armval,
              x = position_surv_med[1],
              y = position_surv_med[2],
              ttheme = gridExtra::ttheme_default(base_size = font_size)
            )
          )
        },
        if (annot_coxph) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_coxph(
              df = df,
              variables = variables,
              control_coxph_pw = control_coxph_pw,
              x = position_coxph[1],
              y = position_coxph[2],
              ttheme = gridExtra::ttheme_default(
                base_size = font_size,
                padding = grid::unit(c(1, .5), "lines"),
                core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
              )
            )
          )
        },

        # Add the y-axis annotation (top-left corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 1),
          children = h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)
        ),

        # Add the x-axis annotation (second row below the Kaplan Meier Curve).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 2 + ttl_row, layout.pos.col = 2),
          children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
        ),

        # Add the legend.
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 3 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$guide)
        ),

        # Add the table with patient-at-risk numbers.
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 2),
            children = grobs_patient$at_risk
          )
        },
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 1),
            children = grobs_patient$label
          )
        },
        if (annot_at_risk) {
          # Add the x-axis for the table.
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 5 + ttl_row, layout.pos.col = 2),
            children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
          )
        },

        # Footnotes.
        if (foot_row == 1) {
          grid::gTree(
            vp = grid::viewport(
              layout.pos.row = ifelse(annot_at_risk, 6 + ttl_row, 4 + ttl_row),
              layout.pos.col = 2
            ),
            children = grid::gList(grid::textGrob(label = footnotes, x = grid::unit(0, "npc"), hjust = 0))
          )
        }
      )
    )

    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(km_grob)
    )
  } else {
    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(ggplot2::ggplotGrob(gg))
    )
  }

  if (newpage && draw) grid::grid.newpage()
  if (draw) grid::grid.draw(result)
  invisible(result)
}

#' Helper function: tidy survival fit
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convert the survival fit data into a data frame designed for plotting
#' within `g_km`.
#'
#' This starts from the [broom::tidy()] result, and then:
#'   * Post-processes the `strata` column into a factor.
#'   * Extends each stratum by an additional first row with time 0 and probability 1 so that
#'     downstream plot lines start at those coordinates.
#'   * Adds a `censor` column.
#'   * Filters the rows before `max_time`.
#'
#' @inheritParams g_km
#' @param fit_km (`survfit`)\cr result of [survival::survfit()].
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is "All".
#'
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`,
#'   `conf.low`, `strata`, and `censor`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' # Test with multiple arms
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' # Test with single arm
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS", ARMCD == "ARM B") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot(armval = "ARM B")
#' }
#'
#' @export
h_data_plot <- function(fit_km,
                        armval = "All",
                        max_time = NULL) {
  y <- broom::tidy(fit_km)

  if (!is.null(fit_km$strata)) {
    fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals")
    strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2)
    strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals")
    y$strata <- factor(
      vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2),
      levels = strata_levels
    )
  } else {
    y$strata <- armval
  }

  y_by_strata <- split(y, y$strata)
  y_by_strata_extended <- lapply(
    y_by_strata,
    FUN = function(tbl) {
      first_row <- tbl[1L, ]
      first_row$time <- 0
      first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")])
      first_row$n.event <- first_row$n.censor <- 0
      first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1
      first_row$std.error <- 0
      rbind(
        first_row,
        tbl
      )
    }
  )
  y <- do.call(rbind, y_by_strata_extended)

  y$censor <- ifelse(y$n.censor > 0, y$estimate, NA)
  if (!is.null(max_time)) {
    y <- y[y$time <= max(max_time), ]
  }
  y
}

#' Helper function: x tick positions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already
#' exists it is kept as is. It is based on the same function `ggplot2` relies on,
#' and is required in the graphic and the patient-at-risk annotation table.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#'
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' h_xticks(data)
#' h_xticks(data, xticks = seq(0, 3000, 500))
#' h_xticks(data, xticks = 500)
#' h_xticks(data, xticks = 500, max_time = 6000)
#' h_xticks(data, xticks = c(0, 500), max_time = 300)
#' h_xticks(data, xticks = 500, max_time = 300)
#' }
#'
#' @export
h_xticks <- function(data, xticks = NULL, max_time = NULL) {
  if (is.null(xticks)) {
    if (is.null(max_time)) {
      labeling::extended(range(data$time)[1], range(data$time)[2], m = 5)
    } else {
      labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5)
    }
  } else if (checkmate::test_number(xticks)) {
    if (is.null(max_time)) {
      seq(0, max(data$time), xticks)
    } else {
      seq(0, max(data$time, max_time), xticks)
    }
  } else if (is.numeric(xticks)) {
    xticks
  } else {
    stop(
      paste(
        "xticks should be either `NULL`",
        "or a single number (interval between x ticks)",
        "or a numeric vector (position of ticks on the x axis)"
      )
    )
  }
}

#' Helper function: KM plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw the Kaplan-Meier plot using `ggplot2`.
#'
#' @inheritParams g_km
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks,
#'   xlab = "Days",
#'   yval = "Survival",
#'   ylab = "Survival Probability",
#'   title = "Survival"
#' )
#' gg
#' }
#'
#' @export
h_ggkm <- function(data,
                   xticks = NULL,
                   yval = "Survival",
                   censor_show,
                   xlab,
                   ylab,
                   title,
                   footnotes = NULL,
                   max_time = NULL,
                   lwd = 1,
                   lty = NULL,
                   pch = 3,
                   size = 2,
                   col = NULL,
                   ci_ribbon = FALSE,
                   ggtheme = nestcolor::theme_nest()) {
  checkmate::assert_numeric(lty, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  # change estimates of survival to estimates of failure (1 - survival)
  if (yval == "Failure") {
    data$estimate <- 1 - data$estimate
    data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high)
    data$censor <- 1 - data$censor
  }

  gg <- {
    ggplot2::ggplot(
      data = data,
      mapping = ggplot2::aes(
        x = .data[["time"]],
        y = .data[["estimate"]],
        ymin = .data[["conf.low"]],
        ymax = .data[["conf.high"]],
        color = .data[["strata"]],
        fill = .data[["strata"]]
      )
    ) +
      ggplot2::geom_hline(yintercept = 0)
  }

  if (ci_ribbon) {
    gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0)
  }

  gg <- if (is.null(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd)
  } else if (checkmate::test_number(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd, lty = lty)
  } else if (is.numeric(lty)) {
    gg +
      ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) +
      ggplot2::scale_linetype_manual(values = lty)
  }

  gg <- gg +
    ggplot2::coord_cartesian(ylim = c(0, 1)) +
    ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes)

  if (!is.null(col)) {
    gg <- gg +
      ggplot2::scale_color_manual(values = col) +
      ggplot2::scale_fill_manual(values = col)
  }
  if (censor_show) {
    dt <- data[data$n.censor != 0, ]
    dt$censor_lbl <- factor("Censored")

    gg <- gg + ggplot2::geom_point(
      data = dt,
      ggplot2::aes(
        x = .data[["time"]],
        y = .data[["censor"]],
        shape = .data[["censor_lbl"]]
      ),
      size = size,
      show.legend = TRUE,
      inherit.aes = TRUE
    ) +
      ggplot2::scale_shape_manual(name = NULL, values = pch) +
      ggplot2::guides(
        shape = ggplot2::guide_legend(override.aes = list(linetype = NA)),
        fill = ggplot2::guide_legend(override.aes = list(shape = NA))
      )
  }

  if (!is.null(max_time) && !is.null(xticks)) {
    gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))))
  } else if (!is.null(xticks)) {
    if (max(data$time) <= max(xticks)) {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)))
    } else {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks)
    }
  } else if (!is.null(max_time)) {
    gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time))
  }

  if (!is.null(ggtheme)) {
    gg <- gg + ggtheme
  }

  gg + ggplot2::theme(
    legend.position = "bottom",
    legend.title = ggplot2::element_blank(),
    panel.grid.major.x = ggplot2::element_line(linewidth = 2)
  )
}

#' `ggplot` Decomposition
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The elements composing the `ggplot` are extracted and organized in a `list`.
#'
#' @param gg (`ggplot`)\cr a graphic to decompose.
#'
#' @return A named `list` with elements:
#'   * `panel`: The panel.
#'   * `yaxis`: The y-axis.
#'   * `xaxis`: The x-axis.
#'   * `xlab`: The x-axis label.
#'   * `ylab`: The y-axis label.
#'   * `guide`: The legend.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   yval = "Survival",
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt",
#'   footnotes = "ff"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5))
#' grid::grid.draw(g_el$panel)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5))
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis)))
#' }
#'
#' @export
h_decompose_gg <- function(gg) {
  g_el <- ggplot2::ggplotGrob(gg)
  y <- c(
    panel = "panel",
    yaxis = "axis-l",
    xaxis = "axis-b",
    xlab = "xlab-b",
    ylab = "ylab-l",
    guide = "guide"
  )
  lapply(X = y, function(x) gtable::gtable_filter(g_el, x))
}

#' Helper: KM Layout
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of
#'   patient at risk matching the main grid of the Kaplan-Meier curve.
#'
#' @return A grid layout.
#'
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the
#'   dimension are fixed, only the curve is flexible and will accommodate with the remaining free space.
#'   * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient
#'     at risk tabulation. The main constraint is about the width of the columns which must allow the writing of
#'     the strata name.
#'   * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#' grid.show.layout(lyt)
#' }
#'
#' @export
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  col_annot_width <- max(
    c(
      as.numeric(grid::convertX(g_el$yaxis$width + g_el$ylab$width, "pt")),
      as.numeric(
        grid::convertX(
          grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt"
        )
      )
    )
  )

  ttl_row <- as.numeric(!is.null(title))
  foot_row <- as.numeric(!is.null(footnotes))
  no_tbl_ind <- c()
  ht_x <- c()
  ht_units <- c()

  if (ttl_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 2)
    ht_units <- c(ht_units, "lines")
  }

  no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2))
  ht_x <- c(
    ht_x,
    1,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt"),
    grid::convertX(g_el$guide$heights, "pt"),
    nlines + 1,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt")
  )
  ht_units <- c(
    ht_units,
    "null",
    "pt",
    "pt",
    "lines",
    "pt"
  )

  if (foot_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 1)
    ht_units <- c(ht_units, "lines")
  }

  no_at_risk_tbl <- if (annot_at_risk) {
    rep(TRUE, 5 + ttl_row + foot_row)
  } else {
    no_tbl_ind
  }

  grid::grid.layout(
    nrow = sum(no_at_risk_tbl), ncol = 2,
    widths = grid::unit(c(col_annot_width, 1), c("pt", "null")),
    heights = grid::unit(
      x = ht_x[no_at_risk_tbl],
      units = ht_units[no_at_risk_tbl]
    )
  )
}

#' Helper: Patient-at-Risk Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Two graphical objects are obtained, one corresponding to row labeling and
#' the second to the number of patient at risk.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which
#'   includes the number of patients at risk at given time points.
#' @param xlim (`numeric`)\cr the maximum value on the x-axis (used to
#'   ensure the at risk table aligns with the KM graph).
#'
#' @return A named `list` of two `gTree` objects: `at_risk` and `label`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#'
#' data_plot <- h_data_plot(fit_km = fit_km)
#'
#' xticks <- h_xticks(data = data_plot)
#'
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#'
#' # The annotation table reports the patient at risk for a given strata and
#' # time (`xticks`).
#' annot_tbl <- summary(fit_km, time = xticks)
#' if (is.null(fit_km$strata)) {
#'   annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All"))
#' } else {
#'   strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
#'   levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
#'   annot_tbl <- data.frame(
#'     n.risk = annot_tbl$n.risk,
#'     time = annot_tbl$time,
#'     strata = annot_tbl$strata
#'   )
#' }
#'
#' # The annotation table is transformed into a grob.
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks))
#'
#' # For the representation, the layout is estimated for which the decomposition
#' # of the graphic element is necessary.
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#'
#' grid::grid.newpage()
#' pushViewport(viewport(layout = lyt, height = .95, width = .95))
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1))
#' pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 2))
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$at_risk)
#' popViewport()
#' pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 1))
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$label)
#' }
#'
#' @export
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  y_int <- annot_tbl$time[2] - annot_tbl$time[1]
  annot_tbl <- expand.grid(
    time = seq(0, xlim, y_int),
    strata = unique(annot_tbl$strata)
  ) %>% dplyr::left_join(annot_tbl, by = c("time", "strata"))
  annot_tbl[is.na(annot_tbl)] <- 0
  y_str_unit <- as.numeric(annot_tbl$strata)
  vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines"))
  gb_table_left_annot <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = unique(annot_tbl$strata),
      x = .95,
      y = grid::unit(
        (max(unique(y_str_unit)) - unique(y_str_unit)) + .5,
        "native"
      ),
      hjust = 1,
      gp = grid::gpar(fontface = "italic", fontsize = 10)
    )
  )
  gb_patient_at_risk <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = annot_tbl$n.risk,
      x = grid::unit(annot_tbl$time, "native"),
      y = grid::unit(
        (max(y_str_unit) - y_str_unit) + .5,
        "line"
      ) # maybe native
    )
  )

  list(
    at_risk = grid::gList(
      grid::gTree(
        vp = vp_table,
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = c(0, xlim) + c(-0.05, 0.05) * xlim,
              yscale = c(0, nlines + 1),
              extension = c(0.05, 0)
            ),
            children = grid::gList(gb_patient_at_risk)
          )
        )
      )
    ),
    label = grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, nlines + 1),
              extension = c(0.0, 0)
            ),
            children = grid::gList(gb_table_left_annot)
          )
        )
      )
    )
  )
}

#' Helper Function: Survival Estimations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval.
#'
#' @inheritParams h_data_plot
#'
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS")
#' fit <- survfit(
#'   form = Surv(AVAL, 1 - CNSR) ~ ARMCD,
#'   data = adtte
#' )
#' h_tbl_median_surv(fit_km = fit)
#' }
#'
#' @export
h_tbl_median_surv <- function(fit_km, armval = "All") {
  y <- if (is.null(fit_km$strata)) {
    as.data.frame(t(summary(fit_km)$table), row.names = armval)
  } else {
    tbl <- summary(fit_km)$table
    rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals")
    rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2]
    as.data.frame(tbl)
  }
  conf.int <- summary(fit_km)$conf.int # nolint
  y$records <- round(y$records)
  y$median <- signif(y$median, 4)
  y$`CI` <- paste0(
    "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")"
  )
  stats::setNames(
    y[c("records", "median", "CI")],
    c("N", "Median", f_conf_level(conf.int))
  )
}

#' Helper Function: Survival Estimation Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The survival fit is transformed in a grob containing a table with groups in
#' rows characterized by N, median and 95% confidence interval.
#'
#' @inheritParams g_km
#' @inheritParams h_data_plot
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#'
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_grob_median_surv() %>%
#'   grid::grid.draw()
#' }
#'
#' @export
h_grob_median_surv <- function(fit_km,
                               armval = "All",
                               x = 0.9,
                               y = 0.9,
                               ttheme = gridExtra::ttheme_default()) {
  data <- h_tbl_median_surv(fit_km, armval = armval)
  gt <- gridExtra::tableGrob(d = data, theme = ttheme)
  vp <- grid::viewport(
    x = grid::unit(x, "npc") + grid::unit(1, "lines"),
    y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
    height = sum(gt$heights),
    width = sum(gt$widths),
    just = c("right", "top")
  )

  grid::gList(
    grid::gTree(
      vp = vp,
      children = grid::gList(gt)
    )
  )
}

#' Helper: Grid Object with y-axis Annotation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Build the y-axis annotation from a decomposed `ggplot`.
#'
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`.
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`.
#'
#' @return a `gTree` object containing the y-axis annotation from a `ggplot`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "title", footnotes = "footnotes", yval = "Survival"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#'
#' grid::grid.newpage()
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20))
#' pushViewport(pvp)
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis))
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA))
#' }
#'
#' @export
h_grob_y_annot <- function(ylab, yaxis) {
  grid::gList(
    grid::gTree(
      vp = grid::viewport(
        width = grid::convertX(yaxis$width + ylab$width, "pt"),
        x = grid::unit(1, "npc"),
        just = "right"
      ),
      children = grid::gList(cbind(ylab, yaxis))
    )
  )
}

#' Helper Function: Pairwise CoxPH table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create a `data.frame` of pairwise stratified or unstratified CoxPH analysis results.
#'
#' @inheritParams g_km
#'
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' h_tbl_coxph_pairwise(
#'   df = adtte,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9)
#' )
#' }
#'
#' @export
h_tbl_coxph_pairwise <- function(df,
                                 variables,
                                 control_coxph_pw = control_coxph()) {
  assert_df_with_variables(df, variables)
  arm <- variables$arm
  df[[arm]] <- factor(df[[arm]])
  ref_group <- levels(df[[arm]])[1]
  comp_group <- levels(df[[arm]])[-1]
  results <- Map(function(comp) {
    res <- s_coxph_pairwise(
      df = df[df[[arm]] == comp, , drop = FALSE],
      .ref_group = df[df[[arm]] == ref_group, , drop = FALSE],
      .in_ref_col = FALSE,
      .var = variables$tte,
      is_event = variables$is_event,
      strat = variables$strat,
      control = control_coxph_pw
    )
    res_df <- data.frame(
      hr = format(round(res$hr, 2), nsmall = 2),
      hr_ci = paste0(
        "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ",
        format(round(res$hr_ci[2], 2), nsmall = 2), ")"
      ),
      pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4),
      stringsAsFactors = FALSE
    )
    colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character"))
    row.names(res_df) <- comp
    res_df
  }, comp_group)
  do.call(rbind, results)
}

#' Helper Function: CoxPH Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()]
#'
#' @inheritParams h_grob_median_surv
#' @param ... arguments will be passed to [h_tbl_coxph_pairwise()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#'
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' tbl_grob <- h_grob_coxph(
#'   df = data,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5
#' )
#' grid::grid.draw(tbl_grob)
#' }
#'
#' @export
h_grob_coxph <- function(...,
                         x = 0,
                         y = 0,
                         ttheme = gridExtra::ttheme_default(
                           base_size = 12,
                           padding = grid::unit(c(1, .5), "lines"),
                           core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
                         )) {
  data <- h_tbl_coxph_pairwise(...)
  tryCatch(
    expr = {
      gt <- gridExtra::tableGrob(d = data, theme = ttheme) # ERROR 'data' must be of a vector type, was 'NULL'
      vp <- grid::viewport(
        x = grid::unit(x, "npc") + grid::unit(1, "lines"),
        y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
        height = sum(gt$heights),
        width = sum(gt$widths),
        just = c("left", "bottom")
      )
      grid::gList(
        grid::gTree(
          vp = vp,
          children = grid::gList(gt)
        )
      )
    },
    error = function(w) {
      message(paste(
        "Warning: Cox table will not be displayed as there is",
        "not any level to be compared in the arm variable."
      ))
      return(
        grid::gList(
          grid::gTree(
            vp = NULL,
            children = NULL
          )
        )
      )
    }
  )
}

#' Survival Time Point Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize patients' survival rate and difference of survival rates between groups at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param time_point (`number`)\cr survival time point of interest.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'     see more in [survival::survfit()]. Note option "none" is no longer supported.
#'   * `time_point` (`number`)\cr survival time point of interest.
#'
#' @name survival_timepoint
NULL

#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
#'
#' @return
#' * `s_surv_timepoint()` returns the statistics:
#'   * `pt_at_risk`: Patients remaining at risk.
#'   * `event_free_rate`: Event-free rate (%).
#'   * `rate_se`: Standard error of event free rate.
#'   * `rate_ci`: Confidence interval for event free rate.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>%
#'   filter(ARMCD == "ARM A")
#'
#' # Internal function - s_surv_timepoint
#' \dontrun{
#' s_surv_timepoint(df, .var = "AVAL", time_point = 7, is_event = "is_event")
#' }
#'
#' @keywords internal
s_surv_timepoint <- function(df,
                             .var,
                             time_point,
                             is_event,
                             control = control_surv_timepoint()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_number(time_point)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
  df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
  if (df_srv_fit[["n.risk"]] == 0) {
    pt_at_risk <- event_free_rate <- rate_se <- NA_real_
    rate_ci <- c(NA_real_, NA_real_)
  } else {
    pt_at_risk <- df_srv_fit$n.risk
    event_free_rate <- df_srv_fit$surv
    rate_se <- df_srv_fit$std.err
    rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
  }
  list(
    pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
    event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
    rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv"`.
#'
#' @return
#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_surv_timepoint
#' \dontrun{
#' a_surv_timepoint(df, .var = "AVAL", time_point = 7, is_event = "is_event")
#' }
#'
#' @keywords internal
a_surv_timepoint <- make_afun(
  s_surv_timepoint,
  .indent_mods = c(
    pt_at_risk = 0L,
    event_free_rate = 0L,
    rate_se = 1L,
    rate_ci = 1L
  ),
  .formats = c(
    pt_at_risk = "xx",
    event_free_rate = "xx.xx",
    rate_se = "xx.xx",
    rate_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
#'
#' @return
#' * `s_surv_timepoint_diff()` returns the statistics:
#'   * `rate_diff`: Event-free rate difference between two groups.
#'   * `rate_diff_ci`: Confidence interval for the difference.
#'   * `ztest_pval`: p-value to test the difference is 0.
#'
#' @examples
#' df_ref_group <- adtte_f %>%
#'   filter(ARMCD == "ARM B")
#'
#' # Internal function - s_surv_timepoint_diff
#' \dontrun{
#' s_surv_timepoint_diff(df, df_ref_group, .in_ref_col = TRUE, .var = "AVAL", is_event = "is_event")
#' s_surv_timepoint_diff(
#'   df,
#'   df_ref_group,
#'   .in_ref_col = FALSE,
#'   .var = "AVAL",
#'   time_point = 7,
#'   is_event = "is_event"
#' )
#' }
#'
#' @keywords internal
s_surv_timepoint_diff <- function(df,
                                  .var,
                                  .ref_group,
                                  .in_ref_col,
                                  time_point,
                                  control = control_surv_timepoint(),
                                  ...) {
  if (.in_ref_col) {
    return(
      list(
        rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
        rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
        ztest_pval = formatters::with_label("", "p-value (Z-test)")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
  res_per_group <- lapply(split(data, group), function(x) {
    s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
  })

  res_x <- res_per_group[[2]]
  res_ref <- res_per_group[[1]]
  rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
  se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)

  qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
  rate_diff_ci <- rate_diff + qs * se_diff
  ztest_pval <- if (is.na(rate_diff)) {
    NA
  } else {
    2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
  }
  list(
    rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
    rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
    ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv_diff"`.
#'
#' @return
#' * `a_surv_timepoint_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_surv_timepoint_diff
#' \dontrun{
#' a_surv_timepoint_diff(
#'   df,
#'   df_ref_group,
#'   .in_ref_col = FALSE,
#'   .var = "AVAL",
#'   time_point = 7,
#'   is_event = "is_event"
#' )
#' }
#'
#' @keywords internal
a_surv_timepoint_diff <- make_afun(
  s_surv_timepoint_diff,
  .formats = c(
    rate_diff = "xx.xx",
    rate_diff_ci = "(xx.xx, xx.xx)",
    ztest_pval = "x.xxxx | (<0.0001)"
  )
)

#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param method (`string`)\cr either `surv` (survival estimations),
#'   `surv_diff` (difference in survival with the control) or `both`.
#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
#'   avoid warnings from duplicate table names.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
#'   the value of `method`.
#'
#' @examples
#' # Survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 7
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "surv_diff",
#'     .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Survival and difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "both"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
surv_timepoint <- function(lyt,
                           vars,
                           ...,
                           table_names_suffix = "",
                           var_labels = "Time",
                           show_labels = "visible",
                           method = c("surv", "surv_diff", "both"),
                           .stats = c(
                             "pt_at_risk", "event_free_rate", "rate_ci",
                             "rate_diff", "rate_diff_ci", "ztest_pval"
                           ),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = if (method == "both") {
                             c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L)
                           } else {
                             c(rate_diff_ci = 1L, ztest_pval = 1L)
                           }) {
  method <- match.arg(method)
  checkmate::assert_string(table_names_suffix)

  f <- list(
    surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
    surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
  )
  .stats <- h_split_param(.stats, .stats, f = f)
  .formats <- h_split_param(.formats, names(.formats), f = f)
  .labels <- h_split_param(.labels, names(.labels), f = f)
  .indent_mods <- h_split_param(.indent_mods, names(.indent_mods), f = f)

  afun_surv <- make_afun(
    a_surv_timepoint,
    .stats = .stats$surv,
    .formats = .formats$surv,
    .labels = .labels$surv,
    .indent_mods = .indent_mods$surv
  )

  afun_surv_diff <- make_afun(
    a_surv_timepoint_diff,
    .stats = .stats$surv_diff,
    .formats = .formats$surv_diff,
    .labels = .labels$surv_diff,
    .indent_mods = .indent_mods$surv_diff
  )

  time_point <- list(...)$time_point

  for (i in seq_along(time_point)) {
    tpt <- time_point[i]

    if (method %in% c("surv", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(tpt, var_labels),
        table_names = paste0("surv_", tpt, table_names_suffix),
        show_labels = show_labels,
        afun = afun_surv,
        extra_args = list(
          is_event = list(...)$is_event,
          control = list(...)$control,
          time_point = tpt
        )
      )
    }

    if (method %in% c("surv_diff", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(tpt, var_labels),
        table_names = paste0("surv_diff_", tpt, table_names_suffix),
        show_labels = ifelse(method == "both", "hidden", show_labels),
        afun = afun_surv_diff,
        extra_args = list(
          is_event = list(...)$is_event,
          control = list(...)$control,
          time_point = tpt
        )
      )
    }
  }
  lyt
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Survival Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern models for a survival outcome. The treatment arm
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
#' hazard ratios are for the comparison of the second level vs. the first one.
#'
#' The model which is fit is:
#'
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
#'   `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
#'   for the biomarker variable, including where the center of the intervals are and their bounds. The
#'   second part of the columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_coxph()] for the available customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = c("AGE", "BMRKR2"),
#'   event = "is_event",
#'   time = "AVAL"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' step_matrix <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different Cox regression options.
#' step_matrix2 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
#' )
#'
#' # Use a global model with cubic interaction and only 5 points.
#' step_matrix3 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
#' )
#'
#' @export
fit_survival_step <- function(variables,
                              data,
                              control = c(control_step(), control_coxph())) {
  checkmate::assert_list(control)
  assert_df_with_variables(data, variables)
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_survival_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_survival_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_survival_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Tabulate Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as response rate and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a
#'   list, which specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#' @param label_all (`string`)\cr label for the total population analysis.
#' @param method (`string`)\cr specifies the test used to calculate the p-value for the difference between
#'   two proportions. For options, see [s_test_proportion_diff()]. Default is `NULL` so no test is performed.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_rsp_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' @name response_subgroups
NULL

#' Prepares Response Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param label_all (`string`)\cr label for the total population analysis.
#'
#' @return A named list of two elements:
#'   * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
#'     `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [response_subgroups]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' # Stratified analysis.
#' df_strat <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strat = "STRATA1"),
#'   data = adrs_f
#' )
#' df_strat
#'
#' # Grouping of the BMRKR2 levels.
#' df_grouped <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_subgroups <- function(variables,
                                  data,
                                  groups_lists = list(),
                                  conf_level = 0.95,
                                  method = NULL,
                                  label_all = "All Patients") {
  df_prop <- h_proportion_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_or <- h_odds_ratio_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    conf_level = conf_level,
    method = method,
    label_all = label_all
  )

  list(prop = df_prop, or = df_or)
}

#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
#'
#' @return
#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_response_subgroups
#' \dontrun{
#' a_response_subgroups(.formats = list("n" = "xx", "prop" = "xx.xx%"))
#' }
#'
#' @keywords internal
a_response_subgroups <- function(.formats = list(
                                   n = "xx",
                                   n_rsp = "xx",
                                   prop = "xx.x%",
                                   n_tot = "xx",
                                   or = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 )) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(.list = combine_vectors(df$lcl, df$ucl), .labels = as.character(df$subgroup), .formats = fmt)
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(.list = as.list(df[[stat]]), .labels = as.character(df$subgroup), .formats = fmt)
        }
      }
    },
    stat = names(.formats),
    fmt = .formats
  )

  afun_lst
}

#' @describeIn response_subgroups Table-creating function which creates a table
#'   summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_rsp_subgroups()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n`: Total number of observations per group.
#'   * `n_rsp`: Number of responders per group.
#'   * `prop`: Proportion of responders.
#'   * `n_tot`: Total number of observations.
#'   * `or`: Odds ratio.
#'   * `ci` : Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing binary response by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#'
#' ## Table with selected columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(
#'     df = df,
#'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
#'   )
#'
#' @export
tabulate_rsp_subgroups <- function(lyt,
                                   df,
                                   vars = c("n_tot", "n", "prop", "or", "ci")) {
  conf_level <- df$or$conf_level[1]
  method <- if ("pval_label" %in% names(df$or)) {
    df$or$pval_label[1]
  } else {
    NULL
  }

  afun_lst <- a_response_subgroups()
  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)

  colvars_prop <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
  )
  colvars_or <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
  )

  # Columns from table_prop are optional.
  if (length(colvars_prop$vars) > 0) {
    lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
    lyt_prop <- split_rows_by(
      lyt = lyt_prop,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_prop <- summarize_row_groups(
      lyt = lyt_prop,
      var = "var_label",
      cfun = afun_lst[names(colvars_prop$labels)]
    )
    lyt_prop <- split_cols_by_multivar(
      lyt = lyt_prop,
      vars = colvars_prop$vars,
      varlabels = colvars_prop$labels
    )

    if ("analysis" %in% df$prop$row_type) {
      lyt_prop <- split_rows_by(
        lyt = lyt_prop,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
      lyt_prop <- analyze_colvars(
        lyt = lyt_prop,
        afun = afun_lst[names(colvars_prop$labels)],
        inclNAs = TRUE
      )
    }

    table_prop <- build_table(lyt_prop, df = df$prop)
  } else {
    table_prop <- NULL
  }

  # Columns "n_tot", "or", "ci" in table_or are required.
  lyt_or <- split_cols_by(lyt = lyt, var = "arm")
  lyt_or <- split_rows_by(
    lyt = lyt_or,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_or <- split_cols_by_multivar(
    lyt = lyt_or,
    vars = colvars_or$vars,
    varlabels = colvars_or$labels
  )
  lyt_or <- summarize_row_groups(
    lyt = lyt_or,
    var = "var_label",
    cfun = afun_lst[names(colvars_or$labels)]
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$or$row_type) {
    lyt_or <- split_rows_by(
      lyt = lyt_or,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
    lyt_or <- analyze_colvars(
      lyt = lyt_or,
      afun = afun_lst[names(colvars_or$labels)],
      inclNAs = TRUE
    )
  }
  table_or <- build_table(lyt_or, df = df$or)

  n_tot_id <- match("n_tot", colvars_or$vars)
  if (is.null(table_prop)) {
    result <- table_or
    or_id <- match("or", colvars_or$vars)
    ci_id <- match("lcl", colvars_or$vars)
  } else {
    result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
    or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
    ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
    n_tot_id <- 1L
  }
  structure(
    result,
    forest_header = paste0(levels(df$prop$arm), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_id
  )
}

#' Labels for Column Variables in Binary Response by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
#'
#' @inheritParams argument_convention
#' @inheritParams tabulate_rsp_subgroups
#'
#' @return A `list` of variables to tabulate and their labels.
#'
#' @export
d_rsp_subgroups_colvars <- function(vars,
                                    conf_level = NULL,
                                    method = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
  checkmate::assert_subset(
    vars,
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  varlabels <- c(
    n = "n",
    n_rsp = "Responders",
    prop = "Response (%)",
    n_tot = "Total n",
    or = "Odds Ratio"
  )
  colvars <- vars

  if ("ci" %in% colvars) {
    checkmate::assert_false(is.null(conf_level))

    varlabels <- c(
      varlabels,
      ci = paste0(100 * conf_level, "% CI")
    )

    # The `lcl`` variable is just a placeholder available in the analysis data,
    # it is not acutally used in the tabulation.
    # Variables used in the tabulation are lcl and ucl, see `a_response_subgroups` for details.
    colvars[colvars == "ci"] <- "lcl"
  }

  if ("pval" %in% colvars) {
    varlabels <- c(
      varlabels,
      pval = method
    )
  }

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Control function for incidence rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for the incidence rate, used
#' internally to specify details in `s_incidence_rate()`.
#'
#' @inheritParams argument_convention
#' @param time_unit_input (`string`)\cr `day`, `month`, or `year` (default)
#'   indicating time unit for data input.
#' @param time_unit_output (`numeric`)\cr time unit for desired output (in person-years).
#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'   for confidence interval type.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @seealso [incidence_rate]
#'
#' @examples
#' control_incidence_rate(0.9, "exact", "month", 100)
#'
#' @export
control_incidence_rate <- function(conf_level = 0.95,
                                   conf_type = c("normal", "normal_log", "exact", "byar"),
                                   time_unit_input = c("year", "day", "week", "month"),
                                   time_unit_output = 1) {
  conf_type <- match.arg(conf_type)
  time_unit_input <- match.arg(time_unit_input)
  checkmate::assert_number(time_unit_output)
  assert_proportion_value(conf_level)

  list(
    conf_level = conf_level,
    conf_type = conf_type,
    time_unit_input = time_unit_input,
    time_unit_output = time_unit_output
  )
}

#' Difference Test for Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Various tests were implemented to test the difference between two proportions.
#'
#' @inheritParams argument_convention
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @seealso [h_prop_diff_test]
#'
#' @name prop_diff_test
NULL

#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
#'
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
#'   to calculate the p-value.
#'
#' @return
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
#'   describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
#'
#' @examples
#' # Statistics function
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50)),
#'   strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' )
#'
#' # Internal function - s_test_proportion_diff
#' \dontrun{
#' s_test_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = "strat"),
#'   method = "cmh"
#' )
#' }
#'
#' @keywords internal
s_test_proportion_diff <- function(df,
                                   .var,
                                   .ref_group,
                                   .in_ref_col,
                                   variables = list(strata = NULL),
                                   method = c("chisq", "schouten", "fisher", "cmh")) {
  method <- match.arg(method)
  y <- list(pval = "")

  if (!.in_ref_col) {
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))
    rsp <- factor(
      c(.ref_group[[.var]], df[[.var]]),
      levels = c("TRUE", "FALSE")
    )
    grp <- factor(
      rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata) || method == "cmh") {
      strata <- variables$strata
      checkmate::assert_false(is.null(strata))
      strata_vars <- stats::setNames(as.list(strata), strata)
      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)
      strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
    }

    tbl <- switch(method,
      cmh = table(grp, rsp, strata),
      table(grp, rsp)
    )

    y$pval <- switch(method,
      chisq = prop_chisq(tbl),
      cmh = prop_cmh(tbl),
      fisher = prop_fisher(tbl),
      schouten = prop_schouten(tbl)
    )
  }

  y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
  y
}

#' Description of the Difference Test Between Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
#'
#' @inheritParams s_test_proportion_diff
#'
#' @return `string` describing the test from which the p-value is derived.
#'
#' @export
d_test_proportion_diff <- function(method) {
  checkmate::assert_string(method)
  meth_part <- switch(method,
    "schouten" = "Chi-Squared Test with Schouten Correction",
    "chisq" = "Chi-Squared Test",
    "cmh" = "Cochran-Mantel-Haenszel Test",
    "fisher" = "Fisher's Exact Test",
    stop(paste(method, "does not have a description"))
  )
  paste0("p-value (", meth_part, ")")
}

#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
#'
#' @return
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_test_proportion_diff
#' \dontrun{
#' a_test_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = "strat"),
#'   method = "cmh"
#' )
#' }
#'
#' @keywords internal
a_test_proportion_diff <- make_afun(
  s_test_proportion_diff,
  .formats = c(pval = "x.xxxx | (<0.0001)"),
  .indent_mods = c(pval = 1L)
)

#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... other arguments are passed to [s_test_proportion_diff()].
#'
#' @return
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_test_proportion_diff()` to the table layout.
#'
#' @examples
#' # With `rtables` pipelines.
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   test_proportion_diff(
#'     vars = "rsp",
#'     method = "cmh", variables = list(strata = "strat")
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
test_proportion_diff <- function(lyt,
                                 vars,
                                 ...,
                                 var_labels = vars,
                                 show_labels = "hidden",
                                 table_names = vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  afun <- make_afun(
    a_test_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions to Test Proportion Differences
#'
#' Helper functions to implement various tests on the difference between two proportions.
#'
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @return A p-value.
#'
#' @seealso [prop_diff_test()] for implementation of these helper functions.
#'
#' @name h_prop_diff_test
NULL

#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
#'
#' @examples
#' # Non-stratified proportion difference test
#'
#' ## Data
#' A <- 20
#' B <- 20
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = A, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = A, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- c(rep("A", A), rep("B", B))
#' tbl <- table(grp, rsp)
#'
#' ## Chi-Squared test
#' # Internal function - prop_chisq
#' \dontrun{
#' prop_chisq(tbl)
#' }
#'
#' @keywords internal
prop_chisq <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }
  stats::prop.test(tbl, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
#'   [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
#'
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
#'   (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
#'
#' @examples
#' # Stratified proportion difference test
#'
#' ## Data
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- factor(rep(c("A", "B"), each = 50))
#' strata <- factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' tbl <- table(grp, rsp, strata)
#'
#' ## Cochran-Mantel-Haenszel test
#' # Internal function - prop_cmh
#' \dontrun{
#' prop_cmh(tbl)
#' }
#'
#' @keywords internal
prop_cmh <- function(ary) {
  checkmate::assert_array(ary)
  checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
  checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
  strata_sizes <- apply(ary, MARGIN = 3, sum)
  if (any(strata_sizes < 5)) {
    warning("<5 data points in some strata. CMH test may be incorrect.")
    ary <- ary[, , strata_sizes > 1]
  }

  stats::mantelhaen.test(ary, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
#'
#' @seealso For information on the Schouten correction (Schouten, 1980),
#'   visit https://onlinelibrary.wiley.com/doi/abs/10.1002/bimj.4710220305.
#'
#' @examples
#' ## Chi-Squared test + Schouten correction.
#' # Internal function - prop_schouten
#' \dontrun{
#' prop_schouten(tbl)
#' }
#'
#' @keywords internal
prop_schouten <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }

  n <- sum(tbl)
  n1 <- sum(tbl[1, ])
  n2 <- sum(tbl[2, ])

  ad <- diag(tbl)
  bc <- diag(apply(tbl, 2, rev))
  ac <- tbl[, 1]
  bd <- tbl[, 2]

  t_schouten <- (n - 1) *
    (abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
    (n1 * n2 * sum(ac) * sum(bd))

  1 - stats::pchisq(t_schouten, df = 1)
}

#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
#'
#' @examples
#' ## Fisher's exact test
#' # Internal function - prop_fisher
#' \dontrun{
#' prop_fisher(tbl)
#' }
#'
#' @keywords internal
prop_fisher <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  stats::fisher.test(tbl)$p.value
}

#' Patient Counts with Abnormal Range Values by Baseline Status
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`), and additional
#' analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or `factor`). For each
#' direction specified in `abnormal` (e.g. high or low) we condition on baseline range result and count
#' patients in the numerator and denominator as follows:
#'   * `Not <Abnormal>`
#'     * `denom`: the number of patients without abnormality at baseline (excluding those with missing baseline)
#'     * `num`:  the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `<Abnormal>`
#'     * `denom`: the number of patients with abnormality at baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `Total`
#'     * `denom`: the number of patients with at least one valid measurement post-baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'
#' @inheritParams argument_convention
#' @param abnormal (`character`)\cr identifying the abnormal range level(s) in `.var`.
#'
#' @note
#' * `df` should be filtered to include only post-baseline records.
#' * If the baseline variable or analysis variable contains `NA`, it is expected that `NA` has been
#'   conveyed to `na_level` appropriately beforehand with [df_explicit_na()] or [explicit_na()].
#'
#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
#'
#' @name abnormal_by_baseline
NULL

#' Description Function for [s_count_abnormal_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
#'
#' @inheritParams abnormal_by_baseline
#'
#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
#'
#' @examples
#' d_count_abnormal_by_baseline("LOW")
#'
#' @export
d_count_abnormal_by_baseline <- function(abnormal) {
  not_abn_name <- paste("Not", tolower(abnormal))
  abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
  total_name <- "Total"

  list(
    not_abnormal = not_abn_name,
    abnormal = abn_name,
    total = total_name
  )
}

#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
#'
#' @param na_level (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
#'   [df_explicit_na()]). The default is `"<Missing>"`.
#'
#' @return
#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
#'   `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6)),
#'   ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
#'   BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
#' )
#' df <- df_explicit_na(df)
#'
#' # Internal function - s_count_abnormal_by_baseline
#' \dontrun{
#' # Just for one abnormal level.
#' s_count_abnormal_by_baseline(df, .var = "ANRIND", abnormal = "HIGH")
#' }
#'
#' @keywords internal
s_count_abnormal_by_baseline <- function(df,
                                         .var,
                                         abnormal,
                                         na_level = "<Missing>",
                                         variables = list(id = "USUBJID", baseline = "BNRIND")) {
  checkmate::assert_string(.var)
  checkmate::assert_string(abnormal)
  checkmate::assert_string(na_level)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_subset(names(variables), c("id", "baseline"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))

  # If input is passed as character, changed to factor
  df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_level)
  df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_level)

  assert_valid_factor(df[[.var]], any.missing = FALSE)
  assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)

  # Keep only records with valid analysis value.
  df <- df[df[[.var]] != na_level, ]

  anl <- data.frame(
    id = df[[variables$id]],
    var = df[[.var]],
    baseline = df[[variables$baseline]],
    stringsAsFactors = FALSE
  )

  # Total:
  #  - Patients in denominator: have at least one valid measurement post-baseline.
  #  - Patients in numerator: have at least one abnormality.
  total_denom <- length(unique(anl$id))
  total_num <- length(unique(anl$id[anl$var == abnormal]))

  # Baseline NA records are counted only in total rows.
  anl <- anl[anl$baseline != na_level, ]

  # Abnormal:
  #   - Patients in denominator: have abnormality at baseline.
  #   - Patients in numerator: have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
  abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))

  # Not abnormal:
  #   - Patients in denominator: do not have abnormality at baseline.
  #   - Patients in numerator: do not have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
  not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))

  labels <- d_count_abnormal_by_baseline(abnormal)
  list(fraction = list(
    not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
    abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
    total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
  ))
}

#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_by_baseline
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `fraction` first
#' # so that the `rtables` formatting function `format_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_abnormal_by_baseline, .ungroup_stats = "fraction")
#' afun(df, .var = "ANRIND", abnormal = "LOW")
#' }
#'
#' @keywords internal
a_count_abnormal_by_baseline <- make_afun(
  s_count_abnormal_by_baseline,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_baseline()` to the table layout.
#'
#' @examples
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 2, 3, 4)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
#' )
#'
#' basic_table() %>%
#'   count_abnormal_by_baseline(
#'     var = "RANGE",
#'     abnormal = c(Low = "LOW"),
#'     variables = list(id = "ID", baseline = "BLRANGE"),
#'     .formats = c(fraction = "xx / xx"),
#'     .indent_mods = c(fraction = 2L)
#'   ) %>%
#'   build_table(df2)
#'
#' @export
count_abnormal_by_baseline <- function(lyt,
                                       var,
                                       abnormal,
                                       ...,
                                       table_names = abnormal,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  checkmate::assert_character(abnormal, len = length(table_names), names = "named")
  checkmate::assert_string(var)
  afun <- make_afun(
    a_count_abnormal_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )
  for (i in seq_along(abnormal)) {
    abn <- abnormal[i]
    lyt <- analyze(
      lyt = lyt,
      vars = var,
      var_labels = names(abn),
      afun = afun,
      table_names = table_names[i],
      extra_args = c(list(abnormal = abn), list(...)),
      show_labels = "visible"
    )
  }
  lyt
}

#' Sort Data by `PK PARAM` Variable
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param pk_data (`data.frame`)\cr Pharmacokinetics dataframe
#' @param key_var (`character`)\cr key variable used to merge pk_data and metadata created by `d_pkparam()`
#'
#' @return A PK `data.frame` sorted by a `PARAM` variable.
#'
#' @examples
#' library(dplyr)
#'
#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
#' pk_ordered_data <- h_pkparam_sort(adpp)
#'
#' @export
h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") {
  assert_df_with_variables(pk_data, list(key_var = key_var))
  pk_data$PARAMCD <- pk_data[[key_var]]

  ordered_pk_data <- d_pkparam()

  # Add the numeric values from ordered_pk_data to pk_data
  joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffix = c("", ".y"))

  joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))]

  joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER)

  # Then order PARAM based on this column
  joined_data$PARAM <- factor(joined_data$PARAM,
    levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY,
    levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data
}

#' Horizontal Waterfall Plot
#'
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param height (`numeric``)\cr vector containing values to be plotted as the waterfall bars.
#' @param id (`character`)\cr vector containing IDs to use as the x-axis label for the waterfall bars.
#' @param col (`character`)\cr colors.
#' @param col_var (`factor`, `character` or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
#' @param xlab (`character`)\cr x label. Default is `"ID"`.
#' @param ylab (`character`)\cr y label. Default is `"Value"`.
#' @param title (`character`)\cr text to be displayed as plot title.
#' @param col_legend_title (`character`)\cr text to be displayed as legend title.
#'
#' @return A `ggplot` waterfall plot.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
#'
#' g_waterfall(
#'   height = c(3, 5, -1),
#'   id = letters[1:3],
#'   col_var = letters[1:3]
#' )
#'
#' adsl_f <- tern_ex_adsl %>%
#'   select(USUBJID, STUDYID, ARM, ARMCD, SEX)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "OVRINV") %>%
#'   mutate(pchg = rnorm(n(), 10, 50))
#'
#' adrs_f <- head(adrs_f, 30)
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
#' head(adrs_f)
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = adrs_f$USUBJID,
#'   col_var = adrs_f$AVALC
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   col_var = adrs_f$SEX
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   xlab = "ID",
#'   ylab = "Percentage Change",
#'   title = "Waterfall plot"
#' )
#'
#' @export
g_waterfall <- function(height,
                        id,
                        col_var = NULL,
                        col = getOption("ggplot2.discrete.colour"),
                        xlab = NULL,
                        ylab = NULL,
                        col_legend_title = NULL,
                        title = NULL) {
  if (!is.null(col_var)) {
    check_same_n(height = height, id = id, col_var = col_var)
  } else {
    check_same_n(height = height, id = id)
  }

  checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  xlabel <- deparse(substitute(id))
  ylabel <- deparse(substitute(height))

  col_label <- if (!missing(col_var)) {
    deparse(substitute(col_var))
  }

  xlab <- if (is.null(xlab)) xlabel else xlab
  ylab <- if (is.null(ylab)) ylabel else ylab
  col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title

  plot_data <- data.frame(
    height = height,
    id = as.character(id),
    col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
    stringsAsFactors = FALSE
  )

  plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]

  p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
    ggplot2::geom_col() +
    ggplot2::geom_text(
      label = format(plot_data_ord$height, digits = 2),
      vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
    ) +
    ggplot2::xlab(xlab) +
    ggplot2::ylab(ylab) +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))

  if (!is.null(col_var)) {
    p <- p +
      ggplot2::aes(fill = col_var) +
      ggplot2::labs(fill = col_legend_title) +
      ggplot2::theme(
        legend.position = "bottom",
        legend.background = ggplot2::element_blank(),
        legend.title = ggplot2::element_text(face = "bold"),
        legend.box.background = ggplot2::element_rect(colour = "black")
      )
  }

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_fill_manual(values = col)
  }

  if (!is.null(title)) {
    p <- p +
      ggplot2::labs(title = title) +
      ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
  }

  p
}

#' Pairwise Formula Special Term
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' The special term `pairwise` indicate that the model should be fitted individually for
#' every tested level in comparison to the reference level.
#'
#' @param x the variable for which pairwise result is expected.
#'
#' @return Variable "paired".
#'
#' @details Let's `ARM` being a factor with level A, B, C; let's be B the reference level,
#'   a model calling the formula including `pairwise(ARM)` will result in two models:
#'   * A model including only levels A and B, and effect of A estimated in reference to B.
#'   * A model including only levels C and B, the effect of C estimated in reference to B.
#'
#' @export
pairwise <- function(x) {
  lifecycle::deprecate_warn("0.8.1.9013", "pairwise()", "univariate()")
  structure(x, varname = deparse(substitute(x)))
}

#' Univariate Formula Special Term
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The special term `univariate` indicate that the model should be fitted individually for
#' every variable included in univariate.
#'
#' @param x A vector of variable name separated by commas.
#'
#' @return When used within a model formula, produces univariate models for each variable provided.
#'
#' @details
#' If provided alongside with pairwise specification, the model
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
#' + `y ~ ARM`
#' + `y ~ ARM + SEX`
#' + `y ~ ARM + AGE`
#' + `y ~ ARM + RACE`
#'
#' @export
univariate <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

# Get the right-hand-term of a formula
rht <- function(x) {
  checkmate::assert_formula(x)
  y <- as.character(rev(x)[[1]])
  return(y)
}

#' Hazard Ratio Estimation in Interactions
#'
#' This function estimates the hazard ratios between arms when an interaction variable is given with
#' specific values.
#'
#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
#'   given the levels of `given`.
#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
#' @param coef Numeric of estimated coefficients.
#' @param vcov Variance-covariance matrix of underlying model.
#' @param conf_level Single numeric for the confidence level of estimate intervals.
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
#'   will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
#'   therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
#'   as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
#'
#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
#'   `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @seealso [s_cox_multivariate()].
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' ADSL <- tern_ex_adsl %>%
#'   filter(SEX %in% c("F", "M"))
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
#' adtte$ARMCD <- droplevels(adtte$ARMCD)
#' adtte$SEX <- droplevels(adtte$SEX)
#'
#' mod <- coxph(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
#'   data = adtte
#' )
#'
#' mmat <- stats::model.matrix(mod)[1, ]
#' mmat[!mmat == 0] <- 0
#'
#' # Internal function - estimate_coef
#' \dontrun{
#' estimate_coef(
#'   variable = "ARMCD", given = "SEX", lvl_var = "ARM A", lvl_given = "M",
#'   coef = stats::coef(mod), mmat = mmat, vcov = stats::vcov(mod), conf_level = .95
#' )
#' }
#'
#' @keywords internal
estimate_coef <- function(variable, given,
                          lvl_var, lvl_given,
                          coef,
                          mmat,
                          vcov,
                          conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)

  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )

  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      return(mmat)
    }
  )
  colnames(design_mat) <- interaction_names

  betas <- as.matrix(coef)

  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"

  coef_se <- apply(design_mat, 2, function(x) {
    vcov_el <- as.logical(x)
    y <- vcov[vcov_el, vcov_el]
    y <- sum(y)
    y <- sqrt(y)
    return(y)
  })

  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)

  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])

    return(x)
  })

  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)

  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  return(y)
}

#' `tryCatch` around `car::Anova`
#'
#' Captures warnings when executing [car::Anova].
#'
#' @inheritParams car::Anova
#'
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
#'
#' @examples
#' # `car::Anova` on cox regression model including strata and expected
#' # a likelihood ratio test triggers a warning as only Wald method is
#' # accepted.
#'
#' library(survival)
#'
#' mod <- coxph(
#'   formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
#'   data = ovarian
#' )
#'
#' # Internal function - try_car_anova
#' \dontrun{
#' with_wald <- try_car_anova(mod = mod, test.statistic = "Wald")
#' with_lr <- try_car_anova(mod = mod, test.statistic = "LR")
#' }
#'
#' @keywords internal
try_car_anova <- function(mod,
                          test.statistic) { # nolint
  y <- tryCatch(
    withCallingHandlers(
      expr = {
        warn_text <- c()
        list(
          aov = car::Anova(
            mod,
            test.statistic = test.statistic,
            type = "III"
          ),
          warn_text = warn_text
        )
      },
      warning = function(w) {
        # If a warning is detected it is handled as "w".
        warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))

        # A warning is sometimes expected, then, we want to restart
        # the execution while ignoring the warning.
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )

  return(y)
}

#' Fit the Cox Regression Model and Anova
#'
#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
#' This last package introduces more flexibility to get the effect p.values.
#'
#' @inheritParams t_coxreg
#'
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
#'   `aov` (result of [car::Anova()]).
#'
#' @noRd
fit_n_aov <- function(formula,
                      data = data,
                      conf_level = conf_level,
                      pval_method = c("wald", "likelihood"),
                      ...) {
  pval_method <- match.arg(pval_method)

  environment(formula) <- environment()
  suppressWarnings({
    # We expect some warnings due to coxph which fails strict programming.
    mod <- survival::coxph(formula, data = data, ...)
    msum <- summary(mod, conf.int = conf_level)
  })

  aov <- try_car_anova(
    mod,
    test.statistic = switch(pval_method,
      "wald" = "Wald",
      "likelihood" = "LR"
    )
  )

  warn_attr <- aov$warn_text
  if (!is.null(aov$warn_text)) message(warn_attr)

  aov <- aov$aov
  y <- list(mod = mod, msum = msum, aov = aov)
  attr(y, "message") <- warn_attr

  return(y)
}

# argument_checks
check_formula <- function(formula) {
  if (!(inherits(formula, "formula"))) {
    stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
  }

  invisible()
}

check_covariate_formulas <- function(covariates) {
  if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) {
    stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
  }

  invisible()
}

name_covariate_names <- function(covariates) {
  miss_names <- names(covariates) == ""
  no_names <- is.null(names(covariates))
  if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
  if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
  return(covariates)
}

check_increments <- function(increments, covariates) {
  if (!is.null(increments)) {
    covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
    lapply(
      X = names(increments), FUN = function(x) {
        if (!x %in% covariates) {
          warning(
            paste(
              "Check `increments`, the `increment` for ", x,
              "doesn't match any names in investigated covariate(s)."
            )
          )
        }
      }
    )
  }

  invisible()
}

#' Multivariate Cox Model - Summarized Results
#'
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
#' covariates included in the model.
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**,
#' `NEST's bookdown`)
#'
#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
#'   including covariates.
#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
#'   "wald" (default) or "likelihood".
#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
#'   method for tie handling, one of `exact` (default), `efron`, `breslow`.
#'
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
#'
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
#'   but is out of scope as defined by the  Global Data Standards Repository
#'   (**`GDS_Standard_TLG_Specs_Tables_2.doc`**).
#'
#' @seealso [estimate_coef()].
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
#' adtte_f <- filter(
#'   adtte_f,
#'   PARAMCD == "OS" &
#'     SEX %in% c("F", "M") &
#'     RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
#' )
#' adtte_f$SEX <- droplevels(adtte_f$SEX)
#' adtte_f$RACE <- droplevels(adtte_f$RACE)
#'
#' # Internal function - s_cox_multivariate
#' \dontrun{
#' s_cox_multivariate(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (ARMCD + RACE + AGE)^2, data = adtte_f
#' )
#' }
#'
#' @keywords internal
s_cox_multivariate <- function(formula, data,
                               conf_level = 0.95,
                               pval_method = c("wald", "likelihood"),
                               ...) {
  tf <- stats::terms(formula, specials = c("strata"))
  covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
  lapply(
    X = covariates,
    FUN = function(x) {
      if (is.character(data[[x]])) {
        data[[x]] <<- as.factor(data[[x]])
      }
      invisible()
    }
  )
  pval_method <- match.arg(pval_method)

  # Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
  y <- fit_n_aov(
    formula = formula,
    data = data,
    conf_level = conf_level,
    pval_method = pval_method,
    ...
  )
  mod <- y$mod
  aov <- y$aov
  msum <- y$msum
  list2env(as.list(y), environment())

  all_term_labs <- attr(mod$terms, "term.labels")
  term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
  names(term_labs) <- term_labs

  coef_inter <- NULL
  if (any(attr(mod$terms, "order") > 1)) {
    for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
    names(for_inter) <- for_inter
    mmat <- stats::model.matrix(mod)[1, ]
    mmat[!mmat == 0] <- 0
    mcoef <- stats::coef(mod)
    mvcov <- stats::vcov(mod)

    estimate_coef_local <- function(variable, given) {
      estimate_coef(
        variable, given,
        coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
        lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
      )
    }

    coef_inter <- lapply(
      for_inter, function(x) {
        y <- attr(mod$terms, "factor")[, x]
        y <- names(y[y > 0])
        Map(estimate_coef_local, variable = y, given = rev(y))
      }
    )
  }

  list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
}

#' Helper Functions for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @inheritParams argument_convention
#' @inheritParams h_coxreg_univar_extract
#' @inheritParams cox_regression_inter
#' @inheritParams control_coxreg
#'
#' @seealso [cox_regression]
#'
#' @name h_cox_regression
NULL

#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
#'   internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
#'
#' @return
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_univar_formulas`
#'
#' ## Simple formulas.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
#'   )
#' )
#'
#' ## Addition of an optional strata.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   )
#' )
#'
#' ## Inclusion of the interaction term.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   ),
#'   interaction = TRUE
#' )
#'
#' ## Only covariates fitted in separate models.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", covariates = c("X", "y")
#'   )
#' )
#'
#' @export
h_coxreg_univar_formulas <- function(variables,
                                     interaction = FALSE) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  checkmate::assert_flag(interaction)

  if (!has_arm || is.null(variables$covariates)) {
    checkmate::assert_false(interaction)
  }

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$covariates)) {
    forms <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      ifelse(has_arm, variables$arm, "1"),
      ifelse(interaction, " * ", " + "),
      variables$covariates,
      ifelse(
        !is.null(variables$strata),
        paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
        ""
      )
    )
  } else {
    forms <- NULL
  }
  nams <- variables$covariates
  if (has_arm) {
    ref <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      variables$arm,
      ifelse(
        !is.null(variables$strata),
        paste0(
          " + strata(", paste0(variables$strata, collapse = ", "), ")"
        ),
        ""
      )
    )
    forms <- c(ref, forms)
    nams <- c("ref", nams)
  }
  stats::setNames(forms, nams)
}

#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
#'   string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
#'   regression models. Interactions will not be included in multivariate Cox regression model.
#'
#' @return
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_multivar_formula`
#'
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
#'   )
#' )
#'
#' # Addition of an optional strata.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' # Example without treatment arm.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' @export
h_coxreg_multivar_formula <- function(variables) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  y <- paste0(
    "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
    ifelse(has_arm, variables$arm, "1")
  )
  if (length(variables$covariates) > 0) {
    y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
  }
  if (!is.null(variables$strata)) {
    y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  y
}

#' @describeIn h_cox_regression Utility function to help tabulate the result of
#'   a univariate Cox regression model.
#'
#' @param effect (`string`)\cr the treatment variable.
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#'
#' @return
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
#'   `n`, `hr`, `lcl`, `ucl`, and `pval`.
#'
#' @examples
#' library(survival)
#'
#' dta_simple <- data.frame(
#'   time = c(5, 5, 10, 10, 5, 5, 10, 10),
#'   status = c(0, 0, 1, 0, 0, 1, 1, 1),
#'   armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
#'   var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
#'   var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
#' )
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_univar_extract(
#'   effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_univar_extract <- function(effect,
                                    covar,
                                    data,
                                    mod,
                                    control = control_coxreg()) {
  checkmate::assert_string(covar)
  checkmate::assert_string(effect)
  checkmate::assert_class(mod, "coxph")
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

  mod_aov <- muffled_car_anova(mod, test_statistic)
  msum <- summary(mod, conf.int = control$conf_level)
  sum_cox <- broom::tidy(msum)

  # Combine results together.
  effect_aov <- mod_aov[effect, , drop = TRUE]
  pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
  sum_main <- sum_cox[grepl(effect, sum_cox$level), ]

  term_label <- if (effect == covar) {
    paste0(
      levels(data[[covar]])[2],
      " vs control (",
      levels(data[[covar]])[1],
      ")"
    )
  } else {
    unname(labels_or_names(data[covar]))
  }
  data.frame(
    effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
    term = covar,
    term_label = term_label,
    level = levels(data[[effect]])[2],
    n = mod[["n"]],
    hr = unname(sum_main["exp(coef)"]),
    lcl = unname(sum_main[grep("lower", names(sum_main))]),
    ucl = unname(sum_main[grep("upper", names(sum_main))]),
    pval = pval,
    stringsAsFactors = FALSE
  )
}

#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
#'   tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
#'
#' @return
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
#'   `n`, `term`, and `term_label`.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_multivar_extract(
#'   var = "var1", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_multivar_extract <- function(var,
                                      data,
                                      mod,
                                      control = control_coxreg()) {
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
  mod_aov <- muffled_car_anova(mod, test_statistic)

  msum <- summary(mod, conf.int = control$conf_level)
  sum_anova <- broom::tidy(mod_aov)
  sum_cox <- broom::tidy(msum)

  ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
  names(ret_anova)[2] <- "pval"
  if (is.factor(data[[var]])) {
    ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  } else {
    ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  }
  names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
  varlab <- unname(labels_or_names(data[var]))
  ret_cox$term <- varlab

  if (is.numeric(data[[var]])) {
    ret <- ret_cox
    ret$term_label <- ret$term
  } else if (length(levels(data[[var]])) <= 2) {
    ret_anova$pval <- NA
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  } else {
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  }

  as.data.frame(ret)
}

#' Number of Patients
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Count the number of unique and non-unique patients in a column (variable).
#'
#' @inheritParams argument_convention
#' @param x (`character` or `factor`)\cr vector of patient IDs.
#' @param count_by (`character` or `factor`)\cr optional vector to be combined with `x` when counting
#'   `nonunique` records.
#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
#'   Defaults to `TRUE`.
#'
#' @name summarize_num_patients
NULL

#' @describeIn summarize_num_patients Statistics function which counts the number of
#'   unique patients, the corresponding percentage taken with respect to the
#'   total number of patients, and the number of non-unique patients.
#'
#' @return
#' * `s_num_patients()` returns a named `list` of 3 statistics:
#'   * `unique`: Vector of counts and percentages.
#'   * `nonunique`: Vector of counts.
#'   * `unique_count`: Counts.
#'
#' @examples
#' # Use the statistics function to count number of unique and nonunique patients.
#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
#' s_num_patients(
#'   x = as.character(c(1, 1, 1, 2, 4, NA)),
#'   labelstr = "",
#'   .N_col = 6L,
#'   count_by = as.character(c(1, 1, 2, 1, 1, 1))
#' )
#'
#' @export
s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint

  checkmate::assert_string(labelstr)
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_flag(unique_count_suffix)

  count1 <- n_available(unique(x))
  count2 <- n_available(x)

  if (!is.null(count_by)) {
    checkmate::assert_vector(count_by, len = length(x))
    checkmate::assert_multi_class(count_by, classes = c("factor", "character"))
    count2 <- n_available(unique(interaction(x, count_by)))
  }

  out <- list(
    unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
    nonunique = formatters::with_label(count2, labelstr),
    unique_count = formatters::with_label(count1, ifelse(unique_count_suffix, paste(labelstr, "(n)"), labelstr))
  )

  out
}

#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
#'   in a column (variable), the corresponding percentage taken with respect to the total number of
#'   patients, and the number of non-unique patients in the column.
#'
#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
#'
#' @return
#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
#'
#' @examples
#' # Count number of unique and non-unique patients.
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
#'
#' df_by_event <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID")
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
#'
#' @export
s_num_patients_content <- function(df,
                                   labelstr = "",
                                   .N_col, # nolint
                                   .var,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE) {
  checkmate::assert_string(.var)
  checkmate::assert_data_frame(df)
  if (is.null(count_by)) {
    assert_df_with_variables(df, list(id = .var))
  } else {
    assert_df_with_variables(df, list(id = .var, count_by = count_by))
  }
  if (!is.null(required)) {
    checkmate::assert_string(required)
    assert_df_with_variables(df, list(required = required))
    df <- df[!is.na(df[[required]]), , drop = FALSE]
  }

  x <- df[[.var]]
  y <- switch(as.numeric(!is.null(count_by)) + 1,
    NULL,
    df[[count_by]]
  )

  s_num_patients(
    x = x,
    labelstr = labelstr,
    .N_col = .N_col,
    count_by = y,
    unique_count_suffix = unique_count_suffix
  )
}

c_num_patients <- make_afun(
  s_num_patients_content,
  .stats = c("unique", "nonunique", "unique_count"),
  .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
)

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @export
summarize_num_patients <- function(lyt,
                                   var,
                                   .stats = NULL,
                                   .formats = NULL,
                                   .labels = c(
                                     unique = "Number of patients with at least one event",
                                     nonunique = "Number of events"
                                   ),
                                   indent_mod = lifecycle::deprecated(),
                                   .indent_mods = 0L,
                                   ...) {
  if (lifecycle::is_present(indent_mod)) {
    lifecycle::deprecate_warn("0.8.2", "summarize_num_patients(indent_mod)", "summarize_num_patients(.indent_mods)")
    .indent_mods <- indent_mod
  }

  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  cfun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    extra_args = list(...),
    indent_mod = .indent_mods
  )
}

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @details In general, functions that starts with `analyze*` are expected to
#'   work like [rtables::analyze()], while functions that starts with `summarize*`
#'   are based upon [rtables::summarize_row_groups()]. The latter provides a
#'   value for each dividing split in the row and column space, but, being it
#'   bound to the fundamental splits, it is repeated by design in every page
#'   when pagination is involved.
#'
#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
#'
#' @examples
#' df_tmp <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
#'   ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
#'   AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
#' )
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients("USUBJID", .stats = c("unique")) %>%
#'   build_table(df_tmp)
#' tbl
#'
#' @export
analyze_num_patients <- function(lyt,
                                 vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = c(
                                   unique = "Number of patients with at least one event",
                                   nonunique = "Number of events"
                                 ),
                                 show_labels = c("default", "visible", "hidden"),
                                 indent_mod = lifecycle::deprecated(),
                                 .indent_mods = 0L,
                                 ...) {
  if (lifecycle::is_present(indent_mod)) {
    lifecycle::deprecate_warn("0.8.2", "analyze_num_patients(indent_mod)", "analyze_num_patients(.indent_mods)")
    .indent_mods <- indent_mod
  }

  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  afun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  analyze(
    afun = afun,
    lyt = lyt,
    vars = vars,
    extra_args = list(...),
    show_labels = show_labels,
    indent_mod = .indent_mods
  )
}

#' Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Compares bivariate responses between two groups in terms of odds ratios
#' along with a confidence interval.
#'
#' @inheritParams argument_convention
#'
#' @details This function uses either logistic regression for unstratified
#'   analyses, or conditional logistic regression for stratified analyses.
#'   The Wald confidence interval with the specified confidence level is
#'   calculated.
#'
#' @note For stratified analyses, there is currently no implementation for conditional
#'   likelihood confidence intervals, therefore the likelihood confidence interval is not
#'   yet available as an option. Besides, when `rsp` contains only responders or non-responders,
#'   then the result values will be `NA`, because no odds ratio estimation is possible.
#'
#' @seealso Relevant helper function [h_odds_ratio()].
#'
#' @name odds_ratio
NULL

#' @describeIn odds_ratio Statistics function which estimates the odds ratio
#'   between a treatment and a control. A `variables` list with `arm` and `strata`
#'   variable names must be passed if a stratified analysis is required.
#'
#' @inheritParams split_cols_by_groups
#'
#' @return
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
#'   (containing `est`, `lcl`, and `ucl`) and `n_tot`.
#'
#' @examples
#' set.seed(12)
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50), levels = c("B", "A")),
#'   strata = factor(sample(c("C", "D"), 100, TRUE))
#' )
#'
#' # Unstratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' # Stratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = "strata")
#' )
#'
#' @export
s_odds_ratio <- function(df,
                         .var,
                         .ref_group,
                         .in_ref_col,
                         .df_row,
                         variables = list(arm = NULL, strata = NULL),
                         conf_level = 0.95,
                         groups_list = NULL) {
  y <- list(or_ci = "", n_tot = "")

  if (!.in_ref_col) {
    assert_proportion_value(conf_level)
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))

    if (is.null(variables$strata)) {
      data <- data.frame(
        rsp = c(.ref_group[[.var]], df[[.var]]),
        grp = factor(
          rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
          levels = c("ref", "Not-ref")
        )
      )
      y <- or_glm(data, conf_level = conf_level)
    } else {
      assert_df_with_variables(.df_row, c(list(rsp = .var), variables))

      # The group variable prepared for clogit must be synchronised with combination groups definition.
      if (is.null(groups_list)) {
        ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
        trt_grp <- as.character(unique(df[[variables$arm]]))
        grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
      } else {
        # If more than one level in reference col.
        reference <- as.character(unique(.ref_group[[variables$arm]]))
        grp_ref_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(reference %in% x)
        )
        ref_grp <- names(groups_list)[grp_ref_flag]

        # If more than one level in treatment col.
        treatment <- as.character(unique(df[[variables$arm]]))
        grp_trt_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(treatment %in% x)
        )
        trt_grp <- names(groups_list)[grp_trt_flag]

        grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
        grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
      }

      # The reference level in `grp` must be the same as in the `rtables` column split.
      data <- data.frame(
        rsp = .df_row[[.var]],
        grp = grp,
        strata = interaction(.df_row[variables$strata])
      )
      y_all <- or_clogit(data, conf_level = conf_level)
      checkmate::assert_string(trt_grp)
      checkmate::assert_subset(trt_grp, names(y_all$or_ci))
      y$or_ci <- y_all$or_ci[[trt_grp]]
      y$n_tot <- y_all$n_tot
    }
  }

  y$or_ci <- formatters::with_label(
    x = y$or_ci,
    label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
  )

  y$n_tot <- formatters::with_label(
    x = y$n_tot,
    label = "Total n"
  )

  y
}

#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
#'
#' @return
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' @export
a_odds_ratio <- make_afun(
  s_odds_ratio,
  .formats = c(or_ci = "xx.xx (xx.xx - xx.xx)"),
  .indent_mods = c(or_ci = 1L)
)

#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_odds_ratio()`.
#'
#' @return
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_odds_ratio()` to the table layout.
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_odds_ratio(vars = "rsp")
#'
#' build_table(l, df = dta)
#'
#' @export
estimate_odds_ratio <- function(lyt,
                                vars,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = "or_ci",
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_odds_ratio,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate odds ratios in [estimate_odds_ratio()].
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
#'   `strata` for [or_clogit()].
#'
#' @return A named `list` of elements `or_ci` and `n_tot`.
#'
#' @seealso [odds_ratio]
#'
#' @name h_odds_ratio
NULL

#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' # Data with 2 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
#'   strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on glm.
#' or_glm(data, conf_level = 0.95)
#'
#' @export
or_glm <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
  formula <- stats::as.formula("rsp ~ grp")
  model_fit <- stats::glm(
    formula = formula, data = data,
    family = stats::binomial(link = "logit")
  )

  # Note that here we need to discard the intercept.
  or <- exp(stats::coef(model_fit)[-1])
  or_ci <- exp(
    stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
  )

  values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
  n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")

  list(or_ci = values, n_tot = n_tot)
}

#' @describeIn h_odds_ratio estimates the odds ratio based on [survival::clogit()]. This is done for
#'   the whole data set including all groups, since the results are not the same as when doing
#'   pairwise comparisons between the groups.
#'
#' @examples
#' # Data with 3 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
#'   strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on stratified estimation by conditional logistic regression.
#' or_clogit(data, conf_level = 0.95)
#'
#' @export
or_clogit <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
  checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  data$strata <- as_factor_keep_attributes(data$strata)

  # Deviation from convention: `survival::strata` must be simply `strata`.
  formula <- stats::as.formula("rsp ~ grp + strata(strata)")
  model_fit <- clogit_with_tryCatch(formula = formula, data = data)

  # Create a list with one set of OR estimates and CI per coefficient, i.e.
  # comparison of one group vs. the reference group.
  coef_est <- stats::coef(model_fit)
  ci_est <- stats::confint(model_fit, level = conf_level)
  or_ci <- list()
  for (coef_name in names(coef_est)) {
    grp_name <- gsub("^grp", "", x = coef_name)
    or_ci[[grp_name]] <- stats::setNames(
      object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
      nm = c("est", "lcl", "ucl")
    )
  }
  list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
}

#' Tabulate Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a list, which
#'   specifies the new group levels via the names and the levels that belong to it in the character vectors that are
#'   elements of the list.
#' @param label_all (`string`)\cr label for the total population analysis.
#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_survival_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' @name survival_duration_subgroups
NULL

#' Prepares Survival Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @inheritParams survival_coxph_pairwise
#'
#' @return A named `list` of two elements:
#'   * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
#'     `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [survival_duration_subgroups]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' df_grouped <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_survival_subgroups <- function(variables,
                                       data,
                                       groups_lists = list(),
                                       control = control_coxph(),
                                       label_all = "All Patients") {
  df_survtime <- h_survtime_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_hr <- h_coxph_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    control = control,
    label_all = label_all
  )

  list(survtime = df_survtime, hr = df_hr)
}

#' @describeIn survival_duration_subgroups  Formatted analysis function which is used as
#'   `afun` in `tabulate_survival_subgroups()`.
#'
#' @return
#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_survival_subgroups
#' \dontrun{
#' a_survival_subgroups(.formats = list("n" = "xx", "median" = "xx.xx"))
#' }
#'
#' @keywords internal
a_survival_subgroups <- function(.formats = list(
                                   n = "xx",
                                   n_events = "xx",
                                   n_tot_events = "xx",
                                   median = "xx.x",
                                   n_tot = "xx",
                                   hr = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 )) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = combine_vectors(df$lcl, df$ucl),
            .labels = as.character(df$subgroup),
            .formats = fmt
          )
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = as.list(df[[stat]]),
            .labels = as.character(df$subgroup),
            .formats = fmt
          )
        }
      }
    },
    stat = names(.formats),
    fmt = .formats
  )

  afun_lst
}

#' @describeIn survival_duration_subgroups Table-creating function which creates a table
#'   summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_survival_subgroups()].
#' @param vars (`character`)\cr the name of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_events`: Number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `n`: Number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
#'   are required.
#'
#' @return An `rtables` table summarizing survival by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#'
#' ## Table with a manually chosen set of columns: adding "pval".
#' basic_table() %>%
#'   tabulate_survival_subgroups(
#'     df = df,
#'     vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
#'     time_unit = adtte_f$AVALU[1]
#'   )
#'
#' @export
tabulate_survival_subgroups <- function(lyt,
                                        df,
                                        vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
                                        time_unit = NULL) {
  conf_level <- df$hr$conf_level[1]
  method <- df$hr$pval_label[1]

  afun_lst <- a_survival_subgroups()
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = conf_level,
    method = method,
    time_unit = time_unit
  )

  colvars_survtime <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
  )
  colvars_hr <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
  )

  # Columns from table_survtime are optional.
  if (length(colvars_survtime$vars) > 0) {
    lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
    lyt_survtime <- split_rows_by(
      lyt = lyt_survtime,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_survtime <- summarize_row_groups(
      lyt = lyt_survtime,
      var = "var_label",
      cfun = afun_lst[names(colvars_survtime$labels)]
    )
    lyt_survtime <- split_cols_by_multivar(
      lyt = lyt_survtime,
      vars = colvars_survtime$vars,
      varlabels = colvars_survtime$labels
    )

    if ("analysis" %in% df$survtime$row_type) {
      lyt_survtime <- split_rows_by(
        lyt = lyt_survtime,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
      lyt_survtime <- analyze_colvars(
        lyt = lyt_survtime,
        afun = afun_lst[names(colvars_survtime$labels)],
        inclNAs = TRUE
      )
    }

    table_survtime <- build_table(lyt_survtime, df = df$survtime)
  } else {
    table_survtime <- NULL
  }

  # Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
  lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
  lyt_hr <- split_rows_by(
    lyt = lyt_hr,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_hr <- summarize_row_groups(
    lyt = lyt_hr,
    var = "var_label",
    cfun = afun_lst[names(colvars_hr$labels)]
  )
  lyt_hr <- split_cols_by_multivar(
    lyt = lyt_hr,
    vars = colvars_hr$vars,
    varlabels = colvars_hr$labels
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$survtime$row_type) {
    lyt_hr <- split_rows_by(
      lyt = lyt_hr,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
    lyt_hr <- analyze_colvars(
      lyt = lyt_hr,
      afun = afun_lst[names(colvars_hr$labels)],
      inclNAs = TRUE
    )
  }
  table_hr <- build_table(lyt_hr, df = df$hr)

  # There can be one or two vars starting with "n_tot".
  n_tot_ids <- grep("^n_tot", colvars_hr$vars)
  if (is.null(table_survtime)) {
    result <- table_hr
    hr_id <- match("hr", colvars_hr$vars)
    ci_id <- match("lcl", colvars_hr$vars)
  } else {
    # Reorder the table.
    result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
    # And then calculate column indices accordingly.
    hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
    ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
    n_tot_ids <- seq_along(n_tot_ids)
  }

  structure(
    result,
    forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    # Take the first one for scaling the symbol sizes in graph.
    col_symbol_size = n_tot_ids[1]
  )
}

#' Labels for Column Variables in Survival Duration by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
#'
#' @inheritParams tabulate_survival_subgroups
#' @inheritParams argument_convention
#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
#'
#' @return A `list` of variables and their labels to tabulate.
#'
#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
#'
#' @export
d_survival_subgroups_colvars <- function(vars,
                                         conf_level,
                                         method,
                                         time_unit = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_string(time_unit, null.ok = TRUE)
  checkmate::assert_subset(c("hr", "ci"), vars)
  checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
  checkmate::assert_subset(
    vars,
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  propcase_time_label <- if (!is.null(time_unit)) {
    paste0("Median (", time_unit, ")")
  } else {
    "Median"
  }

  varlabels <- c(
    n = "n",
    n_events = "Events",
    median = propcase_time_label,
    n_tot = "Total n",
    n_tot_events = "Total Events",
    hr = "Hazard Ratio",
    ci = paste0(100 * conf_level, "% Wald CI"),
    pval = method
  )

  colvars <- vars

  # The `lcl` variable is just a placeholder available in the analysis data,
  # it is not acutally used in the tabulation.
  # Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
  colvars[colvars == "ci"] <- "lcl"

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Summary for analysis of covariance (ANCOVA).
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize results of ANCOVA. This can be used to analyze multiple endpoints and/or
#' multiple timepoints within the same response variable `.var`.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_ancova
NULL

#' Helper Function to Return Results of a Linear Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'     summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
#'     interaction terms indicated by `"X1 * X2"`.
#' @param interaction_item (`character`)\cr name of the variable that should have interactions
#'   with arm. if the interaction is not needed, the default option is `NULL`.
#'
#' @return The summary of a linear model.
#'
#' @examples
#' h_ancova(
#'   .var = "Sepal.Length",
#'   .df_row = iris,
#'   variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
#' )
#'
#' @export
h_ancova <- function(.var,
                     .df_row,
                     variables,
                     interaction_item = NULL) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_subset(names(variables), c("arm", "covariates"))
  assert_df_with_variables(.df_row, list(rsp = .var))

  arm <- variables$arm
  covariates <- variables$covariates
  if (!is.null(covariates) && length(covariates) > 0) {
    # Get all covariate variable names in the model.
    var_list <- get_covariates(covariates)
    assert_df_with_variables(.df_row, var_list)
  }

  covariates_part <- paste(covariates, collapse = " + ")
  if (covariates_part != "") {
    formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm))
  } else {
    formula <- stats::as.formula(paste0(.var, " ~ ", arm))
  }

  if (is.null(interaction_item)) {
    specs <- arm
  } else {
    specs <- c(arm, interaction_item)
  }

  lm_fit <- stats::lm(
    formula = formula,
    data = .df_row
  )
  emmeans_fit <- emmeans::emmeans(
    lm_fit,
    # Specify here the group variable over which EMM are desired.
    specs = specs,
    # Pass the data again so that the factor levels of the arm variable can be inferred.
    data = .df_row
  )

  emmeans_fit
}

#' @describeIn summarize_ancova Statistics function that produces a named list of results
#'   of the investigated linear model.
#'
#' @inheritParams h_ancova
#' @param interaction_y (`character`)\cr a selected item inside of the interaction_item column which will be used
#'   to select the specific ANCOVA results. if the interaction is not needed, the default option is `FALSE`.
#'
#' @return
#' * `s_ancova()` returns a named list of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `lsmean`: Estimated marginal means in the group.
#'   * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
#'     If working with the reference group, this will be empty.
#'   * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
#'     to the reference group.
#'   * `pval`: p-value (not adjusted for multiple comparisons).
#'
#' @examples
#' library(dplyr)
#'
#' df <- iris %>% filter(Species == "virginica")
#' .df_row <- iris
#' .var <- "Petal.Length"
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
#' .ref_group <- iris %>% filter(Species == "setosa")
#' conf_level <- 0.95
#'
#' # Internal function - s_ancova
#' \dontrun{
#' s_ancova(
#'   df, .var, .df_row, variables, .ref_group,
#'   .in_ref_col = FALSE,
#'   conf_level, interaction_y = FALSE, interaction_item = NULL
#' )
#' }
#'
#' @keywords internal
s_ancova <- function(df,
                     .var,
                     .df_row,
                     variables,
                     .ref_group,
                     .in_ref_col,
                     conf_level,
                     interaction_y = FALSE,
                     interaction_item = NULL) {
  emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item)

  sum_fit <- summary(
    emmeans_fit,
    level = conf_level
  )

  arm <- variables$arm

  sum_level <- as.character(unique(df[[arm]]))

  # Ensure that there is only one element in sum_level.
  checkmate::assert_scalar(sum_level)

  sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]

  # Get the index of the ref arm
  if (interaction_y != FALSE) {
    y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
    # convert characters selected in interaction_y into the numeric order
    interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
    sum_fit_level <- sum_fit_level[interaction_y, ]
    # if interaction is called, reset the index
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
    ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
  } else {
    y <- df[[.var]]
    # Get the index of the ref arm when interaction is not called
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
  }

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(character(), "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    # Estimate the differences between the marginal means.
    emmeans_contrasts <- emmeans::contrast(
      emmeans_fit,
      # Compare all arms versus the control arm.
      method = "trt.vs.ctrl",
      # Take the arm factor from .ref_group as the control arm.
      ref = ref_key,
      level = conf_level
    )
    sum_contrasts <- summary(
      emmeans_contrasts,
      # Derive confidence intervals, t-tests and p-values.
      infer = TRUE,
      # Do not adjust the p-values for multiplicity.
      adjust = "none"
    )

    sum_contrasts_level <- sum_contrasts[grepl(sum_level, sum_contrasts$contrast), ]
    if (interaction_y != FALSE) {
      sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
    }

    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(
        c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(sum_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
#'
#' @return
#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_ancova
#' \dontrun{
#' a_ancova(
#'   df, .var, .df_row, variables, .ref_group,
#'   .in_ref_col = FALSE,
#'   interaction_y = FALSE, interaction_item = NULL, conf_level
#' )
#' }
#'
#' @keywords internal
a_ancova <- make_afun(
  s_ancova,
  .indent_mods = c("n" = 0L, "lsmean" = 0L, "lsmean_diff" = 0L, "lsmean_diff_ci" = 1L, "pval" = 1L),
  .formats = c(
    "n" = "xx",
    "lsmean" = "xx.xx",
    "lsmean_diff" = "xx.xx",
    "lsmean_diff_ci" = "(xx.xx, xx.xx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_ancova()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "setosa") %>%
#'   add_colcounts() %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = NULL),
#'     table_names = "unadj",
#'     conf_level = 0.95, var_labels = "Unadjusted comparison",
#'     .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
#'   ) %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
#'     table_names = "adj",
#'     conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
#'   ) %>%
#'   build_table(iris)
#'
#' @export
summarize_ancova <- function(lyt,
                             vars,
                             var_labels,
                             ...,
                             show_labels = "visible",
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL,
                             interaction_y = FALSE,
                             interaction_item = NULL) {
  afun <- make_afun(
    a_ancova,
    interaction_y = interaction_y,
    interaction_item = interaction_item,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Occurrence Table Pruning
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Family of constructor and condition functions to flexibly prune occurrence tables.
#' The condition functions always return whether the row result is higher than the threshold.
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
#' functions.
#'
#' @note Since most table specifications are worded positively, we name our constructor and condition
#'   functions positively, too. However, note that the result of [keep_rows()] says what
#'   should be pruned, to conform with the [rtables::prune_table()] interface.
#'
#' @examples
#' \dontrun{
#' tab <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   split_rows_by("STRATA1") %>%
#'   summarize_row_groups() %>%
#'   summarize_vars("COUNTRY", .stats = "count_fraction") %>%
#'   build_table(DM)
#' }
#'
#' @name prune_occurrences
NULL

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a row condition function. This removes all analysis rows (`TableRow`) that should be
#'   pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
#'   children left.
#'
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   analysis rows and flags whether these should be kept in the pruned table.
#'
#' @return
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
#'   to prune an `rtables` table.
#'
#' @examples
#' \dontrun{
#' # `keep_rows`
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
#' prune_table(tab, keep_rows(is_non_empty))
#' }
#'
#' @export
keep_rows <- function(row_condition) {
  checkmate::assert_function(row_condition)
  function(table_tree) {
    if (inherits(table_tree, "TableRow")) {
      return(!row_condition(table_tree))
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a condition for the (first) content row in leaf tables. This removes all leaf tables where
#'   the first content row does not fulfill the condition. It does not check individual rows.
#'   It then proceeds recursively by removing the sub tree if there are no children left.
#'
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
#'
#' @return
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
#'   row of leaf tables in the table.
#'
#' @examples
#' # `keep_content_rows`
#' # Internal function - has_count_in_cols
#' \dontrun{
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
#' prune_table(tab, keep_content_rows(more_than_twenty))
#' }
#'
#' @export
keep_content_rows <- function(content_row_condition) {
  checkmate::assert_function(content_row_condition)
  function(table_tree) {
    if (is_leaf_table(table_tree)) {
      content_row <- h_content_first_row(table_tree)
      return(!content_row_condition(content_row))
    }
    if (inherits(table_tree, "DataRow")) {
      return(FALSE)
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#' @param ... arguments for row or column access, see [rtables_access]: either `col_names` (`character`) including
#'   the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
#'   directly instead.
#'
#' @return
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
#'
#' @examples
#' # Internal function - has_count_in_cols
#' \dontrun{
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one))
#' }
#'
#' @keywords internal
has_count_in_cols <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    total_count >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
#'   the specified columns satisfying a threshold.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#'
#' @return
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
#'   specified columns with the threshold.
#'
#' @examples
#' \dontrun{
#' # `has_count_in_any_col`
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(any_more_than_one))
#' }
#'
#' @export
has_count_in_any_col <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    any(row_counts >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
#'   specified column, and computes the fraction by dividing by the total column counts.
#'
#' @examples
#' \dontrun{
#' # `has_fraction_in_cols`
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_cols <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    col_counts <- h_col_counts(table_row, ...)
    total_n <- sum(col_counts)
    total_percent <- total_count / total_n
    total_percent >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
#'  in the specified columns and checks whether any of them fulfill the threshold.
#'
#' @examples
#' \dontrun{
#' # `has_fraction_in_any_col`
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_any_col <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_fractions <- h_row_fractions(table_row, ...)
    any(row_fractions >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the fractions reported in each specified column.
#'
#' @return
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \dontrun{
#' # `has_fractions_difference`
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
#' }
#'
#' @export
has_fractions_difference <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    fractions <- h_row_fractions(table_row, ...)
    difference <- diff(range(fractions))
    difference >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the counts reported in each specified column.
#'
#' @return
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' # Internal function - has_counts_difference
#' \dontrun{
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one_diff))
#' }
#'
#' @keywords internal
has_counts_difference <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    counts <- h_row_counts(table_row, ...)
    difference <- diff(range(counts))
    difference >= atleast
  })
}

#' Helper Functions for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that are used internally for the STEP calculations.
#'
#' @inheritParams argument_convention
#'
#' @name h_step
#' @include control_step.R
NULL

#' @describeIn h_step creates the windows for STEP, based on the control settings
#'   provided.
#'
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
#' @param control (named `list`)\cr output from `control_step()`.
#'
#' @return
#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
#'   and the interval information matrix `interval`.
#'
#' @export
h_step_window <- function(x,
                          control = control_step()) {
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  sel <- matrix(FALSE, length(x), control$num_points)
  out <- matrix(0, control$num_points, 3)
  colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
  if (control$use_percentile) {
    # Create windows according to percentile cutoffs.
    out <- cbind(out, out)
    colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
    xs <- seq(0, 1, length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, 0),
        min(xs[i] + control$bandwidth, 1)
      )
      out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
      sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
    }
    # Center is the middle point of the percentile window.
    out[, 1] <- xs[-control$num_points - 1]
    out[, 4] <- stats::quantile(x, out[, 1])
  } else {
    # Create windows according to cutoffs.
    m <- c(min(x), max(x))
    xs <- seq(m[1], m[2], length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, m[1]),
        min(xs[i] + control$bandwidth, m[2])
      )
      sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
    }
    # Center is the same as the point for predicting.
    out[, 1] <- xs[-control$num_points - 1]
  }
  list(sel = sel, interval = out)
}

#' @describeIn h_step calculates the estimated treatment effect estimate
#'   on the linear predictor scale and corresponding standard error from a STEP `model` fitted
#'   on `data` given `variables` specification, for a single biomarker value `x`.
#'   This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
#'   ratio estimates.
#'
#' @param model the regression model object.
#'
#' @return
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
#'
#' @export
h_step_trt_effect <- function(data,
                              model,
                              variables,
                              x) {
  checkmate::assert_multi_class(model, c("coxph", "glm"))
  checkmate::assert_number(x)
  assert_df_with_variables(data, variables)
  checkmate::assert_factor(data[[variables$arm]], n.levels = 2)

  newdata <- data[c(1, 1), ]
  newdata[, variables$biomarker] <- x
  newdata[, variables$arm] <- levels(data[[variables$arm]])
  model_terms <- stats::delete.response(stats::terms(model))
  model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
  mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
  coefs <- stats::coef(model)
  # Note: It is important to use the coef subset from matrix, otherwise intercept and
  # strata are included for coxph() models.
  mat <- mat[, names(coefs)]
  mat_diff <- diff(mat)
  est <- mat_diff %*% coefs
  var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff)
  se <- sqrt(var)
  c(
    est = est,
    se = se
  )
}

#' @describeIn h_step builds the model formula used in survival STEP calculations.
#'
#' @return
#' * `h_step_survival_formula()` returns a model formula.
#'
#' @export
h_step_survival_formula <- function(variables,
                                    control = control_step()) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
  form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   Cox regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
#'   `events`, log hazard ratio estimates `loghr`, standard error `se`,
#'   and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
#'   included for each biomarker value in `x`.
#'
#' @export
h_step_survival_est <- function(formula,
                                data,
                                variables,
                                x,
                                subset = rep(TRUE, nrow(data)),
                                control = control_coxph()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  # Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  coxph_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- survival::coxph(
          formula = formula,
          data = data,
          subset = .subset,
          ties = control$ties
        )
      },
      warning = function(w) {
        coxph_warnings <<- c(coxph_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(coxph_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = fit$n,
    events = fit$nevent,
    loghr = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' @describeIn h_step builds the model formula used in response STEP calculations.
#'
#' @return
#' * `h_step_rsp_formula()` returns a model formula.
#'
#' @export
h_step_rsp_formula <- function(variables,
                               control = c(control_step(), control_logistic())) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)
  assert_list_of_variables(variables[c("arm", "biomarker", "response")])
  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = control$response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   logistic regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
#'   ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
#'   `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
#'
#' @export
h_step_rsp_est <- function(formula,
                           data,
                           variables,
                           x,
                           subset = rep(TRUE, nrow(data)),
                           control = control_logistic()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")
  # Note: `subset` in `glm` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  fit_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- if (is.null(variables$strata)) {
          stats::glm(
            formula = formula,
            data = data,
            subset = .subset,
            family = stats::binomial("logit")
          )
        } else {
          # clogit needs coxph and strata imported
          survival::clogit(
            formula = formula,
            data = data,
            subset = .subset
          )
        }
      },
      warning = function(w) {
        fit_warnings <<- c(fit_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(fit_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = length(fit$y),
    logor = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' Summary for Poisson Negative Binomial.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Summarize results of a Poisson Negative Binomial Regression.
#' This can be used to analyze count and/or frequency data using a linear model.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_glm_count
NULL

#' Helper Functions for Poisson Models.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Helper functions that can be used to return the results of various Poisson models.
#'
#' @inheritParams argument_convention
#'
#' @seealso [summarize_glm_count]
#'
#' @name h_glm_count
NULL

#' @describeIn h_glm_count Helper function to return results of a poisson model.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param `weights`(`character`)\cr a character vector specifying weights used
#'   in averaging predictions. Number of weights must equal the number of levels included in the covariates.
#'   Weights option passed to emmeans function (hyperlink) (link to emmeans documentation)
#'
#' @return
#' * `h_glm_poisson()` returns the results of a Poisson model.
#'
#' @examples
#' # Internal function - h_glm_poisson
#' \dontrun{
#' h_glm_poisson(
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL)
#' )
#' }
#'
#' @keywords internal
h_glm_poisson <- function(.var,
                          .df_row,
                          variables,
                          weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::poisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return results of a quasipoisson model.
#'
#' @inheritParams summarize_glm_count
#'
#' @return
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
#'
#' @examples
#' # Internal function - h_glm_quasipoisson
#' \dontrun{
#' h_glm_quasipoisson(
#'   .var = "AVAL",
#'   .df_row = adtte,
#'   variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1"))
#' )
#' }
#'
#' @keywords internal
h_glm_quasipoisson <- function(.var,
                               .df_row,
                               variables,
                               weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::quasipoisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return the results of the
#'   selected model (poisson, quasipoisson, negative binomial).
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param `weights`(`character`)\cr character vector specifying weights used in averaging predictions.
#' @param `distribution`(`character`)\cr a character value specifying the distribution
#'   used in the regression (poisson, quasipoisson).
#'
#' @return
#' * `h_glm_count()` returns the results of the selected model.
#'
#' @examples
#' # Internal function - h_glm_count
#' \dontrun{
#' h_glm_count(
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = NULL),
#'   distribution = "poisson"
#' )
#' }
#'
#' @keywords internal
h_glm_count <- function(.var,
                        .df_row,
                        variables,
                        distribution,
                        weights) {
  if (distribution == "negbin") {
    stop("negative binomial distribution is not currently available.")
  }
  switch(distribution,
    poisson = h_glm_poisson(.var, .df_row, variables, weights),
    quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
    negbin = list() # h_glm_negbin(.var, .df_row, variables, weights) # nolint
  )
}

#' @describeIn h_glm_count Helper function to return the estimated means.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param conf_level (`numeric`)\cr value used to derive the confidence interval for the rate.
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
#' @param `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'   summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'
#' @return
#' * `h_ppmeans()` returns the estimated means.
#'
#' @examples
#' # Internal function - h_ppmeans
#' \dontrun{
#' fits <- h_glm_count(
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
#'   distribution = "quasipoisson"
#' )
#'
#' h_ppmeans(
#'   obj = fits$glm_fit,
#'   .df_row = anl,
#'   arm = "ARM",
#'   conf_level = 0.95
#' )
#' }
#'
#' @keywords internal
h_ppmeans <- function(obj, .df_row, arm, conf_level) {
  alpha <- 1 - conf_level
  p <- 1 - alpha / 2

  arm_levels <- levels(.df_row[[arm]])

  out <- lapply(arm_levels, function(lev) {
    temp <- .df_row
    temp[[arm]] <- factor(lev, levels = arm_levels)

    mf <- stats::model.frame(obj$formula, data = temp)
    X <- stats::model.matrix(obj$formula, data = mf) # nolint

    rate <- stats::predict(obj, newdata = mf, type = "response")
    rate_hat <- mean(rate)

    zz <- colMeans(rate * X)
    se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz))
    rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
    rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)

    c(rate_hat, rate_lwr, rate_upr)
  })

  names(out) <- arm_levels
  out <- do.call(rbind, out)
  if ("negbin" %in% class(obj)) {
    colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
  } else {
    colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
  }
  out <- as.data.frame(out)
  out[[arm]] <- rownames(out)
  out
}

#' @describeIn summarize_glm_count Statistics function that produces a named list of results
#'   of the investigated Poisson model.
#'
#' @inheritParams h_glm_count
#'
#' @return
#' * `s_glm_count()` returns a named `list` of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `rate`: Estimated event rate per follow-up time.
#'   * `rate_ci`: Confidence level for estimated rate per follow-up time.
#'   * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
#'   * `rate_ratio_ci`: Confidence level for the rate ratio.
#'   * `pval`: p-value.
#'
#' @examples
#' # Internal function - s_change_from_baseline
#' \dontrun{
#' s_glm_count(
#'   df = anl %>%
#'     filter(ARMCD == "ARM B"),
#'   .df_row = anl,
#'   .var = "AVAL",
#'   .in_ref_col = TRUE,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
#'   conf_level = 0.95,
#'   distribution = "quasipoisson",
#'   rate_mean_method = "ppmeans"
#' )
#' }
#'
#' @keywords internal
s_glm_count <- function(df,
                        .var,
                        .df_row,
                        variables,
                        .ref_group,
                        .in_ref_col,
                        distribution,
                        conf_level,
                        rate_mean_method,
                        weights,
                        scale = 1) {
  arm <- variables$arm

  y <- df[[.var]]
  smry_level <- as.character(unique(df[[arm]]))

  # ensure there is only 1 value
  checkmate::assert_scalar(smry_level)

  results <- h_glm_count(
    .var = .var,
    .df_row = .df_row,
    variables = variables,
    distribution = distribution,
    weights
  )

  if (rate_mean_method == "emmeans") {
    emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
  } else if (rate_mean_method == "ppmeans") {
    emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
  }

  emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(character(), "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    emmeans_contrasts <- emmeans::contrast(
      results$emmeans_fit,
      method = "trt.vs.ctrl",
      ref = grep(
        as.character(unique(.ref_group[[arm]])),
        as.data.frame(results$emmeans_fit)[[arm]]
      )
    )

    contrasts_smry <- summary(
      emmeans_contrasts,
      infer = TRUE,
      adjust = "none"
    )

    smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]

    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(smry_contrasts_level$ratio, "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(
        c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(smry_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
#'
#' @return
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - s_change_from_baseline
#' \dontrun{
#' a_glm_count(
#'   df = anl %>%
#'     filter(ARMCD == "ARM A"),
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
#'   .ref_group = "ARM B", .in_ref_col = TRUE,
#'   conf_level = 0.95,
#'   distribution = "poisson",
#'   rate_mean_method = "ppmeans"
#' )
#' }
#'
#' @keywords internal
a_glm_count <- make_afun(
  s_glm_count,
  .indent_mods = c(
    "n" = 0L,
    "rate" = 0L,
    "rate_ci" = 1L,
    "rate_ratio" = 0L,
    "rate_ratio_ci" = 1L,
    "pval" = 1L
  ),
  .formats = c(
    "n" = "xx",
    "rate" = "xx.xxxx",
    "rate_ci" = "(xx.xxxx, xx.xxxx)",
    "rate_ratio" = "xx.xxxx",
    "rate_ratio_ci" = "(xx.xxxx, xx.xxxx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_glm_count()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
#' anl$AVAL_f <- as.factor(anl$AVAL)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   summarize_vars(
#'     "AVAL_f",
#'     var_labels = "Number of exacerbations per patient",
#'     .stats = c("count_fraction"),
#'     .formats = c("count_fraction" = "xx (xx.xx%)"),
#'     .label = c("Number of exacerbations per patient")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
#'     conf_level = 0.95,
#'     distribution = "poisson",
#'     rate_mean_method = "emmeans",
#'     var_labels = "Unadjusted exacerbation rate (per year)",
#'     table_names = "unadj",
#'     .stats = c("rate"),
#'     .labels = c(rate = "Rate")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
#'     conf_level = 0.95,
#'     distribution = "quasipoisson",
#'     rate_mean_method = "ppmeans",
#'     var_labels = "Adjusted (QP) exacerbation rate (per year)",
#'     table_names = "adj",
#'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
#'     .labels = c(
#'       rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
#'       rate_ratio_ci = "Rate Ratio CI", pval = "p value"
#'     )
#'   )
#' build_table(lyt = lyt, df = anl)
#'
#' @export
summarize_glm_count <- function(lyt,
                                vars,
                                var_labels,
                                ...,
                                show_labels = "visible",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_glm_count,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Helper Function to create a map dataframe that can be used in `trim_levels_to_map` split function.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a map dataframe from the input dataset, which can be used as an argument in the
#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
#'   abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
#'   `abnormal = list(Low = "LOW", High = "HIGH"))`
#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
#'
#' @return A map `data.frame`.
#'
#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
#'   `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
#'   `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
#'   for low direction and at least one observation with high range is not missing for high direction.
#'
#' @examples
#' adlb <- df_explicit_na(tern_ex_adlb)
#'
#' h_map_for_count_abnormal(
#'   df = adlb,
#'   variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "default",
#'   na_level = "<Missing>"
#' )
#'
#' df <- data.frame(
#'   USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
#'   AVISIT = c(
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2)
#'   ),
#'   PARAM = rep(c("ALT", "CPR"), 6),
#'   ANRIND = c(
#'     "NORMAL", "NORMAL", "LOW",
#'     "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
#'   ),
#'   ANRLO = rep(5, 12),
#'   ANRHI = rep(20, 12)
#' )
#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
#' h_map_for_count_abnormal(
#'   df = df,
#'   variables = list(
#'     anl = "ANRIND",
#'     split_rows = c("PARAM"),
#'     range_low = "ANRLO",
#'     range_high = "ANRHI"
#'   ),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "range",
#'   na_level = "<Missing>"
#' )
#'
#' @export
h_map_for_count_abnormal <- function(df,
                                     variables = list(
                                       anl = "ANRIND",
                                       split_rows = c("PARAM"),
                                       range_low = "ANRLO",
                                       range_high = "ANRHI"
                                     ),
                                     abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
                                     method = c("default", "range"),
                                     na_level = "<Missing>") {
  method <- match.arg(method)
  checkmate::assert_subset(c("anl", "split_rows"), names(variables))
  checkmate::assert_false(anyNA(df[variables$split_rows]))
  assert_df_with_variables(df,
    variables = list(anl = variables$anl, split_rows = variables$split_rows),
    na_level = na_level
  )
  assert_df_with_factors(df, list(val = variables$anl))
  assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
  assert_list_of_variables(variables)
  checkmate::assert_list(abnormal, types = "character", len = 2)

  # Drop usued levels from df as they are not supposed to be in the final map
  df <- droplevels(df)

  normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))

  # Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
  checkmate::assert_vector(normal_value, len = 1)

  # Default method will only have what is observed in the df, and records with all normal values will be excluded to
  # avoid error in layout building.
  if (method == "default") {
    df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
    map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
    map_normal <- unique(subset(map, select = variables$split_rows))
    map_normal[[variables$anl]] <- normal_value
    map <- rbind(map, map_normal)
  } else if (method == "range") {
    # range method follows the rule that at least one observation with ANRLO > 0 for low
    # direction and at least one observation with ANRHI is not missing for high direction.
    checkmate::assert_subset(c("range_low", "range_high"), names(variables))
    checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))

    assert_df_with_variables(df,
      variables = list(
        range_low = variables$range_low,
        range_high = variables$range_high
      )
    )

    # Define low direction of map
    df_low <- subset(df, df[[variables$range_low]] > 0)
    map_low <- unique(df_low[variables$split_rows])
    low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
    low_levels_df <- as.data.frame(low_levels)
    colnames(low_levels_df) <- variables$anl
    low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
    rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
    map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
    map_low <- cbind(map_low, low_levels_df)

    # Define high direction of map
    df_high <- subset(df, df[[variables$range_high]] != na_level | !is.na(df[[variables$range_high]]))
    map_high <- unique(df_high[variables$split_rows])
    high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
    high_levels_df <- as.data.frame(high_levels)
    colnames(high_levels_df) <- variables$anl
    high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
    rownames(map_high) <- NULL
    map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
    map_high <- cbind(map_high, high_levels_df)

    # Define normal of map
    map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
    map_normal[variables$anl] <- normal_value

    map <- rbind(map_low, map_high, map_normal)
  }

  # map should be all characters
  map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)

  # sort the map final output by split_rows variables
  for (i in rev(seq_len(length(variables$split_rows)))) {
    map <- map[order(map[[i]]), ]
  }
  map
}

#' Helper Function for Deriving Analysis Datasets for LBT13 and LBT14
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function that merges `ADSL` and `ADLB` datasets so that missing lab test records are inserted in the
#' output dataset.
#'
#' @param adsl (`data.frame`)\cr `ADSL` dataframe.
#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
#' @param worst_flag (named `vector`)\cr Worst post-baseline lab flag variable.
#' @param by_visit (`logical`)\cr defaults to `FALSE` to generate worst grade per patient.
#'   If worst grade per patient per visit is specified for `worst_flag`, then
#'   `by_visit` should be `TRUE` to generate worst grade patient per visit.
#' @param no_fillin_visits (named `character`)\cr Visits that are not considered for post-baseline worst toxicity
#'   grade. Defaults to `c("SCREENING", "BASELINE")`.
#'
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
#'   `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
#'   `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
#'
#' @details In the result data missing records will be created for the following situations:
#'   * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
#'   * Patients who do not have any post-baseline lab values.
#'   * Patients without any post-baseline values flagged as the worst.
#'
#' @examples
#' # `h_adsl_adlb_merge_using_worst_flag`
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRHIFL" = "Y")
#' )
#'
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRLOVFL" = "Y"),
#'   by_visit = TRUE
#' )
#'
#' @export
h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint
                                               adlb,
                                               worst_flag = c("WGRHIFL" = "Y"),
                                               by_visit = FALSE,
                                               no_fillin_visits = c("SCREENING", "BASELINE")) {
  col_names <- names(worst_flag)
  filter_values <- worst_flag

  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )

  position_satisfy_filters <- Reduce(intersect, temp)

  adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
  columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")

  adlb_f <- adlb[position_satisfy_filters, ] %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
  adlb_f <- adlb_f[, columns_from_adlb]

  avisits_grid <- adlb %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
    dplyr::pull(.data[["AVISIT"]]) %>%
    unique()

  if (by_visit) {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      AVISIT = avisits_grid,
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>%
      dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
      dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")

    adlb_btoxgr <- adlb %>%
      dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
      unique() %>%
      dplyr::rename("BTOXGR_MAP" = "BTOXGR")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )
    adlb_out <- adlb_out %>%
      dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
      dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
      dplyr::select(-"BTOXGR_MAP")

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  } else {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  }

  adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
  adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)

  adlb_out <- df_explicit_na(adlb_out)
  formatters::var_labels(adlb_out) <- adlb_var_labels

  adlb_out
}

#' Cumulative Counts with Thresholds
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize cumulative counts of a (`numeric`) vector that is less than, less or equal to,
#' greater than, or greater or equal to user-specific thresholds.
#'
#' @inheritParams h_count_cumulative
#' @inheritParams argument_convention
#'
#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
#'
#' @name count_cumulative
NULL

#' Helper Function for [s_count_cumulative()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
#'
#' @inheritParams argument_convention
#' @param threshold (`number`)\cr a cutoff value as threshold to count values of `x`.
#' @param lower_tail (`logical`)\cr whether to count lower tail, default is `TRUE`.
#' @param include_eq (`logical`)\cr whether to include value equal to the `threshold` in
#'   count, default is `TRUE`.
#' @param .N_col (`count`)\cr denominator for fraction calculation.
#'
#' @return A named vector with items:
#'   * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
#'     of user specification.
#'   * `fraction`: the fraction of the count.
#'
#' @seealso [count_cumulative]
#'
#' @examples
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#' h_count_cumulative(x, 5, .N_col = .N_col)
#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 0, lower_tail = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 100, lower_tail = FALSE, .N_col = .N_col)
#'
#' @export
h_count_cumulative <- function(x,
                               threshold,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               na.rm = TRUE, # nolint
                               .N_col) { # nolint
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(threshold)
  checkmate::assert_numeric(.N_col)
  checkmate::assert_flag(lower_tail)
  checkmate::assert_flag(include_eq)
  checkmate::assert_flag(na.rm)

  is_keep <- if (na.rm) !is.na(x) else rep(TRUE, length(x))
  count <- if (lower_tail && include_eq) {
    length(x[is_keep & x <= threshold])
  } else if (lower_tail && !include_eq) {
    length(x[is_keep & x < threshold])
  } else if (!lower_tail && include_eq) {
    length(x[is_keep & x >= threshold])
  } else if (!lower_tail && !include_eq) {
    length(x[is_keep & x > threshold])
  }

  result <- c(count = count, fraction = count / .N_col)
  result
}

#' Description of Cumulative Count
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_count_cumulative()].
#'
#' @inheritParams h_count_cumulative
#'
#' @return Labels for [s_count_cumulative()].
#'
#' @export
d_count_cumulative <- function(threshold, lower_tail, include_eq) {
  checkmate::assert_numeric(threshold)
  lg <- if (lower_tail) "<" else ">"
  eq <- if (include_eq) "=" else ""
  paste0(lg, eq, " ", threshold)
}

#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
#'
#' @param thresholds (`numeric`)\cr vector of cutoff value for the counts.
#'
#' @return
#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
#'   component, each component containing a vector for the count and fraction.
#'
#' @examples
#' # Internal function - s_count_cumulative
#' \dontrun{
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#' s_count_cumulative(x, thresholds = c(0, 5, 11), .N_col = .N_col)
#' s_count_cumulative(x, thresholds = c(0, 5, 11), include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
#' }
#'
#' @keywords internal
s_count_cumulative <- function(x,
                               thresholds,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               .N_col, # nolint
                               ...) {
  checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)

  count_fraction_list <- Map(function(thres) {
    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
    label <- d_count_cumulative(thres, lower_tail, include_eq)
    formatters::with_label(result, label)
  }, thresholds)

  names(count_fraction_list) <- thresholds
  list(count_fraction = count_fraction_list)
}

#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
#'   in `count_cumulative()`.
#'
#' @return
#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_cumulative
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_cumulative, .ungroup_stats = "count_fraction")
#' afun(x, thresholds = c(0, 5, 11), .N_col = .N_col)
#' }
#'
#' @keywords internal
a_count_cumulative <- make_afun(
  s_count_cumulative,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_cumulative()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_cumulative(
#'     vars = "AGE",
#'     thresholds = c(40, 60)
#'   ) %>%
#'   build_table(tern_ex_adsl)
#'
#' @export
count_cumulative <- function(lyt,
                             vars,
                             var_labels = vars,
                             show_labels = "visible",
                             ...,
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  afun <- make_afun(
    a_count_cumulative,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    table_names = table_names,
    var_labels = var_labels,
    show_labels = show_labels,
    extra_args = list(...)
  )
}

#' Pairwise CoxPH model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize p-value, HR and CIs from stratified or unstratified CoxPH model.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param strat (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1. Default method is "log-rank" which
#'     comes from [survival::survdiff()], can also be set to "wald" or "likelihood" (from [survival::coxph()]).
#'   * `ties` (`string`)\cr specifying the method for tie handling. Default is "efron",
#'     can also be set to "breslow" or "exact". See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#'
#' @name survival_coxph_pairwise
NULL

#' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR and p-value of a coxph model.
#'
#' @return
#' * `s_coxph_pairwise()` returns the statistics:
#'   * `pvalue`: p-value to test HR = 1.
#'   * `hr`: Hazard ratio.
#'   * `hr_ci`: Confidence interval for hazard ratio.
#'   * `n_tot`: Total number of observations.
#'   * `n_tot_events`: Total number of events.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' df <- adtte_f %>%
#'   filter(ARMCD == "ARM A")
#' df_ref_group <- adtte_f %>%
#'   filter(ARMCD == "ARM B")
#'
#' # Internal function - s_coxph_pairwise
#' \dontrun{
#' s_coxph_pairwise(df, df_ref_group, .in_ref_col = FALSE, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
s_coxph_pairwise <- function(df,
                             .ref_group,
                             .in_ref_col,
                             .var,
                             is_event,
                             strat = NULL,
                             control = control_coxph()) {
  checkmate::assert_string(.var)
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[is_event]])
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  pval_method <- control$pval_method
  ties <- control$ties
  conf_level <- control$conf_level

  if (.in_ref_col) {
    return(
      list(
        pvalue = formatters::with_label("", paste0("p-value (", pval_method, ")")),
        hr = formatters::with_label("", "Hazard Ratio"),
        hr_ci = formatters::with_label("", f_conf_level(conf_level)),
        n_tot = formatters::with_label("", "Total n"),
        n_tot_events = formatters::with_label("", "Total events")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))

  df_cox <- data.frame(
    tte = data[[.var]],
    is_event = data[[is_event]],
    arm = group
  )
  if (is.null(strat)) {
    formula_cox <- survival::Surv(tte, is_event) ~ arm
  } else {
    formula_cox <- stats::as.formula(
      paste0(
        "survival::Surv(tte, is_event) ~ arm + strata(",
        paste(strat, collapse = ","),
        ")"
      )
    )
    df_cox <- cbind(df_cox, data[strat])
  }
  cox_fit <- survival::coxph(
    formula = formula_cox,
    data = df_cox,
    ties = ties
  )
  sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
  orginal_survdiff <- survival::survdiff(
    formula_cox,
    data = df_cox
  )
  log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1)

  pval <- switch(pval_method,
    "wald" = sum_cox$waldtest["pvalue"],
    "log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff()
    "likelihood" = sum_cox$logtest["pvalue"]
  )
  list(
    pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")),
    hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"),
    hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
    n_tot = formatters::with_label(sum_cox$n, "Total n"),
    n_tot_events = formatters::with_label(sum_cox$nevent, "Total events")
  )
}

#' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`.
#'
#' @return
#' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_coxph_pairwise
#' \dontrun{
#' a_coxph_pairwise(df, df_ref_group, .in_ref_col = FALSE, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
a_coxph_pairwise <- make_afun(
  s_coxph_pairwise,
  .indent_mods = c(pvalue = 0L, hr = 0L, hr_ci = 1L, n_tot = 0L, n_tot_events = 0L),
  .formats = c(
    pvalue = "x.xxxx | (<0.0001)",
    hr = "xx.xx",
    hr_ci = "(xx.xx, xx.xx)",
    n_tot = "xx.xx",
    n_tot_events = "xx.xx"
  )
)

#' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_coxph_pairwise()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Unstratified Analysis"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Stratified Analysis",
#'     strat = "SEX",
#'     control = control_coxph(pval_method = "wald")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
coxph_pairwise <- function(lyt,
                           vars,
                           ...,
                           var_labels = "CoxPH",
                           show_labels = "visible",
                           table_names = vars,
                           .stats = c("pvalue", "hr", "hr_ci"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- make_afun(
    a_coxph_pairwise,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Combine Factor Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Combine specified old factor Levels in a single new level.
#'
#' @param x factor
#' @param levels level names to be combined
#' @param new_level name of new level
#'
#' @return A `factor` with the new levels.
#'
#' @examples
#' x <- factor(letters[1:5], levels = letters[5:1])
#' combine_levels(x, levels = c("a", "b"))
#'
#' combine_levels(x, c("e", "b"))
#'
#' @export
combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) {
  checkmate::assert_factor(x)
  checkmate::assert_subset(levels, levels(x))

  lvls <- levels(x)

  lvls[lvls %in% levels] <- new_level

  levels(x) <- lvls

  x
}

#' Conversion of a Vector to a Factor
#'
#' Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
#' can decide whether they prefer converting to factor manually (e.g. for full control of
#' factor levels).
#'
#' @param x (`atomic`)\cr object to convert.
#' @param x_name (`string`)\cr name of `x`.
#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
#' @param verbose defaults to `TRUE`. It prints out warnings and messages.
#'
#' @return A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`.
#'
#' @examples
#' # Internal function - as_factor_keep_attributes
#' \dontrun{
#' as_factor_keep_attributes(formatters::with_label(c(1, 1, 2, 3), "id"), verbose = FALSE)
#' as_factor_keep_attributes(c("a", "b", ""), "id", verbose = FALSE)
#' }
#'
#' @keywords internal
as_factor_keep_attributes <- function(x,
                                      x_name = deparse(substitute(x)),
                                      na_level = "<Missing>",
                                      verbose = TRUE) {
  checkmate::assert_atomic(x)
  checkmate::assert_string(x_name)
  checkmate::assert_string(na_level)
  checkmate::assert_flag(verbose)
  if (is.factor(x)) {
    return(x)
  }
  x_class <- class(x)[1]
  if (verbose) {
    warning(paste(
      "automatically converting", x_class, "variable", x_name,
      "to factor, better manually convert to factor to avoid failures"
    ))
  }
  if (identical(length(x), 0L)) {
    warning(paste(
      x_name, "has length 0, this can lead to tabulation failures, better convert to factor"
    ))
  }
  if (is.character(x)) {
    x_no_na <- explicit_na(sas_na(x), label = na_level)
    if (any(na_level %in% x_no_na)) {
      do.call(
        structure,
        c(
          list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)),
          attributes(x)
        )
      )
    } else {
      do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
    }
  } else {
    do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
  }
}

#' Labels for Bins in Percent
#'
#' This creates labels for quantile based bins in percent. This assumes the right-closed
#' intervals as produced by [cut_quantile_bins()].
#'
#' @param probs (`proportion` vector)\cr the probabilities identifying the quantiles.
#'   This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
#'   the boundaries 0 and 1 must not be included.
#' @param digits (`integer`)\cr number of decimal places to round the percent numbers.
#'
#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
#'
#' @examples
#' # Internal function - bins_percent_labels
#' \dontrun{
#' # Just pass the internal probability bounds, then 0 and 100% will be added automatically.
#' bins_percent_labels(c(0.2, 0.5))
#'
#' # Determine how to round.
#' bins_percent_labels(0.35224, digits = 1)
#'
#' # Passing an empty vector just gives a single bin 0-100%.
#' bins_percent_labels(c(0, 1))
#' }
#'
#' @keywords internal
bins_percent_labels <- function(probs,
                                digits = 0) {
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  percent <- round(probs * 100, digits = digits)
  left <- paste0(utils::head(percent, -1), "%")
  right <- paste0(utils::tail(percent, -1), "%")
  without_left_bracket <- paste0(left, ",", right, "]")
  with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1))
  if (length(without_left_bracket) > 1) {
    with_left_bracket <- c(
      with_left_bracket,
      paste0("(", utils::tail(without_left_bracket, -1))
    )
  }
  with_left_bracket
}

#' Cutting Numeric Vector into Empirical Quantile Bins
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This cuts a numeric vector into sample quantile bins.
#'
#' @inheritParams bins_percent_labels
#' @param x (`numeric`)\cr the continuous variable values which should be cut into
#'   quantile bins. This may contain `NA` values, which are then
#'   not used for the quantile calculations, but included in the return vector.
#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
#'   probabilities in `probs`, then this must be `n + 1` long.
#' @param type (`integer`)\cr type of quantiles to use, see [stats::quantile()] for details.
#' @param ordered (`flag`)\cr should the result be an ordered factor.
#'
#' @return A `factor` variable with appropriately-labeled bins as levels.
#'
#' @note Intervals are closed on the right side. That is, the first bin is the interval
#'   `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
#'   and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
#'
#' @examples
#' # Default is to cut into quartile bins.
#' cut_quantile_bins(cars$speed)
#'
#' # Use custom quantiles.
#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
#'
#' # Use custom labels.
#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
#'
#' # NAs are preserved in result factor.
#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
#' which(is.na(ozone_binned))
#' # So you might want to make these explicit.
#' explicit_na(ozone_binned)
#'
#' @export
cut_quantile_bins <- function(x,
                              probs = c(0.25, 0.5, 0.75),
                              labels = NULL,
                              type = 7,
                              ordered = TRUE) {
  checkmate::assert_flag(ordered)
  checkmate::assert_numeric(x)
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  if (is.null(labels)) labels <- bins_percent_labels(probs)
  checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE)

  if (all(is.na(x))) {
    # Early return if there are only NAs in input.
    return(factor(x, ordered = ordered, levels = labels))
  }

  quantiles <- stats::quantile(
    x,
    probs = probs,
    type = type,
    na.rm = TRUE
  )

  checkmate::assert_numeric(quantiles, unique = TRUE)

  cut(
    x,
    breaks = quantiles,
    labels = labels,
    ordered_result = ordered,
    include.lowest = TRUE,
    right = TRUE
  )
}

#' Discard Certain Levels from a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This discards the observations as well as the levels specified from a factor.
#'
#' @param x (`factor`)\cr the original factor.
#' @param discard (`character`)\cr which levels to discard.
#'
#' @return A modified `factor` with observations as well as levels from `discard` dropped.
#'
#' @examples
#' fct_discard(factor(c("a", "b", "c")), "c")
#'
#' @export
fct_discard <- function(x, discard) {
  checkmate::assert_factor(x)
  checkmate::assert_character(discard, any.missing = FALSE)
  new_obs <- x[!(x %in% discard)]
  new_levels <- setdiff(levels(x), discard)
  factor(new_obs, levels = new_levels)
}

#' Insertion of Explicit Missings in a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This inserts explicit missings in a factor based on a condition. Additionally,
#' existing `NA` values will be explicitly converted to given `na_level`.
#'
#' @param x (`factor`)\cr the original factor.
#' @param condition (`logical`)\cr where to insert missings.
#' @param na_level (`string`)\cr which level to use for missings.
#'
#' @return A modified `factor` with inserted and existing `NA` converted to `na_level`.
#'
#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
#'
#' @examples
#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
#'
#' @export
fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") {
  checkmate::assert_factor(x, len = length(condition))
  checkmate::assert_logical(condition)
  x[condition] <- NA
  x <- forcats::fct_na_value_to_level(x, level = na_level)
  forcats::fct_drop(x, only = na_level)
}

#' Collapsing of Factor Levels and Keeping Only Those New Group Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This collapses levels and only keeps those new group levels, in the order provided.
#' The returned factor has levels in the order given, with the possible missing level last (this will
#' only be included if there are missing values).
#'
#' @param .f (`factor` or `character`)\cr original vector.
#' @param ... (named `character` vectors)\cr levels in each vector provided will be collapsed into
#'   the new level given by the respective name.
#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
#'   new factor. Note that this level must not be contained in the new levels specified in `...`.
#'
#' @return A modified `factor` with collapsed levels. Values and levels which are not included
#'   in the given `character` vector input will be set to the missing level `.na_level`.
#'
#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
#'   [explicit_na()] can be called separately on the result.
#'
#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
#'
#' @examples
#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
#'
#' @export
fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
  new_lvls <- names(list(...))
  if (checkmate::test_subset(.na_level, new_lvls)) {
    stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels"))
  }
  x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
  do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
}

#' Helper Function to create a new `SMQ` variable in `ADAE` by stacking `SMQ` and/or `CQ` records.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a new `SMQ` variable in `ADAE` that consists of all adverse events belonging to
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
#' belonging to any of the selected baskets.
#'
#' @inheritParams argument_convention
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
#' @param smq_varlabel (`string`)\cr a label for the new variable created.
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
#' @param aag_summary (`data.frame`)\cr containing the `SMQ` baskets and the levels of interest for the final `SMQ`
#'   variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
#'   The two columns of this dataset should be named `basket` and `basket_name`.
#'
#' @return `data.frame` with variables in `keys` taken from `df` and new variable `SMQ` containing
#'   records belonging to the baskets selected via the `baskets` argument.
#'
#' @examples
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
#' h_stack_by_baskets(df = adae)
#'
#' aag <- data.frame(
#'   NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
#'   REFNAME = c(
#'     "D.2.1.5.3/A.1.1.1.1 AESI", "X.9.9.9.9/Y.8.8.8.8 AESI",
#'     "C.1.1.1.3/B.2.2.3.1 AESI", "C.1.1.1.3/B.3.3.3.3 AESI"
#'   ),
#'   SCOPE = c("", "", "BROAD", "BROAD"),
#'   stringsAsFactors = FALSE
#' )
#'
#' basket_name <- character(nrow(aag))
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
#' basket_name[smq_pos] <- paste0(
#'   aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
#' )
#'
#' aag_summary <- data.frame(
#'   basket = aag$NAMVAR,
#'   basket_name = basket_name,
#'   stringsAsFactors = TRUE
#' )
#'
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
#'
#' h_stack_by_baskets(
#'   df = adae,
#'   aag_summary = NULL,
#'   keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
#'   baskets = "SMQ01NAM"
#' )
#'
#' @export
h_stack_by_baskets <- function(df,
                               baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE),
                               smq_varlabel = "Standardized MedDRA Query",
                               keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
                               aag_summary = NULL,
                               na_level = "<Missing>") {
  # Use of df_explicit_na() in case the user has not previously used
  df <- df_explicit_na(df, na_level = na_level)

  smq_nam <- baskets[startsWith(baskets, "SMQ")]
  # SC corresponding to NAM
  smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
  smq <- stats::setNames(smq_sc, smq_nam)

  checkmate::assert_character(baskets)
  checkmate::assert_string(smq_varlabel)
  checkmate::assert_data_frame(df)
  checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ")))
  checkmate::assert_true(all(endsWith(baskets, "NAM")))
  checkmate::assert_subset(baskets, names(df))
  checkmate::assert_subset(keys, names(df))
  checkmate::assert_subset(smq_sc, names(df))
  checkmate::assert_string(na_level)

  if (!is.null(aag_summary)) {
    assert_df_with_variables(
      df = aag_summary,
      variables = list(val = c("basket", "basket_name"))
    )
    # Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
    # Honestly, I think those should completely match. Target baskets should be the same.
    if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
      warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
    }
  }

  var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)

  # convert `na_level` records from baskets to NA for the later loop and from wide to long steps
  df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_level] <- NA

  if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
    df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty dataframe keeping all factor levels
  } else {
    # Concatenate SMQxxxNAM with corresponding SMQxxxSC
    df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]

    for (nam in names(smq)) {
      sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
      nam_notna <- !is.na(df[[nam]])
      new_colname <- paste(nam, sc, sep = "_")
      df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
    }

    df_cnct$unique_id <- seq(1, nrow(df_cnct))
    var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
    # have to convert df_cnct from tibble to dataframe
    # as it throws a warning otherwise about rownames.
    # tibble do not support rownames and reshape creates rownames

    df_long <- stats::reshape(
      data = as.data.frame(df_cnct),
      varying = var_cols,
      v.names = "SMQ",
      idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
      direction = "long",
      new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
    )

    df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
    df_long$SMQ <- as.factor(df_long$SMQ)
  }

  smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_level)

  if (!is.null(aag_summary)) {
    # A warning in case there is no match between df and aag_summary records
    if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
      warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
    }
    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(
        c(
          smq_levels,
          setdiff(unique(aag_summary$basket_name), smq_levels)
        )
      )
    )
  } else {
    all_na_basket_flag <- vapply(df[, baskets], function(x) {
      all(is.na(x))
    }, FUN.VALUE = logical(1))
    all_na_basket <- baskets[all_na_basket_flag]

    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(c(smq_levels, all_na_basket))
    )
  }
  formatters::var_labels(df_long) <- var_labels
  tibble::tibble(df_long)
}

#' Formatting Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' See below for the list of formatting functions created in `tern` to work with `rtables`.
#'
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
#' custom formats can be created via the [`formatters::sprintf_format()`] function.
#'
#' @family formatting functions
#' @name formatting_functions
NULL

#' Formatting Fraction and Percentage
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction(x = c(num = 2L, denom = 3L))
#' format_fraction(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", round(x["num"] / x["denom"] * 100, 1), "%)"
    )
  }

  return(result)
}

#' Formatting Fraction and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent with fixed single decimal place.
#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
    )
  }
  return(result)
}

#' Formatting Count and Fraction
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction(x = c(2, 0.6667))
#' format_count_fraction(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Formatting Count and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
#' format_count_fraction_fixed_dp(x = c(2, 0.5))
#' format_count_fraction_fixed_dp(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else if (x[2] == 1) {
    sprintf("%d (100%%)", x[1])
  } else {
    sprintf("%d (%.1f%%)", x[1], x[2] * 100)
  }

  return(result)
}

#' Formatting: XX as Formatting Function
#'
#' Translate a string where x and dots are interpreted as number place
#' holders, and others as formatting elements.
#'
#' @param str (`string`)\cr template.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
#'
#' z <- format_xx("xx (xx.x)")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x - xx.x")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x, incl. xx.x% NE")
#' sapply(test, z)
#'
#' @family formatting functions
#' @export
format_xx <- function(str) {
  # Find position in the string.
  positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE)
  x_positions <- regmatches(x = str, m = positions)[[1]]

  # Roundings depends on the number of x behind [.].
  roundings <- lapply(
    X = x_positions,
    function(x) {
      y <- strsplit(split = "\\.", x = x)[[1]]
      rounding <- function(x) {
        round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
      }
      return(rounding)
    }
  )

  rtable_format <- function(x, output) {
    values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
    regmatches(x = str, m = positions)[[1]] <- values
    return(str)
  }

  return(rtable_format)
}

#' Formatting Fraction with Lower Threshold
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction when the second element of the input `x` is the fraction. It applies
#' a lower threshold, below which it is just stated that the fraction is smaller than that.
#'
#' @param threshold (`proportion`)\cr lower threshold.
#'
#' @return An `rtables` formatting function that takes numeric input `x` where the second
#'   element is the fraction that is formatted. If the fraction is above or equal to the threshold,
#'   then it is displayed in percentage. If it is positive but below the threshold, it returns,
#'   e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
#'
#' @examples
#' format_fun <- format_fraction_threshold(0.05)
#' format_fun(x = c(20, 0.1))
#' format_fun(x = c(2, 0.01))
#' format_fun(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_fraction_threshold <- function(threshold) {
  assert_proportion_value(threshold)
  string_below_threshold <- paste0("<", round(threshold * 100))
  function(x, ...) {
    assert_proportion_value(x[2], include_boundaries = TRUE)
    ifelse(
      x[2] > 0.01,
      round(x[2] * 100),
      ifelse(
        x[2] == 0,
        "0",
        string_below_threshold
      )
    )
  }
}

#' Formatting Extreme Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `rtables` formatting functions that handle extreme values.
#'
#' @param digits (`integer`)\cr number of decimal places to display.
#'
#' @details For each input, apply a format to the specified number of `digits`. If the value is
#'    below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
#'    above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
#'    If it is zero, then returns "0.00".
#'
#' @family formatting functions
#' @name extreme_format
NULL

#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
#'  used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
#'
#' @return
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
#'   and `format_string`, with thresholds formatted as strings.
#'
#' @examples
#' h_get_format_threshold(2L)
#'
#' @export
h_get_format_threshold <- function(digits = 2L) {
  checkmate::assert_integerish(digits)

  low_threshold <- 1 / (10 ^ digits) # styler: off
  high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off

  string_below_threshold <- paste0("<", low_threshold)
  string_above_threshold <- paste0(">", high_threshold)

  list(
    "threshold" = c(low = low_threshold, high = high_threshold),
    "format_string" = c(low = string_below_threshold, high = string_above_threshold)
  )
}

#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
#'   Creates a formatted string to be used in Formatting Functions.
#'
#' @param x (`number`)\cr value to format.
#'
#' @return
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
#'   of the given value to the digit threshold, as a formatted string.
#'
#' @examples
#' h_format_threshold(0.001)
#' h_format_threshold(1000)
#'
#' @export
h_format_threshold <- function(x, digits = 2L) {
  if (is.na(x)) {
    return(x)
  }

  checkmate::assert_numeric(x, lower = 0)

  l_fmt <- h_get_format_threshold(digits)

  result <- if (x < l_fmt$threshold["low"] && 0 < x) {
    l_fmt$format_string["low"]
  } else if (x > l_fmt$threshold["high"]) {
    l_fmt$format_string["high"]
  } else {
    sprintf(fmt = paste0("%.", digits, "f"), x)
  }

  unname(result)
}

#' Formatting a Single Extreme Value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create Formatting Function for a single extreme value.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
#'
#' @examples
#' format_fun <- format_extreme_values(2L)
#' format_fun(x = 0.127)
#' format_fun(x = Inf)
#' format_fun(x = 0)
#' format_fun(x = 0.009)
#'
#' @family formatting functions
#' @export
format_extreme_values <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_scalar(x, na.ok = TRUE)

    h_format_threshold(x = x, digits = digits)
  }
}

#' Formatting Extreme Values Part of a Confidence Interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting Function for extreme values part of a confidence interval. Values
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
#'   values confidence interval.
#'
#' @examples
#' format_fun <- format_extreme_values_ci(2L)
#' format_fun(x = c(0.127, Inf))
#' format_fun(x = c(0, 0.009))
#'
#' @family formatting functions
#' @export
format_extreme_values_ci <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_vector(x, len = 2)
    l_result <- h_format_threshold(x = x[1], digits = digits)
    h_result <- h_format_threshold(x = x[2], digits = digits)

    paste0("(", l_result, ", ", h_result, ")")
  }
}

#' Helper Functions for Tabulating Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as response rate
#' and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_subgroups
NULL

#' @describeIn h_response_subgroups helper to prepare a data frame of binary responses by arm.
#'
#' @return
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
#'
#' @examples
#' h_proportion_df(
#'   c(TRUE, FALSE, FALSE),
#'   arm = factor(c("A", "A", "B"), levels = c("A", "B"))
#' )
#'
#' @export
h_proportion_df <- function(rsp, arm) {
  checkmate::assert_logical(rsp)
  assert_valid_factor(arm, len = length(rsp))
  non_missing_rsp <- !is.na(rsp)
  rsp <- rsp[non_missing_rsp]
  arm <- arm[non_missing_rsp]

  lst_rsp <- split(rsp, arm)
  lst_results <- Map(function(x, arm) {
    if (length(x) > 0) {
      s_prop <- s_proportion(df = x)
      data.frame(
        arm = arm,
        n = length(x),
        n_rsp = unname(s_prop$n_prop[1]),
        prop = unname(s_prop$n_prop[2]),
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        arm = arm,
        n = 0L,
        n_rsp = NA,
        prop = NA,
        stringsAsFactors = FALSE
      )
    }
  }, lst_rsp, names(lst_rsp))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_response_subgroups summarizes proportion of binary responses by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_proportion_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_response_subgroups helper to prepare a data frame with estimates of
#'   the odds ratio between a treatment and a control arm.
#'
#' @inheritParams response_subgroups
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
#'   optionally `pval` and `pval_label`.
#'
#' @examples
#' # Unstratatified analysis.
#' h_odds_ratio_df(
#'   c(TRUE, FALSE, FALSE, TRUE),
#'   arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' )
#'
#' # Include p-value.
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
#'
#' # Stratatified analysis.
#' h_odds_ratio_df(
#'   rsp = adrs_f$rsp,
#'   arm = adrs_f$ARM,
#'   strata_data = adrs_f[, c("STRATA1", "STRATA2")],
#'   method = "cmh"
#' )
#'
#' @export
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
  assert_valid_factor(arm, n.levels = 2, len = length(rsp))

  df_rsp <- data.frame(
    rsp = rsp,
    arm = arm
  )

  if (!is.null(strata_data)) {
    strata_var <- interaction(strata_data, drop = TRUE)
    strata_name <- "strata"

    assert_valid_factor(strata_var, len = nrow(df_rsp))

    df_rsp[[strata_name]] <- strata_var
  } else {
    strata_name <- NULL
  }

  l_df <- split(df_rsp, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Odds ratio and CI.
    result_odds_ratio <- s_odds_ratio(
      df = l_df[[2]],
      .var = "rsp",
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .df_row = df_rsp,
      variables = list(arm = "arm", strata = strata_name),
      conf_level = conf_level
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
      or = unname(result_odds_ratio$or_ci["est"]),
      lcl = unname(result_odds_ratio$or_ci["lcl"]),
      ucl = unname(result_odds_ratio$or_ci["ucl"]),
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      # Test for difference.
      result_test <- s_test_proportion_diff(
        df = l_df[[2]],
        .var = "rsp",
        .ref_group = l_df[[1]],
        .in_ref_col = FALSE,
        variables = list(strata = strata_name),
        method = method
      )

      df$pval <- as.numeric(result_test$pval)
      df$pval_label <- obj_label(result_test$pval)
    }

    # In those cases cannot go through the model so will obtain n_tot from data.
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = sum(stats::complete.cases(df_rsp)),
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )
    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  }

  df
}

#' @describeIn h_response_subgroups summarizes estimates of the odds ratio between a treatment and a control
#'   arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
#'   and `strat`. `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @return
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
#'   `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Unstratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Stratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adrs_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_odds_ratio_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      conf_level = 0.95,
                                      method = NULL,
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  strata_data <- if (is.null(variables$strat)) {
    NULL
  } else {
    data[, variables$strat, drop = FALSE]
  }

  # Add All Patients.
  result_all <- h_odds_ratio_df(
    rsp = data[[variables$rsp]],
    arm = data[[variables$arm]],
    strata_data = strata_data,
    conf_level = conf_level,
    method = method
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      grp_strata_data <- if (is.null(variables$strat)) {
        NULL
      } else {
        grp$df[, variables$strat, drop = FALSE]
      }

      result <- h_odds_ratio_df(
        rsp = grp$df[[variables$rsp]],
        arm = grp$df[[variables$arm]],
        strata_data = grp_strata_data,
        conf_level = conf_level,
        method = method
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the event rate adjusted for person-years at risk, otherwise known
#' as incidence rate. Primary analysis variable is the person-years at risk.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `time_unit_input` (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `time_unit_output` (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
#'
#' @name incidence_rate
NULL

#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
#'   associated confidence interval.
#'
#' @return
#' * `s_incidence_rate()` returns the following statistics:
#'   - `person_years`: Total person-years at risk.
#'   - `n_events`: Total number of events observed.
#'   - `rate`: Estimated incidence rate.
#'   - `rate_ci`: Confidence interval for the incidence rate.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(seq(6)),
#'   CNSR = c(0, 1, 1, 0, 0, 0),
#'   AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B"))
#' ) %>%
#'   mutate(is_event = CNSR == 0) %>%
#'   mutate(n_events = as.integer(is_event))
#'
#' # Internal function - s_incidence_rate
#' \dontrun{
#' s_incidence_rate(
#'   df,
#'   .var = "AVAL",
#'   n_events = "n_events",
#'   control = control_incidence_rate(
#'     time_unit_input = "month",
#'     time_unit_output = 100
#'   )
#' )
#' }
#'
#' @keywords internal
s_incidence_rate <- function(df,
                             .var,
                             n_events,
                             is_event,
                             control = control_incidence_rate()) {
  if (!missing(is_event)) {
    warning("argument is_event will be deprecated. Please use n_events.")

    if (missing(n_events)) {
      assert_df_with_variables(df, list(tte = .var, is_event = is_event))
      checkmate::assert_string(.var)
      checkmate::assert_logical(df[[is_event]], any.missing = FALSE)
      checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
      n_events <- is_event
    }
  } else {
    assert_df_with_variables(df, list(tte = .var, n_events = n_events))
    checkmate::assert_string(.var)
    checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
    checkmate::assert_integer(df[[n_events]], any.missing = FALSE)
  }

  time_unit_input <- control$time_unit_input
  time_unit_output <- control$time_unit_output
  conf_level <- control$conf_level
  person_years <- sum(df[[.var]], na.rm = TRUE) * (
    1 * (time_unit_input == "year") +
      1 / 12 * (time_unit_input == "month") +
      1 / 52.14 * (time_unit_input == "week") +
      1 / 365.24 * (time_unit_input == "day")
  )
  n_events <- sum(df[[n_events]], na.rm = TRUE)

  result <- h_incidence_rate(
    person_years,
    n_events,
    control
  )
  list(
    person_years = formatters::with_label(person_years, "Total patient-years at risk"),
    n_events = formatters::with_label(n_events, "Number of adverse events observed"),
    rate = formatters::with_label(result$rate, paste("AE rate per", time_unit_output, "patient-years")),
    rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level))
  )
}

#' @describeIn incidence_rate Formatted analysis function which is used as `afun`
#'   in `estimate_incidence_rate()`.
#'
#' @return
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_incidence_rate
#' \dontrun{
#' a_incidence_rate(
#'   df,
#'   .var = "AVAL",
#'   n_events = "n_events",
#'   control = control_incidence_rate(time_unit_input = "month", time_unit_output = 100)
#' )
#' }
#'
#' @keywords internal
a_incidence_rate <- make_afun(
  s_incidence_rate,
  .formats = c(
    "person_years" = "xx.x",
    "n_events" = "xx",
    "rate" = "xx.xx",
    "rate_ci" = "(xx.xx, xx.xx)"
  )
)

#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_incidence_rate()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   estimate_incidence_rate(
#'     vars = "AVAL",
#'     n_events = "n_events",
#'     control = control_incidence_rate(
#'       time_unit_input = "month",
#'       time_unit_output = 100
#'     )
#'   ) %>%
#'   build_table(df)
#'
#' @export
estimate_incidence_rate <- function(lyt,
                                    vars,
                                    ...,
                                    show_labels = "hidden",
                                    table_names = vars,
                                    .stats = NULL,
                                    .formats = NULL,
                                    .labels = NULL,
                                    .indent_mods = NULL) {
  afun <- make_afun(
    a_incidence_rate,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Helper Functions for Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `time_unit_input`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `time_unit_output`: (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @return Estimated incidence rate `rate` and associated confidence interval `rate_ci`.
#'
#' @seealso [incidence_rate]
#'
#' @name h_incidence_rate
NULL

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal(200, 2)
#'
#' @export
h_incidence_rate_normal <- function(person_years,
                                    n_events,
                                    alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  se <- sqrt(est / person_years)
  ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se

  list(rate = est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   logarithm of the incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal_log(200, 2)
#'
#' @export
h_incidence_rate_normal_log <- function(person_years,
                                        n_events,
                                        alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  rate_est <- n_events / person_years
  rate_se <- sqrt(rate_est / person_years)
  lrate_est <- log(rate_est)
  lrate_se <- rate_se / rate_est
  ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se)

  list(rate = rate_est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated exact confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_exact(200, 2)
#'
#' @export
h_incidence_rate_exact <- function(person_years,
                                   n_events,
                                   alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years)
  ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years)

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated Byar's confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_byar(200, 2)
#'
#' @export
h_incidence_rate_byar <- function(person_years,
                                  n_events,
                                  alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  seg_1 <- n_events + 0.5
  seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
  seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
  lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years
  ucl <- seg_1 * ((seg_2 + seg_3) ^ 3) / person_years # styler: off

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval.
#'
#' @examples
#' # Internal function - h_incidence_rate
#' \dontrun{
#' h_incidence_rate(200, 2)
#'
#' h_incidence_rate(
#'   200,
#'   2,
#'   control_incidence_rate(
#'     conf_level = 0.9,
#'     conf_type = "normal_log",
#'     time_unit_output = 100
#'   )
#' )
#' }
#'
#' @keywords internal
h_incidence_rate <- function(person_years,
                             n_events,
                             control = control_incidence_rate()) {
  alpha <- 1 - control$conf_level
  est <- switch(control$conf_type,
    normal = h_incidence_rate_normal(person_years, n_events, alpha),
    normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha),
    exact = h_incidence_rate_exact(person_years, n_events, alpha),
    byar = h_incidence_rate_byar(person_years, n_events, alpha)
  )

  time_unit_output <- control$time_unit_output
  list(
    rate = est$rate * time_unit_output,
    rate_ci = est$rate_ci * time_unit_output
  )
}

#' Summarize the Change from Baseline or Absolute Baseline Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` indicates the numerical change from baseline results,
#' and additional required secondary analysis variables are `value` and `baseline_flag`.
#' Depending on the baseline flag, either the absolute baseline values (at baseline)
#' or the change from baseline values (post-baseline) are then summarized.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_change
NULL

#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
#'
#' @return
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
#'
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
#'   an error will be thrown.
#'
#' @examples
#' df <- data.frame(
#'   chg = c(1, 2, 3),
#'   is_bl = c(TRUE, TRUE, TRUE),
#'   val = c(4, 5, 6)
#' )
#'
#' # Internal function - s_change_from_baseline
#' \dontrun{
#' s_change_from_baseline(
#'   df,
#'   .var = "chg",
#'   variables = list(value = "val", baseline_flag = "is_bl")
#' )
#' }
#'
#' @keywords internal
s_change_from_baseline <- function(df,
                                   .var,
                                   variables,
                                   na.rm = TRUE, # nolint
                                   ...) {
  checkmate::assert_numeric(df[[variables$value]])
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[variables$baseline_flag]])
  checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1)
  assert_df_with_variables(df, c(variables, list(chg = .var)))

  combined <- ifelse(
    df[[variables$baseline_flag]],
    df[[variables$value]],
    df[[.var]]
  )
  if (is.logical(combined) && identical(length(combined), 0L)) {
    combined <- numeric(0)
  }
  s_summary(combined, na.rm = na.rm, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
#'
#' @return
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_change_from_baseline
#' \dontrun{
#' a_change_from_baseline(
#'   df,
#'   .var = "chg",
#'   variables = list(value = "val", baseline_flag = "is_bl")
#' )
#' }
#'
#' @keywords internal
a_change_from_baseline <- make_afun(
  s_change_from_baseline,
  .formats = c(
    n = "xx",
    mean_sd = "xx.xx (xx.xx)",
    mean_se = "xx.xx (xx.xx)",
    median = "xx.xx",
    range = "xx.xx - xx.xx",
    mean_ci = "(xx.xx, xx.xx)",
    median_ci = "(xx.xx, xx.xx)",
    mean_pval = "xx.xx"
  ),
  .labels = c(
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    range = "Min - Max"
  )
)

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_change_from_baseline()` to the table layout.
#'
#' @note To be used after a split on visits in the layout, such that each data subset only contains
#'   either baseline or post-baseline data.
#'
#' @examples
#' # `summarize_change()`
#'
#' ## Fabricated dataset.
#' library(dplyr)
#'
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9))
#' ) %>%
#'   mutate(ABLFLL = AVISIT == "V1") %>%
#'   group_by(USUBJID) %>%
#'   mutate(
#'     BLVAL = AVAL[ABLFLL],
#'     CHG = AVAL - BLVAL
#'   ) %>%
#'   ungroup()
#'
#' results <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
#'   build_table(dta_test)
#' \dontrun{
#' Viewer(results)
#' }
#'
#' @export
summarize_change <- function(lyt,
                             vars,
                             ...,
                             table_names = vars,
                             .stats = c("n", "mean_sd", "median", "range"),
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  afun <- make_afun(
    a_change_from_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    table_names = table_names
  )
}

#' Control Function for CoxPH Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for CoxPH model, typically used internally to specify
#' details of CoxPH model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
#'   Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#' @param ties (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'   can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"),
                          ties = c("efron", "breslow", "exact"),
                          conf_level = 0.95) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  assert_proportion_value(conf_level)

  list(pval_method = pval_method, ties = ties, conf_level = conf_level)
}

#' Control Function for `survfit` Model for Survival Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'   see more in [survival::survfit()]. Note option "none" is no longer supported.
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles of survival time.
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_time <- function(conf_level = 0.95,
                              conf_type = c("plain", "log", "log-log"),
                              quantiles = c(0.25, 0.75)) {
  conf_type <- match.arg(conf_type)
  checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles)
}

#' Control Function for `survfit` Model for Patient's Survival Rate at time point
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams control_surv_time
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_timepoint <- function(conf_level = 0.95,
                                   conf_type = c("plain", "log", "log-log")) {
  conf_type <- match.arg(conf_type)
  assert_proportion_value(conf_level)
  list(
    conf_level = conf_level,
    conf_type = conf_type
  )
}

#' Add Titles, Footnotes, Page Number, and a Bounding Box to a Grid Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
#' with title, footnote, and page numbers.
#'
#' @inheritParams grid::grob
#' @param grob a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
#' @param titles vector of character strings. Vector elements are separated by a newline and strings are wrapped
#'   according to the page width.
#' @param footnotes vector of character string. Same rules as for `titles`.
#' @param page string with page numeration, if `NULL` then no page number is displayed.
#' @param width_titles unit object
#' @param width_footnotes unit object
#' @param border boolean, whether a a border should be drawn around the plot or not.
#' @param margins unit object of length 4
#' @param padding  unit object of length 4
#' @param outer_margins  unit object of length 4
#' @param gp_titles a `gpar` object
#' @param gp_footnotes a `gpar` object
#'
#' @return A grid grob (`gTree`).
#'
#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
#'
#' @examples
#' library(grid)
#'
#' titles <- c(
#'   "Edgar Anderson's Iris Data",
#'   paste(
#'     "This famous (Fisher's or Anderson's) iris data set gives the measurements",
#'     "in centimeters of the variables sepal length and width and petal length",
#'     "and width, respectively, for 50 flowers from each of 3 species of iris."
#'   )
#' )
#'
#' footnotes <- c(
#'   "The species are Iris setosa, versicolor, and virginica.",
#'   paste(
#'     "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
#'     "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
#'   )
#' )
#'
#' ## empty plot
#' grid.newpage()
#'
#' grid.draw(
#'   decorate_grob(
#'     NULL,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 4 of 10"
#'   )
#' )
#'
#' # grid
#' p <- gTree(
#'   children = gList(
#'     rectGrob(),
#'     xaxisGrob(),
#'     yaxisGrob(),
#'     textGrob("Sepal.Length", y = unit(-4, "lines")),
#'     textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
#'     pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
#'   ),
#'   vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
#' )
#' grid.newpage()
#' grid.draw(p)
#'
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with ggplot2
#' library(ggplot2)
#'
#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
#'   ggplot2::geom_point()
#' p_gg
#' p <- ggplotGrob(p_gg)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with lattice
#' library(lattice)
#'
#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
#' p <- grid.grab()
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' # with gridExtra - no borders
#' library(gridExtra)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     tableGrob(
#'       head(mtcars)
#'     ),
#'     titles = "title",
#'     footnotes = "footnote",
#'     border = FALSE
#'   )
#' )
#'
#' @export
decorate_grob <- function(grob,
                          titles,
                          footnotes,
                          page = "",
                          width_titles = grid::unit(1, "npc") - grid::stringWidth(page),
                          width_footnotes = grid::unit(1, "npc") - grid::stringWidth(page),
                          border = TRUE,
                          margins = grid::unit(c(1, 0, 1, 0), "lines"),
                          padding = grid::unit(rep(1, 4), "lines"),
                          outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
                          gp_titles = grid::gpar(),
                          gp_footnotes = grid::gpar(fontsize = 8),
                          name = NULL,
                          gp = grid::gpar(),
                          vp = NULL) {
  st_titles <- split_text_grob(
    titles,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_titles,
    vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
    gp = gp_titles
  )

  st_footnotes <- split_text_grob(
    footnotes,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_footnotes,
    vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
    gp = gp_footnotes
  )

  grid::gTree(
    grob = grob,
    titles = titles,
    footnotes = footnotes,
    page = page,
    width_titles = width_titles,
    width_footnotes = width_footnotes,
    border = border,
    margins = margins,
    padding = padding,
    outer_margins = outer_margins,
    gp_titles = gp_titles,
    gp_footnotes = gp_footnotes,
    children = grid::gList(
      grid::gTree(
        children = grid::gList(
          st_titles,
          grid::gTree(
            children = grid::gList(
              if (border) grid::rectGrob(),
              grid::gTree(
                children = grid::gList(
                  grob
                ),
                vp = grid::plotViewport(margins = padding)
              )
            ),
            vp = grid::vpStack(
              grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
              grid::plotViewport(margins = margins)
            )
          ),
          st_footnotes,
          grid::textGrob(
            page,
            x = 1, y = 0,
            just = c("right", "bottom"),
            vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
            gp = gp_footnotes
          )
        ),
        childrenvp = NULL,
        name = "titles_grob_footnotes",
        vp = grid::vpStack(
          grid::plotViewport(margins = outer_margins),
          grid::viewport(
            layout = grid::grid.layout(
              nrow = 3, ncol = 1,
              heights = grid::unit.c(
                grid::grobHeight(st_titles),
                grid::unit(1, "null"),
                grid::grobHeight(st_footnotes)
              )
            )
          )
        )
      )
    ),
    name = name,
    gp = gp,
    vp = vp,
    cl = "decoratedGrob"
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.decoratedGrob <- function(x) {
  checkmate::assert_character(x$titles)
  checkmate::assert_character(x$footnotes)

  if (!is.null(x$grob)) {
    checkmate::assert_true(grid::is.grob(x$grob))
  }
  if (length(x$page) == 1) {
    checkmate::assert_character(x$page)
  }
  if (!grid::is.unit(x$outer_margins)) {
    checkmate::assert_vector(x$outer_margins, len = 4)
  }
  if (!grid::is.unit(x$margins)) {
    checkmate::assert_vector(x$margins, len = 4)
  }
  if (!grid::is.unit(x$padding)) {
    checkmate::assert_vector(x$padding, len = 4)
  }

  x
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

# Adapted from Paul Murell R Graphics 2nd Edition
# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
split_string <- function(text, width) {
  strings <- strsplit(text, " ")
  out_string <- NA
  for (string_i in seq_along(strings)) {
    newline_str <- strings[[string_i]]
    if (length(newline_str) == 0) newline_str <- ""
    if (is.na(out_string[string_i])) {
      out_string[string_i] <- newline_str[[1]][[1]]
      linewidth <- grid::stringWidth(out_string[string_i])
    }
    gapwidth <- grid::stringWidth(" ")
    availwidth <- as.numeric(width)
    if (length(newline_str) > 1) {
      for (i in seq(2, length(newline_str))) {
        width_i <- grid::stringWidth(newline_str[i])
        if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) {
          sep <- " "
          linewidth <- linewidth + gapwidth + width_i
        } else {
          sep <- "\n"
          linewidth <- width_i
        }
        out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep)
      }
    }
  }
  paste(out_string, collapse = "\n")
}

#' Split Text According To Available Text Width
#'
#' Dynamically wrap text.
#'
#' @inheritParams grid::grid.text
#' @param text character string
#' @param width a unit object specifying max width of text
#'
#' @return A text grob.
#'
#' @details This code is taken from R Graphics by Paul Murell, 2nd edition
#'
#' @examples
#' # Internal function - split_text_grob
#' \dontrun{
#' sg <- split_text_grob(text = paste(
#'   "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vitae",
#'   "dapibus dolor, ac mattis erat. Nunc metus lectus, imperdiet ut enim eu,",
#'   "commodo scelerisque urna. Vestibulum facilisis metus vel nibh tempor, sed",
#'   "elementum sem tempus. Morbi quis arcu condimentum, maximus lorem id,",
#'   "tristique ante. Nullam a nunc dui. Fusce quis lacus nec ante dignissim",
#'   "faucibus nec vitae tellus. Suspendisse mollis et sapien eu ornare. Vestibulum",
#'   "placerat neque nec justo efficitur, ornare varius nulla imperdiet. Nunc justo",
#'   "sapien, vestibulum eget efficitur eget, porttitor id ante. Nulla tempor",
#'   "luctus massa id elementum. Praesent dictum, neque vitae vestibulum malesuada,",
#'   "nunc nisi blandit lacus, sit amet tristique odio dui sit amet velit."
#' ))
#'
#' library(grid)
#' grobHeight(sg)
#'
#' grid.newpage()
#' pushViewport(plotViewport())
#' grid.rect()
#' grid.draw(sg)
#'
#' grid.rect(
#'   height = grobHeight(sg), width = unit(1, "cm"), gp = gpar(fill = "red")
#' )
#'
#' # stack split_text_grob
#' grid.newpage()
#' pushViewport(plotViewport())
#' grid.rect()
#' grid.draw(split_text_grob(
#'   c("Hello, this is a test", "and yet another test"),
#'   just = c("left", "top"), x = 0, y = 1
#' ))
#' }
#'
#' @keywords internal
split_text_grob <- function(text,
                            x = grid::unit(0.5, "npc"),
                            y = grid::unit(0.5, "npc"),
                            width = grid::unit(1, "npc"),
                            just = "centre",
                            hjust = NULL,
                            vjust = NULL,
                            default.units = "npc", # nolint
                            name = NULL,
                            gp = grid::gpar(),
                            vp = NULL) {
  if (!grid::is.unit(x)) x <- grid::unit(x, default.units)
  if (!grid::is.unit(y)) y <- grid::unit(y, default.units)
  if (!grid::is.unit(width)) width <- grid::unit(width, default.units)
  if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units)
  if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units)
  if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units)

  ## if it is a fixed unit then we do not need to recalculate when viewport resized
  if (!inherits(width, "unit.arithmetic") &&
    !is.null(attr(width, "unit")) &&
    attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) {
    attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
  }

  grid::grid.text(
    label = split_string(text, width),
    x = x, y = y,
    just = just,
    hjust = hjust,
    vjust = vjust,
    rot = 0,
    check.overlap = FALSE,
    name = name,
    gp = gp,
    vp = vp,
    draw = FALSE
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.dynamicSplitText <- function(x) {
  checkmate::assert_character(x$text)
  checkmate::assert_true(grid::is.unit(x$width))
  checkmate::assert_vector(x$width, len = 1)
  x
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.dynamicSplitText <- function(x) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }
  grid::stringHeight(txt)
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.dynamicSplitText <- function(x) {
  x$width
}

#' @importFrom grid drawDetails
#' @noRd
drawDetails.dynamicSplitText <- function(x, recording) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }

  x$width <- NULL
  x$label <- txt
  x$text <- NULL
  class(x) <- c("text", class(x)[-1])

  grid::grid.draw(x)
}

#' Update Page Number
#'
#' Automatically updates page number.
#'
#' @param npages number of pages in total
#' @param ... passed on to [decorate_grob()]
#'
#' @return Closure that increments the page number.
#'
#' @examples
#' # Internal function - decorate_grob_factory
#' \dontrun{
#' pf <- decorate_grob_factory(
#'   titles = "This is a test\nHello World",
#'   footnotes = "Here belong the footnotess",
#'   npages = 3
#' )
#'
#' library(grid)
#' draw_grob(pf(NULL))
#' draw_grob(pf(NULL))
#' draw_grob(pf(NULL))
#' }
#'
#' @keywords internal
decorate_grob_factory <- function(npages, ...) {
  current_page <- 0
  function(grob) {
    current_page <<- current_page + 1
    if (current_page > npages) {
      stop(paste("current page is", current_page, "but max.", npages, "specified."))
    }
    decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
  }
}

#' Decorate Set of `grobs` and Add Page Numbering
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Note that this uses the [decorate_grob_factory()] function.
#'
#' @param grobs a list of grid grobs
#' @param ... arguments passed on to [decorate_grob()].
#'
#' @return A decorated grob.
#'
#' @examples
#' library(ggplot2)
#' library(grid)
#' g <- with(data = iris, {
#'   list(
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     )
#'   )
#' })
#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
#'
#' draw_grob(lg[[1]])
#' draw_grob(lg[[2]])
#' draw_grob(lg[[6]])
#'
#' @export
decorate_grob_set <- function(grobs, ...) {
  n <- length(grobs)
  lgf <- decorate_grob_factory(npages = n, ...)
  lapply(grobs, lgf)
}

#' Count the Number of Patients with Particular Flags
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#'
#' @seealso [count_patients_with_event]
#'
#' @name count_patients_with_flags
NULL

#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
#'   a particular flag variable is `TRUE`.
#'
#' @inheritParams summarize_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#' @param flag_variables (`character`)\cr a character vector specifying the names of `logical`
#'   variables from analysis dataset used for counting the number of unique identifiers.
#'
#' @return
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
#'   flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
#'
#' @examples
#' library(dplyr)
#'
#' # `s_count_patients_with_flags()`
#'
#' # Add labelled flag variables to analysis dataset.
#' adae <- tern_ex_adae %>%
#'   mutate(
#'     fl1 = TRUE,
#'     fl2 = TRTEMFL == "Y",
#'     fl3 = TRTEMFL == "Y" & AEOUT == "FATAL",
#'     fl4 = TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y"
#'   )
#' labels <- c(
#'   "fl1" = "Total AEs",
#'   "fl2" = "Total number of patients with at least one adverse event",
#'   "fl3" = "Total number of patients with fatal AEs",
#'   "fl4" = "Total number of patients with related fatal AEs"
#' )
#' formatters::var_labels(adae)[names(labels)] <- labels
#'
#' s_count_patients_with_flags(
#'   adae,
#'   "SUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'   denom = "N_col",
#'   .N_col = 1000
#' )
#'
#' @export
s_count_patients_with_flags <- function(df,
                                        .var,
                                        flag_variables,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  if (is.null(names(flag_variables))) flag_variables <- stats::setNames(flag_variables, flag_variables)
  flag_names <- unname(flag_variables)
  flag_variables <- names(flag_variables)

  checkmate::assert_subset(flag_variables, colnames(df))
  temp <- sapply(flag_variables, function(x) {
    tmp <- Map(function(y) which(df[[y]]), x)
    position_satisfy_flags <- Reduce(intersect, tmp)
    id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]]))
    s_count_values(
      as.character(unique(df[[.var]])),
      id_satisfy_flags,
      denom = denom,
      .N_col = .N_col,
      .N_row = .N_row
    )
  })
  colnames(temp) <- flag_names
  temp <- data.frame(t(temp))
  result <- temp %>% as.list()
  if (length(flag_variables) == 1) {
    for (i in 1:3) names(result[[i]]) <- flag_names[1]
  }
  result
}

#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
#'   in `count_patients_with_flags()`.
#'
#' @return
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#'
#' # `a_count_patients_with_flags()`
#'
#' afun <- make_afun(a_count_patients_with_flags,
#'   .stats = "count_fraction",
#'   .ungroup_stats = "count_fraction"
#' )
#' afun(
#'   adae,
#'   .N_col = 10L,
#'   .N_row = 10L,
#'   .var = "USUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4")
#' )
#'
#' @export
a_count_patients_with_flags <- make_afun(
  s_count_patients_with_flags,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_flags()` to the table layout.
#'
#' @examples
#' # `count_patients_with_flags()`
#'
#' lyt2 <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_patients_with_flags(
#'     "SUBJID",
#'     flag_variables = formatters::var_labels(adae[, c("fl1", "fl2", "fl3", "fl4")]),
#'     denom = "N_col"
#'   )
#' build_table(lyt2, adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_patients_with_flags <- function(lyt,
                                      var,
                                      var_labels = var,
                                      show_labels = "hidden",
                                      ...,
                                      table_names = paste0("tbl_flags_", var),
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .indent_mods = NULL) {
  afun <- make_afun(
    a_count_patients_with_flags,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = afun,
    table_names = table_names,
    extra_args = list(...)
  )

  lyt
}

#' Count Patients with Marked Laboratory Abnormalities
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates whether single, replicated or last marked laboratory
#' abnormality was observed (`factor`). Additional analysis variables are `id` (`character` or `factor`)
#' and `direction` (`factor`) indicating the direction of the abnormality. Denominator is number of
#' patients with at least one valid measurement during the analysis.
#'   * For `Single, not last` and `Last or replicated`: Numerator is number of patients
#'     with `Single, not last` and `Last or replicated` levels, respectively.
#'   * For `Any`: Numerator is the number of patients with either single or
#'     replicated marked abnormalities.
#'
#' @inheritParams argument_convention
#' @param category (`list`)\cr with different marked category names for single
#'   and last or replicated.
#'
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
#'   abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
#'   patient will be counted only under the `Last or replicated` category.
#'
#' @name abnormal_by_marked
NULL

#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
#'
#' @return
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
#'   `Last or replicated`, and `Any` results.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
#'   ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
#'   ANRIND = factor(c(
#'     "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
#'     "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
#'     "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
#'   )),
#'   ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
#'   PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
#'   AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
#'   stringsAsFactors = FALSE
#' )
#'
#' df <- df %>%
#'   mutate(abn_dir = factor(
#'     case_when(
#'       ANRIND == "LOW LOW" ~ "Low",
#'       ANRIND == "HIGH HIGH" ~ "High",
#'       TRUE ~ ""
#'     ),
#'     levels = c("Low", "High")
#'   ))
#'
#' # Select only post-baseline records.
#' df <- df %>% filter(ONTRTFL == "Y")
#' df_crp <- df %>%
#'   filter(PARAMCD == "CRP") %>%
#'   droplevels()
#' full_parent_df <- list(df_crp, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
#' spl_context <- data.frame(
#'   split = c("PARAMCD", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#' # Internal function - s_count_abnormal_by_marked
#' \dontrun{
#' s_count_abnormal_by_marked(
#'   df = df_crp %>% filter(abn_dir == "High"),
#'   .spl_context = spl_context,
#'   .var = "AVALCAT1",
#'   variables = list(id = "USUBJID", param = "PARAMCD", direction = "abn_dir")
#' )
#' }
#'
#' @keywords internal
s_count_abnormal_by_marked <- function(df,
                                       .var = "AVALCAT1",
                                       .spl_context,
                                       category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                       variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_list(category)
  checkmate::assert_subset(names(category), c("single", "last_replicated"))
  checkmate::assert_subset(names(variables), c("id", "param", "direction"))
  checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)

  assert_df_with_variables(df, c(aval = .var, variables))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))


  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  # Patients in the denominator have at least one post-baseline visit.
  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  if (denom != 0) {
    subjects_last_replicated <- unique(
      df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
    )
    subjects_single <- unique(
      df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
    )
    # Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
    subjects_single <- setdiff(subjects_single, subjects_last_replicated)
    n_single <- length(subjects_single)
    n_last_replicated <- length(subjects_last_replicated)
    n_any <- n_single + n_last_replicated
    result <- list(count_fraction = list(
      "Single, not last" = c(n_single, n_single / denom),
      "Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
      "Any Abnormality" = c(n_any, n_any / denom)
    ))
  } else {
    result <- list(count_fraction = list(
      "Single, not last" = c(0, 0),
      "Last or replicated" = c(0, 0),
      "Any Abnormality" = c(0, 0)
    ))
  }

  result
}

#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_marked()`.
#'
#' @return
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_by_marked
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_abnormal_by_marked, .ungroup_stats = "count_fraction")
#' afun(
#'   df = df_crp %>% filter(abn_dir == "High"),
#'   .spl_context = spl_context,
#'   variables = list(id = "USUBJID", param = "PARAMCD", direction = "abn_dir")
#' )
#' }
#'
#' @keywords internal
a_count_abnormal_by_marked <- make_afun(
  s_count_abnormal_by_marked,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_marked()` to the table layout.
#'
#' @examples
#' map <- unique(
#'   df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
#' ) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAMCD, abn_dir)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_to_map(map)
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_in_group("abn_dir")
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' @export
count_abnormal_by_marked <- function(lyt,
                                     var,
                                     ...,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  checkmate::assert_string(var)

  afun <- make_afun(
    a_count_abnormal_by_marked,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    show_labels = "hidden",
    extra_args = c(list(...))
  )
  lyt
}

#' Counting Patients and Events in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of unique patients and the total number of all and specific events
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#'
#' @name count_patients_events_in_cols
NULL

#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
#'   events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
#'
#' @param filters_list (named `list` of `character`)\cr each element in this list describes one
#'   type of event describe by filters, in the same format as [s_count_patients_with_event()].
#'   If it has a label, then this will be used for the column title.
#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
#'   that corresponding table cells will stay blank.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
#'   be used as label.
#'
#' @return
#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
#'   - `unique`: number of unique patients in `df`.
#'   - `all`: number of rows in `df`.
#'   - one element with the same name as in `filters_list`: number of rows in `df`,
#'     i.e. events, fulfilling the filter condition.
#'
#' @examples
#' # `s_count_patients_and_multiple_events()`
#' df <- data.frame(
#'   USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
#'   ARM = c("A", "A", "B", "B", "B", "B", "A"),
#'   AESER = rep("Y", 7),
#'   AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
#'   AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
#'   AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
#'   AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
#' )
#'
#' # Internal function - s_count_patients_and_multiple_events
#' \dontrun{
#' s_count_patients_and_multiple_events(
#'   df = df,
#'   id = "USUBJID",
#'   filters_list = list(
#'     serious = c(AESER = "Y"),
#'     fatal = c(AESDTH = "Y")
#'   )
#' )
#' }
#'
#' @keywords internal
s_count_patients_and_multiple_events <- function(df, # nolint
                                                 id,
                                                 filters_list,
                                                 empty_stats = character(),
                                                 labelstr = "",
                                                 custom_label = NULL) {
  checkmate::assert_list(filters_list, names = "named")
  checkmate::assert_data_frame(df)
  checkmate::assert_string(id)
  checkmate::assert_disjunct(c("unique", "all"), names(filters_list))
  checkmate::assert_character(empty_stats)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)

  # Below we want to count each row in `df` once, therefore introducing this helper index column.
  df$.row_index <- as.character(seq_len(nrow(df)))
  y <- list()
  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "counts"
  }
  y$unique <- formatters::with_label(
    s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L],
    row_label
  )
  y$all <- formatters::with_label(
    nrow(df),
    row_label
  )
  events <- Map(
    function(filters) {
      formatters::with_label(
        s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count,
        row_label
      )
    },
    filters = filters_list
  )
  y_complete <- c(y, events)
  y <- if (length(empty_stats) > 0) {
    y_reduced <- y_complete
    for (stat in intersect(names(y_complete), empty_stats)) {
      y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]]))
    }
    y_reduced
  } else {
    y_complete
  }
  y
}

#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split.
#'   Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
#' @examples
#' # `summarize_patients_events_in_cols()`
#' basic_table() %>%
#'   summarize_patients_events_in_cols(
#'     filters_list = list(
#'       related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
#'       fatal = c(AESDTH = "Y"),
#'       fatal_related = c(AEREL = "Y", AESDTH = "Y")
#'     ),
#'     custom_label = "%s Total number of patients and events"
#'   ) %>%
#'   build_table(df)
#'
#' @export
summarize_patients_events_in_cols <- function(lyt, # nolint
                                              id = "USUBJID",
                                              filters_list = list(),
                                              ...,
                                              .stats = c(
                                                "unique",
                                                "all",
                                                names(filters_list)
                                              ),
                                              .labels = c(
                                                unique = "Patients (All)",
                                                all = "Events (All)",
                                                labels_or_names(filters_list)
                                              ),
                                              col_split = TRUE) {
  afun_list <- Map(
    function(stat) {
      make_afun(
        s_count_patients_and_multiple_events,
        id = id,
        filters_list = filters_list,
        .stats = stat,
        .formats = "xx."
      )
    },
    stat = .stats
  )
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(id, length(.stats)),
      varlabels = .labels[.stats]
    )
  }
  summarize_row_groups(
    lyt = lyt,
    cfun = afun_list,
    extra_args = list(...)
  )
}

#' Stack Multiple Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Stack grobs as a new grob with 1 column and multiple rows layout.
#'
#' @param ... grobs.
#' @param grobs list of grobs.
#' @param padding unit of length 1, space between each grob.
#' @param vp a [viewport()] object (or `NULL`).
#' @param name a character identifier for the grob.
#' @param gp A [gpar()] object.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid.newpage()
#' grid.draw(stack_grobs(g1, g2, g3))
#'
#' showViewport()
#'
#' grid.newpage()
#' pushViewport(viewport(layout = grid.layout(1, 2)))
#' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test"))
#'
#' showViewport()
#' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE)
#'
#' @export
stack_grobs <- function(...,
                        grobs = list(...),
                        padding = grid::unit(2, "line"),
                        vp = NULL,
                        gp = NULL,
                        name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  n_layout <- 2 * length(grobs) - 1
  hts <- lapply(
    seq(1, n_layout),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts)
  )

  nested_grobs <- Map(function(g, i) {
    grid::gTree(
      children = grid::gList(g),
      vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1)
    )
  }, grobs, seq_along(grobs) * 2 - 1)

  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Arrange Multiple Grobs
#'
#' Arrange grobs as a new grob with \verb{n*m (rows*cols)} layout.
#'
#' @inheritParams stack_grobs
#' @param ncol number of columns in layout.
#' @param nrow number of rows in layout.
#' @param padding_ht unit of length 1, vertical space between each grob.
#' @param padding_wt unit of length 1, horizontal space between each grob.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' # Internal function - arrange_grobs
#' \dontrun{
#' num <- lapply(1:9, textGrob)
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(grobs = num, ncol = 2))
#'
#' showViewport()
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2))
#'
#' showViewport()
#'
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3))
#'
#' grid::grid.newpage()
#' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2)))
#' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1))
#'
#' showViewport()
#' }
#'
#' @keywords internal
arrange_grobs <- function(...,
                          grobs = list(...),
                          ncol = NULL, nrow = NULL,
                          padding_ht = grid::unit(2, "line"),
                          padding_wt = grid::unit(2, "line"),
                          vp = NULL,
                          gp = NULL,
                          name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  if (is.null(ncol) && is.null(nrow)) {
    ncol <- 1
    nrow <- ceiling(length(grobs) / ncol)
  } else if (!is.null(ncol) && is.null(nrow)) {
    nrow <- ceiling(length(grobs) / ncol)
  } else if (is.null(ncol) && !is.null(nrow)) {
    ncol <- ceiling(length(grobs) / nrow)
  }

  if (ncol * nrow < length(grobs)) {
    stop("specififed ncol and nrow are not enough for arranging the grobs ")
  }

  if (ncol == 1) {
    return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name))
  }

  n_col <- 2 * ncol - 1
  n_row <- 2 * nrow - 1
  hts <- lapply(
    seq(1, n_row),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_ht
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  wts <- lapply(
    seq(1, n_col),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_wt
      }
    }
  )
  wts <- do.call(grid::unit.c, wts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts)
  )

  nested_grobs <- list()
  k <- 0
  for (i in seq(nrow) * 2 - 1) {
    for (j in seq(ncol) * 2 - 1) {
      k <- k + 1
      if (k <= length(grobs)) {
        nested_grobs <- c(
          nested_grobs,
          list(grid::gTree(
            children = grid::gList(grobs[[k]]),
            vp = grid::viewport(layout.pos.row = i, layout.pos.col = j)
          ))
        )
      }
    }
  }
  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Draw `grob`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw grob on device page.
#'
#' @param grob grid object
#' @param newpage draw on a new page
#' @param vp a [viewport()] object (or `NULL`).
#'
#' @return A `grob`.
#'
#' @examples
#' library(dplyr)
#' library(grid)
#'
#' # Internal function - arrange_grob
#' \dontrun{
#' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc"))
#' rect %>% draw_grob(vp = grid::viewport(angle = 45))
#'
#' num <- lapply(1:10, textGrob)
#' num %>%
#'   arrange_grobs(grobs = .) %>%
#'   draw_grob()
#' showViewport()
#' }
#'
#' @export
draw_grob <- function(grob, newpage = TRUE, vp = NULL) {
  if (newpage) {
    grid::grid.newpage()
  }
  if (!is.null(vp)) {
    grid::pushViewport(vp)
  }
  grid::grid.draw(grob)
}

tern_grob <- function(x) {
  class(x) <- unique(c("ternGrob", class(x)))
  x
}

print.ternGrob <- function(x, ...) {
  grid::grid.newpage()
  grid::grid.draw(x)
}

#' Occurrence Counts by Grade
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences by grade for patients
#' with occurrence data. Multiple occurrences within one individual are counted once at the
#' greatest intensity/highest grade level.
#'
#' @inheritParams argument_convention
#' @param grade_groups (named `list` of `character`)\cr containing groupings of grades.
#' @param remove_single (`logical`)\cr `TRUE` to not include the elements of one-element grade groups
#'   in the the output list; in this case only the grade groups names will be included in the output.
#'
#' @seealso Relevant helper function [h_append_grade_groups()].
#'
#' @name count_occurrences_by_grade
NULL

#' Helper function for [s_count_occurrences_by_grade()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
#' the end. Grade groups names must be unique.
#'
#' @inheritParams count_occurrences_by_grade
#' @param refs (named `list` of `numeric`)\cr where each name corresponds to a reference grade level
#'   and each entry represents a count.
#'
#' @return Formatted list of grade groupings.
#'
#' @examples
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(5:1),
#'     "Grade A" = "5",
#'     "Grade B" = c("4", "3")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 5, "3" = 0)
#' )
#'
#' @export
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE) {
  checkmate::assert_list(grade_groups)
  checkmate::assert_list(refs)
  refs_orig <- refs
  elements <- unique(unlist(grade_groups))

  ### compute sums in groups
  grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i]))
  if (!checkmate::test_subset(elements, names(refs))) {
    padding_el <- setdiff(elements, names(refs))
    refs[padding_el] <- 0
  }
  result <- c(grp_sum, refs)

  ### order result while keeping grade_groups's ordering
  ordr <- grade_groups

  # elements of any-grade group (if any) will be moved to the end
  is_any <- sapply(grade_groups, setequal, y = names(refs))
  ordr[is_any] <- list(character(0)) # hide elements under any-grade group

  # groups-elements combined sequence
  ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE)
  ordr <- ordr[!duplicated(ordr)]

  # append remaining elements (if any)
  ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group
  ordr <- union(ordr, names(refs)) # from refs

  # remove elements of single-element groups, if any
  if (remove_single) {
    is_single <- sapply(grade_groups, length) == 1L
    ordr <- setdiff(ordr, unlist(grade_groups[is_single]))
  }

  # apply the order
  result <- result[ordr]

  # remove groups without any elements in the original refs
  # note: it's OK if groups have 0 value
  keep_grp <- vapply(grade_groups, function(x, rf) {
    any(x %in% rf)
  }, rf = names(refs_orig), logical(1))

  keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp]
  result <- result[keep_el]

  result
}

#' @describeIn count_occurrences_by_grade Statistics function which counts the
#'  number of patients by highest grade.
#'
#' @return
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
#'   grade level grouping.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6, 1)),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
#'   AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
#'   AESEV = factor(
#'     x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
#'     levels = c("MILD", "MODERATE", "SEVERE")
#'   ),
#'   stringsAsFactors = FALSE
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' s_count_occurrences_by_grade(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
s_count_occurrences_by_grade <- function(df,
                                         .var,
                                         .N_col, # nolint
                                         id = "USUBJID",
                                         grade_groups = list(),
                                         remove_single = TRUE,
                                         labelstr = "") {
  assert_valid_factor(df[[.var]])
  assert_df_with_variables(df, list(grade = .var, id = id))

  if (nrow(df) < 1) {
    grade_levels <- levels(df[[.var]])
    l_count <- as.list(rep(0, length(grade_levels)))
    names(l_count) <- grade_levels
  } else {
    if (isTRUE(is.factor(df[[id]]))) {
      assert_valid_factor(df[[id]], any.missing = FALSE)
    } else {
      checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE)
    }
    checkmate::assert_count(.N_col)

    id <- df[[id]]
    grade <- df[[.var]]

    if (!is.ordered(grade)) {
      grade_lbl <- obj_label(grade)
      lvls <- levels(grade)
      if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
        lvl_ord <- lvls
      } else {
        lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
        lvl_ord <- levels(grade)[order(as.numeric(lvls))]
      }
      grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl)
    }

    df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE)
    l_count <- as.list(table(df_max$grade))
  }

  if (length(grade_groups) > 0) {
    l_count <- h_append_grade_groups(grade_groups, l_count, remove_single)
  }

  l_count_fraction <- lapply(l_count, function(i, denom) c(i, i / denom), denom = .N_col)

  list(
    count_fraction = l_count_fraction
  )
}

#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
#'   in `count_occurrences_by_grade()`.
#'
#' @return
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences_by_grade, .ungroup_stats = "count_fraction")
#' afun(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
a_count_occurrences_by_grade <- make_afun(
  s_count_occurrences_by_grade,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param var_labels (`character`)\cr labels to show in the result table.
#'
#' @return
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' # Define additional grade groupings.
#' grade_groups <- list(
#'   "-Any-" = c("1", "2", "3", "4", "5"),
#'   "Grade 1-2" = c("1", "2"),
#'   "Grade 3-5" = c("3", "4", "5")
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
count_occurrences_by_grade <- function(lyt,
                                       var,
                                       var_labels = var,
                                       show_labels = "default",
                                       ...,
                                       table_names = var,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .indent_mods = NULL,
                                       .labels = NULL) {
  afun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = afun,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
summarize_occurrences_by_grade <- function(lyt,
                                           var,
                                           ...,
                                           .stats = NULL,
                                           .formats = NULL,
                                           .indent_mods = NULL,
                                           .labels = NULL) {
  cfun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    extra_args = list(...)
  )
}

#' Helper Functions for Tabulating Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams response_biomarkers_subgroups
#' @inheritParams extract_rsp_biomarkers
#' @inheritParams argument_convention
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_biomarkers_subgroups
NULL

#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
#'   to the "logistic regression" variable list. The reason is that currently there is an
#'   inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_rsp_to_logistic_variables(
#'   variables = list(
#'     rsp = "RSP",
#'     covariates = c("A", "B"),
#'     strat = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_rsp_to_logistic_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_string(biomarker)
  list(
    response = variables$rsp,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strat
  )
}

#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
#'   overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
#'   biomarkers in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
#'   and `strat`.
#'
#' @return
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_logistic_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f[NULL, ]
#' )
#'
#' @export
h_logistic_mult_cont_df <- function(variables,
                                    data,
                                    control = control_logistic()) {
  assert_df_with_variables(data, variables)

  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  conf_level <- control[["conf_level"]]
  pval_label <- "p-value (Wald)"

  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      model_fit <- fit_logistic(
        variables = h_rsp_to_logistic_variables(variables, bm),
        data = data,
        response_definition = control$response_definition
      )
      result <- h_logistic_simple_terms(
        x = bm,
        fit_glm = model_fit,
        conf_level = control$conf_level
      )
      resp_vector <- if (inherits(model_fit, "glm")) {
        model_fit$model[[variables$rsp]]
      } else {
        as.logical(as.matrix(model_fit$y)[, "status"])
      }
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = length(resp_vector),
        n_rsp = sum(resp_vector),
        prop = mean(resp_vector),
        or = as.numeric(result[1L, "odds_ratio"]),
        lcl = as.numeric(result[1L, "lcl"]),
        ucl = as.numeric(result[1L, "ucl"]),
        conf_level = conf_level,
        pval = as.numeric(result[1L, "pvalue"]),
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_rsp = 0L,
      prop = NA,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_response_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_rsp_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_logistic_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_rsp_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#'
#' # Internal function - h_tab_rsp_one_biomarker
#' \dontrun{
#' h_tab_rsp_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
#' )
#' }
#'
#' @export
h_tab_rsp_one_biomarker <- function(df,
                                    vars,
                                    .indent_mods = 0L) {
  afuns <- a_response_subgroups()[vars]
  colvars <- d_rsp_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1]
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars,
    .indent_mods = .indent_mods
  )
}

#' Convert List of Groups to Data Frame
#'
#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
#'
#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#'
#' @return [tibble::tibble()] in the required format.
#'
#' @examples
#' grade_groups <- list(
#'   "Any Grade (%)" = c("1", "2", "3", "4", "5"),
#'   "Grade 3-4 (%)" = c("3", "4"),
#'   "Grade 5 (%)" = "5"
#' )
#' # Internal function - groups_list_to_df
#' \dontrun{
#' groups_list_to_df(grade_groups)
#' }
#'
#' @keywords internal
groups_list_to_df <- function(groups_list) {
  checkmate::assert_list(groups_list, names = "named")
  lapply(groups_list, checkmate::assert_character)
  tibble::tibble(
    valname = make_names(names(groups_list)),
    label = names(groups_list),
    levelcombo = unname(groups_list),
    exargs = replicate(length(groups_list), list())
  )
}

#' Reference and Treatment Group Combination
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
#' columns in the `rtables` framework and teal modules.
#'
#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
#' @param ref (`string`)\cr the reference level(s).
#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
#'
#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
#'
#' @examples
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("B: Placebo")
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM)
#'
#' @export
combine_groups <- function(fct,
                           ref = NULL,
                           collapse = "/") {
  checkmate::assert_string(collapse)
  checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE)
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  group_levels <- levels(fct)
  if (is.null(ref)) {
    ref <- group_levels[1]
  } else {
    checkmate::assert_subset(ref, group_levels)
  }

  groups <- list(
    ref = group_levels[group_levels %in% ref],
    trt = group_levels[!group_levels %in% ref]
  )
  stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse))
}

#' Split Columns by Groups of Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams groups_list_to_df
#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
#'   control formats (`format`), add a joint column for all groups (`incl_all`).
#'
#' @return A layout object suitable for passing to further layouting functions. Adding
#'   this function to an `rtable` layout will add a column split including the given
#'   groups to the table layout.
#'
#' @seealso [rtables::split_cols_by()]
#'
#' @examples
#' # 1 - Basic use
#'
#' # Without group combination `split_cols_by_groups` is
#' # equivalent to [rtables::split_cols_by()].
#' basic_table() %>%
#'   split_cols_by_groups("ARM") %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Add a reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 2 - Adding group specification
#'
#' # Manual preparation of the groups.
#' groups <- list(
#'   "Arms A+B" = c("A: Drug X", "B: Placebo"),
#'   "Arms A+C" = c("A: Drug X", "C: Combination")
#' )
#'
#' # Use of split_cols_by_groups without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Including differentiated output in the reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff. of Averages" = rcell(NULL))
#'       } else {
#'         in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 3 - Binary list dividing factor levels into reference and treatment
#'
#' # `combine_groups` defines reference and treatment.
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("A: Drug X", "B: Placebo")
#' )
#' groups
#'
#' # Use group definition without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Use group definition with reference column (first item of groups).
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' @export
split_cols_by_groups <- function(lyt,
                                 var,
                                 groups_list = NULL,
                                 ref_group = NULL,
                                 ...) {
  if (is.null(groups_list)) {
    split_cols_by(
      lyt = lyt,
      var = var,
      ref_group = ref_group,
      ...
    )
  } else {
    groups_df <- groups_list_to_df(groups_list)
    if (!is.null(ref_group)) {
      ref_group <- groups_df$valname[groups_df$label == ref_group]
    }
    split_cols_by(
      lyt = lyt,
      var = var,
      split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname),
      ref_group = ref_group,
      ...
    )
  }
}

#' Combine Counts
#'
#' Simplifies the estimation of column counts, especially when group combination is required.
#'
#' @inheritParams combine_groups
#' @inheritParams groups_list_to_df
#'
#' @return A `vector` of column counts.
#'
#' @seealso [combine_groups()]
#'
#' @examples
#' ref <- c("A: Drug X", "B: Placebo")
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#'
#' # Internal function - combine_counts
#' \dontrun{
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' ref <- "A: Drug X"
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#' }
#'
#' @keywords internal
combine_counts <- function(fct, groups_list = NULL) {
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  if (is.null(groups_list)) {
    y <- table(fct)
    y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]])
  } else {
    y <- vapply(
      X = groups_list,
      FUN = function(x) sum(table(fct)[x]),
      FUN.VALUE = 1
    )
  }
  y
}

#' Counting Missed Doses
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are specific functions to count patients with missed doses. The difference to [count_cumulative()] is
#' mainly the special labels.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant description function [d_count_missed_doses()].
#'
#' @name count_missed_doses
NULL

#' @describeIn count_missed_doses Statistics function to count non-missing values.
#'
#' @return
#' * `s_count_nonmissing()` returns the statistic `n` which is the count of non-missing values in `x`.
#'
#' @examples
#' set.seed(1)
#' x <- c(sample(1:10, 10), NA)
#'
#' # Internal function - s_count_nonmissing
#' \dontrun{
#' s_count_nonmissing(x)
#' }
#'
#' @keywords internal
s_count_nonmissing <- function(x) {
  list(n = n_available(x))
}

#' Description Function that Calculates Labels for [s_count_missed_doses()].
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams s_count_missed_doses
#'
#' @return [d_count_missed_doses()] returns a named `character` vector with the labels.
#'
#' @seealso [s_count_missed_doses()]
#'
#' @export
d_count_missed_doses <- function(thresholds) {
  paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", ""))
}

#' @describeIn count_missed_doses Statistics function to count patients with missed doses.
#'
#' @param thresholds (vector of `count`)\cr number of missed doses the patients at least had.
#'
#' @return
#' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold.
#'
#' @examples
#' # Internal function - s_count_missed_doses
#' \dontrun{
#' s_count_missed_doses(x = c(0, 1, 0, 2, 3, 4, 0, 2), thresholds = c(2, 5), .N_col = 10)
#' }
#'
#' @keywords internal
s_count_missed_doses <- function(x,
                                 thresholds,
                                 .N_col) { # nolint
  stat <- s_count_cumulative(
    x = x,
    thresholds = thresholds,
    lower_tail = FALSE,
    include_eq = TRUE,
    .N_col = .N_col
  )
  labels <- d_count_missed_doses(thresholds)
  for (i in seq_along(stat$count_fraction)) {
    stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i])
  }
  n_stat <- s_count_nonmissing(x)
  c(n_stat, stat)
}

#' @describeIn count_missed_doses Formatted analysis function which is used as `afun`
#'   in `count_missed_doses()`.
#'
#' @return
#' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_missed_doses
#' \dontrun{
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_missed_doses, .ungroup_stats = "count_fraction")
#' afun(x = c(0, 1, 0, 2, 3, 4, 0, 2), thresholds = c(2, 5), .N_col = 10)
#' }
#'
#' @keywords internal
a_count_missed_doses <- make_afun(
  s_count_missed_doses,
  .formats = c(n = "xx", count_fraction = format_count_fraction)
)

#' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @inheritParams s_count_cumulative
#'
#' @return
#' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_missed_doses()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adsl %>%
#'   distinct(STUDYID, USUBJID, ARM) %>%
#'   mutate(
#'     PARAMCD = "TNDOSMIS",
#'     PARAM = "Total number of missed doses during study",
#'     AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE),
#'     AVALC = ""
#'   )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>%
#'   build_table(anl, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_missed_doses <- function(lyt,
                               vars,
                               var_labels = vars,
                               show_labels = "visible",
                               ...,
                               table_names = vars,
                               .stats = NULL,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
  afun <- make_afun(
    a_count_missed_doses,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    table_names = table_names,
    show_labels = show_labels,
    extra_args = list(...)
  )
}

#' Counting Specific Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We can count the occurrence of specific values in a variable of interest.
#'
#' @inheritParams argument_convention
#'
#' @note
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
#'   and fails otherwise.
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
#'   otherwise they are hidden.
#'
#' @name count_values_funs
NULL

#' @describeIn count_values_funs S3 generic function to count values.
#'
#' @inheritParams s_summary.logical
#' @param values (`character`)\cr specific values that should be counted.
#'
#' @return
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
#'
#' @export
s_count_values <- function(x,
                           values,
                           na.rm = TRUE, # nolint
                           .N_col, # nolint
                           .N_row, # nolint
                           denom = c("n", "N_row", "N_col")) {
  UseMethod("s_count_values", x)
}

#' @describeIn count_values_funs Method for `character` class.
#'
#' @method s_count_values character
#'
#' @examples
#' # `s_count_values.character`
#' s_count_values(x = c("a", "b", "a"), values = "a")
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
#'
#' @export
s_count_values.character <- function(x,
                                     values = "Y",
                                     na.rm = TRUE, # nolint
                                     ...) {
  checkmate::assert_character(values)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  is_in_values <- x %in% values

  s_summary(is_in_values, ...)
}

#' @describeIn count_values_funs Method for `factor` class. This makes an automatic
#'   conversion to `character` and then forwards to the method for characters.
#'
#' @method s_count_values factor
#'
#' @examples
#' # `s_count_values.factor`
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
#'
#' @export
s_count_values.factor <- function(x,
                                  values = "Y",
                                  ...) {
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Method for `logical` class.
#'
#' @method s_count_values logical
#'
#' @examples
#' # `s_count_values.logical`
#' s_count_values(x = c(TRUE, FALSE, TRUE))
#'
#' @export
s_count_values.logical <- function(x, values = TRUE, ...) {
  checkmate::assert_logical(values)
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Formatted analysis function which is used as `afun`
#'   in `count_values()`.
#'
#' @return
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_values`
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
#'
#' @export
a_count_values <- make_afun(
  s_count_values,
  .formats = c(count_fraction = "xx (xx.xx%)", count = "xx")
)

#' @describeIn count_values_funs Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_values()` to the table layout.
#'
#' @examples
#' # `count_values`
#' basic_table() %>%
#'   count_values("Species", values = "setosa") %>%
#'   build_table(iris)
#'
#' @export
count_values <- function(lyt,
                         vars,
                         values,
                         ...,
                         table_names = vars,
                         .stats = "count_fraction",
                         .formats = NULL,
                         .labels = c(count_fraction = paste(values, collapse = ", ")),
                         .indent_mods = NULL) {
  afun <- make_afun(
    a_count_values,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = c(list(values = values), list(...)),
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Patient Counts with Abnormal Range Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`)
#' and additional analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or
#' `factor`). For each direction specified in `abnormal` (e.g. high or low) count patients in the
#' numerator and denominator as follows:
#'   * `num` : The number of patients with this abnormality recorded while on treatment.
#'   * `denom`: The number of patients with at least one post-baseline assessment.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to
#'   `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list,
#'   for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`.
#'
#' @note
#' * `count_abnormal()` only works with a single variable containing multiple abnormal levels.
#' * `df` should be filtered to include only post-baseline records.
#' * the denominator includes patients that might have other abnormal levels at baseline,
#'   and patients with missing baseline. Patients with these abnormalities at
#'   baseline can be optionally excluded from numerator and denominator.
#'
#' @name abnormal
#' @include formatting_functions.R
NULL

#' @describeIn abnormal Statistics function which counts patients with abnormal range values
#'   for a single `abnormal` level.
#'
#' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality
#'   from numerator and denominator.
#'
#' @return
#' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 2)),
#'   ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df <- df %>%
#'   filter(ONTRTFL == "Y")
#'
#' # Internal function - s_count_abnormal
#' \dontrun{
#' # For abnormal level "HIGH" we get the following counts.
#' s_count_abnormal(df, .var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW"))
#'
#' # Optionally exclude patients with abnormality at baseline.
#' s_count_abnormal(
#'   df,
#'   .var = "ANRIND",
#'   abnormal = list(high = "HIGH", low = "LOW"),
#'   exclude_base_abn = TRUE
#' )
#' }
#'
#' @keywords internal
s_count_abnormal <- function(df,
                             .var,
                             abnormal = list(Low = "LOW", High = "HIGH"),
                             variables = list(id = "USUBJID", baseline = "BNRIND"),
                             exclude_base_abn = FALSE) {
  checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE)
  checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]])))
  checkmate::assert_factor(df[[.var]])
  checkmate::assert_flag(exclude_base_abn)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  count_abnormal_single <- function(abn_name, abn) {
    # Patients in the denominator fulfill:
    # - have at least one post-baseline visit
    # - their baseline must not be abnormal if `exclude_base_abn`.
    if (exclude_base_abn) {
      denom_select <- !(df[[variables$baseline]] %in% abn)
    } else {
      denom_select <- TRUE
    }
    denom <- length(unique(df[denom_select, variables$id, drop = TRUE]))

    # Patients in the numerator fulfill:
    # - have at least one post-baseline visit with the required abnormality level
    # - are part of the denominator patients.
    num_select <- (df[[.var]] %in% abn) & denom_select
    num <- length(unique(df[num_select, variables$id, drop = TRUE]))

    formatters::with_label(c(num = num, denom = denom), abn_name)
  }

  # This will define the abnormal levels theoretically possible for a specific lab parameter
  # within a split level of a layout.
  abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]]))
  abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))]

  result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE)
  result <- list(fraction = result)
  result
}

#' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`.
#'
#' @return
#' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`.
#' a_fun <- make_afun(a_count_abnormal, .ungroup_stats = "fraction")
#' a_fun(df, .var = "ANRIND", abnormal = list(low = "LOW", high = "HIGH"))
#' }
#'
#' @keywords internal
a_count_abnormal <- make_afun(
  s_count_abnormal,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal()` to the table layout.
#'
#' @examples
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 1, 2, 2)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df2 <- df2 %>%
#'   filter(ONTRTFL == "Y")
#'
#' basic_table() %>%
#'   count_abnormal(
#'     var = "RANGE",
#'     abnormal = list(low = "LOW", high = "HIGH"),
#'     variables = list(id = "ID", baseline = "BL_RANGE")
#'   ) %>%
#'   build_table(df2)
#'
#' @export
count_abnormal <- function(lyt,
                           var,
                           ...,
                           table_names = var,
                           .stats = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- make_afun(
    a_count_abnormal,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )

  checkmate::assert_string(var)

  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    table_names = table_names,
    extra_args = list(...),
    show_labels = "hidden"
  )
}

#' Tabulate Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' across population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' @name survival_biomarkers_subgroups
NULL

#' Prepares Survival Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates,
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`,
#'   `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()].
#'
#' @examples
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in multiple regression models containing one covariate `RACE`,
#' # as well as one stratification variable `STRATA1`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#'
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually.
#' df_grouped <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_survival_biomarkers <- function(variables,
                                        data,
                                        groups_lists = list(),
                                        control = control_coxreg(),
                                        label_all = "All Patients") {
  checkmate::assert_list(variables)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_coxreg_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_coxreg_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table
#'   summarizing biomarker effects on survival by subgroup.
#'
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_survival_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup.
#'
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_surv_one_biomarker()] which is used internally, [extract_survival_biomarkers()].
#'
#' @examples
#' ## Table with default columns.
#' tabulate_survival_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_survival_biomarkers(
#'   df = df,
#'   vars = c("n_tot_events", "ci", "n_tot", "median", "hr"),
#'   time_unit = as.character(adtte_f$AVALU[1])
#' )
#'
#' ## Finally produce the forest plot.
#' \dontrun{
#' g_forest(tab, xlim = c(0.8, 1.2))
#' }
#'
#' @export
tabulate_survival_biomarkers <- function(df,
                                         vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
                                         time_unit = NULL,
                                         .indent_mods = 0L) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_surv_one_biomarker(
      df = df_sub,
      vars = vars,
      time_unit = time_unit,
      .indent_mods = .indent_mods
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_tot_ids <- grep("^n_tot", vars)
  hr_id <- match("hr", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Higher", "Lower"), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_ids[1]
  )
}

#' Patient Counts with the Most Extreme Post-baseline Toxicity Grade per Direction of Abnormality
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the toxicity grade (`factor`), and additional
#' analysis variables are `id` (`character` or `factor`), `param` (`factor`) and `grade_dir` (`factor`).
#' The pre-processing steps are crucial when using this function.
#' For a certain direction (e.g. high or low) this function counts
#' patients in the denominator as number of patients with at least one valid measurement during treatment,
#' and patients in the numerator as follows:
#'   * `1` to `4`: Numerator is number of patients with worst grades 1-4 respectively;
#'   * `Any`: Numerator is number of patients with at least one abnormality, which means grade is different from 0.
#'
#' @inheritParams argument_convention
#'
#' @details The pre-processing steps are crucial when using this function. From the standard lab grade variable
#'   `ATOXGR`, derive the following two variables:
#'   * A grade direction variable (e.g. `GRADE_DIR`) is required in order to obtain
#'     the correct denominators when building the layout as it is used to define row splitting.
#'   * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from
#'     `ATOXGR` are replaced by their absolute values.
#'
#' @note Prior to tabulation, `df` must be filtered to include only post-baseline records with worst grade flags.
#'
#' @name abnormal_by_worst_grade
NULL

#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
#'
#' @return
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
#'   "Any" results.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#' adlb <- tern_ex_adlb
#'
#' # Data is modified in order to have some parameters with grades only in one direction
#' # and simulate the real data.
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
#'
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
#'
#' # Here starts the real pre-processing.
#' adlb_f <- adlb %>%
#'   filter(!AVISIT %in% c("SCREENING", "BASELINE")) %>%
#'   mutate(
#'     GRADE_DIR = factor(
#'       case_when(
#'         ATOXGR %in% c("-1", "-2", "-3", "-4") ~ "LOW",
#'         ATOXGR == "0" ~ "ZERO",
#'         ATOXGR %in% c("1", "2", "3", "4") ~ "HIGH"
#'       ),
#'       levels = c("LOW", "ZERO", "HIGH")
#'     ),
#'     GRADE_ANL = fct_relevel(
#'       fct_recode(ATOXGR, `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
#'       c("0", "1", "2", "3", "4")
#'     )
#'   ) %>%
#'   filter(WGRLOFL == "Y" | WGRHIFL == "Y") %>%
#'   droplevels()
#'
#' adlb_f_alt <- adlb_f %>%
#'   filter(PARAMCD == "ALT") %>%
#'   droplevels()
#' full_parent_df <- list(adlb_f_alt, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(adlb_f_alt)), "not_needed")
#'
#' # This mimics a split structure on PARAM and GRADE_DIR for a total column
#' spl_context <- data.frame(
#'   split = c("PARAM", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#'
#' # Internal function - s_count_abnormal_by_worst_grade
#' \dontrun{
#' s_count_abnormal_by_worst_grade(
#'   df = adlb_f_alt,
#'   .spl_context = spl_context,
#'   .var = "GRADE_ANL"
#' )
#' }
#'
#' @keywords internal
s_count_abnormal_by_worst_grade <- function(df, # nolint
                                            .var = "GRADE_ANL",
                                            .spl_context,
                                            variables = list(
                                              id = "USUBJID",
                                              param = "PARAM",
                                              grade_dir = "GRADE_DIR"
                                            )) {
  checkmate::assert_string(.var)
  assert_valid_factor(df[[.var]])
  assert_valid_factor(df[[variables$param]])
  assert_valid_factor(df[[variables$grade_dir]])
  assert_df_with_variables(df, c(a = .var, variables))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  # To verify that the `split_rows_by` are performed with correct variables.
  checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split)
  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
  result <- split(numeric(0), factor(x_lvls))

  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  for (lvl in x_lvls) {
    if (lvl != "Any") {
      df_lvl <- df[df[[.var]] == lvl, ]
    } else {
      df_lvl <- df[df[[.var]] != 0, ]
    }
    num <- length(unique(df_lvl[["USUBJID"]]))
    fraction <- ifelse(denom == 0, 0, num / denom)
    result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl)
  }

  result <- list(count_fraction = result)
  result
}

#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_worst_grade()`.
#'
#' @return
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_by_worst_grade
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_abnormal_by_worst_grade, .ungroup_stats = "count_fraction")
#' afun(df = adlb_f_alt, .spl_context = spl_context)
#' }
#'
#' @keywords internal
a_count_abnormal_by_worst_grade <- make_afun( # nolint
  s_count_abnormal_by_worst_grade,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
#'
#' @examples
#' # Map excludes records without abnormal grade since they should not be displayed
#' # in the table.
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAM") %>%
#'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
#'   count_abnormal_by_worst_grade(
#'     var = "GRADE_ANL",
#'     variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
#'   ) %>%
#'   build_table(df = adlb_f)
#'
#' @export
count_abnormal_by_worst_grade <- function(lyt,
                                          var,
                                          ...,
                                          .stats = NULL,
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  afun <- make_afun(
    a_count_abnormal_by_worst_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    extra_args = list(...),
    show_labels = "hidden"
  )
}

#' Helper Functions for Tabulating Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams survival_biomarkers_subgroups
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_biomarkers_subgroups
NULL

#' @describeIn h_survival_biomarkers_subgroups helps with converting the "survival" function variable list
#'   to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable
#'   names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`,
#'   `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_surv_to_coxreg_variables(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "EVNT",
#'     covariates = c("A", "B"),
#'     strata = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_surv_to_coxreg_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$tte)
  checkmate::assert_string(variables$is_event)
  checkmate::assert_string(biomarker)
  list(
    time = variables$tte,
    event = variables$is_event,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strata
  )
}

#' @describeIn h_survival_biomarkers_subgroups prepares estimates for number of events, patients and median survival
#'   times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers
#'   in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strat`.
#'
#' @return
#' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "REGION1",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f[NULL, ]
#' )
#'
#' @export
h_coxreg_mult_cont_df <- function(variables,
                                  data,
                                  control = control_coxreg()) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  conf_level <- control[["conf_level"]]
  pval_label <- paste0(
    # the regex capitalizes the first letter of the string / senetence.
    "p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")"
  )
  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      coxreg_list <- fit_coxreg_multivar(
        variables = h_surv_to_coxreg_variables(variables, bm),
        data = data,
        control = control
      )
      result <- do.call(
        h_coxreg_multivar_extract,
        c(list(var = bm), coxreg_list[c("mod", "data", "control")])
      )
      data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y))
      data_fit$status <- as.logical(data_fit$status)
      median <- s_surv_time(
        df = data_fit,
        .var = "time",
        is_event = "status"
      )$median
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = coxreg_list$mod$n,
        n_tot_events = coxreg_list$mod$nevent,
        median = as.numeric(median),
        result[1L, c("hr", "lcl", "ucl")],
        conf_level = conf_level,
        pval = result[1L, "pval"],
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_tot_events = 0L,
      median = NA,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_survival_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_survival_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_coxreg_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_surv_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#' h_tab_surv_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
#'   time_unit = "days"
#' )
#'
#' @export
h_tab_surv_one_biomarker <- function(df,
                                     vars,
                                     time_unit,
                                     .indent_mods = 0L) {
  afuns <- a_survival_subgroups()[vars]
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1],
    time_unit = time_unit
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars,
    .indent_mods = .indent_mods
  )
}

#' Helper Function for Tabulation of a Single Biomarker Result
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Please see [h_tab_surv_one_biomarker()] and [h_tab_rsp_one_biomarker()], which use this function for examples.
#' This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr results for a single biomarker.
#' @param afuns (named `list` of `function`)\cr analysis functions.
#' @param colvars (`list` with `vars` and `labels`)\cr variables to tabulate and their labels.
#'
#' @return An `rtables` table object with statistics in columns.
#'
#' @export
h_tab_one_biomarker <- function(df,
                                afuns,
                                colvars,
                                .indent_mods = 0L) {
  lyt <- basic_table()

  # Row split by row type - only keep the content rows here.
  lyt <- split_rows_by(
    lyt = lyt,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )

  # Summarize rows with all patients.
  lyt <- summarize_row_groups(
    lyt = lyt,
    var = "var_label",
    cfun = afuns,
    indent_mod = .indent_mods
  )

  # Split cols by the multiple variables to populate into columns.
  lyt <- split_cols_by_multivar(
    lyt = lyt,
    vars = colvars$vars,
    varlabels = colvars$labels
  )

  # If there is any subgroup variables, we extend the layout accordingly.
  if ("analysis" %in% df$row_type) {
    # Now only continue with the subgroup rows.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )

    # Split by the subgroup variable.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "var",
      labels_var = "var_label",
      nested = TRUE,
      child_labels = "visible",
      indent_mod = .indent_mods * 2
    )

    # Then analyze colvars for each subgroup.
    lyt <- summarize_row_groups(
      lyt = lyt,
      cfun = afuns,
      var = "subgroup"
    )
  }
  build_table(lyt, df = df)
}

#' Occurrence Table Sorting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to score occurrence table subtables and rows which can be used in the
#' sorting of occurrence tables.
#'
#' @name score_occurrences
NULL

#' @describeIn score_occurrences Scoring function which sums the counts across all
#'   columns. It will fail if anything else but counts are used.
#'
#' @inheritParams rtables_access
#'
#' @return
#' * `score_occurrences()` returns the sum of counts across all columns of a table row.
#'
#' @seealso [h_row_first_values()]
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients(
#'     vars = "USUBJID",
#'     .stats = c("unique"),
#'     .labels = c("Total number of patients with at least one event")
#'   ) %>%
#'   split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = c("unique", "nonunique"),
#'     .labels = c(
#'       "Total number of patients with at least one event",
#'       "Total number of events"
#'     )
#'   ) %>%
#'   count_occurrences(vars = "AEDECOD")
#'
#' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>%
#'   prune_table()
#'
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences)
#'
#' tbl_sorted
#'
#' @export
score_occurrences <- function(table_row) {
  row_counts <- h_row_counts(table_row)
  sum(row_counts)
}

#' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include
#'   specific columns in the scoring. See [h_row_counts()] for further information.
#'
#' @inheritParams has_count_in_cols
#'
#' @return
#' * `score_occurrences_cols()` returns a function that sums counts across all specified columns
#'   of a table row.
#'
#' @seealso [h_row_counts()]
#'
#' @examples
#' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo"))
#'
#' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted.
#' # That would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_cols <- function(...) {
  function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    sum(row_counts)
  }
}

#' @describeIn score_occurrences Scoring functions produced by this constructor can be used on
#'   subtables: They sum up all specified column counts in the subtable. This is useful when
#'   there is no available content row summing up these counts.
#'
#' @return
#' * `score_occurrences_subtable()` returns a function that sums counts in each subtable
#'   across all specified columns.
#'
#' @examples
#' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl))
#'
#' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That
#' # would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_subtable <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    table_rows <- collect_leaves(table_tree)
    counts <- vapply(table_rows, score_table_row, numeric(1))
    sum(counts)
  }
}

#' @describeIn score_occurrences Produce score function for sorting table by summing the first content row in
#'   specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()].
#'
#' @return
#' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in
#'   specified columns.
#'
#' @export
score_occurrences_cont_cols <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    if (inherits(table_tree, "ContentRow")) {
      return(NA)
    }
    content_row <- h_content_first_row(table_tree)
    score_table_row(content_row)
  }
}

#' Create a STEP Graph
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
#' along the continuous biomarker value subgroups.
#'
#' @param df (`tibble`)\cr result of [tidy.step()].
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
#'   biomarker values.
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
#'   ribbon area, or `NULL` to not plot a CI ribbon.
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` STEP graph.
#'
#' @seealso Custom tidy method [tidy.step()].
#'
#' @examples
#' library(nestcolor)
#' library(survival)
#' lung$sex <- factor(lung$sex)
#'
#' # Survival example.
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' step_data <- broom::tidy(step_matrix)
#'
#' # Default plot.
#' g_step(step_data)
#'
#' # Add the reference 1 horizontal line.
#' library(ggplot2)
#' g_step(step_data) +
#'   ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
#'
#' # Use actual values instead of percentiles, different color for estimate and no CI,
#' # use log scale for y axis.
#' g_step(
#'   step_data,
#'   use_percentile = FALSE,
#'   est = list(col = "blue", lty = 1),
#'   ci_ribbon = NULL
#' ) + scale_y_log10()
#'
#' # Adding another curve based on additional column.
#' step_data$extra <- exp(step_data$`Percentile Center`)
#' g_step(step_data) +
#'   ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
#'
#' # Response example.
#' vars <- list(
#'   response = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_rsp_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(
#'     control_logistic(response_definition = "I(response == 2)"),
#'     control_step()
#'   )
#' )
#' step_data <- broom::tidy(step_matrix)
#' g_step(step_data)
#'
#' @export
g_step <- function(df,
                   use_percentile = "Percentile Center" %in% names(df),
                   est = list(col = "blue", lty = 1),
                   ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5),
                   col = getOption("ggplot2.discrete.colour")) {
  checkmate::assert_tibble(df)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_list(est, names = "named")
  checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE)

  x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center")
  df$x <- df[[x_var]]
  attrs <- attributes(df)
  df$y <- df[[attrs$estimate]]

  # Set legend names. To be modified also at call level
  legend_names <- c("Estimate", "CI 95%")

  p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]]))

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ci_ribbon)) {
    if (is.null(ci_ribbon$fill)) {
      ci_ribbon$fill <- "lightblue"
    }
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(
        ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]],
        fill = legend_names[2]
      ),
      alpha = ci_ribbon$alpha
    ) +
      scale_fill_manual(
        name = "", values = c("CI 95%" = ci_ribbon$fill)
      )
  }
  suppressMessages(p <- p +
    ggplot2::geom_line(
      ggplot2::aes(y = .data[["y"]], color = legend_names[1]),
      linetype = est$lty
    ) +
    scale_colour_manual(
      name = "", values = c("Estimate" = "blue")
    ))

  p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate)
  if (use_percentile) {
    p <- p + ggplot2::scale_x_continuous(labels = scales::percent)
  }
  p
}

#' Custom Tidy Method for STEP Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tidy the STEP results into a `tibble` format ready for plotting.
#'
#' @param x (`step` matrix)\cr results from [fit_survival_step()].
#' @param ... not used here.
#'
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
#'   respectively. Additional attributes carry metadata also used for plotting.
#'
#' @seealso [g_step()] which consumes the result from this function.
#'
#' @method tidy step
#'
#' @examples
#' library(survival)
#' lung$sex <- factor(lung$sex)
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' broom::tidy(step_matrix)
#'
#' @export
tidy.step <- function(x, ...) { # nolint
  checkmate::assert_class(x, "step")
  dat <- as.data.frame(x)
  nams <- names(dat)
  is_surv <- "loghr" %in% names(dat)
  est_var <- ifelse(is_surv, "loghr", "logor")
  new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio")
  new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper"))
  names(dat)[match(est_var, nams)] <- new_est_var
  dat[, new_y_vars] <- exp(dat[, new_y_vars])
  any_is_na <- any(is.na(dat[, new_y_vars]))
  any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE)
  if (any_is_na) {
    warning(paste(
      "Missing values in the point estimate or CI columns,",
      "this will lead to holes in the `g_step()` plot"
    ))
  }
  if (any_is_very_large) {
    warning(paste(
      "Very large absolute values in the point estimate or CI columns,",
      "consider adding `scale_y_log10()` to the `g_step()` result for plotting"
    ))
  }
  if (any_is_na || any_is_very_large) {
    warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting")
  }
  structure(
    tibble::as_tibble(dat),
    estimate = new_est_var,
    biomarker = attr(x, "variables")$biomarker,
    ci = f_conf_level(attr(x, "control")$conf_level)
  )
}

#' Additional Assertions for `checkmate`
#'
#' Additional assertion functions which can be used together with the `checkmate` package.
#'
#' @inheritParams checkmate::assert_factor
#' @param x (`any`)\cr object to test.
#' @param df (`data.frame`)\cr data set to test.
#' @param variables (named `list` of `character`)\cr list of variables to test.
#' @param include_boundaries (`logical`)\cr whether to include boundaries when testing
#'   for proportions.
#' @param na_level (`character`)\cr the string you have been using to represent NA or
#'   missing data. For `NA` values please consider using directly [is.na()] or
#'   similar approaches.
#' @param (`integer`)\cr minimum number of factor levels. Default is `1`.
#' @param ... a collection of objects to test.
#'
#' @return Nothing if assertion passes, otherwise prints the error message.
#'
#' @name assertions
NULL

check_list_of_variables <- function(x) {
  # drop NULL elements in list
  x <- Filter(Negate(is.null), x)

  res <- checkmate::check_list(x,
    names = "named",
    min.len = 1,
    any.missing = FALSE,
    types = "character"
  )
  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(unlist(x), min.chars = 1)
  }
  return(res)
}
#' @describeIn assertions Checks whether `x` is a valid list of variable names.
#'   `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
#'
#' @examples
#' # Check whether `x` is a valid list of variable names.
#'
#' # Internal function - assert_list_of_variables
#' \dontrun{
#' assert_list_of_variables(list(val = "a"))
#' assert_list_of_variables(list(val = c("a", "b")))
#' assert_list_of_variables(list(val = c("a", "b"), val2 = NULL))
#'
#' # The following calls fail
#' assert_list_of_variables(list(1, 2))
#' assert_list_of_variables(list("bla" = 2))
#' }
#'
#' @keywords internal
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)

check_df_with_variables <- function(df, variables, na_level = NULL) {
  checkmate::assert_data_frame(df)
  assert_list_of_variables(variables)

  # flag for equal variables and column names
  err_flag <- all(unlist(variables) %in% colnames(df))
  checkmate::assert_flag(err_flag)

  if (isFALSE(err_flag)) {
    vars <- setdiff(unlist(variables), colnames(df))
    return(paste(
      deparse(substitute(df)),
      "does not contain all specified variables as column names. Missing from dataframe:",
      paste(vars, collapse = ", ")
    ))
  }
  # checking if na_level is present and in which column
  if (!is.null(na_level)) {
    checkmate::assert_string(na_level)
    res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
    if (any(res)) {
      return(paste0(
        deparse(substitute(df)), " contains explicit na_level (", na_level,
        ") in the following columns: ", paste0(unlist(variables)[res],
          collapse = ", "
        )
      ))
    }
  }
  return(TRUE)
}
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
#'   Please notice how this produces an error when not all variables are present in the
#'   data.frame while the opposite is not required.
#'
#' @examples
#' # Check whether `df` contains the analysis `variables`.
#'
#' # Internal function - assert_df_with_variables
#' \dontrun{
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = "a")
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = c("a", "b"))
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = c("a", "b"))
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3, e = "<Missing>"),
#'   variables = list(val = c("a", "b")), na_level = "<Missing>"
#' )
#'
#' # The following calls fail
#' assert_df_with_variables(
#'   df = matrix(1:5, ncol = 2, nrow = 3),
#'   variables = list(val = "a")
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = c("a", "b", "c"))
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3, e = "<Missing>"),
#'   variables = list(val = c("a", "b", "e")), na_level = "<Missing>"
#' )
#' }
#'
#' @keywords internal
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)

check_valid_factor <- function(x,
                               min.levels = 1, # nolint
                               max.levels = NULL, # nolint
                               null.ok = TRUE, # nolint
                               any.missing = TRUE, # nolint
                               n.levels = NULL, # nolint
                               len = NULL) {
  # checks on levels insertion
  checkmate::assert_int(min.levels, lower = 1)

  # main factor check
  res <- checkmate::check_factor(x,
    min.levels = min.levels,
    null.ok = null.ok,
    max.levels = max.levels,
    any.missing = any.missing,
    n.levels = n.levels
  )

  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(levels(x), min.chars = 1)
  }

  return(res)
}
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
#'   string levels). Note that `NULL` and `NA` elements are allowed.
#'
#' @examples
#' # Check whether `x` is a valid factor.
#'
#' # Internal function - assert_valid_factor
#' \dontrun{
#' assert_valid_factor(factor(c("a", NULL)))
#' assert_valid_factor(factor(c("a", "b")))
#' assert_valid_factor(factor(c("a", "b")), len = 2)
#' assert_valid_factor(factor(c("a", NA)), any.missing = TRUE)
#' assert_valid_factor(factor("A", levels = c("A", "B")))
#'
#' # The following calls fail
#' assert_valid_factor(-1)
#' assert_valid_factor(factor(c("a", "")))
#' assert_valid_factor(factor(c("a", NA)), any.missing = FALSE)
#' assert_valid_factor(factor(NULL))
#' assert_valid_factor(factor(c(NULL, "")))
#' assert_valid_factor(factor())
#' }
#'
#' @keywords internal
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)


check_df_with_factors <- function(df,
                                  variables,
                                  min.levels = 1, # nolint
                                  max.levels = NULL, # nolint
                                  any.missing = TRUE, # nolint
                                  na_level = NULL) {
  res <- check_df_with_variables(df, variables, na_level)
  # checking if all the columns specified by variables are valid factors
  if (isTRUE(res)) {
    # searching the data.frame with selected columns (variables) as a list
    res <- lapply(
      X = as.list(df)[unlist(variables)],
      FUN = check_valid_factor,
      min.levels = min.levels,
      max.levels = max.levels,
      any.missing = any.missing
    )
    res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
    if (any(res_lo)) {
      return(paste0(
        deparse(substitute(df)), " does not contain only factor variables among:",
        "\n* Column `", paste0(unlist(variables)[res_lo],
          "` of the data.frame -> ", res[res_lo],
          collapse = "\n* "
        )
      ))
    } else {
      res <- TRUE
    }
  }
  return(res)
}
#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
#'   are all factors. Note that the creation of `NA` by direct call of `factor()` will
#'   trim `NA` levels out of the vector list itself.
#'
#' @examples
#' # Check whether `df` contains all factor analysis `variables`.
#' adf <- data.frame(a = factor(c("A", "B")), b = 3)
#' bdf <- data.frame(a = factor(letters[1:3]), b = factor(c(1, 2, 3)), d = 3)
#'
#' # Internal function - assert_df_with_factors
#' \dontrun{
#' assert_df_with_factors(df = adf, variables = list(val = "a"))
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1)
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 2, max.levels = 2)
#' assert_df_with_factors(
#'   df = data.frame(a = factor(c("A", NA, "B")), b = 3),
#'   variable = list(val = "a"),
#'   min.levels = 2,
#'   max.levels = 2
#' )
#'
#' # The following calls fail
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1, max.levels = 1)
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1, max.levels = 1)
#' assert_df_with_factors(df = adf, variables = list(val = "a", val = "b", val = ""))
#' assert_df_with_factors(df = adf, variables = list(val = "a", val = "b", val = "d"))
#' assert_df_with_factors(
#'   df = bdf,
#'   variables = list(val = "a", val = "b"),
#'   min.levels = 1,
#'   max.levels = 1
#' )
#' }
#'
#' @keywords internal
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)

#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
#'
#' @examples
#' # Check whether `x` is between 0 and 1.
#' # Internal function - assert_proportion_value
#' \dontrun{
#' assert_proportion_value(x = 0, include_boundaries = TRUE)
#' assert_proportion_value(x = 0.3)
#'
#' # These fail
#' assert_proportion_value(x = 1.3)
#' assert_proportion_value(x = 1)
#' }
#'
#' @keywords internal
assert_proportion_value <- function(x, include_boundaries = FALSE) {
  checkmate::assert_number(x, lower = 0, upper = 1)
  checkmate::assert_flag(include_boundaries)
  if (isFALSE(include_boundaries)) {
    checkmate::assert_true(x > 0)
    checkmate::assert_true(x < 1)
  }
}

#' Counting Patients Summing Exposure Across All Patients in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of patients and summing analysis value (i.e exposure values) across all patients
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_patients_exposure_in_cols
NULL

#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
#'   of patients and the sum of exposure across all patients.
#'
#' @param ex_var (`character`)\cr name of the variable within `df` containing exposure values.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will be used as label.
#'
#' @return
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
#'   * `n_patients`: Number of unique patients in `df`.
#'   * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
#'   SEX = c(rep("Female", 6), rep("Male", 6)),
#'   AVAL = as.numeric(sample(seq(1, 20), 12)),
#'   stringsAsFactors = TRUE
#' )
#' adsl <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
#'   SEX = c(rep("Female", 2), rep("Male", 2)),
#'   stringsAsFactors = TRUE
#' )
#'
#' # Internal function - s_count_patients_sum_exposure
#' \dontrun{
#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl))
#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl), .stats = "n_patients")
#' s_count_patients_sum_exposure(
#'   df = df,
#'   .N_col = nrow(adsl),
#'   custom_label = "some user's custom label"
#' )
#' }
#'
#' @keywords internal
s_count_patients_sum_exposure <- function(df,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          .stats = c("n_patients", "sum_exposure"),
                                          .N_col, # nolint
                                          custom_label = NULL) {
  assert_df_with_variables(df, list(ex_var = ex_var, id = id))
  checkmate::assert_string(id)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)
  checkmate::assert_numeric(df[[ex_var]])
  checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))

  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "Total patients numbers/person time"
  }

  y <- list()

  if ("n_patients" %in% .stats) {
    y$n_patients <-
      formatters::with_label(
        s_num_patients_content(
          df = df,
          .N_col = .N_col, # nolint
          .var = id,
          labelstr = ""
        )$unique,
        row_label
      )
  }
  if ("sum_exposure" %in% .stats) {
    y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
  }
  y
}

#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
#'   [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
#'   [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
#'
#' @return
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' tern:::a_count_patients_sum_exposure(
#'   df = df,
#'   var = "SEX",
#'   .N_col = nrow(df),
#'   .stats = "n_patients"
#' )
#'
#' @keywords internal
a_count_patients_sum_exposure <- function(df,
                                          var = NULL,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          add_total_level = FALSE,
                                          .N_col, # nolint
                                          .stats,
                                          .formats = list(n_patients = "xx (xx.x%)", sum_exposure = "xx"),
                                          custom_label = NULL) {
  checkmate::assert_flag(add_total_level)

  if (!is.null(var)) {
    assert_df_with_variables(df, list(var = var))
    df[[var]] <- as.factor(df[[var]])
  }

  y <- list()
  if (is.null(var)) {
    y[[.stats]] <- list(Total = s_count_patients_sum_exposure(
      df = df,
      ex_var = ex_var,
      id = id,
      labelstr = labelstr,
      .N_col = .N_col,
      .stats = .stats,
      custom_label = custom_label
    )[[.stats]])
  } else {
    for (lvl in levels(df[[var]])) {
      y[[.stats]][[lvl]] <- s_count_patients_sum_exposure(
        df = subset(df, get(var) == lvl),
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = lvl
      )[[.stats]]
    }
    if (add_total_level) {
      y[[.stats]][["Total"]] <- s_count_patients_sum_exposure(
        df = df,
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = custom_label
      )[[.stats]]
    }
  }

  in_rows(.list = y[[.stats]], .formats = .formats[[.stats]])
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
#' result
#'
#' lyt2 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
#' result2
#'
#' @export
summarize_patients_exposure_in_cols <- function(lyt, # nolint
                                                var,
                                                ...,
                                                .stats = c("n_patients", "sum_exposure"),
                                                .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                                .indent_mods = NULL,
                                                col_split = TRUE) {
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = a_count_patients_sum_exposure,
    extra_args = list(...)
  )
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
#'   column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
#'   `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
#'   pages when pagination is used.
#'
#' @examples
#' lyt3 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
#' result3
#'
#' lyt4 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(
#'     var = "AVAL", col_split = TRUE,
#'     .stats = "n_patients", custom_label = "some custom label"
#'   ) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
#' result4
#'
#' lyt5 <- basic_table() %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
#' result5
#'
#' # Adding total levels and custom label
#' lyt <- basic_table(
#'   show_colcounts = TRUE
#' ) %>%
#'   analyze_patients_exposure_in_cols(
#'     var = "ARMCD",
#'     col_split = TRUE,
#'     add_total_level = TRUE,
#'     custom_label = "TOTAL"
#'   ) %>%
#'   append_topleft(c("", "Sex"))
#'
#' tbl <- build_table(lyt, df = df, alt_counts_df = adsl)
#' tbl
#'
#' @export
analyze_patients_exposure_in_cols <- function(lyt, # nolint
                                              var = NULL,
                                              ex_var = "AVAL",
                                              col_split = TRUE,
                                              add_total_level = FALSE,
                                              .stats = c("n_patients", "sum_exposure"),
                                              .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                              .indent_mods = 0L,
                                              ...) {
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(ex_var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  lyt <- lyt %>% analyze_colvars(
    afun = a_count_patients_sum_exposure,
    indent_mod = .indent_mods,
    extra_args = c(
      list(
        var = var,
        ex_var = ex_var,
        add_total_level = add_total_level
      ),
      ...
    )
  )
  lyt
}

#' Occurrence Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences for patients with occurrence
#' data. Primary analysis variables are the dictionary terms. All occurrences are counted for total
#' counts. Multiple occurrences within patient at the lowest term level displayed in the table are
#' counted only once.
#'
#' @inheritParams argument_convention
#'
#' @note By default, occurrences which don't appear in a given row split are dropped from the table and
#'   the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout
#'   needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would
#'   like to show all occurrences.
#'
#' @name count_occurrences
NULL

#' @describeIn count_occurrences Statistics function which counts number of patients that report an
#' occurrence.
#'
#' @param denom (`string`)\cr choice of denominator for patient proportions. Can be:
#'   - `N_col`: total number of patients in this column across rows
#'   - `n`: number of patients with any occurrences
#'
#' @return
#' * `s_count_occurrences()` returns a list with:
#'   * `count`: list of counts with one element per occurrence.
#'   * `count_fraction`: list of counts and fractions with one element per occurrence.
#'   * `fraction`: list of numerators and denominators with one element per occurrence.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 4, 4, 4)),
#'   MHDECOD = c("MH1", "MH2", "MH1", "MH1", "MH1", "MH3")
#' )
#'
#' N_per_col <- 4L
#'
#' # Count unique occurrences per subject.
#' s_count_occurrences(
#'   df,
#'   .N_col = N_per_col,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
s_count_occurrences <- function(df,
                                denom = c("N_col", "n"),
                                .N_col, # nolint
                                .df_row,
                                drop = TRUE,
                                .var = "MHDECOD",
                                id = "USUBJID") {
  checkmate::assert_flag(drop)
  assert_df_with_variables(df, list(range = .var, id = id))
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))
  denom <- match.arg(denom)

  occurrences <- if (drop) {
    # Note that we don't try to preserve original level order here since a) that would required
    # more time to look up in large original levels and b) that would fail for character input variable.
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  } else {
    df[[.var]]
  }
  ids <- factor(df[[id]])
  dn <- switch(denom,
    n = nlevels(ids),
    N_col = .N_col
  )
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  list(
    count = n_ids_per_occurrence,
    count_fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) {
        if (i == 0 && denom == 0) {
          c(0, 0)
        } else {
          c(i, i / denom)
        }
      },
      denom = dn
    ),
    fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) c("num" = i, "denom" = denom),
      denom = dn
    )
  )
}

#' @describeIn count_occurrences Formatted analysis function which is used as `afun`
#'   in `count_occurrences()`.
#'
#' @return
#' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction"))
#' afun(
#'   df,
#'   .N_col = N_per_col,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
a_count_occurrences <- make_afun(
  s_count_occurrences,
  .formats = c(count = "xx", count_fraction = format_count_fraction_fixed_dp, fraction = format_fraction_fixed_dp)
)

#' @describeIn count_occurrences Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(
#'     1, 1, 2, 4, 4, 4,
#'     6, 6, 6, 7, 7, 8
#'   )),
#'   MHDECOD = c(
#'     "MH1", "MH2", "MH1", "MH1", "MH1", "MH3",
#'     "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"
#'   ),
#'   ARM = rep(c("A", "B"), each = 6)
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' # Create table layout
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences(vars = "MHDECOD", .stats = c("count_fraction"))
#'
#' # Apply table layout to data and produce `rtable` object
#' lyt %>%
#'   build_table(df, alt_counts_df = df_adsl) %>%
#'   prune_table()
#'
#' @export
count_occurrences <- function(lyt,
                              vars,
                              var_labels = vars,
                              show_labels = "hidden",
                              ...,
                              table_names = vars,
                              .stats = "count_fraction",
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  afun <- make_afun(
    a_count_occurrences,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' Estimation of Proportions per Level of Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion along with confidence interval of a proportion
#' regarding the level of a factor.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant description function [d_onco_rsp_label()].
#'
#' @name estimate_multinomial_rsp
NULL

#' Description of Standard Oncology Response
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Describe the oncology response in a standard way.
#'
#' @param x (`character`)\cr the standard oncology code to be described.
#'
#' @return Response labels.
#'
#' @seealso [estimate_multinomial_rsp()]
#'
#' @examples
#' d_onco_rsp_label(
#'   c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
#' )
#'
#' # Adding some values not considered in d_onco_rsp_label
#'
#' d_onco_rsp_label(
#'   c("CR", "PR", "hello", "hi")
#' )
#'
#' @export
d_onco_rsp_label <- function(x) {
  x <- as.character(x)
  desc <- c(
    CR           = "Complete Response (CR)",
    PR           = "Partial Response (PR)",
    MR           = "Minimal/Minor Response (MR)",
    MRD          = "Minimal Residual Disease (MRD)",
    SD           = "Stable Disease (SD)",
    PD           = "Progressive Disease (PD)",
    `NON CR/PD`  = "Non-CR or Non-PD (NON CR/PD)",
    NE           = "Not Evaluable (NE)",
    `NE/Missing` = "Missing or unevaluable",
    Missing      = "Missing",
    `NA`         = "Not Applicable (NA)",
    ND           = "Not Done (ND)"
  )

  values_label <- vapply(
    X = x,
    FUN.VALUE = character(1),
    function(val) {
      if (val %in% names(desc)) desc[val] else val
    }
  )

  return(factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))))
}

#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
#'   of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
#'
#' @return
#' * `s_length_proportion()` returns statistics from [s_proportion()].
#'
#' @examples
#' s_length_proportion(rep("CR", 10), .N_col = 100)
#' s_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
s_length_proportion <- function(x,
                                .N_col, # nolint
                                ...) {
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
  checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)

  n_true <- length(x)
  n_false <- .N_col - n_true
  x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
  s_proportion(df = x_logical, ...)
}

#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
#'   in `estimate_multinomial_response()`.
#'
#' @return
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_length_proportion(rep("CR", 10), .N_col = 100)
#' a_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
a_length_proportion <- make_afun(
  s_length_proportion,
  .formats = c(
    n_prop = "xx (xx.x%)",
    prop_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()] and
#'   [rtables::summarize_row_groups()].
#'
#' @return
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_length_proportion()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # Use of the layout creating function.
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = factor(rep(LETTERS[1:3], each = 4)),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' ) %>% mutate(
#'   AVALC = factor(AVAL,
#'     levels = c(0, 1),
#'     labels = c("Complete Response (CR)", "Partial Response (PR)")
#'   )
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_multinomial_response(var = "AVALC")
#'
#' tbl <- build_table(lyt, dta_test)
#'
#' html <- as_html(tbl)
#' html
#' \dontrun{
#' Viewer(html)
#' }
#'
#' @export
estimate_multinomial_response <- function(lyt,
                                          var,
                                          ...,
                                          show_labels = "hidden",
                                          table_names = var,
                                          .stats = "prop_ci",
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  afun <- make_afun(
    a_length_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  lyt <- split_rows_by(lyt, var = var)
  lyt <- summarize_row_groups(lyt)

  analyze(
    lyt,
    vars = var,
    afun = afun,
    show_labels = show_labels,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Binary (Response) Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
#' (response) outcome. The treatment arm variable must have exactly 2 levels,
#' where the first one is taken as reference and the estimated odds ratios are
#' for the comparison of the second level vs. the first one.
#'
#' The (conditional) logistic regression model which is fit is:
#'
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables:
#'   needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()]
#'   and [control_logistic()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the
#'   subgroup intervals used for the biomarker variable, including where the
#'   center of the intervals are and their bounds. The second part of the
#'   columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_logistic()] for the available
#'   customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(survival)
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(
#'     PARAMCD == "BESRSPI",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     SEX = factor(SEX)
#'   )
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
#' step_matrix <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.5))
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different logistic regression options, including confidence level.
#' step_matrix2 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = 0.6, degree = 1))
#' )
#'
#' # Use a global constant model. This is helpful as a reference for the subgroup models.
#' step_matrix3 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
#' )
#'
#' # It is also possible to use strata, i.e. use conditional logistic regression models.
#' variables2 <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP",
#'   strata = c("STRATA1", "STRATA2")
#' )
#'
#' step_matrix4 <- fit_rsp_step(
#'   variables = variables2,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.6))
#' )
#'
#' @export
fit_rsp_step <- function(variables,
                         data,
                         control = c(control_step(), control_logistic())) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_rsp_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_rsp_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_rsp_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Individual Patient Plots
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
#' Patients' individual baseline values can be added to the plot(s) as reference.
#'
#' @inheritParams argument_convention
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
#' @param xlab (`string`)\cr plot label for x-axis.
#' @param ylab (`string`)\cr plot label for y-axis.
#' @param id_var (`string`)\cr variable used as patient identifier.
#' @param title (`string`)\cr title for plot.
#' @param subtitle (`string`)\cr subtitle for plot.
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
#'   plot when TRUE.
#' @param yvar_baseline (`string`)\cr variable with baseline values only.
#'   Ignored when `add_baseline_hline` is FALSE.
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
#'   by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
#' @param plotting_choices (`character`)\cr specifies options for displaying
#'   plots. Must be one of "all_in_one", "split_by_max_obs", "separate_by_obs".
#' @param max_obs_per_plot (`count`)\cr Number of observations to be plotted on one
#'   plot. Ignored when `plotting_choices` is not "separate_by_obs".
#' @param caption (`character` scalar)\cr optional caption below the plot.
#' @param col (`character`)\cr lines colors.
#'
#' @seealso Relevant helper function [h_g_ipp()].
#'
#' @name individual_patient_plot
NULL

#' Helper Function To Create Simple Line Plot over Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function that generates a simple line plot displaying parameter trends over time.
#'
#' @inheritParams argument_convention
#' @inheritParams g_ipp
#'
#' @return A `ggplot` line plot.
#'
#' @seealso [g_ipp()] which uses this function.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' p <- h_g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   id_var = "USUBJID",
#'   ylab = "SGOT/ALT (U/L)",
#'   add_baseline_hline = TRUE
#' )
#' p
#'
#' @export
h_g_ipp <- function(df,
                    xvar,
                    yvar,
                    xlab,
                    ylab,
                    id_var,
                    title = "Individual Patient Plots",
                    subtitle = "",
                    caption = NULL,
                    add_baseline_hline = FALSE,
                    yvar_baseline = "BASE",
                    ggtheme = nestcolor::theme_nest(),
                    col = NULL) {
  checkmate::assert_string(xvar)
  checkmate::assert_string(yvar)
  checkmate::assert_string(yvar_baseline)
  checkmate::assert_string(id_var)
  checkmate::assert_string(xlab)
  checkmate::assert_string(ylab)
  checkmate::assert_string(title)
  checkmate::assert_string(subtitle)
  checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
  checkmate::assert_data_frame(df)
  checkmate::assert_flag(add_baseline_hline)
  checkmate::assert_character(col, null.ok = TRUE)

  p <- ggplot2::ggplot(
    data = df,
    mapping = ggplot2::aes(
      x = .data[[xvar]],
      y = .data[[yvar]],
      group = .data[[id_var]],
      colour = .data[[id_var]]
    )
  ) +
    ggplot2::geom_line(linewidth = 0.4) +
    ggplot2::geom_point(size = 2) +
    ggplot2::labs(
      x = xlab,
      y = ylab,
      title = title,
      subtitle = subtitle,
      caption = caption
    ) +
    ggtheme

  if (add_baseline_hline) {
    baseline_df <- df[, c(id_var, yvar_baseline)]
    baseline_df <- unique(baseline_df)

    p <- p +
      ggplot2::geom_hline(
        data = baseline_df,
        mapping = ggplot2::aes(
          yintercept = .data[[yvar_baseline]],
          colour = .data[[id_var]]
        ),
        linetype = "dotdash",
        linewidth = 0.4
      ) +
      ggplot2::geom_text(
        data = baseline_df,
        mapping = ggplot2::aes(
          x = 1,
          y = .data[[yvar_baseline]],
          label = .data[[id_var]],
          colour = .data[[id_var]]
        ),
        nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
        vjust = "right",
        size = 2
      )

    if (!is.null(col)) {
      p <- p +
        ggplot2::scale_color_manual(values = col)
    }
  }
  p
}

#' @describeIn individual_patient_plot Plotting function for individual patient plots which, depending on user
#'   preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
#'   values over time.
#'
#' @return A `ggplot` object or a list of `ggplot` objects.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' plot_list <- g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   ylab = "SGOT/ALT (U/L)",
#'   title = "Individual Patient Plots",
#'   add_baseline_hline = TRUE,
#'   plotting_choices = "split_by_max_obs",
#'   max_obs_per_plot = 5
#' )
#' plot_list
#'
#' @export
g_ipp <- function(df,
                  xvar,
                  yvar,
                  xlab,
                  ylab,
                  id_var = "USUBJID",
                  title = "Individual Patient Plots",
                  subtitle = "",
                  caption = NULL,
                  add_baseline_hline = FALSE,
                  yvar_baseline = "BASE",
                  ggtheme = nestcolor::theme_nest(),
                  plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
                  max_obs_per_plot = 4,
                  col = NULL) {
  checkmate::assert_count(max_obs_per_plot)
  checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
  checkmate::assert_character(col, null.ok = TRUE)

  plotting_choices <- match.arg(plotting_choices)

  if (plotting_choices == "all_in_one") {
    p <- h_g_ipp(
      df = df,
      xvar = xvar,
      yvar = yvar,
      xlab = xlab,
      ylab = ylab,
      id_var = id_var,
      title = title,
      subtitle = subtitle,
      caption = caption,
      add_baseline_hline = add_baseline_hline,
      yvar_baseline = yvar_baseline,
      ggtheme = ggtheme,
      col = col
    )

    return(p)
  } else if (plotting_choices == "split_by_max_obs") {
    id_vec <- unique(df[[id_var]])
    id_list <- split(
      id_vec,
      rep(1:ceiling(length(id_vec) / max_obs_per_plot),
        each = max_obs_per_plot,
        length.out = length(id_vec)
      )
    )

    df_list <- list()
    plot_list <- list()

    for (i in seq_along(id_list)) {
      df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]

      plots <- h_g_ipp(
        df = df_list[[i]],
        xvar = xvar,
        yvar = yvar,
        xlab = xlab,
        ylab = ylab,
        id_var = id_var,
        title = title,
        subtitle = subtitle,
        caption = caption,
        add_baseline_hline = add_baseline_hline,
        yvar_baseline = yvar_baseline,
        ggtheme = ggtheme,
        col = col
      )

      plot_list[[i]] <- plots
    }
    return(plot_list)
  } else {
    ind_df <- split(df, df[[id_var]])
    plot_list <- lapply(
      ind_df,
      function(x) {
        h_g_ipp(
          df = x,
          xvar = xvar,
          yvar = yvar,
          xlab = xlab,
          ylab = ylab,
          id_var = id_var,
          title = title,
          subtitle = subtitle,
          caption = caption,
          add_baseline_hline = add_baseline_hline,
          yvar_baseline = yvar_baseline,
          ggtheme = ggtheme,
          col = col
        )
      }
    )

    return(plot_list)
  }
}

#' Combination Functions Class
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
#' can be combined and negated with the logical operators.
#'
#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
#' @param x (`CombinationFunction`)\cr the function which should be negated.
#'
#' @return Returns a logical value indicating whether the left hand side of the equation equals the right hand side.
#'
#' @exportClass CombinationFunction
#' @export CombinationFunction
#'
#' @examples
#' higher <- function(a) {
#'   force(a)
#'   CombinationFunction(
#'     function(x) {
#'       x > a
#'     }
#'   )
#' }
#'
#' lower <- function(b) {
#'   force(b)
#'   CombinationFunction(
#'     function(x) {
#'       x < b
#'     }
#'   )
#' }
#'
#' c1 <- higher(5)
#' c2 <- lower(10)
#' c3 <- higher(5) & lower(10)
#' c3(7)
#'
#' @aliases CombinationFunction-class
#' @name combination_function
CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint

#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "AND" of the two individual results.
#'
#' @export
methods::setMethod(
  "&",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) && e2(...)
    })
  }
)

#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "OR" of the two individual results.
#'
#' @export
methods::setMethod(
  "|",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) || e2(...)
    })
  }
)

#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the original function. The result
#'   is then the opposite of this results.
#'
#' @export
methods::setMethod(
  "!",
  signature = c(x = "CombinationFunction"),
  definition = function(x) {
    CombinationFunction(function(...) {
      !x(...)
    })
  }
)

#' Count the Number of Patients with a Particular Event
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#'
#' @seealso [count_patients_with_flags]
#'
#' @name count_patients_with_event
NULL

#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
#'   the defined event has occurred.
#'
#' @inheritParams summarize_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
#'   to be used for counting the number of unique identifiers satisfying such conditions.
#'   Multiple column names and flags are accepted in this format
#'   `c("column_name1" = "flag1", "column_name2" = "flag2")`.
#'   Note that only equality is being accepted as condition.
#'
#' @return
#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
#'
#' @examples
#' library(dplyr)
#'
#' # `s_count_patients_with_event()`
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y")
#' )
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
#' )
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'   denom = "N_col",
#'   .N_col = 456
#' )
#'
#' @export
s_count_patients_with_event <- function(df,
                                        .var,
                                        filters,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  col_names <- names(filters)
  filter_values <- filters

  checkmate::assert_subset(col_names, colnames(df))

  temp <- Map(
    function(x, y) which(df[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(intersect, temp)
  id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]]))
  result <- s_count_values(
    as.character(unique(df[[.var]])),
    id_satisfy_filters,
    denom = denom,
    .N_col = .N_col,
    .N_row = .N_row
  )
  result
}

#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
#'   in `count_patients_with_event()`.
#'
#' @return
#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_patients_with_event()`
#'
#' a_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y"),
#'   .N_col = 100,
#'   .N_row = 100
#' )
#'
#' @export
a_count_patients_with_event <- make_afun(
  s_count_patients_with_event,
  .formats = c(count_fraction = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_event()` to the table layout.
#'
#' @examples
#' # `count_patients_with_event()`
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_values(
#'     "STUDYID",
#'     values = "AB12345",
#'     .stats = "count",
#'     .labels = c(count = "Total AEs")
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
#'     table_names = "tbl_all"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'     .labels = c(count_fraction = "Total number of patients with fatal AEs"),
#'     table_names = "tbl_fatal"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
#'     .indent_mods = c(count_fraction = 2L),
#'     table_names = "tbl_rel_fatal"
#'   )
#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_patients_with_event <- function(lyt,
                                      vars,
                                      ...,
                                      table_names = vars,
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .labels = NULL,
                                      .indent_mods = NULL) {
  afun <- make_afun(
    a_count_patients_with_event,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Survival Time Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize median survival time and CIs, percentiles of survival times, survival
#' time range of censored/event patients.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_time()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
#'     see more in [survival::survfit()]. Note option "none" is not supported.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
#'
#' @name survival_time
NULL

#' @describeIn survival_time Statistics function which analyzes survival times.
#'
#' @return
#' * `s_surv_time()` returns the statistics:
#'   * `median`: Median survival time.
#'   * `median_ci`: Confidence interval for median time.
#'   * `quantiles`: Survival time for two specified quantiles.
#'   * `range_censor`: Survival time range for censored observations.
#'   * `range_event`: Survival time range for observations with events.
#'   * `range`: Survival time range for all observations.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#'
#' # Internal function - s_surv_time
#' \dontrun{
#' s_surv_time(df, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
s_surv_time <- function(df,
                        .var,
                        is_event,
                        control = control_surv_time()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level
  quantiles <- control$quantiles

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  srv_tab <- summary(srv_fit, extend = TRUE)$table
  srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)$quantile
  range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
  range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
  range <- range_noinf(df[[.var]], na.rm = TRUE)
  list(
    median = formatters::with_label(unname(srv_tab["median"]), "Median"),
    median_ci = formatters::with_label(
      unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level)
    ),
    quantiles = formatters::with_label(
      unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile")
    ),
    range_censor = formatters::with_label(range_censor, "Range (censored)"),
    range_event = formatters::with_label(range_event, "Range (event)"),
    range = formatters::with_label(range, "Range")
  )
}

#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
#'
#' @return
#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_surv_time
#' \dontrun{
#' a_surv_time(df, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
a_surv_time <- make_afun(
  s_surv_time,
  .formats = c(
    "median" = "xx.x",
    "median_ci" = "(xx.x, xx.x)",
    "quantiles" = "xx.x, xx.x",
    "range_censor" = "xx.x to xx.x",
    "range_event" = "xx.x to xx.x",
    "range" = "xx.x to xx.x"
  )
)

#' @describeIn survival_time Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_time()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD") %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'     control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
surv_time <- function(lyt,
                      vars,
                      ...,
                      var_labels = "Time to Event",
                      table_names = vars,
                      .stats = c("median", "median_ci", "quantiles", "range_censor", "range_event"),
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = c(
                        "median" = 0L, "median_ci" = 1L, "quantiles" = 0L,
                        "range_censor" = 0L, "range_event" = 0L, "range" = 0L
                      )) {
  afun <- make_afun(
    a_surv_time,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = extract_by_name(.indent_mods, .stats)
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = "visible",
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Generate PK reference dataset
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @return `data.frame` of PK parameters
#'
#' @examples
#' pk_reference_dataset <- d_pkparam()
#'
#' @export
d_pkparam <- function() {
  pk_dataset <- as.data.frame(matrix(
    c(
      "TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1",
      "CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2",
      "CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3",
      "AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4",
      "AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5",
      "AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6",
      "AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7",
      "AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8",
      "AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9",
      "AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10",
      "AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11",
      "AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12",
      "AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13",
      "AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14",
      "AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15",
      "AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16",
      "AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17",
      "AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18",
      "AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19",
      "AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20",
      "AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21",
      "AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22",
      "AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23",
      "AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24",
      "AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25",
      "TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26",
      "CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27",
      "C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28",
      "CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29",
      "CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30",
      "CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31",
      "LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32",
      "CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33",
      "CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34",
      "CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35",
      "CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36",
      "CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37",
      "CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38",
      "VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39",
      "VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40",
      "VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41",
      "VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42",
      "VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43",
      "VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44",
      "LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45",
      "LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46",
      "LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47",
      "LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48",
      "MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49",
      "MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50",
      "MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51",
      "R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52",
      "R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53",
      "TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54",
      "TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55",
      "ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56",
      "FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57",
      "CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58",
      "RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59",
      "RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60",
      "VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61",
      "RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62",
      "ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63",
      "RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64",
      "RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65",
      "ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66",
      "MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67",
      "ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68",
      "TON", "Time to Onset", "Tonset", "Matrix of PD", "69",
      "TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70",
      "TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71",
      "TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72",
      "TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73",
      "TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74",
      "TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75",
      "TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76",
      "BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77",
      "TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78",
      "AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79",
      "AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80",
      "AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81",
      "AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82",
      "AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83",
      "AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84",
      "TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85",
      "AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86",
      "AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87",
      "TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88",
      "TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89",
      "TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90",
      "TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91",
      "SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92",
      "SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93",
      "SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94",
      "SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95",
      "R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96",
      "R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97",
      "R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98",
      "R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99",
      "CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100",
      "CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101",
      "AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102",
      "ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103",
      "ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104",
      "ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105",
      "ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106",
      "ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107",
      "ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108",
      "ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109",
      "ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110",
      "ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111",
      "ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112",
      "ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113",
      "ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114",
      "ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115",
      "ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116",
      "ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117",
      "AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118",
      "AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119",
      "AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120",
      "AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121",
      "AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122",
      "AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123",
      "AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124",
      "AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125",
      "AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126",
      "AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127",
      "AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128",
      "AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129",
      "AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130",
      "AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131",
      "AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132",
      "AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133",
      "AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134",
      "AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135",
      "AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136",
      "AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137",
      "AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138",
      "AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139",
      "AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140",
      "AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141",
      "AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142",
      "AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143",
      "AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144",
      "AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145",
      "AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146",
      "AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147",
      "AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148",
      "AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149",
      "AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150",
      "AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151",
      "AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152",
      "AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153",
      "AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154",
      "AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155",
      "AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156",
      "AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157",
      "AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158",
      "AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159",
      "AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160",
      "AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161",
      "AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162",
      "AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163",
      "AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164",
      "AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165",
      "AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166",
      "AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167",
      "AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168",
      "AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169",
      "AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170",
      "AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171",
      "AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172",
      "AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173",
      "AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174",
      "AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175",
      "AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176",
      "AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177",
      "AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178",
      "AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179",
      "AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180",
      "AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181",
      "AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182",
      "C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183",
      "C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184",
      "C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185",
      "C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186",
      "CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187",
      "CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188",
      "CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189",
      "CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190",
      "CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191",
      "CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192",
      "CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193",
      "CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194",
      "CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195",
      "CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196",
      "CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197",
      "CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198",
      "CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199",
      "CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200",
      "CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201",
      "CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202",
      "CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203",
      "CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204",
      "CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205",
      "CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206",
      "CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207",
      "CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208",
      "CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209",
      "CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210",
      "CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211",
      "CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212",
      "CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213",
      "CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214",
      "CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215",
      "CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216",
      "CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217",
      "CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218",
      "CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219",
      "CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220",
      "CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221",
      "CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222",
      "CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223",
      "CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224",
      "CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225",
      "CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226",
      "CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227",
      "CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228",
      "CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229",
      "CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230",
      "CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231",
      "CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232",
      "CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233",
      "CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234",
      "CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235",
      "CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236",
      "CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237",
      "CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238",
      "CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239",
      "CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240",
      "CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241",
      "CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242",
      "CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243",
      "CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244",
      "CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245",
      "CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246",
      "CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247",
      "CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248",
      "CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249",
      "CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250",
      "EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251",
      "ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252",
      "ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253",
      "ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254",
      "ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255",
      "ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256",
      "ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257",
      "ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258",
      "ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259",
      "ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260",
      "ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261",
      "ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262",
      "ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263",
      "ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264",
      "ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265",
      "FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266",
      "FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267",
      "FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268",
      "FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269",
      "FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270",
      "HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271",
      "HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272",
      "HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273",
      "LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274",
      "LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275",
      "LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276",
      "LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277",
      "LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278",
      "MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279",
      "MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280",
      "MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281",
      "MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282",
      "MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283",
      "MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284",
      "MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285",
      "MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286",
      "MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287",
      "MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288",
      "MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289",
      "MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290",
      "MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291",
      "NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292",
      "NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293",
      "NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294",
      "NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295",
      "NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296",
      "PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297",
      "RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298",
      "RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299",
      "RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300",
      "RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301",
      "RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302",
      "RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303",
      "RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304",
      "RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305",
      "RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306",
      "RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307",
      "RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308",
      "RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309",
      "RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310",
      "RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311",
      "RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312",
      "RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313",
      "RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314",
      "RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315",
      "RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316",
      "RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317",
      "RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318",
      "RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319",
      "RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320",
      "RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321",
      "RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322",
      "RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323",
      "RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324",
      "RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325",
      "RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326",
      "RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327",
      "RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328",
      "RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329",
      "RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330",
      "RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331",
      "RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332",
      "RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333",
      "RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334",
      "RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335",
      "RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336",
      "RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337",
      "RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338",
      "RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339",
      "RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340",
      "RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341",
      "RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342",
      "RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343",
      "RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344",
      "RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345",
      "RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346",
      "RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347",
      "RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348",
      "SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349",
      "SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350",
      "TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351",
      "TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352",
      "TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353",
      "V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354",
      "V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355",
      "V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356",
      "V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357",
      "V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358",
      "VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359",
      "VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360",
      "VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361",
      "VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362",
      "VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363",
      "VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364",
      "VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365",
      "VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366",
      "VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367",
      "VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368",
      "VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369",
      "VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370",
      "VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371",
      "VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372",
      "VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373",
      "VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374",
      "VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375",
      "VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376",
      "VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377",
      "VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378",
      "VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379",
      "VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380",
      "VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381",
      "VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382",
      "VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383",
      "VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384",
      "VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385",
      "VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386",
      "VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387",
      "VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388",
      "VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389",
      "VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390",
      "VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391",
      "VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392",
      "VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393",
      "VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394",
      "VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395",
      "VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396",
      "VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397",
      "VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398"
    ),
    ncol = 5,
    byrow = TRUE
  ))
  colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER")
  pk_dataset
}

#' Summarize Variables in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This analyze function uses the S3 generic function [s_summary()] to summarize different variables
#' that are arranged in columns. Additional standard formatting arguments are available. It is a
#' minimal wrapper for [rtables::analyze_colvars()]. The latter function is meant to add different
#' analysis methods for each column variables as different rows. To have the analysis methods as
#' column labels, please refer to [analyze_vars_in_cols()].
#'
#' @inheritParams argument_convention
#' @param ... arguments passed to `s_summary()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @seealso [rtables::split_cols_by_multivar()] and [analyze_colvars_functions].
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9)),
#'   CHG = c(1:9, rep(NA, 9))
#' )
#'
#' ## Default output within a `rtables` pipeline.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars() %>%
#'   build_table(dta_test)
#'
#' ## Selection of statistics, formats and labels also work.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(
#'     .stats = c("n", "mean_sd"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD")
#'   ) %>%
#'   build_table(dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(na.rm = FALSE) %>%
#'   build_table(dta_test)
#'
#' @export
summarize_colvars <- function(lyt,
                              ...,
                              .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)

  analyze_colvars(
    lyt,
    afun = afun,
    extra_args = list(...)
  )
}

#' Control Function for Logistic Regression Model Fitting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for logistic regression models.
#' `conf_level` refers to the confidence level used for the Odds Ratio CIs.
#'
#' @inheritParams argument_convention
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the logistic regression model on the left hand side of the formula.
#'   Note that the evaluated expression should result in either a logical vector or a factor with 2
#'   levels. By default this is just `"response"` such that the original response variable is used
#'   and not modified further.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @examples
#' # Standard options.
#' control_logistic()
#'
#' # Modify confidence level.
#' control_logistic(conf_level = 0.9)
#'
#' # Use a different response definition.
#' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))")
#'
#' @export
control_logistic <- function(response_definition = "response",
                             conf_level = 0.95) {
  checkmate::assert_true(grepl("response", response_definition))
  checkmate::assert_string(response_definition)
  assert_proportion_value(conf_level)
  list(
    response_definition = response_definition,
    conf_level = conf_level
  )
}

1		#' Missing Data
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Substitute missing data with a string or factor level.
6		#'
7		#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
8		#' @param label (`character`)\cr string that missing data should be replaced with.
9		#'
10		#' @return `x` with any `NA` values substituted by `label`.
11		#'
12		#' @examples
13		#' explicit_na(c(NA, "a", "b"))
14		#' is.na(explicit_na(c(NA, "a", "b")))
15		#'
16		#' explicit_na(factor(c(NA, "a", "b")))
17		#' is.na(explicit_na(factor(c(NA, "a", "b"))))
18		#'
19		#' explicit_na(sas_na(c("a", "")))
20		#'
21		#' @export
22		explicit_na <- function(x, label = "<Missing>") {
23	409x	checkmate::assert_string(label)
24
25	409x	if (is.factor(x)) {
26	307x	x <- forcats::fct_na_value_to_level(x, label)
27	307x	forcats::fct_drop(x, only = label)
28	102x	} else if (is.character(x)) {
29	102x	x[is.na(x)] <- label
30	102x	x
31		} else {
32	!	stop("only factors and character vectors allowed")
33		}
34		}
35
36		#' Convert Strings to `NA`
37		#'
38		#' @description `r lifecycle::badge("stable")`
39		#'
40		#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
41		#' convert these values to `NA`s.
42		#'
43		#' @inheritParams explicit_na
44		#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
45		#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
46		#'
47		#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
48		#' `empty` and `whitespaces`.
49		#'
50		#' @examples
51		#' sas_na(c("1", "", " ", " ", "b"))
52		#' sas_na(factor(c("", " ", "b")))
53		#'
54		#' is.na(sas_na(c("1", "", " ", " ", "b")))
55		#'
56		#' @export
57		sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
58	406x	checkmate::assert_flag(empty)
59	406x	checkmate::assert_flag(whitespaces)
60
61	406x	if (is.factor(x)) {
62	300x	empty_levels <- levels(x) == ""
63	11x	if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA
64
65	300x	ws_levels <- grepl("^\\s+$", levels(x))
66	!	if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA
67
68	300x	x
69	106x	} else if (is.character(x)) {
70	106x	if (empty) x[x == ""] <- NA_character_
71
72	106x	if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_
73
74	106x	x
75		} else {
76	!	stop("only factors and character vectors allowed")
77		}
78		}

1		#' Compare Variables Between Groups
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Comparison with a reference group for different `x` objects.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @note
10		#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
11		#' between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
12		#' be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
13		#' for as explicit factor levels.
14		#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
15		#' set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
16		#' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
17		#' default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
18		#' * For character variables, automatic conversion to factor does not guarantee that the table
19		#' will be generated correctly. In particular for sparse tables this very likely can fail.
20		#' Therefore it is always better to manually convert character variables to factors during pre-processing.
21		#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
22		#' is well defined.
23		#'
24		#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally
25		#' to compute a summary within `s_compare()`.
26		#'
27		#' @name compare_variables
28		#' @include summarize_variables.R
29		NULL
30
31		#' @describeIn compare_variables S3 generic function to produce a comparison summary.
32		#'
33		#' @return
34		#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
35		#'
36		#' @export
37		s_compare <- function(x,
38		.ref_group,
39		.in_ref_col,
40		...) {
41	9x	UseMethod("s_compare", x)
42		}
43
44		#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
45		#' to calculate the p-value.
46		#'
47		#' @method s_compare numeric
48		#'
49		#' @examples
50		#' # `s_compare.numeric`
51		#'
52		#' ## Usual case where both this and the reference group vector have more than 1 value.
53		#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
54		#'
55		#' ## If one group has not more than 1 value, then p-value is not calculated.
56		#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
57		#'
58		#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
59		#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
60		#'
61		#' @export
62		s_compare.numeric <- function(x,
63		.ref_group,
64		.in_ref_col,
65		...) {
66	2x	checkmate::assert_numeric(x)
67	2x	checkmate::assert_numeric(.ref_group)
68	2x	checkmate::assert_flag(.in_ref_col)
69
70	2x	y <- s_summary.numeric(x = x, ...)
71
72	2x	y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
73	1x	stats::t.test(x, .ref_group)$p.value
74		} else {
75	1x	character()
76		}
77
78	2x	y
79		}
80
81		#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
82		#' to calculate the p-value.
83		#'
84		#' @param denom (`string`)\cr choice of denominator for factor proportions,
85		#' can only be `n` (number of values in this row and column intersection).
86		#'
87		#' @method s_compare factor
88		#'
89		#' @examples
90		#' # `s_compare.factor`
91		#'
92		#' ## Basic usage:
93		#' x <- factor(c("a", "a", "b", "c", "a"))
94		#' y <- factor(c("a", "b", "c"))
95		#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
96		#'
97		#' ## Management of NA values.
98		#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
99		#' y <- explicit_na(factor(c("a", "b", "c", NA)))
100		#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
101		#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
102		#'
103		#' @export
104		s_compare.factor <- function(x,
105		.ref_group,
106		.in_ref_col,
107		denom = "n",
108		na.rm = TRUE, # nolint
109		...) {
110	3x	checkmate::assert_flag(.in_ref_col)
111	3x	assert_valid_factor(x)
112	3x	assert_valid_factor(.ref_group)
113	3x	denom <- match.arg(denom)
114
115	3x	y <- s_summary.factor(
116	3x	x = x,
117	3x	denom = denom,
118	3x	na.rm = na.rm,
119		...
120		)
121
122	3x	if (na.rm) {
123	3x	x <- x[!is.na(x)] %>% fct_discard("<Missing>")
124	3x	.ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>")
125		} else {
126	!	x <- x %>% explicit_na(label = "NA")
127	!	.ref_group <- .ref_group %>% explicit_na(label = "NA")
128		}
129
130	3x	checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)
131
132	3x	y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
133	3x	tab <- rbind(table(x), table(.ref_group))
134	3x	res <- suppressWarnings(stats::chisq.test(tab))
135	3x	res$p.value
136		} else {
137	!	character()
138		}
139
140	3x	y
141		}
142
143		#' @describeIn compare_variables Method for `character` class. This makes an automatic
144		#' conversion to `factor` (with a warning) and then forwards to the method for factors.
145		#'
146		#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used
147		#' to print out information about factor casting. Defaults to `TRUE`.
148		#'
149		#' @method s_compare character
150		#'
151		#' @examples
152		#' # `s_compare.character`
153		#'
154		#' ## Basic usage:
155		#' x <- c("a", "a", "b", "c", "a")
156		#' y <- c("a", "b", "c")
157		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
158		#'
159		#' ## Note that missing values handling can make a large difference:
160		#' x <- c("a", "a", "b", "c", "a", NA)
161		#' y <- c("a", "b", "c", rep(NA, 20))
162		#' s_compare(x,
163		#' .ref_group = y, .in_ref_col = FALSE,
164		#' .var = "x", verbose = FALSE
165		#' )
166		#' s_compare(x,
167		#' .ref_group = y, .in_ref_col = FALSE, .var = "x",
168		#' na.rm = FALSE, verbose = FALSE
169		#' )
170		#'
171		#' @export
172		s_compare.character <- function(x,
173		.ref_group,
174		.in_ref_col,
175		denom = "n",
176		na.rm = TRUE, # nolint
177		.var,
178		verbose = TRUE,
179		...) {
180	1x	x <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
181	1x	.ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, verbose = verbose)
182	1x	s_compare(
183	1x	x = x,
184	1x	.ref_group = .ref_group,
185	1x	.in_ref_col = .in_ref_col,
186	1x	denom = denom,
187	1x	na.rm = na.rm,
188		...
189		)
190		}
191
192		#' @describeIn compare_variables Method for `logical` class. A chi-squared test
193		#' is used. If missing values are not removed, then they are counted as `FALSE`.
194		#'
195		#' @method s_compare logical
196		#'
197		#' @examples
198		#' # `s_compare.logical`
199		#'
200		#' ## Basic usage:
201		#' x <- c(TRUE, FALSE, TRUE, TRUE)
202		#' y <- c(FALSE, FALSE, TRUE)
203		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
204		#'
205		#' ## Management of NA values.
206		#' x <- c(NA, TRUE, FALSE)
207		#' y <- c(NA, NA, NA, NA, FALSE)
208		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
209		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
210		#'
211		#' @export
212		s_compare.logical <- function(x,
213		.ref_group,
214		.in_ref_col,
215		na.rm = TRUE, # nolint
216		denom = "n",
217		...) {
218	3x	denom <- match.arg(denom)
219
220	3x	y <- s_summary.logical(
221	3x	x = x,
222	3x	na.rm = na.rm,
223	3x	denom = denom,
224		...
225		)
226
227	3x	if (na.rm) {
228	2x	x <- stats::na.omit(x)
229	2x	.ref_group <- stats::na.omit(.ref_group)
230		} else {
231	1x	x[is.na(x)] <- FALSE
232	1x	.ref_group[is.na(.ref_group)] <- FALSE
233		}
234
235	3x	y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
236	3x	x <- factor(x, levels = c(TRUE, FALSE))
237	3x	.ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
238	3x	tbl <- rbind(table(x), table(.ref_group))
239	3x	suppressWarnings(prop_chisq(tbl))
240		} else {
241	!	character()
242		}
243
244	3x	y
245		}
246
247		#' @describeIn compare_variables Formatted analysis function which is used as `afun`
248		#' in `compare_vars()`.
249		#'
250		#' @return
251		#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
252		#'
253		#' @export
254		a_compare <- function(x,
255		.ref_group,
256		.in_ref_col,
257		...,
258		.var) {
259	!	UseMethod("a_compare", x)
260		}
261
262		#' @describeIn compare_variables Formatted analysis function method for `numeric` class.
263		#'
264		#' @examples
265		#' # `a_compare.numeric`
266		#' a_compare(
267		#' rnorm(10, 5, 1),
268		#' .ref_group = rnorm(20, -5, 1),
269		#' .in_ref_col = FALSE,
270		#' .var = "bla"
271		#' )
272		#'
273		#' @export
274		a_compare.numeric <- make_afun(
275		s_compare.numeric,
276		.formats = c(
277		.a_summary_numeric_formats,
278		pval = "x.xxxx \| (<0.0001)"
279		),
280		.labels = c(
281		.a_summary_numeric_labels,
282		pval = "p-value (t-test)"
283		),
284		.null_ref_cells = FALSE
285		)
286
287		.a_compare_counts_formats <- c(
288		.a_summary_counts_formats,
289		pval = "x.xxxx \| (<0.0001)"
290		)
291
292		.a_compare_counts_labels <- c(
293		pval = "p-value (chi-squared test)"
294		)
295
296		#' @describeIn compare_variables Formatted analysis function method for `factor` class.
297		#'
298		#' @examples
299		#' # `a_compare.factor`
300		#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
301		#' # functions can be applied correctly.
302		#' afun <- make_afun(
303		#' getS3method("a_compare", "factor"),
304		#' .ungroup_stats = c("count", "count_fraction")
305		#' )
306		#' x <- factor(c("a", "a", "b", "c", "a"))
307		#' y <- factor(c("a", "a", "b", "c"))
308		#' afun(x, .ref_group = y, .in_ref_col = FALSE)
309		#'
310		#' @export
311		a_compare.factor <- make_afun(
312		s_compare.factor,
313		.formats = .a_compare_counts_formats,
314		.labels = .a_compare_counts_labels,
315		.null_ref_cells = FALSE
316		)
317
318		#' @describeIn compare_variables Formatted analysis function method for `character` class.
319		#'
320		#' @examples
321		#' # `a_compare.character`
322		#' afun <- make_afun(
323		#' getS3method("a_compare", "character"),
324		#' .ungroup_stats = c("count", "count_fraction")
325		#' )
326		#' x <- c("A", "B", "A", "C")
327		#' y <- c("B", "A", "C")
328		#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
329		#'
330		#' @export
331		a_compare.character <- make_afun(
332		s_compare.character,
333		.formats = .a_compare_counts_formats,
334		.labels = .a_compare_counts_labels,
335		.null_ref_cells = FALSE
336		)
337
338		#' @describeIn compare_variables Formatted analysis function method for `logical` class.
339		#'
340		#' @examples
341		#' # `a_compare.logical`
342		#' afun <- make_afun(
343		#' getS3method("a_compare", "logical")
344		#' )
345		#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
346		#' y <- c(TRUE, FALSE)
347		#' afun(x, .ref_group = y, .in_ref_col = FALSE)
348		#'
349		#' @export
350		a_compare.logical <- make_afun(
351		s_compare.logical,
352		.formats = .a_compare_counts_formats,
353		.labels = .a_compare_counts_labels,
354		.null_ref_cells = FALSE
355		)
356
357		#' Constructor Function for [compare_vars()]
358		#'
359		#' @description `r lifecycle::badge("stable")`
360		#'
361		#' Constructor function which creates a combined formatted analysis function.
362		#'
363		#' @inheritParams argument_convention
364		#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
365		#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
366		#' for that statistic's row label.
367		#'
368		#' @return Combined formatted analysis function for use in [compare_vars()].
369		#'
370		#' @note Since [a_compare()] is generic and we want customization of the formatting arguments
371		#' via [rtables::make_afun()], we need to create another temporary generic function, with
372		#' corresponding customized methods. Then in order for the methods to be found,
373		#' we need to wrap them in a combined `afun`. Since this is required by two layout creating
374		#' functions (and possibly others in the future), we provide a constructor that does this:
375		#' [create_afun_compare()].
376		#'
377		#' @seealso [compare_vars()]
378		#'
379		#' @examples
380		#' # `create_afun_compare()` to create combined `afun`
381		#'
382		#' afun <- create_afun_compare(
383		#' .stats = c("n", "count_fraction", "mean_sd", "pval"),
384		#' .indent_mods = c(pval = 1L)
385		#' )
386		#'
387		#' lyt <- basic_table() %>%
388		#' split_cols_by("ARMCD", ref_group = "ARM A") %>%
389		#' analyze(
390		#' "AGE",
391		#' afun = afun,
392		#' show_labels = "visible"
393		#' )
394		#' build_table(lyt, df = tern_ex_adsl)
395		#'
396		#' lyt <- basic_table() %>%
397		#' split_cols_by("ARMCD", ref_group = "ARM A") %>%
398		#' analyze(
399		#' "SEX",
400		#' afun = afun,
401		#' show_labels = "visible"
402		#' )
403		#' build_table(lyt, df = tern_ex_adsl)
404		#'
405		#' @export
406		create_afun_compare <- function(.stats = NULL,
407		.formats = NULL,
408		.labels = NULL,
409		.indent_mods = NULL) {
410	3x	function(x,
411	3x	.ref_group,
412	3x	.in_ref_col,
413		...,
414	3x	.var) {
415	15x	afun <- function(x, ...) {
416	15x	UseMethod("afun", x)
417		}
418
419	15x	numeric_stats <- afun_selected_stats(
420	15x	.stats,
421	15x	all_stats = c(names(.a_summary_numeric_formats), "pval")
422		)
423	15x	afun.numeric <- make_afun( # nolint
424	15x	a_compare.numeric,
425	15x	.stats = numeric_stats,
426	15x	.formats = extract_by_name(.formats, numeric_stats),
427	15x	.labels = extract_by_name(.labels, numeric_stats),
428	15x	.indent_mods = extract_by_name(.indent_mods, numeric_stats),
429	15x	.null_ref_cells = FALSE
430		)
431
432	15x	factor_stats <- afun_selected_stats(
433	15x	.stats,
434	15x	all_stats = names(.a_compare_counts_formats)
435		)
436	15x	ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
437	15x	afun.factor <- make_afun( # nolint
438	15x	a_compare.factor,
439	15x	.stats = factor_stats,
440	15x	.formats = extract_by_name(.formats, factor_stats),
441	15x	.labels = extract_by_name(.labels, factor_stats),
442	15x	.indent_mods = extract_by_name(.indent_mods, factor_stats),
443	15x	.ungroup_stats = ungroup_stats,
444	15x	.null_ref_cells = FALSE
445		)
446
447	15x	afun.character <- make_afun( # nolint
448	15x	a_compare.character,
449	15x	.stats = factor_stats,
450	15x	.formats = extract_by_name(.formats, factor_stats),
451	15x	.labels = extract_by_name(.labels, factor_stats),
452	15x	.indent_mods = extract_by_name(.indent_mods, factor_stats),
453	15x	.ungroup_stats = ungroup_stats,
454	15x	.null_ref_cells = FALSE
455		)
456
457	15x	afun.logical <- make_afun( # nolint
458	15x	a_compare.logical,
459	15x	.stats = factor_stats,
460	15x	.formats = extract_by_name(.formats, factor_stats),
461	15x	.labels = extract_by_name(.labels, factor_stats),
462	15x	.indent_mods = extract_by_name(.indent_mods, factor_stats),
463	15x	.null_ref_cells = FALSE
464		)
465
466	15x	afun(
467	15x	x = x,
468	15x	.ref_group = .ref_group,
469	15x	.in_ref_col = .in_ref_col,
470		...,
471	15x	.var = .var
472		)
473		}
474		}
475
476		#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
477		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
478		#'
479		#' @param ... arguments passed to `s_compare()`.
480		#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
481		#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
482		#' for that statistic's row label.
483		#'
484		#' @return
485		#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
486		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
487		#' the statistics from `s_compare()` to the table layout.
488		#'
489		#' @examples
490		#' # `compare_vars()` in `rtables` pipelines
491		#'
492		#' ## Default output within a `rtables` pipeline.
493		#' lyt <- basic_table() %>%
494		#' split_cols_by("ARMCD", ref_group = "ARM B") %>%
495		#' compare_vars(c("AGE", "SEX"))
496		#' build_table(lyt, tern_ex_adsl)
497		#'
498		#' ## Select and format statistics output.
499		#' lyt <- basic_table() %>%
500		#' split_cols_by("ARMCD", ref_group = "ARM C") %>%
501		#' compare_vars(
502		#' vars = "AGE",
503		#' .stats = c("mean_sd", "pval"),
504		#' .formats = c(mean_sd = "xx.x, xx.x"),
505		#' .labels = c(mean_sd = "Mean, SD")
506		#' )
507		#' build_table(lyt, df = tern_ex_adsl)
508		#'
509		#' @export
510		compare_vars <- function(lyt,
511		vars,
512		var_labels = vars,
513		nested = TRUE,
514		...,
515		na_level = NA_character_,
516		show_labels = "default",
517		table_names = vars,
518		.stats = c("n", "mean_sd", "count_fraction", "pval"),
519		.formats = NULL,
520		.labels = NULL,
521		.indent_mods = NULL) {
522	3x	afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods)
523
524	3x	analyze(
525	3x	lyt = lyt,
526	3x	vars = vars,
527	3x	var_labels = var_labels,
528	3x	afun = afun,
529	3x	nested = nested,
530	3x	extra_args = list(...),
531	3x	na_str = na_level,
532	3x	inclNAs = TRUE,
533	3x	show_labels = show_labels,
534	3x	table_names = table_names
535		)
536		}

1		#' Summary numeric variables in columns
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' Layout-creating function which can be used for creating column-wise summary tables.
6		#' This function sets the analysis methods as column labels and is a wrapper for
7		#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
8		#'
9		#' @inheritParams argument_convention
10		#' @inheritParams rtables::analyze_colvars
11		#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
12		#' character vector applies on the column space. You can change the row labels by defining this
13		#' parameter to a named character vector with names corresponding to the split values. It defaults
14		#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
15		#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
16		#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
17		#' to define row labels. This behavior is not supported as we never need to overload row labels.
18		#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
19		#' This option allows you to add multiple instances of this functions, also in a nested fashion,
20		#' without adding more splits. This split must happen only one time on a single layout.
21		#'
22		#' @return
23		#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
24		#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
25		#' in columns, and add it to the table layout.
26		#'
27		#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
28		#' [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
29		#' support to more complex analysis pipelines on the column space. For the same reasons,
30		#' we encourage to read the examples carefully and file issues for cases that differ from
31		#' them.
32		#'
33		#' Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
34		#' row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
35		#' (`do_summarize_row_groups = FALSE`, the default), and to the group label for
36		#' `do_summarize_row_groups = TRUE`.
37		#'
38		#' @seealso [summarize_vars()], [rtables::analyze_colvars()].
39		#'
40		#' @examples
41		#' library(dplyr)
42		#'
43		#' # Data preparation
44		#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
45		#'
46		#' lyt <- basic_table() %>%
47		#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
48		#' split_rows_by(
49		#' var = "SEX",
50		#' label_pos = "topleft",
51		#' child_label = "hidden"
52		#' ) %>% # Removes duplicated labels
53		#' analyze_vars_in_cols(vars = "AGE")
54		#' result <- build_table(lyt = lyt, df = adpp)
55		#' result
56		#'
57		#' # By selecting just some statistics and ad-hoc labels
58		#' lyt <- basic_table() %>%
59		#' split_rows_by(var = "ARM", label_pos = "topleft") %>%
60		#' split_rows_by(
61		#' var = "SEX",
62		#' label_pos = "topleft",
63		#' child_labels = "hidden",
64		#' split_fun = drop_split_levels
65		#' ) %>%
66		#' analyze_vars_in_cols(
67		#' vars = "AGE",
68		#' .stats = c("n", "cv", "geom_mean"),
69		#' .labels = c(
70		#' n = "aN",
71		#' cv = "aCV",
72		#' geom_mean = "aGeomMean"
73		#' )
74		#' )
75		#' result <- build_table(lyt = lyt, df = adpp)
76		#' result
77		#'
78		#' # Changing row labels
79		#' lyt <- basic_table() %>%
80		#' analyze_vars_in_cols(
81		#' vars = "AGE",
82		#' row_labels = "some custom label"
83		#' )
84		#' result <- build_table(lyt, df = adpp)
85		#' result
86		#'
87		#' # Pharmacokinetic parameters
88		#' lyt <- basic_table() %>%
89		#' split_rows_by(
90		#' var = "TLG_DISPLAY",
91		#' split_label = "PK Parameter",
92		#' label_pos = "topleft",
93		#' child_label = "hidden"
94		#' ) %>%
95		#' analyze_vars_in_cols(
96		#' vars = "AVAL"
97		#' )
98		#' result <- build_table(lyt, df = adpp)
99		#' result
100		#'
101		#' # Multiple calls (summarize label and analyze underneath)
102		#' lyt <- basic_table() %>%
103		#' split_rows_by(
104		#' var = "TLG_DISPLAY",
105		#' split_label = "PK Parameter",
106		#' label_pos = "topleft"
107		#' ) %>%
108		#' analyze_vars_in_cols(
109		#' vars = "AVAL",
110		#' do_summarize_row_groups = TRUE # does a summarize level
111		#' ) %>%
112		#' split_rows_by("SEX",
113		#' child_label = "hidden",
114		#' label_pos = "topleft"
115		#' ) %>%
116		#' analyze_vars_in_cols(
117		#' vars = "AVAL",
118		#' split_col_vars = FALSE # avoids re-splitting the columns
119		#' )
120		#' result <- build_table(lyt, df = adpp)
121		#' result
122		#'
123		#' @export
124		analyze_vars_in_cols <- function(lyt,
125		vars,
126		...,
127		.stats = c(
128		"n",
129		"mean",
130		"sd",
131		"se",
132		"cv",
133		"geom_cv"
134		),
135		.labels = c(
136		n = "n",
137		mean = "Mean",
138		sd = "SD",
139		se = "SE",
140		cv = "CV (%)",
141		geom_cv = "CV % Geometric Mean"
142		),
143		row_labels = NULL,
144		do_summarize_row_groups = FALSE,
145		split_col_vars = TRUE,
146		.indent_mods = NULL,
147		nested = TRUE,
148		na_level = NULL,
149		.formats = NULL) {
150	6x	checkmate::assert_string(na_level, null.ok = TRUE)
151	6x	checkmate::assert_character(row_labels, null.ok = TRUE)
152	6x	checkmate::assert_int(.indent_mods, null.ok = TRUE)
153	6x	checkmate::assert_flag(nested)
154	6x	checkmate::assert_flag(split_col_vars)
155	6x	checkmate::assert_flag(do_summarize_row_groups)
156
157		# Automatic assignment of formats
158	6x	if (is.null(.formats)) {
159		# General values
160	6x	sf_numeric <- summary_formats("numeric")
161	6x	sf_counts <- summary_formats("counts")[-1]
162	6x	formats_v <- c(sf_numeric, sf_counts)
163		} else {
164	!	formats_v <- .formats
165		}
166
167		# Check for vars in the case that one or more are used
168	6x	if (length(vars) == 1) {
169	5x	vars <- rep(vars, length(.stats))
170	1x	} else if (length(vars) != length(.stats)) {
171	1x	stop(
172	1x	"Analyzed variables (vars) does not have the same ",
173	1x	"number of elements of specified statistics (.stats)."
174		)
175		}
176
177	5x	if (split_col_vars) {
178		# Checking there is not a previous identical column split
179	4x	clyt <- tail(clayout(lyt), 1)[[1]]
180
181	4x	dummy_lyt <- split_cols_by_multivar(
182	4x	lyt = basic_table(),
183	4x	vars = vars,
184	4x	varlabels = .labels[.stats]
185		)
186
187	4x	if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
188	!	stop(
189	!	"Column split called again with the same values. ",
190	!	"This can create many unwanted columns. Please consider adding ",
191	!	"split_col_vars = FALSE to the last call of ",
192	!	deparse(sys.calls()[[sys.nframe() - 1]]), "."
193		)
194		}
195
196		# Main col split
197	4x	lyt <- split_cols_by_multivar(
198	4x	lyt = lyt,
199	4x	vars = vars,
200	4x	varlabels = .labels[.stats]
201		)
202		}
203
204	5x	if (do_summarize_row_groups) {
205	2x	if (length(unique(vars)) > 1) {
206	!	stop("When using do_summarize_row_groups only one label level var should be inserted.")
207		}
208
209		# Function list for do_summarize_row_groups. Slightly different handling of labels
210	2x	cfun_list <- Map(
211	2x	function(stat) {
212	12x	function(u, .spl_context, labelstr, ...) {
213		# Statistic
214	24x	res <- s_summary(u, ...)[[stat]]
215
216		# Label check and replacement
217	24x	if (length(row_labels) > 1) {
218	12x	if (!(labelstr %in% names(row_labels))) {
219	!	stop(
220	!	"Replacing the labels in do_summarize_row_groups needs a named vector",
221	!	"that contains the split values. In the current split variable ",
222	!	.spl_context$split[nrow(.spl_context)],
223	!	" the labelstr value (split value by default) ", labelstr, " is not in",
224	!	" row_labels names: ", names(row_labels)
225		)
226		}
227	12x	lbl <- unlist(row_labels[labelstr])
228		} else {
229	12x	lbl <- labelstr
230		}
231
232		# Cell creation
233	24x	rcell(res,
234	24x	label = lbl,
235	24x	format = formats_v[names(formats_v) == stat][[1]],
236	24x	format_na_str = na_level,
237	24x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
238		)
239		}
240		},
241	2x	stat = .stats
242		)
243
244		# Main call to rtables
245	2x	summarize_row_groups(
246	2x	lyt = lyt,
247	2x	var = unique(vars),
248	2x	cfun = cfun_list,
249	2x	extra_args = list(...)
250		)
251		} else {
252		# Function list for analyze_colvars
253	3x	afun_list <- Map(
254	3x	function(stat) {
255	15x	function(u, .spl_context, ...) {
256		# Main statistics
257	78x	res <- s_summary(u, ...)[[stat]]
258
259		# Label from context
260	78x	label_from_context <- .spl_context$value[nrow(.spl_context)]
261
262		# Label switcher
263	78x	if (is.null(row_labels)) {
264	18x	lbl <- label_from_context
265		} else {
266	60x	if (length(row_labels) > 1) {
267	48x	if (!(label_from_context %in% names(row_labels))) {
268	!	stop(
269	!	"Replacing the labels in do_summarize_row_groups needs a named vector",
270	!	"that contains the split values. In the current split variable ",
271	!	.spl_context$split[nrow(.spl_context)],
272	!	" the split value ", label_from_context, " is not in",
273	!	" row_labels names: ", names(row_labels)
274		)
275		}
276	48x	lbl <- unlist(row_labels[label_from_context])
277		} else {
278	12x	lbl <- row_labels
279		}
280		}
281
282		# Cell creation
283	78x	rcell(res,
284	78x	label = lbl,
285	78x	format = formats_v[names(formats_v) == stat][[1]],
286	78x	format_na_str = na_level,
287	78x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
288		)
289		}
290		},
291	3x	stat = .stats
292		)
293
294		# Main call to rtables
295	3x	analyze_colvars(lyt,
296	3x	afun = afun_list,
297	3x	nested = nested,
298	3x	extra_args = list(...)
299		)
300		}
301		}
302
303		# Help function
304		get_last_col_split <- function(lyt) {
305	!	tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
306		}

1		#' Controls for Cox Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Sets a list of parameters for Cox regression fit. Used internally.
6		#'
7		#' @inheritParams argument_convention
8		#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
9		#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
10		#' treatment and candidate covariate. Note that for univariate models without treatment arm, and
11		#' multivariate models, no interaction can be used so that this needs to be `FALSE`.
12		#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
13		#' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
14		#'
15		#' @return A `list` of items with names corresponding to the arguments.
16		#'
17		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
18		#'
19		#' @examples
20		#' control_coxreg()
21		#'
22		#' @export
23		control_coxreg <- function(pval_method = c("wald", "likelihood"),
24		ties = c("exact", "efron", "breslow"),
25		conf_level = 0.95,
26		interaction = FALSE) {
27	40x	pval_method <- match.arg(pval_method)
28	40x	ties <- match.arg(ties)
29	40x	checkmate::assert_flag(interaction)
30	40x	assert_proportion_value(conf_level)
31	40x	list(
32	40x	pval_method = pval_method,
33	40x	ties = ties,
34	40x	conf_level = conf_level,
35	40x	interaction = interaction
36		)
37		}
38
39		#' Custom Tidy Methods for Cox Regression
40		#'
41		#' @description `r lifecycle::badge("stable")`
42		#'
43		#' @inheritParams argument_convention
44		#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
45		#' or [fit_coxreg_multivar()] (for multivariate models).
46		#'
47		#' @return [tidy()] returns:
48		#' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>\|z\|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
49		#' `upper .95`, `level`, and `n`.
50		#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
51		#' `lcl`, `ucl`, `pval`, and `ci`.
52		#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
53		#' `level`, and `ci`.
54		#'
55		#' @seealso [cox_regression]
56		#'
57		#' @name tidy_coxreg
58		NULL
59
60		#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
61		#'
62		#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
63		#'
64		#' @method tidy summary.coxph
65		#'
66		#' @examples
67		#' library(survival)
68		#' library(broom)
69		#'
70		#' set.seed(1, kind = "Mersenne-Twister")
71		#'
72		#' dta_bladder <- with(
73		#' data = bladder[bladder$enum < 5, ],
74		#' data.frame(
75		#' time = stop,
76		#' status = event,
77		#' armcd = as.factor(rx),
78		#' covar1 = as.factor(enum),
79		#' covar2 = factor(
80		#' sample(as.factor(enum)),
81		#' levels = 1:4, labels = c("F", "F", "M", "M")
82		#' )
83		#' )
84		#' )
85		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
86		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
87		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
88		#'
89		#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
90		#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
91		#' tidy(msum)
92		#'
93		#' @export
94		tidy.summary.coxph <- function(x, # nolint
95		...) {
96	120x	checkmate::assert_class(x, "summary.coxph")
97	120x	pval <- x$coefficients
98	120x	confint <- x$conf.int
99	120x	levels <- rownames(pval)
100
101	120x	pval <- tibble::as_tibble(pval)
102	120x	confint <- tibble::as_tibble(confint)
103
104	120x	ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
105	120x	ret$level <- levels
106	120x	ret$n <- x[["n"]]
107	120x	ret
108		}
109
110		#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
111		#'
112		#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
113		#'
114		#' @method tidy coxreg.univar
115		#'
116		#' @examples
117		#' ## Cox regression: arm + 1 covariate.
118		#' mod1 <- fit_coxreg_univar(
119		#' variables = list(
120		#' time = "time", event = "status", arm = "armcd",
121		#' covariates = "covar1"
122		#' ),
123		#' data = dta_bladder,
124		#' control = control_coxreg(conf_level = 0.91)
125		#' )
126		#'
127		#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
128		#' mod2 <- fit_coxreg_univar(
129		#' variables = list(
130		#' time = "time", event = "status", arm = "armcd",
131		#' covariates = c("covar1", "covar2")
132		#' ),
133		#' data = dta_bladder,
134		#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
135		#' )
136		#'
137		#' tidy(mod1)
138		#' tidy(mod2)
139		#'
140		#' @export
141		tidy.coxreg.univar <- function(x, # nolint
142		...) {
143	26x	checkmate::assert_class(x, "coxreg.univar")
144	26x	mod <- x$mod
145	26x	vars <- c(x$vars$arm, x$vars$covariates)
146	26x	has_arm <- "arm" %in% names(x$vars)
147
148	26x	result <- if (!has_arm) {
149	5x	Map(
150	5x	mod = mod, vars = vars,
151	5x	f = function(mod, vars) {
152	6x	h_coxreg_multivar_extract(
153	6x	var = vars,
154	6x	data = x$data,
155	6x	mod = mod,
156	6x	control = x$control
157		)
158		}
159		)
160	26x	} else if (x$control$interaction) {
161	8x	Map(
162	8x	mod = mod, covar = vars,
163	8x	f = function(mod, covar) {
164	17x	h_coxreg_extract_interaction(
165	17x	effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
166	17x	at = x$at, control = x$control
167		)
168		}
169		)
170		} else {
171	13x	Map(
172	13x	mod = mod, vars = vars,
173	13x	f = function(mod, vars) {
174	34x	h_coxreg_univar_extract(
175	34x	effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
176	34x	control = x$control
177		)
178		}
179		)
180		}
181	26x	result <- do.call(rbind, result)
182
183	26x	result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
184	26x	result$n <- lapply(result$n, empty_vector_if_na)
185	26x	result$ci <- lapply(result$ci, empty_vector_if_na)
186	26x	result$hr <- lapply(result$hr, empty_vector_if_na)
187	26x	if (x$control$interaction) {
188	8x	result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
189		# Remove interaction p-values due to change in specifications.
190	8x	result$pval[result$effect != "Treatment:"] <- NA
191		}
192	26x	result$pval <- lapply(result$pval, empty_vector_if_na)
193	26x	attr(result, "conf_level") <- x$control$conf_level
194	26x	result
195		}
196
197		#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
198		#'
199		#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
200		#'
201		#' @method tidy coxreg.multivar
202		#'
203		#' @examples
204		#' multivar_model <- fit_coxreg_multivar(
205		#' variables = list(
206		#' time = "time", event = "status", arm = "armcd",
207		#' covariates = c("covar1", "covar2")
208		#' ),
209		#' data = dta_bladder
210		#' )
211		#' broom::tidy(multivar_model)
212		#'
213		#' @export
214		tidy.coxreg.multivar <- function(x, # nolint
215		...) {
216	8x	checkmate::assert_class(x, "coxreg.multivar")
217	8x	vars <- c(x$vars$arm, x$vars$covariates)
218
219		# Convert the model summaries to data.
220	8x	result <- Map(
221	8x	vars = vars,
222	8x	f = function(vars) {
223	28x	h_coxreg_multivar_extract(
224	28x	var = vars, data = x$data,
225	28x	mod = x$mod, control = x$control
226		)
227		}
228		)
229	8x	result <- do.call(rbind, result)
230
231	8x	result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
232	8x	result$ci <- lapply(result$ci, empty_vector_if_na)
233	8x	result$hr <- lapply(result$hr, empty_vector_if_na)
234	8x	result$pval <- lapply(result$pval, empty_vector_if_na)
235	8x	result <- result[, names(result) != "n"]
236	8x	attr(result, "conf_level") <- x$control$conf_level
237
238	8x	result
239		}
240
241		#' Fits for Cox Proportional Hazards Regression
242		#'
243		#' @description `r lifecycle::badge("stable")`
244		#'
245		#' Fitting functions for univariate and multivariate Cox regression models.
246		#'
247		#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named
248		#' list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
249		#' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
250		#' estimates will be tabulated later.
251		#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
252		#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
253		#' the value of the covariate at which the effect should be estimated.
254		#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
255		#'
256		#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
257		#'
258		#' @examples
259		#' library(survival)
260		#'
261		#' set.seed(1, kind = "Mersenne-Twister")
262		#'
263		#' # Testing dataset [survival::bladder].
264		#' dta_bladder <- with(
265		#' data = bladder[bladder$enum < 5, ],
266		#' data.frame(
267		#' time = stop,
268		#' status = event,
269		#' armcd = as.factor(rx),
270		#' covar1 = as.factor(enum),
271		#' covar2 = factor(
272		#' sample(as.factor(enum)),
273		#' levels = 1:4, labels = c("F", "F", "M", "M")
274		#' )
275		#' )
276		#' )
277		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
278		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
279		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
280		#'
281		#' plot(
282		#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
283		#' lty = 2:4,
284		#' xlab = "Months",
285		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
286		#' )
287		#'
288		#' @name fit_coxreg
289		NULL
290
291		#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
292		#'
293		#' @return
294		#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
295		#' with 5 elements:
296		#' * `mod`: Cox regression models fitted by [survival::coxph()].
297		#' * `data`: The original data frame input.
298		#' * `control`: The original control input.
299		#' * `vars`: The variables used in the model.
300		#' * `at`: Value of the covariate at which the effect should be estimated.
301		#'
302		#' @note When using `fit_coxreg_univar` there should be two study arms.
303		#'
304		#' @examples
305		#' # fit_coxreg_univar
306		#'
307		#' ## Cox regression: arm + 1 covariate.
308		#' mod1 <- fit_coxreg_univar(
309		#' variables = list(
310		#' time = "time", event = "status", arm = "armcd",
311		#' covariates = "covar1"
312		#' ),
313		#' data = dta_bladder,
314		#' control = control_coxreg(conf_level = 0.91)
315		#' )
316		#'
317		#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
318		#' mod2 <- fit_coxreg_univar(
319		#' variables = list(
320		#' time = "time", event = "status", arm = "armcd",
321		#' covariates = c("covar1", "covar2")
322		#' ),
323		#' data = dta_bladder,
324		#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
325		#' )
326		#'
327		#' ## Cox regression: arm + 1 covariate, stratified analysis.
328		#' mod3 <- fit_coxreg_univar(
329		#' variables = list(
330		#' time = "time", event = "status", arm = "armcd", strata = "covar2",
331		#' covariates = c("covar1")
332		#' ),
333		#' data = dta_bladder,
334		#' control = control_coxreg(conf_level = 0.91)
335		#' )
336		#'
337		#' ## Cox regression: no arm, only covariates.
338		#' mod4 <- fit_coxreg_univar(
339		#' variables = list(
340		#' time = "time", event = "status",
341		#' covariates = c("covar1", "covar2")
342		#' ),
343		#' data = dta_bladder
344		#' )
345		#'
346		#' @export
347		fit_coxreg_univar <- function(variables,
348		data,
349		at = list(),
350		control = control_coxreg()) {
351	31x	checkmate::assert_list(variables, names = "named")
352	31x	has_arm <- "arm" %in% names(variables)
353	31x	arm_name <- if (has_arm) "arm" else NULL
354
355	31x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
356
357	31x	assert_df_with_variables(data, variables)
358	31x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
359
360	31x	if (!is.null(variables$strata)) {
361	4x	checkmate::assert_disjunct(control$pval_method, "likelihood")
362		}
363	30x	if (has_arm) {
364	24x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
365		}
366	29x	vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
367	29x	for (i in vars) {
368	66x	if (is.factor(data[[i]])) {
369	58x	attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
370		}
371		}
372	29x	forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
373	29x	mod <- lapply(
374	29x	forms, function(x) {
375	62x	survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
376		}
377		)
378	29x	structure(
379	29x	list(
380	29x	mod = mod,
381	29x	data = data,
382	29x	control = control,
383	29x	vars = variables,
384	29x	at = at
385		),
386	29x	class = "coxreg.univar"
387		)
388		}
389
390		#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
391		#'
392		#' @return
393		#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
394		#' with 4 elements:
395		#' * `mod`: Cox regression model fitted by [survival::coxph()].
396		#' * `data`: The original data frame input.
397		#' * `control`: The original control input.
398		#' * `vars`: The variables used in the model.
399		#'
400		#' @examples
401		#' # fit_coxreg_multivar
402		#'
403		#' ## Cox regression: multivariate Cox regression.
404		#' multivar_model <- fit_coxreg_multivar(
405		#' variables = list(
406		#' time = "time", event = "status", arm = "armcd",
407		#' covariates = c("covar1", "covar2")
408		#' ),
409		#' data = dta_bladder
410		#' )
411		#'
412		#' # Example without treatment arm.
413		#' multivar_covs_model <- fit_coxreg_multivar(
414		#' variables = list(
415		#' time = "time", event = "status",
416		#' covariates = c("covar1", "covar2")
417		#' ),
418		#' data = dta_bladder
419		#' )
420		#'
421		#' @export
422		fit_coxreg_multivar <- function(variables,
423		data,
424		control = control_coxreg()) {
425	51x	checkmate::assert_list(variables, names = "named")
426	51x	has_arm <- "arm" %in% names(variables)
427	51x	arm_name <- if (has_arm) "arm" else NULL
428
429	51x	if (!is.null(variables$covariates)) {
430	13x	checkmate::assert_character(variables$covariates)
431		}
432
433	51x	checkmate::assert_false(control$interaction)
434	51x	assert_df_with_variables(data, variables)
435	51x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
436
437	51x	if (!is.null(variables$strata)) {
438	3x	checkmate::assert_disjunct(control$pval_method, "likelihood")
439		}
440
441	50x	form <- h_coxreg_multivar_formula(variables)
442	50x	mod <- survival::coxph(
443	50x	formula = stats::as.formula(form),
444	50x	data = data,
445	50x	ties = control$ties
446		)
447	50x	structure(
448	50x	list(
449	50x	mod = mod,
450	50x	data = data,
451	50x	control = control,
452	50x	vars = variables
453		),
454	50x	class = "coxreg.multivar"
455		)
456		}
457
458		#' Muffled `car::Anova`
459		#'
460		#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
461		#' present, this function deliberately muffles this message.
462		#'
463		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
464		#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
465		#'
466		#' @return Returns the output of [car::Anova()], with convergence message muffled.
467		#'
468		#' @keywords internal
469		muffled_car_anova <- function(mod, test_statistic) {
470	134x	tryCatch(
471	134x	withCallingHandlers(
472	134x	expr = {
473	134x	car::Anova(
474	134x	mod,
475	134x	test.statistic = test_statistic,
476	134x	type = "III"
477		)
478		},
479	134x	message = function(m) invokeRestart("muffleMessage"),
480	134x	error = function(e) {
481	1x	stop(paste(
482	1x	"the model seems to have convergence problems, please try to change",
483	1x	"the configuration of covariates or strata variables, e.g.",
484	1x	"- original error:", e
485		))
486		}
487		)
488		)
489		}

1		#' Cox Regression Helper: Interactions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Test and estimate the effect of a treatment in interaction with a covariate.
6		#' The effect is estimated as the HR of the tested treatment for a given level
7		#' of the covariate, in comparison to the treatment control.
8		#'
9		#' @inheritParams argument_convention
10		#' @param x (`numeric` or `factor`)\cr the values of the effect to be tested.
11		#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
12		#' @param covar (`string`)\cr the name of the covariate in the model.
13		#' @param mod (`coxph`)\cr the Cox regression model.
14		#' @param label (`string`)\cr the label to be returned as `term_label`.
15		#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
16		#' @param ... see methods.
17		#'
18		#' @examples
19		#' library(survival)
20		#'
21		#' set.seed(1, kind = "Mersenne-Twister")
22		#'
23		#' # Testing dataset [survival::bladder].
24		#' dta_bladder <- with(
25		#' data = bladder[bladder$enum < 5, ],
26		#' data.frame(
27		#' time = stop,
28		#' status = event,
29		#' armcd = as.factor(rx),
30		#' covar1 = as.factor(enum),
31		#' covar2 = factor(
32		#' sample(as.factor(enum)),
33		#' levels = 1:4,
34		#' labels = c("F", "F", "M", "M")
35		#' )
36		#' )
37		#' )
38		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
39		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
40		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
41		#'
42		#' plot(
43		#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
44		#' lty = 2:4,
45		#' xlab = "Months",
46		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
47		#' )
48		#'
49		#' @name cox_regression_inter
50		NULL
51
52		#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
53		#'
54		#' @return
55		#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
56		#' variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
57		#'
58		#' @export
59		h_coxreg_inter_effect <- function(x,
60		effect,
61		covar,
62		mod,
63		label,
64		control,
65		...) {
66	16x	UseMethod("h_coxreg_inter_effect", x)
67		}
68
69		#' @describeIn cox_regression_inter Estimate the interaction with a `numeric` covariate.
70		#'
71		#' @param at (`list`)\cr a list with items named after the covariate, every
72		#' item is a vector of levels at which the interaction should be estimated.
73		#'
74		#' @export
75		h_coxreg_inter_effect.numeric <- function(x,
76		effect,
77		covar,
78		mod,
79		label,
80		control,
81		at,
82		...) {
83	7x	betas <- stats::coef(mod)
84	7x	attrs <- attr(stats::terms(mod), "term.labels")
85	7x	term_indices <- grep(
86	7x	pattern = effect,
87	7x	x = attrs[!grepl("strata\\(", attrs)]
88		)
89	7x	checkmate::assert_vector(term_indices, len = 2)
90	7x	betas <- betas[term_indices]
91	7x	betas_var <- diag(stats::vcov(mod))[term_indices]
92	7x	betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
93	7x	xval <- if (is.null(at[[covar]])) {
94	6x	stats::median(x)
95		} else {
96	1x	at[[covar]]
97		}
98	7x	effect_index <- !grepl(covar, names(betas))
99	7x	coef_hat <- betas[effect_index] + xval * betas[!effect_index]
100	7x	coef_se <- sqrt(
101	7x	betas_var[effect_index] +
102	7x	xval ^ 2 * betas_var[!effect_index] + # styler: off
103	7x	2 * xval * betas_cov
104		)
105	7x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
106	7x	data.frame(
107	7x	effect = "Covariate:",
108	7x	term = rep(covar, length(xval)),
109	7x	term_label = paste0(" ", xval),
110	7x	level = as.character(xval),
111	7x	n = NA,
112	7x	hr = exp(coef_hat),
113	7x	lcl = exp(coef_hat - q_norm * coef_se),
114	7x	ucl = exp(coef_hat + q_norm * coef_se),
115	7x	pval = NA,
116	7x	pval_inter = NA,
117	7x	stringsAsFactors = FALSE
118		)
119		}
120
121		#' @describeIn cox_regression_inter Estimate the interaction with a `factor` covariate.
122		#'
123		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
124		#'
125		#' @export
126		h_coxreg_inter_effect.factor <- function(x,
127		effect,
128		covar,
129		mod,
130		label,
131		control,
132		data,
133		...) {
134	9x	y <- h_coxreg_inter_estimations(
135	9x	variable = effect, given = covar,
136	9x	lvl_var = levels(data[[effect]]),
137	9x	lvl_given = levels(data[[covar]]),
138	9x	mod = mod,
139	9x	conf_level = 0.95
140	9x	)[[1]]
141
142	9x	data.frame(
143	9x	effect = "Covariate:",
144	9x	term = rep(covar, nrow(y)),
145	9x	term_label = as.character(paste0(" ", levels(data[[covar]]))),
146	9x	level = as.character(levels(data[[covar]])),
147	9x	n = NA,
148	9x	hr = y[, "hr"],
149	9x	lcl = y[, "lcl"],
150	9x	ucl = y[, "ucl"],
151	9x	pval = NA,
152	9x	pval_inter = NA,
153	9x	stringsAsFactors = FALSE
154		)
155		}
156
157		#' @describeIn cox_regression_inter A higher level function to get
158		#' the results of the interaction test and the estimated values.
159		#'
160		#' @return
161		#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
162		#' no interaction, [h_coxreg_univar_extract()] is applied instead.
163		#'
164		#' @examples
165		#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
166		#' h_coxreg_extract_interaction(
167		#' mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
168		#' control = control_coxreg()
169		#' )
170		#'
171		#' @export
172		h_coxreg_extract_interaction <- function(effect,
173		covar,
174		mod,
175		data,
176		at,
177		control) {
178	21x	if (!any(attr(stats::terms(mod), "order") == 2)) {
179	8x	y <- h_coxreg_univar_extract(
180	8x	effect = effect, covar = covar, mod = mod, data = data, control = control
181		)
182	8x	y$pval_inter <- NA
183	8x	y
184		} else {
185	13x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
186
187		# Test the main treatment effect.
188	13x	mod_aov <- muffled_car_anova(mod, test_statistic)
189	13x	sum_anova <- broom::tidy(mod_aov)
190	13x	pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]
191
192		# Test the interaction effect.
193	13x	pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
194	13x	covar_test <- data.frame(
195	13x	effect = "Covariate:",
196	13x	term = covar,
197	13x	term_label = unname(labels_or_names(data[covar])),
198	13x	level = "",
199	13x	n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
200	13x	pval_inter = pval_inter,
201	13x	stringsAsFactors = FALSE
202		)
203		# Estimate the interaction.
204	13x	y <- h_coxreg_inter_effect(
205	13x	data[[covar]],
206	13x	covar = covar,
207	13x	effect = effect,
208	13x	mod = mod,
209	13x	label = unname(labels_or_names(data[covar])),
210	13x	at = at,
211	13x	control = control,
212	13x	data = data
213		)
214	13x	rbind(covar_test, y)
215		}
216		}
217
218		#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
219		#'
220		#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
221		#' of the levels of `variable` given the levels of `given`.
222		#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
223		#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
224		#'
225		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
226		#' and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
227		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
228		#'
229		#' - b1 (arm b), b2 (arm c)
230		#' - b3 (sex m)
231		#' - b4 (arm b: sex m), b5 (arm c: sex m)
232		#'
233		#' The estimation of the Hazard Ratio for arm C/sex M is given in reference
234		#' to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
235		#' The interaction coefficient is deduced by b2 + b5 while the standard error
236		#' is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
237		#'
238		#' @return
239		#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
240		#' to the combinations of `variable` and `given`, with columns:
241		#' * `coef_hat`: Estimation of the coefficient.
242		#' * `coef_se`: Standard error of the estimation.
243		#' * `hr`: Hazard ratio.
244		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
245		#'
246		#' @examples
247		#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
248		#' result <- h_coxreg_inter_estimations(
249		#' variable = "armcd", given = "covar1",
250		#' lvl_var = levels(dta_bladder$armcd),
251		#' lvl_given = levels(dta_bladder$covar1),
252		#' mod = mod, conf_level = .95
253		#' )
254		#' result
255		#'
256		#' @export
257		h_coxreg_inter_estimations <- function(variable,
258		given,
259		lvl_var,
260		lvl_given,
261		mod,
262		conf_level = 0.95) {
263	10x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
264	10x	giv_lvl <- paste0(given, lvl_given)
265	10x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
266	10x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
267	10x	design_mat <- within(
268	10x	data = design_mat,
269	10x	expr = {
270	10x	inter <- paste0(variable, ":", given)
271	10x	rev_inter <- paste0(given, ":", variable)
272		}
273		)
274	10x	split_by_variable <- design_mat$variable
275	10x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
276
277	10x	mmat <- stats::model.matrix(mod)[1, ]
278	10x	mmat[!mmat == 0] <- 0
279
280	10x	design_mat <- apply(
281	10x	X = design_mat, MARGIN = 1, FUN = function(x) {
282	32x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
283	32x	mmat
284		}
285		)
286	10x	colnames(design_mat) <- interaction_names
287
288	10x	coef <- stats::coef(mod)
289	10x	vcov <- stats::vcov(mod)
290	10x	betas <- as.matrix(coef)
291	10x	coef_hat <- t(design_mat) %*% betas
292	10x	dimnames(coef_hat)[2] <- "coef"
293	10x	coef_se <- apply(
294	10x	design_mat, 2,
295	10x	function(x) {
296	32x	vcov_el <- as.logical(x)
297	32x	y <- vcov[vcov_el, vcov_el]
298	32x	y <- sum(y)
299	32x	y <- sqrt(y)
300	32x	return(y)
301		}
302		)
303	10x	q_norm <- stats::qnorm((1 + conf_level) / 2)
304	10x	y <- cbind(coef_hat, `se(coef)` = coef_se)
305	10x	y <- apply(y, 1, function(x) {
306	32x	x["hr"] <- exp(x["coef"])
307	32x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
308	32x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
309	32x	x
310		})
311	10x	y <- t(y)
312	10x	y <- by(y, split_by_variable, identity)
313	10x	y <- lapply(y, as.matrix)
314	10x	attr(y, "details") <- paste0(
315	10x	"Estimations of ", variable,
316	10x	" hazard ratio given the level of ", given, " compared to ",
317	10x	variable, " level ", lvl_var[1], "."
318		)
319	10x	y
320		}

1		#' Encode Categorical Missing Values in a Data Frame
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is a helper function to encode missing entries across groups of categorical
6		#' variables in a data frame.
7		#'
8		#' @details Missing entries are those with `NA` or empty strings and will
9		#' be replaced with a specified value. If factor variables include missing
10		#' values, the missing value will be inserted as the last level.
11		#' Similarly, in case character or logical variables should be converted to factors
12		#' with the `char_as_factor` or `logical_as_factor` options, the missing values will
13		#' be set as the last level.
14		#'
15		#' @param data (`data.frame`)\cr data set.
16		#' @param omit_columns (`character`)\cr names of variables from `data` that should
17		#' not be modified by this function.
18		#' @param char_as_factor (`flag`)\cr whether to convert character variables
19		#' in `data` to factors.
20		#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
21		#' in `data` to factors.
22		#' @param na_level (`string`)\cr used to replace all `NA` or empty
23		#' values inside non-`omit_columns` columns.
24		#'
25		#' @return A `data.frame` with the chosen modifications applied.
26		#'
27		#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
28		#'
29		#' @examples
30		#' my_data <- data.frame(
31		#' u = c(TRUE, FALSE, NA, TRUE),
32		#' v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
33		#' w = c("A", "B", NA, "C"),
34		#' x = c("D", "E", "F", NA),
35		#' y = c("G", "H", "I", ""),
36		#' z = c(1, 2, 3, 4),
37		#' stringsAsFactors = FALSE
38		#' )
39		#'
40		#' # Example 1
41		#' # Encode missing values in all character or factor columns.
42		#' df_explicit_na(my_data)
43		#' # Also convert logical columns to factor columns.
44		#' df_explicit_na(my_data, logical_as_factor = TRUE)
45		#' # Encode missing values in a subset of columns.
46		#' df_explicit_na(my_data, omit_columns = c("x", "y"))
47		#'
48		#' # Example 2
49		#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
50		#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
51		#' # included when generating `rtables`.
52		#' adsl <- tern_ex_adsl
53		#' adsl$SEX[adsl$SEX == "M"] <- NA
54		#' adsl <- df_explicit_na(adsl)
55		#'
56		#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
57		#' adsl <- tern_ex_adsl
58		#' adsl$SEX[adsl$SEX == "M"] <- NA
59		#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
60		#'
61		#' # Example 3
62		#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
63		#' # a numeric variable will not be included in the summary statistics, nor will they be included
64		#' # in the denominator value for calculating the percent values.
65		#' adsl <- tern_ex_adsl
66		#' adsl$AGE[adsl$AGE < 30] <- NA
67		#' adsl <- df_explicit_na(adsl)
68		#'
69		#' @export
70		df_explicit_na <- function(data,
71		omit_columns = NULL,
72		char_as_factor = TRUE,
73		logical_as_factor = FALSE,
74		na_level = "<Missing>") {
75	27x	checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
76	26x	checkmate::assert_data_frame(data)
77	25x	checkmate::assert_flag(char_as_factor)
78	24x	checkmate::assert_flag(logical_as_factor)
79	24x	checkmate::assert_string(na_level)
80
81	22x	target_vars <- if (is.null(omit_columns)) {
82	20x	names(data)
83		} else {
84	2x	setdiff(names(data), omit_columns) # May have duplicates.
85		}
86	22x	if (length(target_vars) == 0) {
87	1x	return(data)
88		}
89
90	21x	l_target_vars <- split(target_vars, target_vars)
91
92		# Makes sure target_vars exist in data and names are not duplicated.
93	21x	assert_df_with_variables(data, l_target_vars)
94
95	21x	for (x in target_vars) {
96	514x	xi <- data[[x]]
97	514x	xi_label <- obj_label(xi)
98
99		# Determine whether to convert character or logical input.
100	514x	do_char_conversion <- is.character(xi) && char_as_factor
101	514x	do_logical_conversion <- is.logical(xi) && logical_as_factor
102
103		# Pre-convert logical to character to deal correctly with replacing NA
104		# values below.
105	514x	if (do_logical_conversion) {
106	2x	xi <- as.character(xi)
107		}
108
109	514x	if (is.factor(xi) \|\| is.character(xi)) {
110		# Handle empty strings and NA values.
111	387x	xi <- explicit_na(sas_na(xi), label = na_level)
112
113		# Convert to factors if requested for the original type,
114		# set na_level as the last value.
115	387x	if (do_char_conversion \|\| do_logical_conversion) {
116	81x	levels_xi <- setdiff(sort(unique(xi)), na_level)
117	81x	if (na_level %in% unique(xi)) {
118	21x	levels_xi <- c(levels_xi, na_level)
119		}
120
121	81x	xi <- factor(xi, levels = levels_xi)
122		}
123
124	387x	data[, x] <- formatters::with_label(xi, label = xi_label)
125		}
126		}
127	21x	return(data)
128		}

1		#' `rtables` Access Helper Functions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' These are a couple of functions that help with accessing the data in `rtables` objects.
6		#' Currently these work for occurrence tables, which are defined as having a count as the first
7		#' element and a fraction as the second element in each cell.
8		#'
9		#' @seealso [prune_occurrences] for usage of these functions.
10		#'
11		#' @name rtables_access
12		NULL
13
14		#' @describeIn rtables_access Helper function to extract the first values from each content
15		#' cell and from specified columns in a `TableRow`. Defaults to all columns.
16		#'
17		#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
18		#' @param col_names (`character`)\cr the names of the columns to extract from.
19		#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
20		#' then these are inferred from the names of `table_row`. Note that this currently only works well with a single
21		#' column split.
22		#'
23		#' @return
24		#' * `h_row_first_values()` returns a `vector` of numeric values.
25		#'
26		#' @examples
27		#' tbl <- basic_table() %>%
28		#' split_cols_by("ARM") %>%
29		#' split_rows_by("RACE") %>%
30		#' analyze("AGE", function(x) {
31		#' list(
32		#' "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
33		#' "n" = length(x),
34		#' "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
35		#' )
36		#' }) %>%
37		#' build_table(tern_ex_adsl) %>%
38		#' prune_table()
39		#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
40		#' result <- max(h_row_first_values(tree_row_elem))
41		#' result
42		#'
43		#' @export
44		h_row_first_values <- function(table_row,
45		col_names = NULL,
46		col_indices = NULL) {
47	727x	col_indices <- check_names_indices(table_row, col_names, col_indices)
48	727x	checkmate::assert_integerish(col_indices)
49	727x	checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))
50
51		# Main values are extracted
52	727x	row_vals <- row_values(table_row)[col_indices]
53
54		# Main return
55	727x	vapply(row_vals, function(rv) {
56	2066x	if (is.null(rv)) {
57	727x	NA_real_
58		} else {
59	2063x	rv[1L]
60		}
61	727x	}, FUN.VALUE = numeric(1))
62		}
63
64		#' @describeIn rtables_access Helper function that extracts row values and checks if they are
65		#' convertible to integers (`integerish` values).
66		#'
67		#' @return
68		#' * `h_row_counts()` returns a `vector` of numeric values.
69		#'
70		#' @examples
71		#' # Row counts (integer values)
72		#' \dontrun{
73		#' h_row_counts(tree_row_elem) # Fails because there are no integers
74		#' }
75		#' # Using values with integers
76		#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
77		#' result <- h_row_counts(tree_row_elem)
78		#' # result
79		#'
80		#' @export
81		h_row_counts <- function(table_row,
82		col_names = NULL,
83		col_indices = NULL) {
84	727x	counts <- h_row_first_values(table_row, col_names, col_indices)
85	727x	checkmate::assert_integerish(counts)
86	727x	counts
87		}
88
89		#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`.
90		#' More specifically it extracts the second values from each content cell and checks it is a fraction.
91		#'
92		#' @return
93		#' * `h_row_fractions()` returns a `vector` of proportions.
94		#'
95		#' @examples
96		#' # Row fractions
97		#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
98		#' h_row_fractions(tree_row_elem)
99		#'
100		#' @export
101		h_row_fractions <- function(table_row,
102		col_names = NULL,
103		col_indices = NULL) {
104	243x	col_indices <- check_names_indices(table_row, col_names, col_indices)
105	243x	row_vals <- row_values(table_row)[col_indices]
106	243x	fractions <- sapply(row_vals, "[", 2L)
107	243x	checkmate::assert_numeric(fractions, lower = 0, upper = 1)
108	243x	fractions
109		}
110
111		#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
112		#'
113		#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
114		#'
115		#' @return
116		#' * `h_col_counts()` returns a `vector` of column counts.
117		#'
118		#' @export
119		h_col_counts <- function(table,
120		col_names = NULL,
121		col_indices = NULL) {
122	304x	col_indices <- check_names_indices(table, col_names, col_indices)
123	304x	counts <- col_counts(table)[col_indices]
124	304x	stats::setNames(counts, col_names)
125		}
126
127		#' @describeIn rtables_access Helper function to get first row of content table of current table.
128		#'
129		#' @return
130		#' * `h_content_first_row()` returns a row from an `rtables` table.
131		#'
132		#' @export
133		h_content_first_row <- function(table) {
134	27x	ct <- content_table(table)
135	27x	tree_children(ct)[[1]]
136		}
137
138		#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
139		#'
140		#' @return
141		#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
142		#'
143		#' @keywords internal
144		is_leaf_table <- function(table) {
145	168x	children <- tree_children(table)
146	168x	child_classes <- unique(sapply(children, class))
147	168x	identical(child_classes, "ElementaryTable")
148		}
149
150		#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
151		#'
152		#' @return
153		#' * `check_names_indices` returns column indices.
154		#'
155		#' @keywords internal
156		check_names_indices <- function(table_row,
157		col_names = NULL,
158		col_indices = NULL) {
159	1274x	if (!is.null(col_names)) {
160	1231x	if (!is.null(col_indices)) {
161	!	stop(
162	!	"Inserted both col_names and col_indices when selecting row values. ",
163	!	"Please choose one."
164		)
165		}
166	1231x	col_indices <- h_col_indices(table_row, col_names)
167		}
168	1274x	if (is.null(col_indices)) {
169	37x	ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
170	37x	col_indices <- seq_len(ll)
171		}
172
173	1274x	return(col_indices)
174		}

1		#' Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @details Cox models are the most commonly used methods to estimate the magnitude of
10		#' the effect in survival analysis. It assumes proportional hazards: the ratio
11		#' of the hazards between groups (e.g., two arms) is constant over time.
12		#' This ratio is referred to as the "hazard ratio" (HR) and is one of the
13		#' most commonly reported metrics to describe the effect size in survival
14		#' analysis (NEST Team, 2020).
15		#'
16		#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
17		#' helper functions, and [tidy_coxreg] for custom tidy methods.
18		#'
19		#' @examples
20		#' library(survival)
21		#'
22		#' # Testing dataset [survival::bladder].
23		#' set.seed(1, kind = "Mersenne-Twister")
24		#' dta_bladder <- with(
25		#' data = bladder[bladder$enum < 5, ],
26		#' tibble::tibble(
27		#' TIME = stop,
28		#' STATUS = event,
29		#' ARM = as.factor(rx),
30		#' COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
31		#' COVAR2 = factor(
32		#' sample(as.factor(enum)),
33		#' levels = 1:4, labels = c("F", "F", "M", "M")
34		#' ) %>% formatters::with_label("Sex (F/M)")
35		#' )
36		#' )
37		#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
38		#' dta_bladder$STUDYID <- factor("X")
39		#'
40		#' plot(
41		#' survfit(Surv(TIME, STATUS) ~ ARM + COVAR1, data = dta_bladder),
42		#' lty = 2:4,
43		#' xlab = "Months",
44		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
45		#' )
46		#'
47		#' @name cox_regression
48		NULL
49
50		#' @describeIn cox_regression Statistics function that transforms results tabulated
51		#' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
52		#'
53		#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
54		#' function with tidying applied via [broom::tidy()].
55		#' @param .stats (`character`)\cr the name of statistics to be reported among:
56		#' * `n`: number of observations (univariate only)
57		#' * `hr`: hazard ratio
58		#' * `ci`: confidence interval
59		#' * `pval`: p-value of the treatment effect
60		#' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
61		#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
62		#' Defaults to "all". Other options include "var_main" for main effects, "inter" for interaction effects,
63		#' and "multi_lvl" for multivariate model covariate level rows. When `.which_vars` is "all" specific
64		#' variables can be selected by specifying `.var_nms`.
65		#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
66		#' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
67		#' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
68		#' is "var_main" `.var_nms` should be only the variable name.
69		#'
70		#' @return
71		#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
72		#'
73		#' @examples
74		#' # s_coxreg
75		#'
76		#' # Univariate
77		#' u1_variables <- list(
78		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
79		#' )
80		#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
81		#' df1 <- broom::tidy(univar_model)
82		#' s_coxreg(model_df = df1, .stats = "hr")
83		#'
84		#' # Univariate with interactions
85		#' univar_model_inter <- fit_coxreg_univar(
86		#' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
87		#' )
88		#' df1_inter <- broom::tidy(univar_model_inter)
89		#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
90		#'
91		#' # Univariate without treatment arm - only "COVAR2" covariate effects
92		#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
93		#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
94		#' df1_covs <- broom::tidy(univar_covs_model)
95		#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
96		#'
97		#' # Multivariate.
98		#' m1_variables <- list(
99		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
100		#' )
101		#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
102		#' df2 <- broom::tidy(multivar_model)
103		#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
104		#' s_coxreg(
105		#' model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
106		#' .var_nms = c("COVAR1", "A Covariate Label")
107		#' )
108		#'
109		#' # Multivariate without treatment arm - only "COVAR1" main effect
110		#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
111		#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
112		#' df2_covs <- broom::tidy(multivar_covs_model)
113		#' s_coxreg(model_df = df2_covs, .stats = "hr")
114		#'
115		#' @export
116		s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
117	178x	assert_df_with_variables(model_df, list(term = "term", stat = .stats))
118	178x	checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
119	178x	model_df$term <- as.character(model_df$term)
120	178x	.var_nms <- .var_nms[!is.na(.var_nms)]
121
122	177x	if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
123	39x	if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)
124
125		# We need a list with names corresponding to the stats to display of equal length to the list of stats.
126	178x	y <- split(model_df, f = model_df$term, drop = FALSE)
127	178x	y <- stats::setNames(y, nm = rep(.stats, length(y)))
128
129	178x	if (.which_vars == "var_main") {
130	79x	y <- lapply(y, function(x) x[1, ]) # only main effect
131	99x	} else if (.which_vars %in% c("inter", "multi_lvl")) {
132	75x	y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
133		}
134
135	178x	lapply(
136	178x	X = y,
137	178x	FUN = function(x) {
138	180x	z <- as.list(x[[.stats]])
139	180x	stats::setNames(z, nm = x$term_label)
140		}
141		)
142		}
143
144		#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
145		#' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
146		#'
147		#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
148		#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
149		#' @param na_level (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
150		#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
151		#' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
152		#'
153		#' @return
154		#' * `a_coxreg()` returns formatted [rtables::CellValue()].
155		#'
156		#' @examples
157		#' tern:::a_coxreg(
158		#' df = dta_bladder,
159		#' labelstr = "Label 1",
160		#' variables = u1_variables,
161		#' .spl_context = list(value = "COVAR1"),
162		#' .stats = "n",
163		#' .formats = "xx"
164		#' )
165		#'
166		#' tern:::a_coxreg(
167		#' df = dta_bladder,
168		#' labelstr = "",
169		#' variables = u1_variables,
170		#' .spl_context = list(value = "COVAR2"),
171		#' .stats = "pval",
172		#' .formats = "xx.xxxx"
173		#' )
174		#'
175		#' @keywords internal
176		a_coxreg <- function(df,
177		labelstr,
178		eff = FALSE,
179		var_main = FALSE,
180		multivar = FALSE,
181		variables,
182		at = list(),
183		control = control_coxreg(),
184		.spl_context,
185		.stats,
186		.formats,
187		.indent_mods = NULL,
188		na_level = "",
189		cache_env = NULL) {
190	176x	cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
191	176x	cov <- tail(.spl_context$value, 1) # current variable/covariate
192	176x	var_lbl <- formatters::var_labels(df)[cov] # check for df labels
193	176x	if (length(labelstr) > 1) {
194	!	labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
195	176x	} else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
196	62x	labelstr <- var_lbl
197		}
198	176x	if (eff \|\| multivar \|\| cov_no_arm) {
199	77x	control$interaction <- FALSE
200		} else {
201	99x	variables$covariates <- cov
202	35x	if (var_main) control$interaction <- TRUE
203		}
204
205	176x	if (is.null(cache_env[[cov]])) {
206	28x	if (!multivar) {
207	21x	model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
208		} else {
209	7x	model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
210		}
211	28x	cache_env[[cov]] <- model
212		} else {
213	148x	model <- cache_env[[cov]]
214		}
215	99x	if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_
216
217	176x	if (cov_no_arm \|\| (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
218	15x	multivar <- TRUE
219	3x	if (!cov_no_arm) var_main <- TRUE
220		}
221
222	176x	vars_coxreg <- list(which_vars = "all", var_nms = NULL)
223	176x	if (eff) {
224	35x	if (multivar && !var_main) { # multivar treatment level
225	6x	var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
226	6x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
227		} else { # treatment effect
228	29x	vars_coxreg["var_nms"] <- variables$arm
229	6x	if (var_main) vars_coxreg["which_vars"] <- "var_main"
230		}
231		} else {
232	141x	if (!multivar \|\| (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
233	108x	vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
234	33x	} else if (multivar) { # multivar covariate level
235	33x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
236	6x	if (var_main) model[cov, .stats] <- NA_real_
237		}
238	35x	if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
239		}
240	176x	var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
241	176x	var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
242	21x	paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
243	176x	} else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) \|\|
244	176x	(multivar && var_main && is.numeric(df[[cov]]))) {
245	42x	labelstr # other main effect labels
246	176x	} else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
247	6x	"All" # multivar numeric covariate
248		} else {
249	107x	names(var_vals)
250		}
251	176x	in_rows(
252	176x	.list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
253	176x	.formats = stats::setNames(rep(.formats, length(var_names)), var_names),
254	176x	.format_na_strs = stats::setNames(rep(na_level, length(var_names)), var_names)
255		)
256		}
257
258		#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
259		#' layout. This function is a wrapper for several `rtables` layouting functions. This function
260		#' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
261		#'
262		#' @inheritParams fit_coxreg_univar
263		#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
264		#' univariate Cox regression will run.
265		#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
266		#' for all rows. This should be created during pre-processing if no such variable currently exists.
267		#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
268		#' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
269		#' treatment and covariate sections and the second between different covariates.
270		#'
271		#' @return
272		#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
273		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
274		#' containing the chosen statistics to the table layout.
275		#'
276		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
277		#' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
278		#' Cox regression models, respectively.
279		#'
280		#' @examples
281		#' # summarize_coxreg
282		#'
283		#' result_univar <- basic_table() %>%
284		#' summarize_coxreg(variables = u1_variables) %>%
285		#' build_table(dta_bladder)
286		#' result_univar
287		#'
288		#' result_multivar <- basic_table() %>%
289		#' summarize_coxreg(
290		#' variables = m1_variables,
291		#' multivar = TRUE,
292		#' ) %>%
293		#' build_table(dta_bladder)
294		#' result_multivar
295		#'
296		#' result_univar_covs <- basic_table() %>%
297		#' summarize_coxreg(
298		#' variables = u2_variables,
299		#' ) %>%
300		#' build_table(dta_bladder)
301		#' result_univar_covs
302		#'
303		#' result_multivar_covs <- basic_table() %>%
304		#' summarize_coxreg(
305		#' variables = m2_variables,
306		#' multivar = TRUE,
307		#' varlabels = c("Covariate 1", "Covariate 2") # custom labels
308		#' ) %>%
309		#' build_table(dta_bladder)
310		#' result_multivar_covs
311		#'
312		#' @export
313		summarize_coxreg <- function(lyt,
314		variables,
315		control = control_coxreg(),
316		at = list(),
317		multivar = FALSE,
318		common_var = "STUDYID",
319		.stats = c("n", "hr", "ci", "pval", "pval_inter"),
320		.formats = c(
321		n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
322		pval = "x.xxxx \| (<0.0001)", pval_inter = "x.xxxx \| (<0.0001)"
323		),
324		varlabels = NULL,
325		.indent_mods = NULL,
326		na_level = "",
327		.section_div = NA_character_) {
328	10x	if (multivar && control$interaction) {
329	1x	warning(paste(
330	1x	"Interactions are not available for multivariate cox regression using summarize_coxreg.",
331	1x	"The model will be calculated without interaction effects."
332		))
333		}
334	10x	if (control$interaction && !"arm" %in% names(variables)) {
335	1x	stop("To include interactions please specify 'arm' in variables.")
336		}
337
338	9x	.stats <- if (!"arm" %in% names(variables) \|\| multivar) { # only valid statistics
339	4x	intersect(c("hr", "ci", "pval"), .stats)
340	9x	} else if (control$interaction) {
341	3x	intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
342		} else {
343	2x	intersect(c("n", "hr", "ci", "pval"), .stats)
344		}
345	9x	stat_labels <- c(
346	9x	n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
347	9x	pval = "p-value", pval_inter = "Interaction p-value"
348		)
349	9x	stat_labels <- stat_labels[names(stat_labels) %in% .stats]
350	9x	.formats <- .formats[names(.formats) %in% .stats]
351	9x	env <- new.env() # create caching environment
352
353	9x	lyt <- lyt %>%
354	9x	split_cols_by_multivar(
355	9x	vars = rep(common_var, length(.stats)),
356	9x	varlabels = stat_labels,
357	9x	extra_args = list(
358	9x	.stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_level = rep(na_level, length(.stats)),
359	9x	cache_env = replicate(length(.stats), list(env))
360		)
361		)
362
363	9x	if ("arm" %in% names(variables)) { # treatment effect
364	7x	lyt <- lyt %>%
365	7x	split_rows_by(
366	7x	common_var,
367	7x	split_label = "Treatment:",
368	7x	label_pos = "visible",
369	7x	section_div = head(.section_div, 1)
370		) %>%
371	7x	summarize_row_groups(
372	7x	cfun = a_coxreg,
373	7x	extra_args = list(
374	7x	variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
375		)
376		)
377	7x	if (multivar) { # treatment level effects
378	2x	lyt <- lyt %>%
379	2x	analyze_colvars(
380	2x	afun = a_coxreg,
381	2x	extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
382		)
383		}
384		}
385
386	9x	if ("covariates" %in% names(variables)) { # covariate main effects
387	9x	lyt <- lyt %>%
388	9x	split_rows_by_multivar(
389	9x	vars = variables$covariates,
390	9x	varlabels = varlabels,
391	9x	split_label = "Covariate:",
392	9x	nested = FALSE,
393	9x	child_labels = if (multivar \|\| control$interaction \|\| !"arm" %in% names(variables)) "default" else "hidden",
394	9x	section_div = tail(.section_div, 1)
395		)
396	9x	if (multivar \|\| control$interaction \|\| !"arm" %in% names(variables)) {
397	7x	lyt <- lyt %>%
398	7x	summarize_row_groups(
399	7x	cfun = a_coxreg,
400	7x	extra_args = list(
401	7x	variables = variables, at = at, control = control, multivar = multivar,
402	7x	var_main = if (multivar) multivar else control$interaction
403		)
404		)
405		} else {
406	!	if (!is.null(varlabels)) names(varlabels) <- variables$covariates
407	2x	lyt <- lyt %>%
408	2x	analyze_colvars(
409	2x	afun = a_coxreg,
410	2x	extra_args = list(
411	2x	variables = variables, at = at, control = control, multivar = multivar,
412	2x	var_main = if (multivar) multivar else control$interaction,
413	2x	labelstr = if (is.null(varlabels)) "" else varlabels
414		)
415		)
416		}
417
418	2x	if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
419	9x	if (multivar \|\| control$interaction) { # covariate level effects
420	7x	lyt <- lyt %>%
421	7x	analyze_colvars(
422	7x	afun = a_coxreg,
423	7x	extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = "")
424		)
425		}
426		}
427
428	9x	lyt
429		}

1		#' Convert Table into Matrix of Strings
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper function to use mostly within tests. `with_spaces`parameter allows
6		#' to test not only for content but also indentation and table structure.
7		#' `print_txt_to_copy` instead facilitate the testing development by returning a well
8		#' formatted text that needs only to be copied and pasted in the expected output.
9		#'
10		#' @param x `rtables` table.
11		#' @param with_spaces Should the tested table keep the indentation and other relevant spaces?
12		#' @param print_txt_to_copy Utility to have a way to copy the input table directly
13		#' into the expected variable instead of copying it too manually.
14		#'
15		#' @return A `matrix` of `string`s.
16		#'
17		#' @export
18		to_string_matrix <- function(x, with_spaces = FALSE, print_txt_to_copy = FALSE) {
19	5x	checkmate::assert_flag(with_spaces)
20	5x	checkmate::assert_flag(print_txt_to_copy)
21
22		# Producing the matrix to test
23	5x	if (with_spaces) {
24	!	out <- strsplit(toString(matrix_form(x, TRUE)), "\\n")[[1]]
25		} else {
26	5x	out <- matrix_form(x)$string
27		}
28
29		# Printing to console formatted output that needs to be copied in "expected"
30	5x	if (print_txt_to_copy) {
31	!	out_tmp <- out
32	!	if (!with_spaces) {
33	!	out_tmp <- apply(out, 1, paste0, collapse = '", "')
34		}
35	!	cat(paste0('c(\n "', paste0(out_tmp, collapse = '",\n "'), '"\n)'))
36		}
37
38		# Return values
39	5x	return(out)
40		}
41
42		#' Blank for Missing Input
43		#'
44		#' Helper function to use in tabulating model results.
45		#'
46		#' @param x (`vector`)\cr input for a cell.
47		#'
48		#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
49		#' the unlisted version of `x`.
50		#'
51		#' @keywords internal
52		unlist_and_blank_na <- function(x) {
53	267x	unl <- unlist(x)
54	267x	if (all(is.na(unl))) {
55	161x	character()
56		} else {
57	106x	unl
58		}
59		}
60
61		#' Constructor for Content Functions given Data Frame with Flag Input
62		#'
63		#' This can be useful for tabulating model results.
64		#'
65		#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
66		#' content function.
67		#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
68		#' @param format (`string`)\cr `rtables` format to use.
69		#'
70		#' @return A content function which gives `df$analysis_var` at the row identified by
71		#' `.df_row$flag` in the given format.
72		#'
73		#' @keywords internal
74		cfun_by_flag <- function(analysis_var,
75		flag_var,
76		format = "xx",
77		.indent_mods = NULL) {
78	61x	checkmate::assert_string(analysis_var)
79	61x	checkmate::assert_string(flag_var)
80	61x	function(df, labelstr) {
81	265x	row_index <- which(df[[flag_var]])
82	265x	x <- unlist_and_blank_na(df[[analysis_var]][row_index])
83	265x	formatters::with_label(
84	265x	rcell(x, format = format, indent_mod = .indent_mods),
85	265x	labelstr
86		)
87		}
88		}
89
90		#' Content Row Function to Add Row Total to Labels
91		#'
92		#' This takes the label of the latest row split level and adds the row total in parentheses.
93		#'
94		#' @inheritParams argument_convention
95		#'
96		#' @return A `list` containing "row_count" with the row count value and the correct label.
97		#'
98		#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
99		#' the former is already split by columns and will refer to the first column of the data only.
100		#'
101		#' @keywords internal
102		c_label_n <- function(df,
103		labelstr,
104		.N_row) { # nolint
105	270x	label <- paste0(labelstr, " (N=", .N_row, ")")
106	270x	list(row_count = formatters::with_label(c(.N_row, .N_row), label))
107		}
108
109		#' Layout Creating Function to Add Row Total Counts
110		#'
111		#' @description `r lifecycle::badge("stable")`
112		#'
113		#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
114		#' is a wrapper for [rtables::summarize_row_groups()].
115		#'
116		#' @inheritParams argument_convention
117		#'
118		#' @return A modified layout where the latest row split labels now have the row-wise
119		#' total counts (i.e. without column-based subsetting) attached in parentheses.
120		#'
121		#' @note Row count values are contained in these row count rows but are not displayed
122		#' so that they are not considered zero rows by default when pruning.
123		#'
124		#' @examples
125		#' basic_table() %>%
126		#' split_cols_by("ARM") %>%
127		#' add_colcounts() %>%
128		#' split_rows_by("RACE", split_fun = drop_split_levels) %>%
129		#' add_rowcounts() %>%
130		#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
131		#' build_table(DM)
132		#'
133		#' @export
134		add_rowcounts <- function(lyt) {
135	5x	c_lbl_n_fun <- make_afun(
136	5x	c_label_n,
137	5x	.stats = c("row_count"),
138	5x	.formats = c(row_count = function(x, ...) "")
139		)
140	5x	summarize_row_groups(
141	5x	lyt,
142	5x	cfun = c_lbl_n_fun
143		)
144		}
145
146		#' Obtain Column Indices
147		#'
148		#' @description `r lifecycle::badge("stable")`
149		#'
150		#' Helper function to extract column indices from a `VTableTree` for a given
151		#' vector of column names.
152		#'
153		#' @param table_tree (`VTableTree`)\cr table to extract the indices from.
154		#' @param col_names (`character`)\cr vector of column names.
155		#'
156		#' @return A vector of column indices.
157		#'
158		#' @export
159		h_col_indices <- function(table_tree, col_names) {
160	1232x	checkmate::assert_class(table_tree, "VTableNodeInfo")
161	1232x	checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
162	1232x	match(col_names, names(attr(col_info(table_tree), "cextra_args")))
163		}
164
165		#' Labels or Names of List Elements
166		#'
167		#' Internal helper function for working with nested statistic function results which typically
168		#' don't have labels but names that we can use.
169		#'
170		#' @param x a list
171		#'
172		#' @return A `character` vector with the labels or names for the list elements.
173		#'
174		#' @keywords internal
175		labels_or_names <- function(x) {
176	114x	checkmate::assert_multi_class(x, c("data.frame", "list"))
177	114x	labs <- sapply(x, obj_label)
178	114x	nams <- rlang::names2(x)
179	114x	label_is_null <- sapply(labs, is.null)
180	114x	result <- unlist(ifelse(label_is_null, nams, labs))
181	114x	return(result)
182		}
183
184		#' Convert to `rtable`
185		#'
186		#' @description `r lifecycle::badge("stable")`
187		#'
188		#' This is a new generic function to convert objects to `rtable` tables.
189		#'
190		#' @param x the object which should be converted to an `rtable`.
191		#' @param ... additional arguments for methods.
192		#'
193		#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
194		#'
195		#' @export
196		as.rtable <- function(x, ...) { # nolint
197	3x	UseMethod("as.rtable", x)
198		}
199
200		#' @describeIn as.rtable method for converting `data.frame` that contain numeric columns to `rtable`.
201		#'
202		#' @param format the format which should be used for the columns.
203		#'
204		#' @method as.rtable data.frame
205		#'
206		#' @examples
207		#' x <- data.frame(
208		#' a = 1:10,
209		#' b = rnorm(10)
210		#' )
211		#' as.rtable(x)
212		#'
213		#' @export
214		as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
215	3x	checkmate::assert_numeric(unlist(x))
216	2x	do.call(
217	2x	rtable,
218	2x	c(
219	2x	list(
220	2x	header = labels_or_names(x),
221	2x	format = format
222		),
223	2x	Map(
224	2x	function(row, row_name) {
225	20x	do.call(
226	20x	rrow,
227	20x	c(as.list(unname(row)),
228	20x	row.name = row_name
229		)
230		)
231		},
232	2x	row = as.data.frame(t(x)),
233	2x	row_name = rownames(x)
234		)
235		)
236		)
237		}
238
239		#' Split parameters
240		#'
241		#' @description `r lifecycle::badge("stable")`
242		#'
243		#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
244		#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
245		#' specific analysis function.
246		#'
247		#' @param param (`vector`)\cr the parameter to be split.
248		#' @param value (`vector`)\cr the value used to split.
249		#' @param f (`list` of `vectors`)\cr the reference to make the split
250		#'
251		#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
252		#'
253		#' @examples
254		#' f <- list(
255		#' surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
256		#' surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
257		#' )
258		#'
259		#' .stats <- c("pt_at_risk", "rate_diff")
260		#' h_split_param(.stats, .stats, f = f)
261		#'
262		#' # $surv
263		#' # [1] "pt_at_risk"
264		#' #
265		#' # $surv_diff
266		#' # [1] "rate_diff"
267		#'
268		#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
269		#' h_split_param(.formats, names(.formats), f = f)
270		#'
271		#' # $surv
272		#' # pt_at_risk event_free_rate
273		#' # "xx" "xxx"
274		#' #
275		#' # $surv_diff
276		#' # NULL
277		#'
278		#' @export
279		h_split_param <- function(param,
280		value,
281		f) {
282	21x	y <- lapply(f, function(x) param[value %in% x])
283	21x	lapply(y, function(x) if (length(x) == 0) NULL else x)
284		}
285
286		#' Get Selected Statistics Names
287		#'
288		#' Helper function to be used for creating `afun`.
289		#'
290		#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
291		#' in this context that all default statistics should be used.
292		#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
293		#'
294		#' @return A `character` vector with the selected statistics.
295		#'
296		#' @keywords internal
297		afun_selected_stats <- function(.stats, all_stats) {
298	857x	checkmate::assert_character(.stats, null.ok = TRUE)
299	857x	checkmate::assert_character(all_stats)
300	857x	if (is.null(.stats)) {
301	1x	all_stats
302		} else {
303	856x	intersect(.stats, all_stats)
304		}
305		}
306
307		#' Add Variable Labels to Top Left Corner in Table
308		#'
309		#' @description `r lifecycle::badge("stable")`
310		#'
311		#' Helper layout creating function to just append the variable labels of a given variables vector
312		#' from a given dataset in the top left corner. If a variable label is not found then the
313		#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
314		#'
315		#' @inheritParams argument_convention
316		#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
317		#' @param indent (`integer`)\cr non-negative number of nested indent space, default to 0L which means no indent.
318		#' 1L means two spaces indent, 2L means four spaces indent and so on.
319		#'
320		#' @return A modified layout with the new variable label(s) added to the top-left material.
321		#'
322		#' @note This is not an optimal implementation of course, since we are using here the data set
323		#' itself during the layout creation. When we have a more mature `rtables` implementation then
324		#' this will also be improved or not necessary anymore.
325		#'
326		#' @examples
327		#' lyt <- basic_table() %>%
328		#' split_cols_by("ARM") %>%
329		#' add_colcounts() %>%
330		#' split_rows_by("SEX") %>%
331		#' append_varlabels(DM, "SEX") %>%
332		#' analyze("AGE", afun = mean) %>%
333		#' append_varlabels(DM, "AGE", indent = 1)
334		#' build_table(lyt, DM)
335		#'
336		#' lyt <- basic_table() %>%
337		#' split_cols_by("ARM") %>%
338		#' split_rows_by("SEX") %>%
339		#' analyze("AGE", afun = mean) %>%
340		#' append_varlabels(DM, c("SEX", "AGE"))
341		#' build_table(lyt, DM)
342		#'
343		#' @export
344		append_varlabels <- function(lyt, df, vars, indent = 0L) {
345	3x	if (checkmate::test_flag(indent)) {
346	!	warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
347	!	indent <- as.integer(indent)
348		}
349
350	3x	checkmate::assert_data_frame(df)
351	3x	checkmate::assert_character(vars)
352	3x	checkmate::assert_count(indent)
353
354	3x	lab <- formatters::var_labels(df[vars], fill = TRUE)
355	3x	lab <- paste(lab, collapse = " / ")
356	3x	space <- paste(rep(" ", indent * 2), collapse = "")
357	3x	lab <- paste0(space, lab)
358
359	3x	append_topleft(lyt, lab)
360		}

1		#' Proportion Difference
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' @inheritParams argument_convention
6		#'
7		#' @seealso [d_proportion_diff()]
8		#'
9		#' @name prop_diff
10		NULL
11
12		#' @describeIn prop_diff Statistics function estimating the difference
13		#' in terms of responder proportion.
14		#'
15		#' @inheritParams prop_diff_strat_nc
16		#' @param method (`string`)\cr the method used for the confidence interval estimation.
17		#'
18		#' @return
19		#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
20		#'
21		#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
22		#' not permitted.
23		#'
24		#' @examples
25		#' # Summary
26		#'
27		#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
28		#' nex <- 100 # Number of example rows
29		#' dta <- data.frame(
30		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
31		#' "grp" = sample(c("A", "B"), nex, TRUE),
32		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
33		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
34		#' stringsAsFactors = TRUE
35		#' )
36		#'
37		#' s_proportion_diff(
38		#' df = subset(dta, grp == "A"),
39		#' .var = "rsp",
40		#' .ref_group = subset(dta, grp == "B"),
41		#' .in_ref_col = FALSE,
42		#' conf_level = 0.90,
43		#' method = "ha"
44		#' )
45		#'
46		#' # CMH example with strata
47		#' s_proportion_diff(
48		#' df = subset(dta, grp == "A"),
49		#' .var = "rsp",
50		#' .ref_group = subset(dta, grp == "B"),
51		#' .in_ref_col = FALSE,
52		#' variables = list(strata = c("f1", "f2")),
53		#' conf_level = 0.90,
54		#' method = "cmh"
55		#' )
56		#'
57		#' @export
58		s_proportion_diff <- function(df,
59		.var,
60		.ref_group,
61		.in_ref_col,
62		variables = list(strata = NULL),
63		conf_level = 0.95,
64		method = c(
65		"waldcc", "wald", "cmh",
66		"ha", "newcombe", "newcombecc",
67		"strat_newcombe", "strat_newcombecc"
68		),
69		weights_method = "cmh") {
70	2x	method <- match.arg(method)
71	2x	if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) {
72	!	stop(paste(
73	!	"When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
74	!	"permitted. Please choose a different method."
75		))
76		}
77	2x	y <- list(diff = "", diff_ci = "")
78
79	2x	if (!.in_ref_col) {
80	2x	rsp <- c(.ref_group[[.var]], df[[.var]])
81	2x	grp <- factor(
82	2x	rep(
83	2x	c("ref", "Not-ref"),
84	2x	c(nrow(.ref_group), nrow(df))
85		),
86	2x	levels = c("ref", "Not-ref")
87		)
88
89	2x	if (!is.null(variables$strata)) {
90	1x	strata_colnames <- variables$strata
91	1x	checkmate::assert_character(strata_colnames, null.ok = FALSE)
92	1x	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
93
94	1x	assert_df_with_variables(df, strata_vars)
95	1x	assert_df_with_variables(.ref_group, strata_vars)
96
97		# Merging interaction strata for reference group rows data and remaining
98	1x	strata <- c(
99	1x	interaction(.ref_group[strata_colnames]),
100	1x	interaction(df[strata_colnames])
101		)
102	1x	strata <- as.factor(strata)
103		}
104
105		# Defining the std way to calculate weights for strat_newcombe
106	2x	if (!is.null(variables$weights_method)) {
107	!	weights_method <- variables$weights_method
108		} else {
109	2x	weights_method <- "cmh"
110		}
111
112	2x	y <- switch(method,
113	2x	"wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
114	2x	"waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
115	2x	"ha" = prop_diff_ha(rsp, grp, conf_level),
116	2x	"newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
117	2x	"newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
118	2x	"strat_newcombe" = prop_diff_strat_nc(rsp,
119	2x	grp,
120	2x	strata,
121	2x	weights_method,
122	2x	conf_level,
123	2x	correct = FALSE
124		),
125	2x	"strat_newcombecc" = prop_diff_strat_nc(rsp,
126	2x	grp,
127	2x	strata,
128	2x	weights_method,
129	2x	conf_level,
130	2x	correct = TRUE
131		),
132	2x	"cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")]
133		)
134
135	2x	y$diff <- y$diff * 100
136	2x	y$diff_ci <- y$diff_ci * 100
137		}
138
139	2x	attr(y$diff, "label") <- "Difference in Response rate (%)"
140	2x	attr(y$diff_ci, "label") <- d_proportion_diff(
141	2x	conf_level, method,
142	2x	long = FALSE
143		)
144
145	2x	y
146		}
147
148		#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
149		#'
150		#' @return
151		#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
152		#'
153		#' @examples
154		#' a_proportion_diff(
155		#' df = subset(dta, grp == "A"),
156		#' .var = "rsp",
157		#' .ref_group = subset(dta, grp == "B"),
158		#' .in_ref_col = FALSE,
159		#' conf_level = 0.90,
160		#' method = "ha"
161		#' )
162		#'
163		#' @export
164		a_proportion_diff <- make_afun(
165		s_proportion_diff,
166		.formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
167		.indent_mods = c(diff = 0L, diff_ci = 1L)
168		)
169
170		#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
171		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
172		#'
173		#' @param ... arguments passed to `s_proportion_diff()`.
174		#'
175		#' @return
176		#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
177		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
178		#' the statistics from `s_proportion_diff()` to the table layout.
179		#'
180		#' @examples
181		#' l <- basic_table() %>%
182		#' split_cols_by(var = "grp", ref_group = "B") %>%
183		#' estimate_proportion_diff(
184		#' vars = "rsp",
185		#' conf_level = 0.90,
186		#' method = "ha"
187		#' )
188		#'
189		#' build_table(l, df = dta)
190		#'
191		#' @export
192		estimate_proportion_diff <- function(lyt,
193		vars,
194		...,
195		var_labels = vars,
196		show_labels = "hidden",
197		table_names = vars,
198		.stats = NULL,
199		.formats = NULL,
200		.labels = NULL,
201		.indent_mods = NULL) {
202	3x	afun <- make_afun(
203	3x	a_proportion_diff,
204	3x	.stats = .stats,
205	3x	.formats = .formats,
206	3x	.labels = .labels,
207	3x	.indent_mods = .indent_mods
208		)
209
210	3x	analyze(
211	3x	lyt,
212	3x	vars,
213	3x	afun = afun,
214	3x	var_labels = var_labels,
215	3x	extra_args = list(...),
216	3x	show_labels = show_labels,
217	3x	table_names = table_names
218		)
219		}
220
221		#' Check: Proportion Difference Arguments
222		#'
223		#' Verifies that and/or convert arguments into valid values to be used in the
224		#' estimation of difference in responder proportions.
225		#'
226		#' @inheritParams prop_diff
227		#' @inheritParams prop_diff_wald
228		#'
229		#' @keywords internal
230		check_diff_prop_ci <- function(rsp,
231		grp,
232		strata = NULL,
233		conf_level,
234		correct = NULL) {
235	17x	checkmate::assert_logical(rsp, any.missing = FALSE)
236	17x	checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
237	17x	checkmate::assert_number(conf_level, lower = 0, upper = 1)
238	17x	checkmate::assert_flag(correct, null.ok = TRUE)
239
240	17x	if (!is.null(strata)) {
241	11x	checkmate::assert_factor(strata, len = length(rsp))
242		}
243
244	17x	invisible()
245		}
246
247		#' Description of Method Used for Proportion Comparison
248		#'
249		#' @description `r lifecycle::badge("stable")`
250		#'
251		#' This is an auxiliary function that describes the analysis in
252		#' `s_proportion_diff`.
253		#'
254		#' @inheritParams s_proportion_diff
255		#' @param long (`logical`)\cr Whether a long or a short (default) description is required.
256		#'
257		#' @return A `string` describing the analysis.
258		#'
259		#' @seealso [prop_diff]
260		#'
261		#' @export
262		d_proportion_diff <- function(conf_level,
263		method,
264		long = FALSE) {
265	8x	label <- paste0(conf_level * 100, "% CI")
266	8x	if (long) {
267	!	label <- paste(
268	!	label,
269	!	ifelse(
270	!	method == "cmh",
271	!	"for adjusted difference",
272	!	"for difference"
273		)
274		)
275		}
276
277	8x	method_part <- switch(method,
278	8x	"cmh" = "CMH, without correction",
279	8x	"waldcc" = "Wald, with correction",
280	8x	"wald" = "Wald, without correction",
281	8x	"ha" = "Anderson-Hauck",
282	8x	"newcombe" = "Newcombe, without correction",
283	8x	"newcombecc" = "Newcombe, with correction",
284	8x	"strat_newcombe" = "Stratified Newcombe, without correction",
285	8x	"strat_newcombecc" = "Stratified Newcombe, with correction",
286	8x	stop(paste(method, "does not have a description"))
287		)
288	8x	paste0(label, " (", method_part, ")")
289		}
290
291		#' Helper Functions to Calculate Proportion Difference
292		#'
293		#' @description `r lifecycle::badge("stable")`
294		#'
295		#' @inheritParams argument_convention
296		#' @inheritParams prop_diff
297		#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
298		#' (e.g. reference and treatment group).
299		#'
300		#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
301		#' (proportion difference confidence interval).
302		#'
303		#' @seealso [prop_diff()] for implementation of these helper functions.
304		#'
305		#' @name h_prop_diff
306		NULL
307
308		#' @describeIn h_prop_diff The Wald interval follows the usual textbook
309		#' definition for a single proportion confidence interval using the normal
310		#' approximation. It is possible to include a continuity correction for Wald's
311		#' interval.
312		#'
313		#' @param correct (`logical`)\cr whether to include the continuity correction. For further
314		#' information, see [stats::prop.test()].
315		#'
316		#' @examples
317		#' # Wald confidence interval
318		#' set.seed(2)
319		#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
320		#' grp <- factor(c(rep("A", 10), rep("B", 10)))
321		#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
322		#'
323		#' @export
324		prop_diff_wald <- function(rsp,
325		grp,
326		conf_level = 0.95,
327		correct = FALSE) {
328	2x	if (isTRUE(correct)) {
329	1x	mthd <- "waldcc"
330		} else {
331	1x	mthd <- "wald"
332		}
333	2x	grp <- as_factor_keep_attributes(grp)
334	2x	check_diff_prop_ci(
335	2x	rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
336		)
337
338		# check if binary response is coded as logical
339	2x	checkmate::assert_logical(rsp, any.missing = FALSE)
340	2x	checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
341
342	2x	tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
343		# x1 and n1 are non-reference groups.
344	2x	diff_ci <- desctools_binom(
345	2x	x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
346	2x	x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
347	2x	conf.level = conf_level,
348	2x	method = mthd
349		)
350
351	2x	list(
352	2x	"diff" = unname(diff_ci[, "est"]),
353	2x	"diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
354		)
355		}
356
357		#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
358		#'
359		#' @examples
360		#' # Anderson-Hauck confidence interval
361		#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
362		#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
363		#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
364		#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
365		#'
366		#' ## Edge case: Same proportion of response in A and B.
367		#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
368		#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
369		#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
370		#'
371		#' @export
372		prop_diff_ha <- function(rsp,
373		grp,
374		conf_level) {
375	3x	grp <- as_factor_keep_attributes(grp)
376	3x	check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)
377
378	3x	tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
379		# x1 and n1 are non-reference groups.
380	3x	ci <- desctools_binom(
381	3x	x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
382	3x	x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
383	3x	conf.level = conf_level,
384	3x	method = "ha"
385		)
386	3x	list(
387	3x	"diff" = unname(ci[, "est"]),
388	3x	"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
389		)
390		}
391
392		#' @describeIn h_prop_diff Newcombe confidence interval. It is based on
393		#' the Wilson score confidence interval for a single binomial proportion.
394		#'
395		#' @examples
396		#' # Newcombe confidence interval
397		#'
398		#' set.seed(1)
399		#' rsp <- c(
400		#' sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
401		#' sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
402		#' )
403		#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
404		#' table(rsp, grp)
405		#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
406		#'
407		#' @export
408		prop_diff_nc <- function(rsp,
409		grp,
410		conf_level,
411		correct = FALSE) {
412	1x	if (isTRUE(correct)) {
413	!	mthd <- "scorecc"
414		} else {
415	1x	mthd <- "score"
416		}
417	1x	grp <- as_factor_keep_attributes(grp)
418	1x	check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)
419
420	1x	p_grp <- tapply(rsp, grp, mean)
421	1x	diff_p <- unname(diff(p_grp))
422	1x	tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
423	1x	ci <- desctools_binom(
424		# x1 and n1 are non-reference groups.
425	1x	x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
426	1x	x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
427	1x	conf.level = conf_level,
428	1x	method = mthd
429		)
430	1x	list(
431	1x	"diff" = unname(ci[, "est"]),
432	1x	"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
433		)
434		}
435
436		#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
437		#' response rates between the experimental treatment group and the control treatment group, adjusted
438		#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared
439		#' test, use [stats::mantelhaen.test()].
440		#'
441		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
442		#'
443		#' @examples
444		#' # Cochran-Mantel-Haenszel confidence interval
445		#'
446		#' set.seed(2)
447		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
448		#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
449		#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
450		#' strata_data <- data.frame(
451		#' "f1" = sample(c("a", "b"), 100, TRUE),
452		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
453		#' stringsAsFactors = TRUE
454		#' )
455		#'
456		#' prop_diff_cmh(
457		#' rsp = rsp, grp = grp, strata = interaction(strata_data),
458		#' conf_level = 0.90
459		#' )
460		#'
461		#' @export
462		prop_diff_cmh <- function(rsp,
463		grp,
464		strata,
465		conf_level = 0.95) {
466	7x	grp <- as_factor_keep_attributes(grp)
467	7x	strata <- as_factor_keep_attributes(strata)
468	7x	check_diff_prop_ci(
469	7x	rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
470		)
471
472	7x	if (any(tapply(rsp, strata, length) < 5)) {
473	!	warning("Less than 5 observations in some strata.")
474		}
475
476		# first dimension: FALSE, TRUE
477		# 2nd dimension: CONTROL, TX
478		# 3rd dimension: levels of strat
479		# rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
480	7x	t_tbl <- table(
481	7x	factor(rsp, levels = c("FALSE", "TRUE")),
482	7x	grp,
483	7x	strata
484		)
485	7x	n1 <- colSums(t_tbl[1:2, 1, ])
486	7x	n2 <- colSums(t_tbl[1:2, 2, ])
487	7x	p1 <- t_tbl[2, 1, ] / n1
488	7x	p2 <- t_tbl[2, 2, ] / n2
489		# CMH weights
490	7x	use_stratum <- (n1 > 0) & (n2 > 0)
491	7x	n1 <- n1[use_stratum]
492	7x	n2 <- n2[use_stratum]
493	7x	p1 <- p1[use_stratum]
494	7x	p2 <- p2[use_stratum]
495	7x	wt <- (n1 * n2 / (n1 + n2))
496	7x	wt_normalized <- wt / sum(wt)
497	7x	est1 <- sum(wt_normalized * p1)
498	7x	est2 <- sum(wt_normalized * p2)
499	7x	estimate <- c(est1, est2)
500	7x	names(estimate) <- levels(grp)
501	7x	se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
502	7x	se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
503	7x	z <- stats::qnorm((1 + conf_level) / 2)
504	7x	err1 <- z * se1
505	7x	err2 <- z * se2
506	7x	ci1 <- c((est1 - err1), (est1 + err1))
507	7x	ci2 <- c((est2 - err2), (est2 + err2))
508	7x	estimate_ci <- list(ci1, ci2)
509	7x	names(estimate_ci) <- levels(grp)
510	7x	diff_est <- est2 - est1
511	7x	se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
512	7x	diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)
513
514	7x	list(
515	7x	prop = estimate,
516	7x	prop_ci = estimate_ci,
517	7x	diff = diff_est,
518	7x	diff_ci = diff_ci,
519	7x	weights = wt_normalized,
520	7x	n1 = n1,
521	7x	n2 = n2
522		)
523		}
524
525		#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response
526		#' rates between the experimental treatment group and the control treatment group, adjusted for stratification
527		#' factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
528		#' Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights
529		#' (see [prop_diff_cmh()]).
530		#'
531		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
532		#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
533		#' and directs the way weights are estimated.
534		#'
535		#' @references
536		#' \insertRef{Yan2010-jt}{tern}
537		#'
538		#' @examples
539		#' # Stratified Newcombe confidence interval
540		#'
541		#' set.seed(2)
542		#' data_set <- data.frame(
543		#' "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
544		#' "f1" = sample(c("a", "b"), 100, TRUE),
545		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
546		#' "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
547		#' stringsAsFactors = TRUE
548		#' )
549		#'
550		#' prop_diff_strat_nc(
551		#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
552		#' weights_method = "cmh",
553		#' conf_level = 0.90
554		#' )
555		#'
556		#' prop_diff_strat_nc(
557		#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
558		#' weights_method = "wilson_h",
559		#' conf_level = 0.90
560		#' )
561		#'
562		#' @export
563		prop_diff_strat_nc <- function(rsp,
564		grp,
565		strata,
566		weights_method = c("cmh", "wilson_h"),
567		conf_level = 0.95,
568		correct = FALSE) {
569	4x	weights_method <- match.arg(weights_method)
570	4x	grp <- as_factor_keep_attributes(grp)
571	4x	strata <- as_factor_keep_attributes(strata)
572	4x	check_diff_prop_ci(
573	4x	rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
574		)
575	4x	checkmate::assert_number(conf_level, lower = 0, upper = 1)
576	4x	checkmate::assert_flag(correct)
577	4x	if (any(tapply(rsp, strata, length) < 5)) {
578	!	warning("Less than 5 observations in some strata.")
579		}
580
581	4x	rsp_by_grp <- split(rsp, f = grp)
582	4x	strata_by_grp <- split(strata, f = grp)
583
584		# Finding the weights
585	4x	weights <- if (identical(weights_method, "cmh")) {
586	3x	prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
587	4x	} else if (identical(weights_method, "wilson_h")) {
588	1x	prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
589		}
590	4x	weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0
591
592		# Calculating lower (`l`) and upper (`u`) confidence bounds per group.
593	4x	strat_wilson_by_grp <- Map(
594	4x	prop_strat_wilson,
595	4x	rsp = rsp_by_grp,
596	4x	strata = strata_by_grp,
597	4x	weights = list(weights, weights),
598	4x	conf_level = conf_level,
599	4x	correct = correct
600		)
601
602	4x	ci_ref <- strat_wilson_by_grp[[1]]
603	4x	ci_trt <- strat_wilson_by_grp[[2]]
604	4x	l_ref <- as.numeric(ci_ref$conf_int[1])
605	4x	u_ref <- as.numeric(ci_ref$conf_int[2])
606	4x	l_trt <- as.numeric(ci_trt$conf_int[1])
607	4x	u_trt <- as.numeric(ci_trt$conf_int[2])
608
609		# Estimating the diff and n_ref, n_trt (it allows different weights to be used)
610	4x	t_tbl <- table(
611	4x	factor(rsp, levels = c("FALSE", "TRUE")),
612	4x	grp,
613	4x	strata
614		)
615	4x	n_ref <- colSums(t_tbl[1:2, 1, ])
616	4x	n_trt <- colSums(t_tbl[1:2, 2, ])
617	4x	use_stratum <- (n_ref > 0) & (n_trt > 0)
618	4x	n_ref <- n_ref[use_stratum]
619	4x	n_trt <- n_trt[use_stratum]
620	4x	p_ref <- t_tbl[2, 1, use_stratum] / n_ref
621	4x	p_trt <- t_tbl[2, 2, use_stratum] / n_trt
622	4x	est1 <- sum(weights * p_ref)
623	4x	est2 <- sum(weights * p_trt)
624	4x	diff_est <- est2 - est1
625
626	4x	lambda1 <- sum(weights^2 / n_ref)
627	4x	lambda2 <- sum(weights^2 / n_trt)
628	4x	z <- stats::qnorm((1 + conf_level) / 2)
629
630	4x	lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
631	4x	upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))
632
633	4x	list(
634	4x	"diff" = diff_est,
635	4x	"diff_ci" = c("lower" = lower, "upper" = upper)
636		)
637		}

1		#' Tabulate Biomarker Effects on Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate the estimated effects of multiple continuous biomarker variables
6		#' on a binary response endpoint across population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
10		#' [extract_rsp_biomarkers()].
11		#' @param vars (`character`)\cr the names of statistics to be reported among:
12		#' * `n_tot`: Total number of patients per group.
13		#' * `n_rsp`: Total number of responses per group.
14		#' * `prop`: Total response proportion per group.
15		#' * `or`: Odds ratio.
16		#' * `ci`: Confidence interval of odds ratio.
17		#' * `pval`: p-value of the effect.
18		#' Note, the statistics `n_tot`, `or` and `ci` are required.
19		#'
20		#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
21		#'
22		#' @details These functions create a layout starting from a data frame which contains
23		#' the required statistics. The tables are then typically used as input for forest plots.
24		#'
25		#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
26		#' not start from an input layout `lyt`. This is because internally the table is
27		#' created by combining multiple subtables.
28		#'
29		#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
30		#'
31		#' @examples
32		#' library(dplyr)
33		#' library(forcats)
34		#'
35		#' adrs <- tern_ex_adrs
36		#' adrs_labels <- formatters::var_labels(adrs)
37		#'
38		#' adrs_f <- adrs %>%
39		#' filter(PARAMCD == "BESRSPI") %>%
40		#' mutate(rsp = AVALC == "CR")
41		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
42		#'
43		#' df <- extract_rsp_biomarkers(
44		#' variables = list(
45		#' rsp = "rsp",
46		#' biomarkers = c("BMRKR1", "AGE"),
47		#' covariates = "SEX",
48		#' subgroups = "BMRKR2"
49		#' ),
50		#' data = adrs_f
51		#' )
52		#'
53		#' \dontrun{
54		#' ## Table with default columns.
55		#' tabulate_rsp_biomarkers(df)
56		#'
57		#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
58		#' tab <- tabulate_rsp_biomarkers(
59		#' df = df,
60		#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
61		#' )
62		#'
63		#' ## Finally produce the forest plot.
64		#' g_forest(tab, xlim = c(0.7, 1.4))
65		#' }
66		#'
67		#' @export
68		#' @name response_biomarkers_subgroups
69		tabulate_rsp_biomarkers <- function(df,
70		vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
71		.indent_mods = 0L) {
72	3x	checkmate::assert_data_frame(df)
73	3x	checkmate::assert_character(df$biomarker)
74	3x	checkmate::assert_character(df$biomarker_label)
75	3x	checkmate::assert_subset(vars, c("n_tot", "n_rsp", "prop", "or", "ci", "pval"))
76
77	3x	df_subs <- split(df, f = df$biomarker)
78	3x	tabs <- lapply(df_subs, FUN = function(df_sub) {
79	5x	tab_sub <- h_tab_rsp_one_biomarker(
80	5x	df = df_sub,
81	5x	vars = vars,
82	5x	.indent_mods = .indent_mods
83		)
84		# Insert label row as first row in table.
85	5x	label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
86	5x	tab_sub
87		})
88	3x	result <- do.call(rbind, tabs)
89
90	3x	n_id <- grep("n_tot", vars)
91	3x	or_id <- match("or", vars)
92	3x	ci_id <- match("ci", vars)
93	3x	structure(
94	3x	result,
95	3x	forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
96	3x	col_x = or_id,
97	3x	col_ci = ci_id,
98	3x	col_symbol_size = n_id
99		)
100		}
101
102		#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
103		#'
104		#' @description `r lifecycle::badge("stable")`
105		#'
106		#' Prepares estimates for number of responses, patients and overall response rate,
107		#' as well as odds ratio estimates, confidence intervals and p-values,
108		#' for multiple biomarkers across population subgroups in a single data frame.
109		#' `variables` corresponds to the names of variables found in `data`, passed as a
110		#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
111		#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
112		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
113		#'
114		#' @inheritParams argument_convention
115		#' @inheritParams response_subgroups
116		#' @param control (named `list`)\cr controls for the response definition and the
117		#' confidence level produced by [control_logistic()].
118		#'
119		#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
120		#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
121		#' `var_label`, and `row_type`.
122		#'
123		#' @note You can also specify a continuous variable in `rsp` and then use the
124		#' `response_definition` control to convert that internally to a logical
125		#' variable reflecting binary response.
126		#'
127		#' @seealso [h_logistic_mult_cont_df()] which is used internally.
128		#'
129		#' @examples
130		#' library(dplyr)
131		#' library(forcats)
132		#'
133		#' adrs <- tern_ex_adrs
134		#' adrs_labels <- formatters::var_labels(adrs)
135		#'
136		#' adrs_f <- adrs %>%
137		#' filter(PARAMCD == "BESRSPI") %>%
138		#' mutate(rsp = AVALC == "CR")
139		#'
140		#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
141		#' # in logistic regression models with one covariate `RACE`. The subgroups
142		#' # are defined by the levels of `BMRKR2`.
143		#' df <- extract_rsp_biomarkers(
144		#' variables = list(
145		#' rsp = "rsp",
146		#' biomarkers = c("BMRKR1", "AGE"),
147		#' covariates = "SEX",
148		#' subgroups = "BMRKR2"
149		#' ),
150		#' data = adrs_f
151		#' )
152		#' df
153		#'
154		#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
155		#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
156		#' # which is then binarized internally (response is defined as this variable
157		#' # being larger than 500).
158		#' df_grouped <- extract_rsp_biomarkers(
159		#' variables = list(
160		#' rsp = "EOSDY",
161		#' biomarkers = c("BMRKR1", "AGE"),
162		#' covariates = "SEX",
163		#' subgroups = "BMRKR2",
164		#' strat = "STRATA1"
165		#' ),
166		#' data = adrs_f,
167		#' groups_lists = list(
168		#' BMRKR2 = list(
169		#' "low" = "LOW",
170		#' "low/medium" = c("LOW", "MEDIUM"),
171		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
172		#' )
173		#' ),
174		#' control = control_logistic(
175		#' response_definition = "I(response > 500)"
176		#' )
177		#' )
178		#' df_grouped
179		#'
180		#' @export
181		extract_rsp_biomarkers <- function(variables,
182		data,
183		groups_lists = list(),
184		control = control_logistic(),
185		label_all = "All Patients") {
186	4x	assert_list_of_variables(variables)
187	4x	checkmate::assert_string(variables$rsp)
188	4x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
189	4x	checkmate::assert_string(label_all)
190
191		# Start with all patients.
192	4x	result_all <- h_logistic_mult_cont_df(
193	4x	variables = variables,
194	4x	data = data,
195	4x	control = control
196		)
197	4x	result_all$subgroup <- label_all
198	4x	result_all$var <- "ALL"
199	4x	result_all$var_label <- label_all
200	4x	result_all$row_type <- "content"
201	4x	if (is.null(variables$subgroups)) {
202		# Only return result for all patients.
203	1x	result_all
204		} else {
205		# Add subgroups results.
206	3x	l_data <- h_split_by_subgroups(
207	3x	data,
208	3x	variables$subgroups,
209	3x	groups_lists = groups_lists
210		)
211	3x	l_result <- lapply(l_data, function(grp) {
212	15x	result <- h_logistic_mult_cont_df(
213	15x	variables = variables,
214	15x	data = grp$df,
215	15x	control = control
216		)
217	15x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
218	15x	cbind(result, result_labels)
219		})
220	3x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
221	3x	result_subgroups$row_type <- "analysis"
222	3x	rbind(
223	3x	result_all,
224	3x	result_subgroups
225		)
226		}
227		}

1		#' Patient Counts for Laboratory Events (Worsen From Baseline) by Highest Grade Post-Baseline
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Patient count and fraction for laboratory events (worsen from baseline) shift table.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()]
10		#'
11		#' @name abnormal_by_worst_grade_worsen
12		NULL
13
14		#' Helper Function to Prepare ADLB with Worst Labs
15		#'
16		#' @description `r lifecycle::badge("stable")`
17		#'
18		#' Helper function to prepare a `df` for generate the patient count shift table
19		#'
20		#' @param adlb (`data.frame`)\cr `ADLB` dataframe
21		#' @param worst_flag_low (named `vector`)\cr Worst low post-baseline lab grade flag variable
22		#' @param worst_flag_high (named `vector`)\cr Worst high post-baseline lab grade flag variable
23		#' @param direction_var (`string`)\cr Direction variable specifying the direction of the shift table of interest.
24		#' Only lab records flagged by `L`, `H` or `B` are included in the shift table.
25		#' * `L`: low direction only
26		#' * `H`: high direction only
27		#' * `B`: both low and high directions
28		#'
29		#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
30		#' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
31		#' direction specified according to `direction_var`. For instance, for a lab that is
32		#' needed for the low direction only, only records flagged by `worst_flag_low` are
33		#' selected. For a lab that is needed for both low and high directions, the worst
34		#' low records are selected for the low direction, and the worst high record are selected
35		#' for the high direction.
36		#'
37		#' @seealso [abnormal_by_worst_grade_worsen]
38		#'
39		#' @examples
40		#' library(dplyr)
41		#'
42		#' # The direction variable, GRADDR, is based on metadata
43		#' adlb <- tern_ex_adlb %>%
44		#' mutate(
45		#' GRADDR = case_when(
46		#' PARAMCD == "ALT" ~ "B",
47		#' PARAMCD == "CRP" ~ "L",
48		#' PARAMCD == "IGA" ~ "H"
49		#' )
50		#' ) %>%
51		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
52		#'
53		#' df <- h_adlb_worsen(
54		#' adlb,
55		#' worst_flag_low = c("WGRLOFL" = "Y"),
56		#' worst_flag_high = c("WGRHIFL" = "Y"),
57		#' direction_var = "GRADDR"
58		#' )
59		#'
60		#' @export
61		h_adlb_worsen <- function(adlb,
62		worst_flag_low = NULL,
63		worst_flag_high = NULL,
64		direction_var) {
65	5x	checkmate::assert_string(direction_var)
66	5x	checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
67	5x	assert_df_with_variables(adlb, list("Col" = direction_var))
68
69	5x	if (any(unique(adlb[[direction_var]]) == "H")) {
70	4x	assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
71		}
72
73	5x	if (any(unique(adlb[[direction_var]]) == "L")) {
74	4x	assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
75		}
76
77	5x	if (any(unique(adlb[[direction_var]]) == "B")) {
78	3x	assert_df_with_variables(
79	3x	adlb,
80	3x	list(
81	3x	"Low" = names(worst_flag_low),
82	3x	"High" = names(worst_flag_high)
83		)
84		)
85		}
86
87		# extract patients with worst post-baseline lab, either low or high or both
88	5x	worst_flag <- c(worst_flag_low, worst_flag_high)
89	5x	col_names <- names(worst_flag)
90	5x	filter_values <- worst_flag
91	5x	temp <- Map(
92	5x	function(x, y) which(adlb[[x]] == y),
93	5x	col_names,
94	5x	filter_values
95		)
96	5x	position_satisfy_filters <- Reduce(union, temp)
97
98		# select variables of interest
99	5x	adlb_f <- adlb[position_satisfy_filters, ]
100
101		# generate subsets for different directionality
102	5x	adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
103	5x	adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
104	5x	adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]
105
106		# for labs requiring both high and low, data is duplicated and will be stacked on top of each other
107	5x	adlb_f_b_h <- adlb_f_b
108	5x	adlb_f_b_l <- adlb_f_b
109
110		# extract data with worst lab
111	5x	if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
112		# change H to High, L to Low
113	3x	adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
114	3x	adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
115
116		# change, B to High and Low
117	3x	adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
118	3x	adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
119
120	3x	adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
121	3x	adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
122	3x	adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
123	3x	adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]
124
125	3x	out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
126	2x	} else if (!is.null(worst_flag_high)) {
127	1x	adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
128	1x	adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
129
130	1x	adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
131	1x	adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
132
133	1x	out <- rbind(adlb_out_h, adlb_out_b_h)
134	1x	} else if (!is.null(worst_flag_low)) {
135	1x	adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
136	1x	adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
137
138	1x	adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
139	1x	adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]
140
141	1x	out <- rbind(adlb_out_l, adlb_out_b_l)
142		}
143
144		# label
145	5x	formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
146		# NA
147	5x	out
148		}
149
150		#' Helper Function to Analyze Patients for [s_count_abnormal_lab_worsen_by_baseline()]
151		#'
152		#' @description `r lifecycle::badge("stable")`
153		#'
154		#' Helper function to count the number of patients and the fraction of patients according to
155		#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
156		#' and the direction of interest specified in `direction_var`.
157		#'
158		#' @inheritParams argument_convention
159		#' @inheritParams h_adlb_worsen
160		#' @param baseline_var (`string`)\cr baseline lab grade variable
161		#'
162		#' @return `h_worsen_counter()` returns the counts and fraction of patients
163		#' whose worst post-baseline lab grades are worse than their baseline grades, for
164		#' post-baseline worst grades "1", "2", "3", "4" and "Any".
165		#'
166		#' @seealso [abnormal_by_worst_grade_worsen]
167		#'
168		#' @examples
169		#' library(dplyr)
170		#'
171		#' # The direction variable, GRADDR, is based on metadata
172		#' adlb <- tern_ex_adlb %>%
173		#' mutate(
174		#' GRADDR = case_when(
175		#' PARAMCD == "ALT" ~ "B",
176		#' PARAMCD == "CRP" ~ "L",
177		#' PARAMCD == "IGA" ~ "H"
178		#' )
179		#' ) %>%
180		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
181		#'
182		#' df <- h_adlb_worsen(
183		#' adlb,
184		#' worst_flag_low = c("WGRLOFL" = "Y"),
185		#' worst_flag_high = c("WGRHIFL" = "Y"),
186		#' direction_var = "GRADDR"
187		#' )
188		#'
189		#' # `h_worsen_counter`
190		#' h_worsen_counter(
191		#' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
192		#' id = "USUBJID",
193		#' .var = "ATOXGR",
194		#' baseline_var = "BTOXGR",
195		#' direction_var = "GRADDR"
196		#' )
197		#'
198		#' @export
199		h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
200	17x	checkmate::assert_string(id)
201	17x	checkmate::assert_string(.var)
202	17x	checkmate::assert_string(baseline_var)
203	17x	checkmate::assert_scalar(unique(df[[direction_var]]))
204	17x	checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
205	17x	assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))
206
207		# remove post-baseline missing
208	17x	df <- df[df[[.var]] != "<Missing>", ]
209
210		# obtain directionality
211	17x	direction <- unique(df[[direction_var]])
212
213	17x	if (direction == "Low") {
214	10x	grade <- -1:-4
215	10x	worst_grade <- -4
216	7x	} else if (direction == "High") {
217	7x	grade <- 1:4
218	7x	worst_grade <- 4
219		}
220
221	17x	if (nrow(df) > 0) {
222	17x	by_grade <- lapply(grade, function(i) {
223		# filter baseline values that is less than i or <Missing>
224	68x	df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
225		# num: number of patients with post-baseline worst lab equal to i
226	68x	num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
227		# denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
228	68x	denom <- length(unique(df_temp[[id]]))
229	68x	rm(df_temp)
230	68x	c(num = num, denom = denom)
231		})
232		} else {
233	!	by_grade <- lapply(1, function(i) {
234	!	c(num = 0, denom = 0)
235		})
236		}
237
238	17x	names(by_grade) <- as.character(seq_along(by_grade))
239
240		# baseline grade less 4 or missing
241	17x	df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]
242
243		# denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
244	17x	denom <- length(unique(df_temp[, id, drop = TRUE]))
245
246		# condition 1: missing baseline and in the direction of abnormality
247	17x	con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
248	17x	df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]
249
250		# condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
251	17x	if (direction == "Low") {
252	10x	con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
253		} else {
254	7x	con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
255		}
256
257		# number of patients satisfy either conditions 1 or 2
258	17x	num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))
259
260	17x	list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
261		}
262
263		#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline
264		#' lab grades are worse than their baseline grades.
265		#'
266		#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
267		#' * `id` (`string`)\cr subject variable name.
268		#' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
269		#' * `direction_var` (`string`)\cr see `direction_var` for more details.
270		#'
271		#' @return
272		#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
273		#' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
274		#' "1", "2", "3", "4" and "Any".
275		#'
276		#' @examples
277		#' library(dplyr)
278		#'
279		#' # The direction variable, GRADDR, is based on metadata
280		#' adlb <- tern_ex_adlb %>%
281		#' mutate(
282		#' GRADDR = case_when(
283		#' PARAMCD == "ALT" ~ "B",
284		#' PARAMCD == "CRP" ~ "L",
285		#' PARAMCD == "IGA" ~ "H"
286		#' )
287		#' ) %>%
288		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
289		#'
290		#' df <- h_adlb_worsen(
291		#' adlb,
292		#' worst_flag_low = c("WGRLOFL" = "Y"),
293		#' worst_flag_high = c("WGRHIFL" = "Y"),
294		#' direction_var = "GRADDR"
295		#' )
296		#' # Internal function - s_count_abnormal_lab_worsen_by_baseline
297		#' \dontrun{
298		#' # Patients with worsening lab grade for CRP in the direction of low
299		#' s_count_abnormal_lab_worsen_by_baseline(
300		#' df = df %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
301		#' .var = "ATOXGR",
302		#' variables = list(
303		#' id = "USUBJID",
304		#' baseline_var = "BTOXGR",
305		#' direction_var = "GRADDR"
306		#' )
307		#' )
308		#' }
309		#'
310		#' @keywords internal
311		s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint
312		.var = "ATOXGR",
313		variables = list(
314		id = "USUBJID",
315		baseline_var = "BTOXGR",
316		direction_var = "GRADDR"
317		)) {
318	1x	checkmate::assert_string(.var)
319	1x	checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
320	1x	checkmate::assert_string(variables$id)
321	1x	checkmate::assert_string(variables$baseline_var)
322	1x	checkmate::assert_string(variables$direction_var)
323	1x	assert_df_with_variables(df, c(aval = .var, variables[1:3]))
324	1x	assert_list_of_variables(variables)
325
326	1x	h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
327		}
328
329
330		#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun`
331		#' in `count_abnormal_lab_worsen_by_baseline()`.
332		#'
333		#' @return
334		#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
335		#' formatted [rtables::CellValue()].
336		#'
337		#' @examples
338		#' # Internal function - a_count_abnormal_lab_worsen_by_baseline
339		#' \dontrun{
340		#' a_count_abnormal_lab_worsen_by_baseline(
341		#' df = df %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
342		#' .var = "ATOXGR",
343		#' variables = list(id = "USUBJID", baseline_var = "BTOXGR", direction_var = "GRADDR")
344		#' )
345		#' }
346		#'
347		#' @keywords internal
348		a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint
349		s_count_abnormal_lab_worsen_by_baseline,
350		.formats = c(fraction = format_fraction),
351		.ungroup_stats = "fraction"
352		)
353
354		#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function
355		#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
356		#'
357		#' @return
358		#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
359		#' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
360		#' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
361		#'
362		#' @examples
363		#' basic_table() %>%
364		#' split_cols_by("ARMCD") %>%
365		#' add_colcounts() %>%
366		#' split_rows_by("PARAMCD") %>%
367		#' split_rows_by("GRADDR") %>%
368		#' count_abnormal_lab_worsen_by_baseline(
369		#' var = "ATOXGR",
370		#' variables = list(
371		#' id = "USUBJID",
372		#' baseline_var = "BTOXGR",
373		#' direction_var = "GRADDR"
374		#' )
375		#' ) %>%
376		#' append_topleft("Direction of Abnormality") %>%
377		#' build_table(df = df, alt_counts_df = tern_ex_adsl)
378		#'
379		#' @export
380		count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint
381		var,
382		...,
383		table_names = NULL,
384		.stats = NULL,
385		.formats = NULL,
386		.labels = NULL,
387		.indent_mods = NULL) {
388	1x	checkmate::assert_string(var)
389
390	1x	afun <- make_afun(
391	1x	a_count_abnormal_lab_worsen_by_baseline,
392	1x	.stats = .stats,
393	1x	.formats = .formats,
394	1x	.labels = .labels,
395	1x	.indent_mods = .indent_mods
396		)
397
398	1x	lyt <- analyze(
399	1x	lyt = lyt,
400	1x	vars = var,
401	1x	afun = afun,
402	1x	extra_args = list(...),
403	1x	show_labels = "hidden"
404		)
405
406	1x	lyt
407		}

1		#' Estimation of Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Estimate the proportion of responders within a studied population.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @seealso [h_proportions]
10		#'
11		#' @name estimate_proportions
12		NULL
13
14		#' @describeIn estimate_proportions Statistics function estimating a
15		#' proportion along with its confidence interval.
16		#'
17		#' @inheritParams prop_strat_wilson
18		#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
19		#' it indicates whether each subject is a responder or not. `TRUE` represents
20		#' a successful outcome. If a `data.frame` is provided, also the `strata` variable
21		#' names must be provided in `variables` as a list element with the strata strings.
22		#' In the case of `data.frame`, the logical vector of responses must be indicated as a
23		#' variable name in `.var`.
24		#' @param method (`string`)\cr the method used to construct the confidence interval
25		#' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
26		#' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
27		#' @param long (`flag`)\cr a long description is required.
28		#'
29		#' @return
30		#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
31		#' given variable.
32		#'
33		#' @examples
34		#' # Case with only logical vector.
35		#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
36		#' s_proportion(rsp_v)
37		#'
38		#' # Example for Stratified Wilson CI
39		#' nex <- 100 # Number of example rows
40		#' dta <- data.frame(
41		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
42		#' "grp" = sample(c("A", "B"), nex, TRUE),
43		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
44		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
45		#' stringsAsFactors = TRUE
46		#' )
47		#'
48		#' s_proportion(
49		#' df = dta,
50		#' .var = "rsp",
51		#' variables = list(strata = c("f1", "f2")),
52		#' conf_level = 0.90,
53		#' method = "strat_wilson"
54		#' )
55		#'
56		#' @export
57		s_proportion <- function(df,
58		.var,
59		conf_level = 0.95,
60		method = c(
61		"waldcc", "wald", "clopper-pearson",
62		"wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
63		"agresti-coull", "jeffreys"
64		),
65		weights = NULL,
66		max_iterations = 50,
67		variables = list(strata = NULL),
68		long = FALSE) {
69	125x	method <- match.arg(method)
70	125x	checkmate::assert_flag(long)
71	125x	assert_proportion_value(conf_level)
72
73	125x	if (!is.null(variables$strata)) {
74		# Checks for strata
75	!	if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
76	!	strata_colnames <- variables$strata
77	!	checkmate::assert_character(strata_colnames, null.ok = FALSE)
78	!	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
79	!	assert_df_with_variables(df, strata_vars)
80
81	!	strata <- interaction(df[strata_colnames])
82	!	strata <- as.factor(strata)
83
84		# Pushing down checks to prop_strat_wilson
85	125x	} else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
86	!	stop("To use stratified methods you need to specify the strata variables.")
87		}
88	125x	if (checkmate::test_atomic_vector(df)) {
89	125x	rsp <- as.logical(df)
90		} else {
91	!	rsp <- as.logical(df[[.var]])
92		}
93	125x	n <- sum(rsp)
94	125x	p_hat <- mean(rsp)
95
96	125x	prop_ci <- switch(method,
97	125x	"clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
98	125x	"wilson" = prop_wilson(rsp, conf_level),
99	125x	"wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
100	125x	"strat_wilson" = prop_strat_wilson(rsp,
101	125x	strata,
102	125x	weights,
103	125x	conf_level,
104	125x	max_iterations,
105	125x	correct = FALSE
106	125x	)$conf_int,
107	125x	"strat_wilsonc" = prop_strat_wilson(rsp,
108	125x	strata,
109	125x	weights,
110	125x	conf_level,
111	125x	max_iterations,
112	125x	correct = TRUE
113	125x	)$conf_int,
114	125x	"wald" = prop_wald(rsp, conf_level),
115	125x	"waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
116	125x	"agresti-coull" = prop_agresti_coull(rsp, conf_level),
117	125x	"jeffreys" = prop_jeffreys(rsp, conf_level)
118		)
119
120	125x	list(
121	125x	"n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
122	125x	"prop_ci" = formatters::with_label(
123	125x	x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
124		)
125		)
126		}
127
128		#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
129		#' in `estimate_proportion()`.
130		#'
131		#' @return
132		#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
133		#'
134		#' @export
135		a_proportion <- make_afun(
136		s_proportion,
137		.formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
138		)
139
140		#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
141		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
142		#'
143		#' @param ... other arguments are ultimately conveyed to [s_proportion()].
144		#'
145		#' @return
146		#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
147		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
148		#' the statistics from `s_proportion()` to the table layout.
149		#'
150		#' @examples
151		#' dta_test <- data.frame(
152		#' USUBJID = paste0("S", 1:12),
153		#' ARM = rep(LETTERS[1:3], each = 4),
154		#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
155		#' )
156		#'
157		#' basic_table() %>%
158		#' split_cols_by("ARM") %>%
159		#' estimate_proportion(vars = "AVAL") %>%
160		#' build_table(df = dta_test)
161		#'
162		#' @export
163		estimate_proportion <- function(lyt,
164		vars,
165		...,
166		show_labels = "hidden",
167		table_names = vars,
168		.stats = NULL,
169		.formats = NULL,
170		.labels = NULL,
171		.indent_mods = NULL) {
172	3x	afun <- make_afun(
173	3x	a_proportion,
174	3x	.stats = .stats,
175	3x	.formats = .formats,
176	3x	.labels = .labels,
177	3x	.indent_mods = .indent_mods
178		)
179	3x	analyze(
180	3x	lyt,
181	3x	vars,
182	3x	afun = afun,
183	3x	extra_args = list(...),
184	3x	show_labels = show_labels,
185	3x	table_names = table_names
186		)
187		}
188
189		#' Helper Functions for Calculating Proportion Confidence Intervals
190		#'
191		#' @description `r lifecycle::badge("stable")`
192		#'
193		#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
194		#'
195		#' @inheritParams argument_convention
196		#' @inheritParams estimate_proportions
197		#'
198		#' @return Confidence interval of a proportion.
199		#'
200		#' @seealso [estimate_proportions], descriptive function [d_proportion()],
201		#' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
202		#'
203		#' @name h_proportions
204		NULL
205
206		#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
207		#' Also referred to as Wilson score interval.
208		#'
209		#' @examples
210		#' rsp <- c(
211		#' TRUE, TRUE, TRUE, TRUE, TRUE,
212		#' FALSE, FALSE, FALSE, FALSE, FALSE
213		#' )
214		#' prop_wilson(rsp, conf_level = 0.9)
215		#'
216		#' @export
217		prop_wilson <- function(rsp, conf_level, correct = FALSE) {
218	5x	y <- stats::prop.test(
219	5x	sum(rsp),
220	5x	length(rsp),
221	5x	correct = correct,
222	5x	conf.level = conf_level
223		)
224
225	5x	as.numeric(y$conf.int)
226		}
227
228		#' @describeIn h_proportions Calculates the stratified Wilson confidence
229		#' interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
230		#'
231		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
232		#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
233		#' estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
234		#' minimizes the weighted squared length of the confidence interval.
235		#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
236		#' to find estimates of optimal weights.
237		#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
238		#' [stats::prop.test()].
239		#'
240		#' @references
241		#' \insertRef{Yan2010-jt}{tern}
242		#'
243		#' @examples
244		#' # Stratified Wilson confidence interval with unequal probabilities
245		#'
246		#' set.seed(1)
247		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
248		#' strata_data <- data.frame(
249		#' "f1" = sample(c("a", "b"), 100, TRUE),
250		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
251		#' stringsAsFactors = TRUE
252		#' )
253		#' strata <- interaction(strata_data)
254		#' n_strata <- ncol(table(rsp, strata)) # Number of strata
255		#'
256		#' prop_strat_wilson(
257		#' rsp = rsp, strata = strata,
258		#' conf_level = 0.90
259		#' )
260		#'
261		#' # Not automatic setting of weights
262		#' prop_strat_wilson(
263		#' rsp = rsp, strata = strata,
264		#' weights = rep(1 / n_strata, n_strata),
265		#' conf_level = 0.90
266		#' )
267		#'
268		#' @export
269		prop_strat_wilson <- function(rsp,
270		strata,
271		weights = NULL,
272		conf_level = 0.95,
273		max_iterations = NULL,
274		correct = FALSE) {
275	20x	checkmate::assert_logical(rsp, any.missing = FALSE)
276	20x	checkmate::assert_factor(strata, len = length(rsp))
277	20x	assert_proportion_value(conf_level)
278
279	20x	tbl <- table(rsp, strata)
280	20x	n_strata <- ncol(tbl)
281
282		# Checking the weights and maximum number of iterations.
283	20x	do_iter <- FALSE
284	20x	if (is.null(weights)) {
285	6x	weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
286	6x	do_iter <- TRUE
287
288		# Iteration parameters
289	2x	if (is.null(max_iterations)) max_iterations <- 10
290	6x	checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
291		}
292	20x	checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = ncol(tbl))
293	20x	checkmate::assert_int(sum(weights), lower = 1, upper = 1)
294
295
296	20x	xs <- tbl["TRUE", ]
297	20x	ns <- colSums(tbl)
298	20x	use_stratum <- (ns > 0)
299	20x	ns <- ns[use_stratum]
300	20x	xs <- xs[use_stratum]
301	20x	ests <- xs / ns
302	20x	vars <- ests * (1 - ests) / ns
303
304	20x	strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)
305
306		# Iterative setting of weights if they were not set externally
307	20x	weights_new <- if (do_iter) {
308	6x	update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
309		} else {
310	14x	weights
311		}
312
313	20x	strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1
314
315	20x	ci_by_strata <- Map(
316	20x	function(x, n) {
317		# Classic Wilson's confidence interval
318	139x	suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
319		},
320	20x	x = xs,
321	20x	n = ns
322		)
323	20x	lower_by_strata <- sapply(ci_by_strata, "[", 1L)
324	20x	upper_by_strata <- sapply(ci_by_strata, "[", 2L)
325
326	20x	lower <- sum(weights_new * lower_by_strata)
327	20x	upper <- sum(weights_new * upper_by_strata)
328
329		# Return values
330	20x	if (do_iter) {
331	6x	list(
332	6x	conf_int = c(
333	6x	lower = lower,
334	6x	upper = upper
335		),
336	6x	weights = weights_new
337		)
338		} else {
339	14x	list(
340	14x	conf_int = c(
341	14x	lower = lower,
342	14x	upper = upper
343		)
344		)
345		}
346		}
347
348		#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
349		#' Also referred to as the `exact` method.
350		#'
351		#' @examples
352		#' prop_clopper_pearson(rsp, conf_level = .95)
353		#'
354		#' @export
355		prop_clopper_pearson <- function(rsp,
356		conf_level) {
357	1x	y <- stats::binom.test(
358	1x	x = sum(rsp),
359	1x	n = length(rsp),
360	1x	conf.level = conf_level
361		)
362	1x	as.numeric(y$conf.int)
363		}
364
365		#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
366		#' for a single proportion confidence interval using the normal approximation.
367		#'
368		#' @param correct (`flag`)\cr apply continuity correction.
369		#'
370		#' @examples
371		#' prop_wald(rsp, conf_level = 0.95)
372		#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
373		#'
374		#' @export
375		prop_wald <- function(rsp, conf_level, correct = FALSE) {
376	122x	n <- length(rsp)
377	122x	p_hat <- mean(rsp)
378	122x	z <- stats::qnorm((1 + conf_level) / 2)
379	122x	q_hat <- 1 - p_hat
380	122x	correct <- if (correct) 1 / (2 * n) else 0
381
382	122x	err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
383	122x	l_ci <- max(0, p_hat - err)
384	122x	u_ci <- min(1, p_hat + err)
385
386	122x	c(l_ci, u_ci)
387		}
388
389		#' @describeIn h_proportions Calculates the Agresti-Coull interval (created by Alan Agresti and Brent Coull) by
390		#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
391		#'
392		#' @examples
393		#' prop_agresti_coull(rsp, conf_level = 0.95)
394		#'
395		#' @export
396		prop_agresti_coull <- function(rsp, conf_level) {
397	2x	n <- length(rsp)
398	2x	x_sum <- sum(rsp)
399	2x	z <- stats::qnorm((1 + conf_level) / 2)
400
401		# Add here both z^2 / 2 successes and failures.
402	2x	x_sum_tilde <- x_sum + z^2 / 2
403	2x	n_tilde <- n + z^2
404
405		# Then proceed as with the Wald interval.
406	2x	p_tilde <- x_sum_tilde / n_tilde
407	2x	q_tilde <- 1 - p_tilde
408	2x	err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
409	2x	l_ci <- max(0, p_tilde - err)
410	2x	u_ci <- min(1, p_tilde + err)
411
412	2x	c(l_ci, u_ci)
413		}
414
415		#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
416		#' non-informative Jeffreys prior for a binomial proportion.
417		#'
418		#' @examples
419		#' prop_jeffreys(rsp, conf_level = 0.95)
420		#'
421		#' @export
422		prop_jeffreys <- function(rsp,
423		conf_level) {
424	4x	n <- length(rsp)
425	4x	x_sum <- sum(rsp)
426
427	4x	alpha <- 1 - conf_level
428	4x	l_ci <- ifelse(
429	4x	x_sum == 0,
430	4x	0,
431	4x	stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
432		)
433
434	4x	u_ci <- ifelse(
435	4x	x_sum == n,
436	4x	1,
437	4x	stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
438		)
439
440	4x	c(l_ci, u_ci)
441		}
442
443		#' Description of the Proportion Summary
444		#'
445		#' @description `r lifecycle::badge("stable")`
446		#'
447		#' This is a helper function that describes the analysis in [s_proportion()].
448		#'
449		#' @inheritParams s_proportion
450		#' @param long (`flag`)\cr whether a long or a short (default) description is required.
451		#'
452		#' @return String describing the analysis.
453		#'
454		#' @export
455		d_proportion <- function(conf_level,
456		method,
457		long = FALSE) {
458	137x	label <- paste0(conf_level * 100, "% CI")
459
460	!	if (long) label <- paste(label, "for Response Rates")
461
462	137x	method_part <- switch(method,
463	137x	"clopper-pearson" = "Clopper-Pearson",
464	137x	"waldcc" = "Wald, with correction",
465	137x	"wald" = "Wald, without correction",
466	137x	"wilson" = "Wilson, without correction",
467	137x	"strat_wilson" = "Stratified Wilson, without correction",
468	137x	"wilsonc" = "Wilson, with correction",
469	137x	"strat_wilsonc" = "Stratified Wilson, with correction",
470	137x	"agresti-coull" = "Agresti-Coull",
471	137x	"jeffreys" = "Jeffreys",
472	137x	stop(paste(method, "does not have a description"))
473		)
474
475	137x	paste0(label, " (", method_part, ")")
476		}
477
478		#' Helper Function for the Estimation of Stratified Quantiles
479		#'
480		#' @description `r lifecycle::badge("stable")`
481		#'
482		#' This function wraps the estimation of stratified percentiles when we assume
483		#' the approximation for large numbers. This is necessary only in the case
484		#' proportions for each strata are unequal.
485		#'
486		#' @inheritParams argument_convention
487		#' @inheritParams prop_strat_wilson
488		#'
489		#' @return Stratified quantile.
490		#'
491		#' @seealso [prop_strat_wilson()]
492		#'
493		#' @examples
494		#' strata_data <- table(data.frame(
495		#' "f1" = sample(c(TRUE, FALSE), 100, TRUE),
496		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
497		#' stringsAsFactors = TRUE
498		#' ))
499		#' ns <- colSums(strata_data)
500		#' ests <- strata_data["TRUE", ] / ns
501		#' vars <- ests * (1 - ests) / ns
502		#' weights <- rep(1 / length(ns), length(ns))
503		#' strata_normal_quantile(vars, weights, 0.95)
504		#'
505		#' @export
506		strata_normal_quantile <- function(vars, weights, conf_level) {
507	42x	summands <- weights^2 * vars
508		# Stratified quantile
509	42x	sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
510		}
511
512		#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
513		#'
514		#' @description `r lifecycle::badge("stable")`
515		#'
516		#' This function wraps the iteration procedure that allows you to estimate
517		#' the weights for each proportional strata. This assumes to minimize the
518		#' weighted squared length of the confidence interval.
519		#'
520		#' @inheritParams prop_strat_wilson
521		#' @param vars (`numeric`)\cr normalized proportions for each strata.
522		#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
523		#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
524		#' be optimized in the future if we need to estimate better initial weights.
525		#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
526		#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
527		#' @param tol (`number`)\cr tolerance threshold for convergence.
528		#'
529		#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
530		#'
531		#' @seealso For references and details see [prop_strat_wilson()].
532		#'
533		#' @examples
534		#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
535		#' sq <- 0.674
536		#' ws <- rep(1 / length(vs), length(vs))
537		#' ns <- c(22, 18, 17, 17, 14, 12)
538		#'
539		#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
540		#'
541		#' @export
542		update_weights_strat_wilson <- function(vars,
543		strata_qnorm,
544		initial_weights,
545		n_per_strata,
546		max_iterations = 50,
547		conf_level = 0.95,
548		tol = 0.001) {
549	8x	it <- 0
550	8x	diff_v <- NULL
551
552	8x	while (it < max_iterations) {
553	20x	it <- it + 1
554	20x	weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
555	20x	weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
556	20x	weights_new <- weights_new_t / weights_new_b
557	20x	weights_new <- weights_new / sum(weights_new)
558	20x	strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
559	20x	diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
560	8x	if (diff_v[length(diff_v)] < tol) break
561	12x	initial_weights <- weights_new
562		}
563
564	8x	if (it == max_iterations) {
565	!	warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
566		}
567
568	8x	list(
569	8x	"n_it" = it,
570	8x	"weights" = weights_new,
571	8x	"diff_v" = diff_v
572		)
573		}

1		#' Multivariate Logistic Regression Table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
6		#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
7		#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
8		#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
9		#' category or specified values and corresponding Wald confidence intervals as default but allow user
10		#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
11		#' that covariate has no effect on response in model containing all specified covariates.
12		#' Allow option to include one two-way interaction and present similar output for
13		#' each interaction degree of freedom.
14		#'
15		#' @inheritParams argument_convention
16		#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
17		#'
18		#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
19		#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
20		#'
21		#' @note For the formula, the variable names need to be standard `data.frame` column names without
22		#' special characters.
23		#'
24		#' @examples
25		#' library(dplyr)
26		#' library(broom)
27		#'
28		#' adrs_f <- tern_ex_adrs %>%
29		#' filter(PARAMCD == "BESRSPI") %>%
30		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
31		#' mutate(
32		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
33		#' RACE = factor(RACE),
34		#' SEX = factor(SEX)
35		#' )
36		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
37		#' mod1 <- fit_logistic(
38		#' data = adrs_f,
39		#' variables = list(
40		#' response = "Response",
41		#' arm = "ARMCD",
42		#' covariates = c("AGE", "RACE")
43		#' )
44		#' )
45		#' mod2 <- fit_logistic(
46		#' data = adrs_f,
47		#' variables = list(
48		#' response = "Response",
49		#' arm = "ARMCD",
50		#' covariates = c("AGE", "RACE"),
51		#' interaction = "AGE"
52		#' )
53		#' )
54		#'
55		#' df <- tidy(mod1, conf_level = 0.99)
56		#' df2 <- tidy(mod2, conf_level = 0.99)
57		#'
58		#' # flagging empty strings with "_"
59		#' df <- df_explicit_na(df, na_level = "_")
60		#' df2 <- df_explicit_na(df2, na_level = "_")
61		#'
62		#' result1 <- basic_table() %>%
63		#' summarize_logistic(
64		#' conf_level = 0.95,
65		#' drop_and_remove_str = "_"
66		#' ) %>%
67		#' build_table(df = df)
68		#' result1
69		#'
70		#' result2 <- basic_table() %>%
71		#' summarize_logistic(
72		#' conf_level = 0.95,
73		#' drop_and_remove_str = "_"
74		#' ) %>%
75		#' build_table(df = df2)
76		#' result2
77		#'
78		#' @export
79		summarize_logistic <- function(lyt,
80		conf_level,
81		drop_and_remove_str = "",
82		.indent_mods = NULL) {
83		# checks
84	3x	checkmate::assert_string(drop_and_remove_str)
85
86	3x	sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
87	3x	sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
88	3x	sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
89	3x	split_fun <- drop_and_remove_levels(drop_and_remove_str)
90
91	3x	lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
92	3x	lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
93	3x	lyt <- sum_logistic_variable_test(lyt)
94	3x	lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
95	3x	lyt <- sum_logistic_term_estimates(lyt)
96	3x	lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
97	3x	lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
98	3x	lyt <- sum_logistic_odds_ratios(lyt)
99	3x	lyt
100		}
101
102		#' Fit for Logistic Regression
103		#'
104		#' @description `r lifecycle::badge("stable")`
105		#'
106		#' Fit a (conditional) logistic regression model.
107		#'
108		#' @inheritParams argument_convention
109		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
110		#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
111		#' This will be used when fitting the (conditional) logistic regression model on the left hand
112		#' side of the formula.
113		#'
114		#' @return A fitted logistic regression model.
115		#'
116		#' @section Model Specification:
117		#'
118		#' The `variables` list needs to include the following elements:
119		#' * `arm`: Treatment arm variable name.
120		#' * `response`: The response arm variable name. Usually this is a 0/1 variable.
121		#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
122		#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
123		#' included in `covariates`. Then the interaction with the treatment arm is included in the model.
124		#'
125		#' @examples
126		#' library(dplyr)
127		#'
128		#' adrs_f <- tern_ex_adrs %>%
129		#' filter(PARAMCD == "BESRSPI") %>%
130		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
131		#' mutate(
132		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
133		#' RACE = factor(RACE),
134		#' SEX = factor(SEX)
135		#' )
136		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
137		#' mod1 <- fit_logistic(
138		#' data = adrs_f,
139		#' variables = list(
140		#' response = "Response",
141		#' arm = "ARMCD",
142		#' covariates = c("AGE", "RACE")
143		#' )
144		#' )
145		#' mod2 <- fit_logistic(
146		#' data = adrs_f,
147		#' variables = list(
148		#' response = "Response",
149		#' arm = "ARMCD",
150		#' covariates = c("AGE", "RACE"),
151		#' interaction = "AGE"
152		#' )
153		#' )
154		#'
155		#' @export
156		fit_logistic <- function(data,
157		variables = list(
158		response = "Response",
159		arm = "ARMCD",
160		covariates = NULL,
161		interaction = NULL,
162		strata = NULL
163		),
164		response_definition = "response") {
165	62x	assert_df_with_variables(data, variables)
166	62x	checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
167	62x	checkmate::assert_string(response_definition)
168	62x	checkmate::assert_true(grepl("response", response_definition))
169
170	62x	response_definition <- sub(
171	62x	pattern = "response",
172	62x	replacement = variables$response,
173	62x	x = response_definition,
174	62x	fixed = TRUE
175		)
176	62x	form <- paste0(response_definition, " ~ ", variables$arm)
177	62x	if (!is.null(variables$covariates)) {
178	28x	form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
179		}
180	62x	if (!is.null(variables$interaction)) {
181	17x	checkmate::assert_string(variables$interaction)
182	17x	checkmate::assert_subset(variables$interaction, variables$covariates)
183	17x	form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
184		}
185	62x	if (!is.null(variables$strata)) {
186	14x	strata_arg <- if (length(variables$strata) > 1) {
187	7x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
188		} else {
189	7x	variables$strata
190		}
191	14x	form <- paste0(form, "+ strata(", strata_arg, ")")
192		}
193	62x	formula <- stats::as.formula(form)
194	62x	if (is.null(variables$strata)) {
195	48x	stats::glm(
196	48x	formula = formula,
197	48x	data = data,
198	48x	family = stats::binomial("logit")
199		)
200		} else {
201	14x	clogit_with_tryCatch(
202	14x	formula = formula,
203	14x	data = data,
204	14x	x = TRUE
205		)
206		}
207		}
208
209		#' Custom Tidy Method for Binomial GLM Results
210		#'
211		#' @description `r lifecycle::badge("stable")`
212		#'
213		#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
214		#' with `binomial` family.
215		#'
216		#' @inheritParams argument_convention
217		#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
218		#' @param fit_glm logistic regression model fitted by [stats::glm()] with "binomial" family.
219		#'
220		#' @return A `data.frame` containing the tidied model.
221		#'
222		#' @method tidy glm
223		#'
224		#' @seealso [h_logistic_regression] for relevant helper functions.
225		#'
226		#' @examples
227		#' library(dplyr)
228		#' library(broom)
229		#'
230		#' adrs_f <- tern_ex_adrs %>%
231		#' filter(PARAMCD == "BESRSPI") %>%
232		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
233		#' mutate(
234		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
235		#' RACE = factor(RACE),
236		#' SEX = factor(SEX)
237		#' )
238		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
239		#' mod1 <- fit_logistic(
240		#' data = adrs_f,
241		#' variables = list(
242		#' response = "Response",
243		#' arm = "ARMCD",
244		#' covariates = c("AGE", "RACE")
245		#' )
246		#' )
247		#' mod2 <- fit_logistic(
248		#' data = adrs_f,
249		#' variables = list(
250		#' response = "Response",
251		#' arm = "ARMCD",
252		#' covariates = c("AGE", "RACE"),
253		#' interaction = "AGE"
254		#' )
255		#' )
256		#'
257		#' df <- tidy(mod1, conf_level = 0.99)
258		#' df2 <- tidy(mod2, conf_level = 0.99)
259		#'
260		#' @export
261		tidy.glm <- function(fit_glm, # nolint
262		conf_level = 0.95,
263		at = NULL) {
264	5x	checkmate::assert_class(fit_glm, "glm")
265	5x	checkmate::assert_set_equal(fit_glm$family$family, "binomial")
266
267	5x	terms_name <- attr(stats::terms(fit_glm), "term.labels")
268	5x	xs_class <- attr(fit_glm$terms, "dataClasses")
269	5x	interaction <- terms_name[which(!terms_name %in% names(xs_class))]
270	5x	df <- if (length(interaction) == 0) {
271	2x	h_logistic_simple_terms(
272	2x	x = terms_name,
273	2x	fit_glm = fit_glm,
274	2x	conf_level = conf_level
275		)
276		} else {
277	3x	h_logistic_inter_terms(
278	3x	x = terms_name,
279	3x	fit_glm = fit_glm,
280	3x	conf_level = conf_level,
281	3x	at = at
282		)
283		}
284	5x	for (var in c("variable", "term", "interaction", "reference")) {
285	20x	df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
286		}
287	5x	df
288		}
289
290		#' Logistic Regression Multivariate Column Layout Function
291		#'
292		#' @description `r lifecycle::badge("stable")`
293		#'
294		#' Layout-creating function which creates a multivariate column layout summarizing logistic
295		#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
296		#'
297		#' @inheritParams argument_convention
298		#'
299		#' @return A layout object suitable for passing to further layouting functions. Adding this
300		#' function to an `rtable` layout will split the table into columns corresponding to
301		#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
302		#'
303		#' @export
304		logistic_regression_cols <- function(lyt,
305		conf_level = 0.95) {
306	4x	vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
307	4x	var_labels <- c(
308	4x	df = "Degrees of Freedom",
309	4x	estimate = "Parameter Estimate",
310	4x	std_error = "Standard Error",
311	4x	odds_ratio = "Odds Ratio",
312	4x	ci = paste("Wald", f_conf_level(conf_level)),
313	4x	pvalue = "p-value"
314		)
315	4x	split_cols_by_multivar(
316	4x	lyt = lyt,
317	4x	vars = vars,
318	4x	varlabels = var_labels
319		)
320		}
321
322		#' Logistic Regression Summary Table Constructor Function
323		#'
324		#' @description `r lifecycle::badge("stable")`
325		#'
326		#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
327		#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
328		#'
329		#' @inheritParams argument_convention
330		#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
331		#' content function.
332		#'
333		#' @return A content function.
334		#'
335		#' @export
336		logistic_summary_by_flag <- function(flag_var, .indent_mods = NULL) {
337	10x	checkmate::assert_string(flag_var)
338	10x	function(lyt) {
339	10x	cfun_list <- list(
340	10x	df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
341	10x	estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
342	10x	std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
343	10x	odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
344	10x	ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
345	10x	pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx \| (<0.0001)", .indent_mods = .indent_mods)
346		)
347	10x	summarize_row_groups(
348	10x	lyt = lyt,
349	10x	cfun = cfun_list
350		)
351		}
352		}

1		#' Line plot with the optional table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Line plot with the optional table.
6		#'
7		#' @param df (`data.frame`)\cr data set containing all analysis variables.
8		#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) to counts objects in strata.
9		#' @param variables (named `character` vector) of variable names in `df` data set. Details are:
10		#' * `x` (`character`)\cr name of x-axis variable.
11		#' * `y` (`character`)\cr name of y-axis variable.
12		#' * `strata` (`character`)\cr name of grouping variable, i.e. treatment arm. Can be `NA` to indicate lack of groups.
13		#' * `paramcd` (`character`)\cr name of the variable for parameter's code. Used for y-axis label and plot's subtitle.
14		#' Can be `NA` if paramcd is not to be added to the y-axis label or subtitle.
15		#' * `y_unit` (`character`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
16		#' Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
17		#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
18		#' All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
19		#' and be of a `double` or `numeric` type vector of length one.
20		#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
21		#' All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
22		#' and be of a `double` or `numeric` type vector of length two.
23		#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Must match the `names`
24		#' attribute of the `interval` element in the list returned by `sfun`. It is possible to specify one whisker only,
25		#' lower or upper.
26		#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
27		#' All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
28		#' @param sfun (`closure`)\cr the function to compute the values of required statistics. It must return a named `list`
29		#' with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
30		#' `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
31		#' @param ... optional arguments to `sfun`.
32		#' @param mid_type (`character`)\cr controls the type of the `mid` plot, it can be point (`p`), line (`l`),
33		#' or point and line (`pl`).
34		#' @param mid_point_size (`integer` or `double`)\cr controls the font size of the point for `mid` plot.
35		#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
36		#' a call to a position adjustment function.
37		#' @param legend_title (`character` string)\cr legend title.
38		#' @param legend_position (`character`)\cr the position of the plot legend (`none`, `left`, `right`, `bottom`, `top`,
39		#' or two-element numeric vector).
40		#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
41		#' @param y_lab (`character`)\cr y-axis label. If equal to `NULL`, then no label will be added.
42		#' @param y_lab_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to the
43		#' y-axis label `y_lab`?
44		#' @param y_lab_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the y-axis
45		#' label `y_lab`?
46		#' @param title (`character`)\cr plot title.
47		#' @param subtitle (`character`)\cr plot subtitle.
48		#' @param subtitle_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to
49		#' the plot's subtitle `subtitle`?
50		#' @param subtitle_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the
51		#' plot's subtitle `subtitle`?
52		#' @param caption (`character`)\cr optional caption below the plot.
53		#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
54		#' (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
55		#' parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
56		#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
57		#' appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
58		#' @param table_font_size (`integer` or `double`)\cr controls the font size of values in the table.
59		#' @param newpage (`logical`)\cr should plot be drawn on new page?
60		#' @param col (`character`)\cr colors.
61		#'
62		#' @return A `ggplot` line plot (and statistics table if applicable).
63		#'
64		#' @examples
65		#' library(nestcolor)
66		#'
67		#' adsl <- tern_ex_adsl
68		#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
69		#' adlb$AVISIT <- droplevels(adlb$AVISIT)
70		#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
71		#'
72		#' # Mean with CI
73		#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
74		#'
75		#' # Mean with CI, no stratification
76		#' g_lineplot(adlb, variables = control_lineplot_vars(strata = NA))
77		#'
78		#' # Mean, upper whisker of CI, no strata counts N
79		#' g_lineplot(
80		#' adlb,
81		#' whiskers = "mean_ci_upr",
82		#' title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
83		#' )
84		#'
85		#' # Median with CI
86		#' g_lineplot(
87		#' adlb,
88		#' adsl,
89		#' mid = "median",
90		#' interval = "median_ci",
91		#' whiskers = c("median_ci_lwr", "median_ci_upr"),
92		#' title = "Plot of Median and 95% Confidence Limits by Visit"
93		#' )
94		#'
95		#' # Mean, +/- SD
96		#' g_lineplot(adlb, adsl,
97		#' interval = "mean_sdi",
98		#' whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
99		#' title = "Plot of Median +/- SD by Visit"
100		#' )
101		#'
102		#' # Mean with CI plot with stats table
103		#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
104		#'
105		#' # Mean with CI, table and customized confidence level
106		#' g_lineplot(
107		#' adlb,
108		#' adsl,
109		#' table = c("n", "mean", "mean_ci"),
110		#' control = control_summarize_vars(conf_level = 0.80),
111		#' title = "Plot of Mean and 80% Confidence Limits by Visit"
112		#' )
113		#'
114		#' # Mean with CI, table, filtered data
115		#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" \| AVISIT == "BASELINE")
116		#' g_lineplot(adlb_f, table = c("n", "mean"))
117		#'
118		#' @export
119		g_lineplot <- function(df,
120		alt_counts_df = NULL,
121		variables = control_lineplot_vars(),
122		mid = "mean",
123		interval = "mean_ci",
124		whiskers = c("mean_ci_lwr", "mean_ci_upr"),
125		table = NULL,
126		sfun = tern::s_summary,
127		...,
128		mid_type = "pl",
129		mid_point_size = 2,
130		position = ggplot2::position_dodge(width = 0.4),
131		legend_title = NULL,
132		legend_position = "bottom",
133		ggtheme = nestcolor::theme_nest(),
134		y_lab = NULL,
135		y_lab_add_paramcd = TRUE,
136		y_lab_add_unit = TRUE,
137		title = "Plot of Mean and 95% Confidence Limits by Visit",
138		subtitle = "",
139		subtitle_add_paramcd = TRUE,
140		subtitle_add_unit = TRUE,
141		caption = NULL,
142		table_format = summary_formats(),
143		table_labels = summary_labels(),
144		table_font_size = 3,
145		newpage = TRUE,
146		col = NULL) {
147	2x	checkmate::assert_character(variables, any.missing = TRUE)
148	2x	checkmate::assert_character(mid, null.ok = TRUE)
149	2x	checkmate::assert_character(interval, null.ok = TRUE)
150	2x	checkmate::assert_character(col, null.ok = TRUE)
151
152	2x	checkmate::assert_string(title, null.ok = TRUE)
153	2x	checkmate::assert_string(subtitle, null.ok = TRUE)
154
155	2x	if (is.character(interval)) {
156	2x	checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
157		}
158
159	2x	if (length(whiskers) == 1) {
160	!	checkmate::assert_character(mid)
161		}
162
163	2x	if (is.character(mid)) {
164	2x	checkmate::assert_scalar(mid_type)
165	2x	checkmate::assert_subset(mid_type, c("pl", "p", "l"))
166		}
167
168	2x	x <- variables[["x"]]
169	2x	y <- variables[["y"]]
170	2x	paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
171	2x	y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
172	2x	if (is.na(variables["strata"])) {
173	!	strata <- NULL # NULL if strata == NA or it is not in variables
174		} else {
175	2x	strata <- variables[["strata"]]
176		}
177	2x	checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
178	2x	checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
179	2x	if ((!is.null(y_lab) && y_lab_add_paramcd) \|\| (!is.null(subtitle) && subtitle_add_paramcd)) {
180	2x	checkmate::assert_false(is.na(paramcd))
181	2x	checkmate::assert_scalar(unique(df[[paramcd]]))
182		}
183
184	2x	checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
185	2x	checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
186	2x	if ((!is.null(y_lab) && y_lab_add_unit) \|\| (!is.null(subtitle) && subtitle_add_unit)) {
187	2x	checkmate::assert_false(is.na(y_unit))
188	2x	checkmate::assert_scalar(unique(df[[y_unit]]))
189		}
190
191	2x	if (!is.null(strata) && !is.null(alt_counts_df)) {
192	2x	checkmate::assert_set_equal(unique(alt_counts_df[[strata]]), unique(df[[strata]]))
193		}
194
195		####################################### \|
196		# ---- Compute required statistics ----
197		####################################### \|
198	2x	if (!is.null(strata)) {
199	2x	df_grp <- tidyr::expand(df, .data[[strata]], .data[[x]]) # expand based on levels of factors
200		} else {
201	!	df_grp <- tidyr::expand(df, NULL, .data[[x]])
202		}
203	2x	df_grp <- df_grp %>%
204	2x	dplyr::full_join(y = df[, c(strata, x, y)], by = c(strata, x), multiple = "all") %>%
205	2x	dplyr::group_by_at(c(strata, x))
206
207	2x	df_stats <- df_grp %>%
208	2x	dplyr::summarise(
209	2x	data.frame(t(do.call(c, unname(sfun(.data[[y]], ...)[c(mid, interval)])))),
210	2x	.groups = "drop"
211		)
212
213	2x	df_stats <- df_stats[!is.na(df_stats[[mid]]), ]
214
215		# add number of objects N in strata
216	2x	if (!is.null(strata) && !is.null(alt_counts_df)) {
217	2x	strata_N <- paste0(strata, "_N") # nolint
218
219	2x	df_N <- as.data.frame(table(alt_counts_df[[strata]], exclude = c(NA, NaN, Inf))) # nolint
220	2x	colnames(df_N) <- c(strata, "N") # nolint
221	2x	df_N[[strata_N]] <- paste0(df_N[[strata]], " (N = ", df_N$N, ")") # nolint
222
223		# strata_N should not be in clonames(df_stats)
224	2x	checkmate::assert_disjunct(strata_N, colnames(df_stats))
225
226	2x	df_stats <- merge(x = df_stats, y = df_N[, c(strata, strata_N)], by = strata)
227	!	} else if (!is.null(strata)) {
228	!	strata_N <- strata # nolint
229		} else {
230	!	strata_N <- NULL # nolint
231		}
232
233		############################################### \|
234		# ---- Prepare certain plot's properties. ----
235		############################################### \|
236		# legend title
237	2x	if (is.null(legend_title) && !is.null(strata) && legend_position != "none") {
238	2x	legend_title <- attr(df[[strata]], "label")
239		}
240
241		# y label
242	2x	if (!is.null(y_lab)) {
243	1x	if (y_lab_add_paramcd) {
244	1x	y_lab <- paste(y_lab, unique(df[[paramcd]]))
245		}
246
247	1x	if (y_lab_add_unit) {
248	1x	y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
249		}
250
251	1x	y_lab <- trimws(y_lab)
252		}
253
254		# subtitle
255	2x	if (!is.null(subtitle)) {
256	2x	if (subtitle_add_paramcd) {
257	2x	subtitle <- paste(subtitle, unique(df[[paramcd]]))
258		}
259
260	2x	if (subtitle_add_unit) {
261	2x	subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
262		}
263
264	2x	subtitle <- trimws(subtitle)
265		}
266
267		############################### \|
268		# ---- Build plot object. ----
269		############################### \|
270	2x	p <- ggplot2::ggplot(
271	2x	data = df_stats,
272	2x	mapping = ggplot2::aes(
273	2x	x = .data[[x]], y = .data[[mid]],
274	2x	color = if (is.null(strata_N)) NULL else .data[[strata_N]],
275	2x	shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
276	2x	lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
277	2x	group = if (is.null(strata_N)) NULL else .data[[strata_N]]
278		)
279		)
280
281	2x	if (!is.null(mid)) {
282		# points
283	2x	if (grepl("p", mid_type, fixed = TRUE)) {
284	2x	p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
285		}
286
287		# lines
288		# further conditions in if are to ensure that not all of the groups consist of only one observation
289	2x	if (grepl("l", mid_type, fixed = TRUE) &&
290	2x	!is.null(strata) &&
291	2x	!all(dplyr::summarise(df_grp, count_n = dplyr::n())[["count_n"]] == 1L)) {
292	2x	p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
293		}
294		}
295
296		# interval
297	2x	if (!is.null(interval)) {
298	2x	p <- p +
299	2x	ggplot2::geom_errorbar(
300	2x	ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
301	2x	width = 0.45,
302	2x	position = position
303		)
304
305	2x	if (length(whiskers) == 1) { # lwr or upr only; mid is then required
306		# workaround as geom_errorbar does not provide single-direction whiskers
307	!	p <- p +
308	!	ggplot2::geom_linerange(
309	!	data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
310	!	ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
311	!	position = position,
312	!	na.rm = TRUE,
313	!	show.legend = FALSE
314		)
315		}
316		}
317
318	2x	p <- p +
319	2x	ggplot2::scale_y_continuous(labels = scales::comma, expand = ggplot2::expansion(c(0.25, .25))) +
320	2x	ggplot2::labs(
321	2x	title = title,
322	2x	subtitle = subtitle,
323	2x	caption = caption,
324	2x	color = legend_title,
325	2x	lty = legend_title,
326	2x	shape = legend_title,
327	2x	x = attr(df[[x]], "label"),
328	2x	y = y_lab
329		)
330
331	2x	if (!is.null(col)) {
332	!	p <- p +
333	!	ggplot2::scale_color_manual(values = col)
334		}
335
336	2x	if (!is.null(ggtheme)) {
337	2x	p <- p + ggtheme
338		} else {
339	!	p <- p +
340	!	ggplot2::theme_bw() +
341	!	ggplot2::theme(
342	!	legend.key.width = grid::unit(1, "cm"),
343	!	legend.position = legend_position,
344	!	legend.direction = ifelse(
345	!	legend_position %in% c("top", "bottom"),
346	!	"horizontal",
347	!	"vertical"
348		)
349		)
350		}
351
352		############################################################# \|
353		# ---- Optionally, add table to the bottom of the plot. ----
354		############################################################# \|
355	2x	if (!is.null(table)) {
356	1x	df_stats_table <- df_grp %>%
357	1x	dplyr::summarise(
358	1x	h_format_row(
359	1x	x = sfun(.data[[y]], ...)[table],
360	1x	format = table_format,
361	1x	labels = table_labels
362		),
363	1x	.groups = "drop"
364		)
365
366	1x	stats_lev <- rev(setdiff(colnames(df_stats_table), c(strata, x)))
367
368	1x	df_stats_table <- df_stats_table %>%
369	1x	tidyr::pivot_longer(
370	1x	cols = -dplyr::all_of(c(strata, x)),
371	1x	names_to = "stat",
372	1x	values_to = "value",
373	1x	names_ptypes = list(stat = factor(levels = stats_lev))
374		)
375
376	1x	tbl <- ggplot2::ggplot(
377	1x	df_stats_table,
378	1x	ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
379		) +
380	1x	ggplot2::geom_text(size = table_font_size) +
381	1x	ggplot2::theme_bw() +
382	1x	ggplot2::theme(
383	1x	panel.border = ggplot2::element_blank(),
384	1x	panel.grid.major = ggplot2::element_blank(),
385	1x	panel.grid.minor = ggplot2::element_blank(),
386	1x	axis.ticks = ggplot2::element_blank(),
387	1x	axis.title = ggplot2::element_blank(),
388	1x	axis.text.x = ggplot2::element_blank(),
389	1x	axis.text.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)),
390	1x	strip.text = ggplot2::element_text(hjust = 0),
391	1x	strip.text.x = ggplot2::element_text(margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")),
392	1x	strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
393	1x	legend.position = "none"
394		)
395
396	1x	if (!is.null(strata)) {
397	1x	tbl <- tbl + ggplot2::facet_wrap(facets = strata, ncol = 1)
398		}
399
400		# align plot and table
401	1x	cowplot::plot_grid(p, tbl, ncol = 1)
402		} else {
403	1x	p
404		}
405		}
406
407		#' Helper function to get the right formatting in the optional table in g_lineplot.
408		#'
409		#' @description `r lifecycle::badge("stable")`
410		#'
411		#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
412		#' Elements of `x` must be `numeric` vectors.
413		#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
414		#' match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
415		#' function through the `format` parameter.
416		#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
417		#' match the names of `x`. When a label is not specified for an element of `x`,
418		#' then this function tries to use `label` or `names` (in this order) attribute of that element
419		#' (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
420		#' are attached to a given element of `x`, then the label is automatically generated.
421		#'
422		#' @return A single row `data.frame` object.
423		#'
424		#' @examples
425		#' mean_ci <- c(48, 51)
426		#' x <- list(mean = 50, mean_ci = mean_ci)
427		#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
428		#' labels <- c(mean = "My Mean")
429		#' h_format_row(x, format, labels)
430		#'
431		#' attr(mean_ci, "label") <- "Mean 95% CI"
432		#' x <- list(mean = 50, mean_ci = mean_ci)
433		#' h_format_row(x, format, labels)
434		#'
435		#' @export
436		h_format_row <- function(x, format, labels = NULL) {
437		# cell: one row, one column data.frame
438	19x	format_cell <- function(x, format, label = NULL) {
439	56x	fc <- format_rcell(x = x, format = format)
440	56x	if (is.na(fc)) {
441	!	fc <- "NA"
442		}
443	56x	x_label <- attr(x, "label")
444	56x	if (!is.null(label) && !is.na(label)) {
445	37x	names(fc) <- label
446	19x	} else if (!is.null(x_label) && !is.na(x_label)) {
447	18x	names(fc) <- x_label
448	1x	} else if (length(x) == length(fc)) {
449	!	names(fc) <- names(x)
450		}
451	56x	as.data.frame(t(fc))
452		}
453
454	19x	row <- do.call(
455	19x	cbind,
456	19x	lapply(
457	19x	names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn])
458		)
459		)
460
461	19x	row
462		}
463
464		#' Control Function for g_lineplot Function
465		#'
466		#' @description `r lifecycle::badge("stable")`
467		#'
468		#' Default values for `variables` parameter in `g_lineplot` function.
469		#' A variable's default value can be overwritten for any variable.
470		#'
471		#' @param x (`character`)\cr x variable name.
472		#' @param y (`character`)\cr y variable name.
473		#' @param strata (`character` or `NA`)\cr strata variable name.
474		#' @param paramcd (`character` or `NA`)\cr paramcd variable name.
475		#' @param y_unit (`character` or `NA`)\cr y_unit variable name.
476		#'
477		#' @return A named character vector of variable names.
478		#'
479		#' @examples
480		#' control_lineplot_vars()
481		#' control_lineplot_vars(strata = NA)
482		#'
483		#' @export
484		control_lineplot_vars <- function(x = "AVISIT", y = "AVAL", strata = "ARM", paramcd = "PARAMCD", y_unit = "AVALU") {
485	2x	checkmate::assert_string(x)
486	2x	checkmate::assert_string(y)
487	2x	checkmate::assert_string(strata, na.ok = TRUE)
488	2x	checkmate::assert_string(paramcd, na.ok = TRUE)
489	2x	checkmate::assert_string(y_unit, na.ok = TRUE)
490
491	2x	variables <- c(x = x, y = y, strata = strata, paramcd = paramcd, y_unit = y_unit)
492	2x	return(variables)
493		}

1		#' Control Function for Subgroup Treatment Effect Pattern (STEP) Calculations
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is an auxiliary function for controlling arguments for STEP calculations.
6		#'
7		#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
8		#' could be used to infer `bandwidth`, see below.
9		#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
10		#' quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
11		#' covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
12		#' distributed.
13		#' @param bandwidth (`number` or `NULL`)\cr indicating the bandwidth of each window.
14		#' Depending on the argument `use_percentile`, it can be either the length of actual-value
15		#' windows on the real biomarker scale, or percentage windows.
16		#' If `use_percentile = TRUE`, it should be a number between 0 and 1.
17		#' If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
18		#' By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
19		#' variable for actual-value windows.
20		#' @param degree (`count`)\cr the degree of polynomial function of the biomarker as an interaction term
21		#' with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
22		#' is not included in the model fitted in each biomarker window.
23		#' @param num_points (`count`)\cr the number of points at which the hazard ratios are estimated. The
24		#' smallest number is 2.
25		#'
26		#' @return A list of components with the same names as the arguments, except `biomarker` which is
27		#' just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
28		#'
29		#' @examples
30		#' # Provide biomarker values and request actual values to be used,
31		#' # so that bandwidth is chosen from range.
32		#' control_step(biomarker = 1:10, use_percentile = FALSE)
33		#'
34		#' # Use a global model with quadratic biomarker interaction term.
35		#' control_step(bandwidth = NULL, degree = 2)
36		#'
37		#' # Reduce number of points to be used.
38		#' control_step(num_points = 10)
39		#'
40		#' @export
41		control_step <- function(biomarker = NULL,
42		use_percentile = TRUE,
43		bandwidth,
44		degree = 0L,
45		num_points = 39L) {
46	31x	checkmate::assert_numeric(biomarker, null.ok = TRUE)
47	30x	checkmate::assert_flag(use_percentile)
48	30x	checkmate::assert_int(num_points, lower = 2)
49	29x	checkmate::assert_count(degree)
50
51	29x	if (missing(bandwidth)) {
52		# Infer bandwidth
53	21x	bandwidth <- if (use_percentile) {
54	18x	0.25
55	21x	} else if (!is.null(biomarker)) {
56	3x	diff(range(biomarker, na.rm = TRUE)) / 4
57		} else {
58	!	NULL
59		}
60		} else {
61		# Check bandwidth
62	8x	if (!is.null(bandwidth)) {
63	5x	if (use_percentile) {
64	4x	assert_proportion_value(bandwidth)
65		} else {
66	1x	checkmate::assert_scalar(bandwidth)
67	1x	checkmate::assert_true(bandwidth > 0)
68		}
69		}
70		}
71	28x	list(
72	28x	use_percentile = use_percentile,
73	28x	bandwidth = bandwidth,
74	28x	degree = as.integer(degree),
75	28x	num_points = as.integer(num_points)
76		)
77		}

1		#' Helper Functions for Tabulating Survival Duration by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that tabulate in a data frame statistics such as median survival
6		#' time and hazard ratio for population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams survival_coxph_pairwise
10		#' @inheritParams survival_duration_subgroups
11		#' @param arm (`factor`)\cr the treatment group variable.
12		#'
13		#' @details Main functionality is to prepare data for use in a layout-creating function.
14		#'
15		#' @examples
16		#' library(dplyr)
17		#' library(forcats)
18		#'
19		#' adtte <- tern_ex_adtte
20		#'
21		#' # Save variable labels before data processing steps.
22		#' adtte_labels <- formatters::var_labels(adtte)
23		#'
24		#' adtte_f <- adtte %>%
25		#' filter(
26		#' PARAMCD == "OS",
27		#' ARM %in% c("B: Placebo", "A: Drug X"),
28		#' SEX %in% c("M", "F")
29		#' ) %>%
30		#' mutate(
31		#' # Reorder levels of ARM to display reference arm before treatment arm.
32		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
33		#' SEX = droplevels(SEX),
34		#' is_event = CNSR == 0
35		#' )
36		#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
37		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
38		#'
39		#' @name h_survival_duration_subgroups
40		NULL
41
42		#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
43		#'
44		#' @return
45		#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
46		#'
47		#' @examples
48		#' # Extract median survival time for one group.
49		#' h_survtime_df(
50		#' tte = adtte_f$AVAL,
51		#' is_event = adtte_f$is_event,
52		#' arm = adtte_f$ARM
53		#' )
54		#'
55		#' @export
56		h_survtime_df <- function(tte, is_event, arm) {
57	55x	checkmate::assert_numeric(tte)
58	54x	checkmate::assert_logical(is_event, len = length(tte))
59	54x	assert_valid_factor(arm, len = length(tte))
60
61	54x	df_tte <- data.frame(
62	54x	tte = tte,
63	54x	is_event = is_event,
64	54x	stringsAsFactors = FALSE
65		)
66
67		# Delete NAs
68	54x	non_missing_rows <- stats::complete.cases(df_tte)
69	54x	df_tte <- df_tte[non_missing_rows, ]
70	54x	arm <- arm[non_missing_rows]
71
72	54x	lst_tte <- split(df_tte, arm)
73	54x	lst_results <- Map(function(x, arm) {
74	108x	if (nrow(x) > 0) {
75	104x	s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
76	104x	median_est <- unname(as.numeric(s_surv$median))
77	104x	n_events <- sum(x$is_event)
78		} else {
79	4x	median_est <- NA
80	4x	n_events <- NA
81		}
82
83	108x	data.frame(
84	108x	arm = arm,
85	108x	n = nrow(x),
86	108x	n_events = n_events,
87	108x	median = median_est,
88	108x	stringsAsFactors = FALSE
89		)
90	54x	}, lst_tte, names(lst_tte))
91
92	54x	df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
93	54x	df$arm <- factor(df$arm, levels = levels(arm))
94	54x	df
95		}
96
97		#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
98		#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
99		#' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
100		#' groupings for `subgroups` variables.
101		#'
102		#' @return
103		#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
104		#' `var`, `var_label`, and `row_type`.
105		#'
106		#' @examples
107		#' # Extract median survival time for multiple groups.
108		#' h_survtime_subgroups_df(
109		#' variables = list(
110		#' tte = "AVAL",
111		#' is_event = "is_event",
112		#' arm = "ARM",
113		#' subgroups = c("SEX", "BMRKR2")
114		#' ),
115		#' data = adtte_f
116		#' )
117		#'
118		#' # Define groupings for BMRKR2 levels.
119		#' h_survtime_subgroups_df(
120		#' variables = list(
121		#' tte = "AVAL",
122		#' is_event = "is_event",
123		#' arm = "ARM",
124		#' subgroups = c("SEX", "BMRKR2")
125		#' ),
126		#' data = adtte_f,
127		#' groups_lists = list(
128		#' BMRKR2 = list(
129		#' "low" = "LOW",
130		#' "low/medium" = c("LOW", "MEDIUM"),
131		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
132		#' )
133		#' )
134		#' )
135		#'
136		#' @export
137		h_survtime_subgroups_df <- function(variables,
138		data,
139		groups_lists = list(),
140		label_all = "All Patients") {
141	11x	checkmate::assert_character(variables$tte)
142	11x	checkmate::assert_character(variables$is_event)
143	11x	checkmate::assert_character(variables$arm)
144	11x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
145
146	11x	assert_df_with_variables(data, variables)
147
148	11x	checkmate::assert_string(label_all)
149
150		# Add All Patients.
151	11x	result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
152	11x	result_all$subgroup <- label_all
153	11x	result_all$var <- "ALL"
154	11x	result_all$var_label <- label_all
155	11x	result_all$row_type <- "content"
156
157		# Add Subgroups.
158	11x	if (is.null(variables$subgroups)) {
159	3x	result_all
160		} else {
161	8x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
162	8x	l_result <- lapply(l_data, function(grp) {
163	40x	result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
164	40x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
165	40x	cbind(result, result_labels)
166		})
167	8x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
168	8x	result_subgroups$row_type <- "analysis"
169	8x	rbind(
170	8x	result_all,
171	8x	result_subgroups
172		)
173		}
174		}
175
176		#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
177		#' treatment hazard ratio.
178		#'
179		#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
180		#'
181		#' @return
182		#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
183		#' `conf_level`, `pval` and `pval_label`.
184		#'
185		#' @examples
186		#' # Extract hazard ratio for one group.
187		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
188		#'
189		#' # Extract hazard ratio for one group with stratification factor.
190		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
191		#'
192		#' @export
193		h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
194	58x	checkmate::assert_numeric(tte)
195	58x	checkmate::assert_logical(is_event, len = length(tte))
196	58x	assert_valid_factor(arm, n.levels = 2, len = length(tte))
197
198	58x	df_tte <- data.frame(tte = tte, is_event = is_event)
199	58x	strata_vars <- NULL
200
201	58x	if (!is.null(strata_data)) {
202	5x	if (is.data.frame(strata_data)) {
203	4x	strata_vars <- names(strata_data)
204	4x	checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
205	4x	assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
206		} else {
207	1x	assert_valid_factor(strata_data, len = nrow(df_tte))
208	1x	strata_vars <- "strata_data"
209		}
210	5x	df_tte[strata_vars] <- strata_data
211		}
212
213	58x	l_df <- split(df_tte, arm)
214
215	58x	if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
216		# Hazard ratio and CI.
217	54x	result <- s_coxph_pairwise(
218	54x	df = l_df[[2]],
219	54x	.ref_group = l_df[[1]],
220	54x	.in_ref_col = FALSE,
221	54x	.var = "tte",
222	54x	is_event = "is_event",
223	54x	strat = strata_vars,
224	54x	control = control
225		)
226
227	54x	df <- data.frame(
228		# Dummy column needed downstream to create a nested header.
229	54x	arm = " ",
230	54x	n_tot = unname(as.numeric(result$n_tot)),
231	54x	n_tot_events = unname(as.numeric(result$n_tot_events)),
232	54x	hr = unname(as.numeric(result$hr)),
233	54x	lcl = unname(result$hr_ci[1]),
234	54x	ucl = unname(result$hr_ci[2]),
235	54x	conf_level = control[["conf_level"]],
236	54x	pval = as.numeric(result$pvalue),
237	54x	pval_label = obj_label(result$pvalue),
238	54x	stringsAsFactors = FALSE
239		)
240		} else if (
241	4x	(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) \|\|
242	4x	(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
243		) {
244	4x	df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
245	4x	df <- data.frame(
246		# Dummy column needed downstream to create a nested header.
247	4x	arm = " ",
248	4x	n_tot = nrow(df_tte_complete),
249	4x	n_tot_events = sum(df_tte_complete$is_event),
250	4x	hr = NA,
251	4x	lcl = NA,
252	4x	ucl = NA,
253	4x	conf_level = control[["conf_level"]],
254	4x	pval = NA,
255	4x	pval_label = NA,
256	4x	stringsAsFactors = FALSE
257		)
258		} else {
259	!	df <- data.frame(
260		# Dummy column needed downstream to create a nested header.
261	!	arm = " ",
262	!	n_tot = 0L,
263	!	n_tot_events = 0L,
264	!	hr = NA,
265	!	lcl = NA,
266	!	ucl = NA,
267	!	conf_level = control[["conf_level"]],
268	!	pval = NA,
269	!	pval_label = NA,
270	!	stringsAsFactors = FALSE
271		)
272		}
273
274	58x	df
275		}
276
277		#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
278		#' across subgroups in a data frame. `variables` corresponds to the names of variables found in
279		#' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
280		#' optionally `subgroups` and `strat`. `groups_lists` optionally specifies
281		#' groupings for `subgroups` variables.
282		#'
283		#' @return
284		#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
285		#' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
286		#'
287		#' @examples
288		#' # Extract hazard ratio for multiple groups.
289		#' h_coxph_subgroups_df(
290		#' variables = list(
291		#' tte = "AVAL",
292		#' is_event = "is_event",
293		#' arm = "ARM",
294		#' subgroups = c("SEX", "BMRKR2")
295		#' ),
296		#' data = adtte_f
297		#' )
298		#'
299		#' # Define groupings of BMRKR2 levels.
300		#' h_coxph_subgroups_df(
301		#' variables = list(
302		#' tte = "AVAL",
303		#' is_event = "is_event",
304		#' arm = "ARM",
305		#' subgroups = c("SEX", "BMRKR2")
306		#' ),
307		#' data = adtte_f,
308		#' groups_lists = list(
309		#' BMRKR2 = list(
310		#' "low" = "LOW",
311		#' "low/medium" = c("LOW", "MEDIUM"),
312		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
313		#' )
314		#' )
315		#' )
316		#'
317		#' # Extract hazard ratio for multiple groups with stratification factors.
318		#' h_coxph_subgroups_df(
319		#' variables = list(
320		#' tte = "AVAL",
321		#' is_event = "is_event",
322		#' arm = "ARM",
323		#' subgroups = c("SEX", "BMRKR2"),
324		#' strat = c("STRATA1", "STRATA2")
325		#' ),
326		#' data = adtte_f
327		#' )
328		#'
329		#' @export
330		h_coxph_subgroups_df <- function(variables,
331		data,
332		groups_lists = list(),
333		control = control_coxph(),
334		label_all = "All Patients") {
335	12x	checkmate::assert_character(variables$tte)
336	12x	checkmate::assert_character(variables$is_event)
337	12x	checkmate::assert_character(variables$arm)
338	12x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
339	12x	checkmate::assert_character(variables$strat, null.ok = TRUE)
340	12x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
341	12x	assert_df_with_variables(data, variables)
342	12x	checkmate::assert_string(label_all)
343
344		# Add All Patients.
345	12x	result_all <- h_coxph_df(
346	12x	tte = data[[variables$tte]],
347	12x	is_event = data[[variables$is_event]],
348	12x	arm = data[[variables$arm]],
349	12x	strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
350	12x	control = control
351		)
352	12x	result_all$subgroup <- label_all
353	12x	result_all$var <- "ALL"
354	12x	result_all$var_label <- label_all
355	12x	result_all$row_type <- "content"
356
357		# Add Subgroups.
358	12x	if (is.null(variables$subgroups)) {
359	3x	result_all
360		} else {
361	9x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
362
363	9x	l_result <- lapply(l_data, function(grp) {
364	42x	result <- h_coxph_df(
365	42x	tte = grp$df[[variables$tte]],
366	42x	is_event = grp$df[[variables$is_event]],
367	42x	arm = grp$df[[variables$arm]],
368	42x	strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
369	42x	control = control
370		)
371	42x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
372	42x	cbind(result, result_labels)
373		})
374
375	9x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
376	9x	result_subgroups$row_type <- "analysis"
377
378	9x	rbind(
379	9x	result_all,
380	9x	result_subgroups
381		)
382		}
383		}
384
385		#' Split Dataframe by Subgroups
386		#'
387		#' @description `r lifecycle::badge("stable")`
388		#'
389		#' Split a dataframe into a non-nested list of subsets.
390		#'
391		#' @inheritParams survival_duration_subgroups
392		#' @param data (`data.frame`)\cr dataset to split.
393		#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
394		#' Unused levels not present in `data` are dropped. Note that the order in this vector
395		#' determines the order in the downstream table.
396		#'
397		#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
398		#'
399		#' @details Main functionality is to prepare data for use in forest plot layouts.
400		#'
401		#' @examples
402		#' df <- data.frame(
403		#' x = c(1:5),
404		#' y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
405		#' z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
406		#' )
407		#' formatters::var_labels(df) <- paste("label for", names(df))
408		#'
409		#' h_split_by_subgroups(
410		#' data = df,
411		#' subgroups = c("y", "z")
412		#' )
413		#'
414		#' h_split_by_subgroups(
415		#' data = df,
416		#' subgroups = c("y", "z"),
417		#' groups_lists = list(
418		#' y = list("AB" = c("A", "B"), "C" = "C")
419		#' )
420		#' )
421		#'
422		#' @export
423		h_split_by_subgroups <- function(data,
424		subgroups,
425		groups_lists = list()) {
426	46x	checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
427	46x	checkmate::assert_list(groups_lists, names = "named")
428	46x	checkmate::assert_subset(names(groups_lists), subgroups)
429	46x	assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))
430
431	46x	data_labels <- unname(formatters::var_labels(data))
432	46x	df_subgroups <- data[, subgroups, drop = FALSE]
433	46x	subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)
434
435	46x	l_labels <- Map(function(grp_i, name_i) {
436	81x	existing_levels <- levels(droplevels(grp_i))
437	81x	grp_levels <- if (name_i %in% names(groups_lists)) {
438		# For this variable groupings are defined. We check which groups are contained in the data.
439	11x	group_list_i <- groups_lists[[name_i]]
440	11x	group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
441	11x	names(which(group_has_levels))
442		} else {
443	70x	existing_levels
444		}
445	81x	df_labels <- data.frame(
446	81x	subgroup = grp_levels,
447	81x	var = name_i,
448	81x	var_label = unname(subgroup_labels[name_i]),
449	81x	stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
450		)
451	46x	}, df_subgroups, names(df_subgroups))
452
453		# Create a dataframe with one row per subgroup.
454	46x	df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
455	46x	row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
456	46x	row_split_var <- factor(row_label, levels = row_label)
457
458		# Create a list of data subsets.
459	46x	lapply(split(df_labels, row_split_var), function(row_i) {
460	205x	which_row <- if (row_i$var %in% names(groups_lists)) {
461	31x	data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
462		} else {
463	174x	data[[row_i$var]] == row_i$subgroup
464		}
465	205x	df <- data[which_row, ]
466	205x	rownames(df) <- NULL
467	205x	formatters::var_labels(df) <- data_labels
468
469	205x	list(
470	205x	df = df,
471	205x	df_labels = data.frame(row_i, row.names = NULL)
472		)
473		})
474		}

1		#' Confidence Interval for Mean
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
6		#' geometric mean. It can be used as a `ggplot` helper function for plotting.
7		#'
8		#' @inheritParams argument_convention
9		#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
10		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
11		#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
12		#'
13		#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
14		#'
15		#' @examples
16		#' stat_mean_ci(sample(10), gg_helper = FALSE)
17		#'
18		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
19		#' ggplot2::geom_point()
20		#'
21		#' p + ggplot2::stat_summary(
22		#' fun.data = stat_mean_ci,
23		#' geom = "errorbar"
24		#' )
25		#'
26		#' p + ggplot2::stat_summary(
27		#' fun.data = stat_mean_ci,
28		#' fun.args = list(conf_level = 0.5),
29		#' geom = "errorbar"
30		#' )
31		#'
32		#' p + ggplot2::stat_summary(
33		#' fun.data = stat_mean_ci,
34		#' fun.args = list(conf_level = 0.5, geom_mean = TRUE),
35		#' geom = "errorbar"
36		#' )
37		#'
38		#' @export
39		stat_mean_ci <- function(x,
40		conf_level = 0.95,
41		na.rm = TRUE, # nolint
42		n_min = 2,
43		gg_helper = TRUE,
44		geom_mean = FALSE) {
45	460x	if (na.rm) {
46	2x	x <- stats::na.omit(x)
47		}
48	460x	n <- length(x)
49
50	460x	if (!geom_mean) {
51	231x	m <- mean(x)
52		} else {
53	229x	negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
54	229x	if (negative_values_exist) {
55	18x	m <- NA_real_
56		} else {
57	211x	x <- log(x)
58	211x	m <- mean(x)
59		}
60		}
61
62	460x	if (n < n_min \|\| is.na(m)) {
63	96x	ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
64		} else {
65	364x	hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
66	364x	ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
67	364x	if (geom_mean) {
68	176x	ci <- exp(ci)
69		}
70		}
71
72	460x	if (gg_helper) {
73	!	m <- ifelse(is.na(m), NA_real_, m)
74	!	ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
75		}
76
77	460x	return(ci)
78		}
79
80		#' Confidence Interval for Median
81		#'
82		#' @description `r lifecycle::badge("stable")`
83		#'
84		#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
85		#' function for plotting.
86		#'
87		#' @inheritParams argument_convention
88		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
89		#'
90		#' @details The function was adapted from `DescTools/versions/0.99.35/source`
91		#'
92		#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
93		#'
94		#' @examples
95		#' stat_median_ci(sample(10), gg_helper = FALSE)
96		#'
97		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
98		#' ggplot2::geom_point()
99		#' p + ggplot2::stat_summary(
100		#' fun.data = stat_median_ci,
101		#' geom = "errorbar"
102		#' )
103		#'
104		#' @export
105		stat_median_ci <- function(x,
106		conf_level = 0.95,
107		na.rm = TRUE, # nolint
108		gg_helper = TRUE) {
109	232x	x <- unname(x)
110	232x	if (na.rm) {
111	3x	x <- x[!is.na(x)]
112		}
113	232x	n <- length(x)
114	232x	med <- stats::median(x)
115
116	232x	k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)
117
118		# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
119	232x	if (k == 0 \|\| is.na(med)) {
120	78x	ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
121	78x	empir_conf_level <- NA_real_
122		} else {
123	154x	x_sort <- sort(x)
124	154x	ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
125	154x	empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
126		}
127
128	232x	if (gg_helper) {
129	!	ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
130		}
131
132	232x	attr(ci, "conf_level") <- empir_conf_level
133
134	232x	return(ci)
135		}
136
137		#' p-Value of the Mean
138		#'
139		#' @description `r lifecycle::badge("stable")`
140		#'
141		#' Convenient function for calculating the two-sided p-value of the mean.
142		#'
143		#' @inheritParams argument_convention
144		#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
145		#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
146		#'
147		#' @return A p-value.
148		#'
149		#' @examples
150		#' stat_mean_pval(sample(10))
151		#'
152		#' stat_mean_pval(rnorm(10), test_mean = 0.5)
153		#'
154		#' @export
155		stat_mean_pval <- function(x,
156		na.rm = TRUE, # nolint
157		n_min = 2,
158		test_mean = 0) {
159	233x	if (na.rm) {
160	4x	x <- stats::na.omit(x)
161		}
162	233x	n <- length(x)
163
164	233x	x_mean <- mean(x)
165	233x	x_sd <- stats::sd(x)
166
167	233x	if (n < n_min) {
168	42x	pv <- c(p_value = NA_real_)
169		} else {
170	191x	x_se <- stats::sd(x) / sqrt(n)
171	191x	ttest <- (x_mean - test_mean) / x_se
172	191x	pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
173		}
174
175	233x	return(pv)
176		}

1		#' Re-implemented [range()] Default S3 method for numerical objects
2		#'
3		#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
4		#' without any warnings.
5		#'
6		#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
7		#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
8		#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
9		#'
10		#' @return A 2-element vector of class `numeric`.
11		#'
12		#' @examples
13		#' # Internal function - range_noinf
14		#' \dontrun{
15		#' range_noinf(1:5)
16		#' range_noinf(c(1:5, NA, NA), na.rm = TRUE)
17		#' range_noinf(numeric(), na.rm = TRUE)
18		#' range_noinf(c(1:5, NA, NA, Inf), na.rm = TRUE, finite = TRUE)
19		#' range_noinf(Inf)
20		#' range_noinf(Inf, na.rm = TRUE, finite = TRUE)
21		#' range_noinf(c(Inf, NA), na.rm = FALSE, finite = TRUE)
22		#' range_noinf(c(1, Inf, NA), na.rm = FALSE, finite = TRUE)
23		#' }
24		#'
25		#' @keywords internal
26		range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint
27
28	733x	checkmate::assert_numeric(x)
29
30	733x	if (finite) {
31	24x	x <- x[is.finite(x)] # removes NAs too
32	709x	} else if (na.rm) {
33	468x	x <- x[!is.na(x)]
34		}
35
36	733x	if (length(x) == 0) {
37	47x	rval <- c(NA, NA)
38	47x	mode(rval) <- typeof(x)
39		} else {
40	686x	rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
41		}
42
43	733x	return(rval)
44		}
45
46		#' Utility function to create label for confidence interval
47		#'
48		#' @description `r lifecycle::badge("stable")`
49		#'
50		#' @inheritParams argument_convention
51		#'
52		#' @return A `string`.
53		#'
54		#' @export
55		f_conf_level <- function(conf_level) {
56	998x	assert_proportion_value(conf_level)
57	996x	paste0(conf_level * 100, "% CI")
58		}
59
60		#' Utility function to create label for p-value
61		#'
62		#' @description `r lifecycle::badge("stable")`
63		#'
64		#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
65		#'
66		#' @return A `string`.
67		#'
68		#' @export
69		f_pval <- function(test_mean) {
70	232x	checkmate::assert_numeric(test_mean, len = 1)
71	230x	paste0("p-value (H0: mean = ", test_mean, ")")
72		}
73
74		#' Utility function to return a named list of covariate names.
75		#'
76		#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
77		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
78		#'
79		#' @return A named `list` of `character` vector.
80		#'
81		#' @keywords internal
82		get_covariates <- function(covariates) {
83	14x	checkmate::assert_character(covariates)
84	12x	cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
85	12x	stats::setNames(as.list(cov_vars), cov_vars)
86		}
87
88		#' Replicate Entries of a Vector if Required
89		#'
90		#' @description `r lifecycle::badge("stable")`
91		#'
92		#' Replicate entries of a vector if required.
93		#'
94		#' @inheritParams argument_convention
95		#' @param n (`count`)\cr how many entries we need.
96		#'
97		#' @return `x` if it has the required length already or is `NULL`,
98		#' otherwise if it is scalar the replicated version of it with `n` entries.
99		#'
100		#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
101		#'
102		#' @export
103		to_n <- function(x, n) {
104	1x	if (is.null(x)) {
105	!	NULL
106	1x	} else if (length(x) == 1) {
107	!	rep(x, n)
108	1x	} else if (length(x) == n) {
109	1x	x
110		} else {
111	!	stop("dimension mismatch")
112		}
113		}
114
115		#' Check Element Dimension
116		#'
117		#' Checks if the elements in `...` have the same dimension.
118		#'
119		#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
120		#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
121		#'
122		#' @return A `logical` value.
123		#'
124		#' @keywords internal
125		check_same_n <- function(..., omit_null = TRUE) {
126	2x	dots <- list(...)
127
128	2x	n_list <- Map(
129	2x	function(x, name) {
130	5x	if (is.null(x)) {
131	!	if (omit_null) {
132	2x	NA_integer_
133		} else {
134	!	stop("arg", name, "is not supposed to be NULL")
135		}
136	5x	} else if (is.data.frame(x)) {
137	!	nrow(x)
138	5x	} else if (is.atomic(x)) {
139	5x	length(x)
140		} else {
141	!	stop("data structure for ", name, "is currently not supported")
142		}
143		},
144	2x	dots, names(dots)
145		)
146
147	2x	n <- stats::na.omit(unlist(n_list))
148
149	2x	if (length(unique(n)) > 1) {
150	!	sel <- which(n != n[1])
151	!	stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
152		}
153
154	2x	TRUE
155		}
156
157		#' Make Names Without Dots
158		#'
159		#' @param nams (`character`)\cr vector of original names.
160		#'
161		#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
162		#'
163		#' @examples
164		#' # Internal function - make_names
165		#' \dontrun{
166		#' make_names(c("foo Bar", "1 2 3 bla"))
167		#' }
168		#'
169		#' @keywords internal
170		make_names <- function(nams) {
171	6x	orig <- make.names(nams)
172	6x	gsub(".", "", x = orig, fixed = TRUE)
173		}
174
175		#' Conversion of Months to Days
176		#'
177		#' @description `r lifecycle::badge("stable")`
178		#'
179		#' Conversion of Months to Days. This is an approximative calculation because it
180		#' considers each month as having an average of 30.4375 days.
181		#'
182		#' @param x (`numeric`)\cr time in months.
183		#'
184		#' @return A `numeric` vector with the time in days.
185		#'
186		#' @examples
187		#' x <- c(13.25, 8.15, 1, 2.834)
188		#' month2day(x)
189		#'
190		#' @export
191		month2day <- function(x) {
192	1x	checkmate::assert_numeric(x)
193	1x	x * 30.4375
194		}
195
196		#' Conversion of Days to Months
197		#'
198		#' @param x (`numeric`)\cr time in days.
199		#'
200		#' @return A `numeric` vector with the time in months.
201		#'
202		#' @examples
203		#' x <- c(403, 248, 30, 86)
204		#' day2month(x)
205		#'
206		#' @export
207		day2month <- function(x) {
208	15x	checkmate::assert_numeric(x)
209	15x	x / 30.4375
210		}
211
212		#' Return an empty numeric if all elements are `NA`.
213		#'
214		#' @param x (`numeric`)\cr vector.
215		#'
216		#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
217		#'
218		#' @examples
219		#' x <- c(NA, NA, NA)
220		#' # Internal function - empty_vector_if_na
221		#' \dontrun{
222		#' empty_vector_if_na(x)
223		#' }
224		#'
225		#' @keywords internal
226		empty_vector_if_na <- function(x) {
227	610x	if (all(is.na(x))) {
228	185x	numeric()
229		} else {
230	425x	x
231		}
232		}
233
234		#' Combine Two Vectors Element Wise
235		#'
236		#' @param x (`vector`)\cr first vector to combine.
237		#' @param y (`vector`)\cr second vector to combine.
238		#'
239		#' @return A `list` where each element combines corresponding elements of `x` and `y`.
240		#'
241		#' @examples
242		#' combine_vectors(1:3, 4:6)
243		#'
244		#' @export
245		combine_vectors <- function(x, y) {
246	49x	checkmate::assert_vector(x)
247	49x	checkmate::assert_vector(y, len = length(x))
248
249	49x	result <- lapply(as.data.frame(rbind(x, y)), `c`)
250	49x	names(result) <- NULL
251	49x	result
252		}
253
254		#' Extract Elements by Name
255		#'
256		#' This utility function extracts elements from a vector `x` by `names`.
257		#' Differences to the standard `[` function are:
258		#'
259		#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
260		#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
261		#' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
262		#'
263		#' @param x (named `vector`)\cr where to extract named elements from.
264		#' @param names (`character`)\cr vector of names to extract.
265		#'
266		#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
267		#'
268		#' @keywords internal
269		extract_by_name <- function(x, names) {
270	3425x	if (is.null(x)) {
271	3001x	return(NULL)
272		}
273	424x	checkmate::assert_named(x)
274	424x	checkmate::assert_character(names)
275	424x	which_extract <- intersect(names(x), names)
276	424x	if (length(which_extract) > 0) {
277	204x	x[which_extract]
278		} else {
279	220x	NULL
280		}
281		}
282
283		#' Labels for Adverse Event Baskets
284		#'
285		#' @description `r lifecycle::badge("stable")`
286		#'
287		#' @param aesi (`character`)\cr with standardized MedDRA query name (e.g. `SMQzzNAM`) or customized query
288		#' name (e.g. `CQzzNAM`).
289		#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
290		#'
291		#' @return A `string` with the standard label for the AE basket.
292		#'
293		#' @examples
294		#' adae <- tern_ex_adae
295		#'
296		#' # Standardized query label includes scope.
297		#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
298		#'
299		#' # Customized query label.
300		#' aesi_label(adae$CQ01NAM)
301		#'
302		#' @export
303		aesi_label <- function(aesi, scope = NULL) {
304	3x	checkmate::assert_character(aesi)
305	3x	checkmate::assert_character(scope, null.ok = TRUE)
306	3x	aesi_label <- obj_label(aesi)
307	3x	aesi <- sas_na(aesi)
308	3x	aesi <- unique(aesi)[!is.na(unique(aesi))]
309
310	3x	lbl <- if (length(aesi) == 1 && !is.null(scope)) {
311	1x	scope <- sas_na(scope)
312	1x	scope <- unique(scope)[!is.na(unique(scope))]
313	1x	checkmate::assert_string(scope)
314	1x	paste0(aesi, " (", scope, ")")
315	3x	} else if (length(aesi) == 1 && is.null(scope)) {
316	1x	aesi
317		} else {
318	1x	aesi_label
319		}
320
321	3x	lbl
322		}
323
324		#' Indicate Study Arm Variable in Formula
325		#'
326		#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
327		#'
328		#' @param x arm information
329		#'
330		#' @return `x`
331		#'
332		#' @keywords internal
333		study_arm <- function(x) {
334	!	structure(x, varname = deparse(substitute(x)))
335		}
336
337		#' Smooth Function with Optional Grouping
338		#'
339		#' @description `r lifecycle::badge("stable")`
340		#'
341		#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
342		#'
343		#' @param df (`data.frame`)\cr data set containing all analysis variables.
344		#' @param x (`character`)\cr value with x column name.
345		#' @param y (`character`)\cr value with y column name.
346		#' @param groups (`character`)\cr vector with optional grouping variables names.
347		#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
348		#'
349		#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
350		#' optional `groups` variables formatted as `factor` type.
351		#'
352		#' @export
353		get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
354	5x	checkmate::assert_data_frame(df)
355	5x	df_cols <- colnames(df)
356	5x	checkmate::assert_string(x)
357	5x	checkmate::assert_subset(x, df_cols)
358	5x	checkmate::assert_numeric(df[[x]])
359	5x	checkmate::assert_string(y)
360	5x	checkmate::assert_subset(y, df_cols)
361	5x	checkmate::assert_numeric(df[[y]])
362
363	5x	if (!is.null(groups)) {
364	4x	checkmate::assert_character(groups)
365	4x	checkmate::assert_subset(groups, df_cols)
366		}
367
368	5x	smooths <- function(x, y) {
369	18x	stats::predict(stats::loess(y ~ x), se = TRUE)
370		}
371
372	5x	if (!is.null(groups)) {
373	4x	cc <- stats::complete.cases(df[c(x, y, groups)])
374	4x	df_c <- df[cc, c(x, y, groups)]
375	4x	df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
376	4x	df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))
377
378	4x	df_smooth_raw <-
379	4x	by(df_c_ordered, df_c_g, function(d) {
380	17x	plx <- smooths(d[[x]], d[[y]])
381	17x	data.frame(
382	17x	x = d[[x]],
383	17x	y = plx$fit,
384	17x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
385	17x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
386		)
387		})
388
389	4x	df_smooth <- do.call(rbind, df_smooth_raw)
390	4x	df_smooth[groups] <- df_c_g
391
392	4x	df_smooth
393		} else {
394	1x	cc <- stats::complete.cases(df[c(x, y)])
395	1x	df_c <- df[cc, ]
396	1x	plx <- smooths(df_c[[x]], df_c[[y]])
397
398	1x	df_smooth <- data.frame(
399	1x	x = df_c[[x]],
400	1x	y = plx$fit,
401	1x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
402	1x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
403		)
404
405	1x	df_smooth
406		}
407		}
408
409		#' Number of Available (Non-Missing Entries) in a Vector
410		#'
411		#' Small utility function for better readability.
412		#'
413		#' @param x (`any`)\cr vector in which to count non-missing values.
414		#'
415		#' @return Number of non-missing values.
416		#'
417		#' @examples
418		#' # Internal function - n_available
419		#' \dontrun{
420		#' n_available(c(1, NA, 2))
421		#' }
422		#'
423		#' @keywords internal
424		n_available <- function(x) {
425	196x	sum(!is.na(x))
426		}
427
428		#' Reapply Variable Labels
429		#'
430		#' This is a helper function that is used in tests.
431		#'
432		#' @param x (`vector`)\cr vector of elements that needs new labels.
433		#' @param varlabels (`character`)\cr vector of labels for `x`.
434		#' @param ... further parameters to be added to the list.
435		#'
436		#' @return `x` with variable labels reapplied.
437		#'
438		#' @export
439		reapply_varlabels <- function(x, varlabels, ...) {
440	10x	named_labels <- c(as.list(varlabels), list(...))
441	10x	formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
442	10x	x
443		}
444
445		# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
446		clogit_with_tryCatch <- function(formula, data, ...) { # nolint
447	30x	tryCatch(
448	30x	survival::clogit(formula = formula, data = data, ...),
449	30x	error = function(e) stop("model not built successfully with survival::clogit")
450		)
451		}

1		#' Survival Time Point Analysis
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Summarize patients' survival rate and difference of survival rates between groups at a time point.
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams s_surv_time
9		#' @param time_point (`number`)\cr survival time point of interest.
10		#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
11		#' [control_surv_timepoint()]. Some possible parameter options are:
12		#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
13		#' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
14		#' see more in [survival::survfit()]. Note option "none" is no longer supported.
15		#' * `time_point` (`number`)\cr survival time point of interest.
16		#'
17		#' @name survival_timepoint
18		NULL
19
20		#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
21		#'
22		#' @return
23		#' * `s_surv_timepoint()` returns the statistics:
24		#' * `pt_at_risk`: Patients remaining at risk.
25		#' * `event_free_rate`: Event-free rate (%).
26		#' * `rate_se`: Standard error of event free rate.
27		#' * `rate_ci`: Confidence interval for event free rate.
28		#'
29		#' @examples
30		#' library(dplyr)
31		#'
32		#' adtte_f <- tern_ex_adtte %>%
33		#' filter(PARAMCD == "OS") %>%
34		#' mutate(
35		#' AVAL = day2month(AVAL),
36		#' is_event = CNSR == 0
37		#' )
38		#' df <- adtte_f %>%
39		#' filter(ARMCD == "ARM A")
40		#'
41		#' # Internal function - s_surv_timepoint
42		#' \dontrun{
43		#' s_surv_timepoint(df, .var = "AVAL", time_point = 7, is_event = "is_event")
44		#' }
45		#'
46		#' @keywords internal
47		s_surv_timepoint <- function(df,
48		.var,
49		time_point,
50		is_event,
51		control = control_surv_timepoint()) {
52	19x	checkmate::assert_string(.var)
53	19x	assert_df_with_variables(df, list(tte = .var, is_event = is_event))
54	19x	checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
55	19x	checkmate::assert_number(time_point)
56	19x	checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
57
58	19x	conf_type <- control$conf_type
59	19x	conf_level <- control$conf_level
60
61	19x	formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
62	19x	srv_fit <- survival::survfit(
63	19x	formula = formula,
64	19x	data = df,
65	19x	conf.int = conf_level,
66	19x	conf.type = conf_type
67		)
68	19x	s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
69	19x	df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
70	19x	if (df_srv_fit[["n.risk"]] == 0) {
71	1x	pt_at_risk <- event_free_rate <- rate_se <- NA_real_
72	1x	rate_ci <- c(NA_real_, NA_real_)
73		} else {
74	18x	pt_at_risk <- df_srv_fit$n.risk
75	18x	event_free_rate <- df_srv_fit$surv
76	18x	rate_se <- df_srv_fit$std.err
77	18x	rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
78		}
79	19x	list(
80	19x	pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
81	19x	event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
82	19x	rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
83	19x	rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
84		)
85		}
86
87		#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
88		#' when `method = "surv"`.
89		#'
90		#' @return
91		#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
92		#'
93		#' @examples
94		#' # Internal function - a_surv_timepoint
95		#' \dontrun{
96		#' a_surv_timepoint(df, .var = "AVAL", time_point = 7, is_event = "is_event")
97		#' }
98		#'
99		#' @keywords internal
100		a_surv_timepoint <- make_afun(
101		s_surv_timepoint,
102		.indent_mods = c(
103		pt_at_risk = 0L,
104		event_free_rate = 0L,
105		rate_se = 1L,
106		rate_ci = 1L
107		),
108		.formats = c(
109		pt_at_risk = "xx",
110		event_free_rate = "xx.xx",
111		rate_se = "xx.xx",
112		rate_ci = "(xx.xx, xx.xx)"
113		)
114		)
115
116		#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
117		#'
118		#' @return
119		#' * `s_surv_timepoint_diff()` returns the statistics:
120		#' * `rate_diff`: Event-free rate difference between two groups.
121		#' * `rate_diff_ci`: Confidence interval for the difference.
122		#' * `ztest_pval`: p-value to test the difference is 0.
123		#'
124		#' @examples
125		#' df_ref_group <- adtte_f %>%
126		#' filter(ARMCD == "ARM B")
127		#'
128		#' # Internal function - s_surv_timepoint_diff
129		#' \dontrun{
130		#' s_surv_timepoint_diff(df, df_ref_group, .in_ref_col = TRUE, .var = "AVAL", is_event = "is_event")
131		#' s_surv_timepoint_diff(
132		#' df,
133		#' df_ref_group,
134		#' .in_ref_col = FALSE,
135		#' .var = "AVAL",
136		#' time_point = 7,
137		#' is_event = "is_event"
138		#' )
139		#' }
140		#'
141		#' @keywords internal
142		s_surv_timepoint_diff <- function(df,
143		.var,
144		.ref_group,
145		.in_ref_col,
146		time_point,
147		control = control_surv_timepoint(),
148		...) {
149	2x	if (.in_ref_col) {
150	!	return(
151	!	list(
152	!	rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
153	!	rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
154	!	ztest_pval = formatters::with_label("", "p-value (Z-test)")
155		)
156		)
157		}
158	2x	data <- rbind(.ref_group, df)
159	2x	group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
160	2x	res_per_group <- lapply(split(data, group), function(x) {
161	4x	s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
162		})
163
164	2x	res_x <- res_per_group[[2]]
165	2x	res_ref <- res_per_group[[1]]
166	2x	rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
167	2x	se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)
168
169	2x	qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
170	2x	rate_diff_ci <- rate_diff + qs * se_diff
171	2x	ztest_pval <- if (is.na(rate_diff)) {
172	2x	NA
173		} else {
174	2x	2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
175		}
176	2x	list(
177	2x	rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
178	2x	rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
179	2x	ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
180		)
181		}
182
183		#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
184		#' when `method = "surv_diff"`.
185		#'
186		#' @return
187		#' * `a_surv_timepoint_diff()` returns the corresponding list with formatted [rtables::CellValue()].
188		#'
189		#' @examples
190		#' # Internal function - a_surv_timepoint_diff
191		#' \dontrun{
192		#' a_surv_timepoint_diff(
193		#' df,
194		#' df_ref_group,
195		#' .in_ref_col = FALSE,
196		#' .var = "AVAL",
197		#' time_point = 7,
198		#' is_event = "is_event"
199		#' )
200		#' }
201		#'
202		#' @keywords internal
203		a_surv_timepoint_diff <- make_afun(
204		s_surv_timepoint_diff,
205		.formats = c(
206		rate_diff = "xx.xx",
207		rate_diff_ci = "(xx.xx, xx.xx)",
208		ztest_pval = "x.xxxx \| (<0.0001)"
209		)
210		)
211
212		#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
213		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
214		#'
215		#' @param method (`string`)\cr either `surv` (survival estimations),
216		#' `surv_diff` (difference in survival with the control) or `both`.
217		#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
218		#' avoid warnings from duplicate table names.
219		#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
220		#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
221		#' for that statistic's row label.
222		#'
223		#' @return
224		#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
225		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
226		#' the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
227		#' the value of `method`.
228		#'
229		#' @examples
230		#' # Survival at given time points.
231		#' basic_table() %>%
232		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
233		#' add_colcounts() %>%
234		#' surv_timepoint(
235		#' vars = "AVAL",
236		#' var_labels = "Months",
237		#' is_event = "is_event",
238		#' time_point = 7
239		#' ) %>%
240		#' build_table(df = adtte_f)
241		#'
242		#' # Difference in survival at given time points.
243		#' basic_table() %>%
244		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
245		#' add_colcounts() %>%
246		#' surv_timepoint(
247		#' vars = "AVAL",
248		#' var_labels = "Months",
249		#' is_event = "is_event",
250		#' time_point = 9,
251		#' method = "surv_diff",
252		#' .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
253		#' ) %>%
254		#' build_table(df = adtte_f)
255		#'
256		#' # Survival and difference in survival at given time points.
257		#' basic_table() %>%
258		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
259		#' add_colcounts() %>%
260		#' surv_timepoint(
261		#' vars = "AVAL",
262		#' var_labels = "Months",
263		#' is_event = "is_event",
264		#' time_point = 9,
265		#' method = "both"
266		#' ) %>%
267		#' build_table(df = adtte_f)
268		#'
269		#' @export
270		surv_timepoint <- function(lyt,
271		vars,
272		...,
273		table_names_suffix = "",
274		var_labels = "Time",
275		show_labels = "visible",
276		method = c("surv", "surv_diff", "both"),
277		.stats = c(
278		"pt_at_risk", "event_free_rate", "rate_ci",
279		"rate_diff", "rate_diff_ci", "ztest_pval"
280		),
281		.formats = NULL,
282		.labels = NULL,
283		.indent_mods = if (method == "both") {
284	1x	c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L)
285		} else {
286	4x	c(rate_diff_ci = 1L, ztest_pval = 1L)
287		}) {
288	5x	method <- match.arg(method)
289	5x	checkmate::assert_string(table_names_suffix)
290
291	5x	f <- list(
292	5x	surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
293	5x	surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
294		)
295	5x	.stats <- h_split_param(.stats, .stats, f = f)
296	5x	.formats <- h_split_param(.formats, names(.formats), f = f)
297	5x	.labels <- h_split_param(.labels, names(.labels), f = f)
298	5x	.indent_mods <- h_split_param(.indent_mods, names(.indent_mods), f = f)
299
300	5x	afun_surv <- make_afun(
301	5x	a_surv_timepoint,
302	5x	.stats = .stats$surv,
303	5x	.formats = .formats$surv,
304	5x	.labels = .labels$surv,
305	5x	.indent_mods = .indent_mods$surv
306		)
307
308	5x	afun_surv_diff <- make_afun(
309	5x	a_surv_timepoint_diff,
310	5x	.stats = .stats$surv_diff,
311	5x	.formats = .formats$surv_diff,
312	5x	.labels = .labels$surv_diff,
313	5x	.indent_mods = .indent_mods$surv_diff
314		)
315
316	5x	time_point <- list(...)$time_point
317
318	5x	for (i in seq_along(time_point)) {
319	5x	tpt <- time_point[i]
320
321	5x	if (method %in% c("surv", "both")) {
322	3x	lyt <- analyze(
323	3x	lyt,
324	3x	vars,
325	3x	var_labels = paste(tpt, var_labels),
326	3x	table_names = paste0("surv_", tpt, table_names_suffix),
327	3x	show_labels = show_labels,
328	3x	afun = afun_surv,
329	3x	extra_args = list(
330	3x	is_event = list(...)$is_event,
331	3x	control = list(...)$control,
332	3x	time_point = tpt
333		)
334		)
335		}
336
337	5x	if (method %in% c("surv_diff", "both")) {
338	3x	lyt <- analyze(
339	3x	lyt,
340	3x	vars,
341	3x	var_labels = paste(tpt, var_labels),
342	3x	table_names = paste0("surv_diff_", tpt, table_names_suffix),
343	3x	show_labels = ifelse(method == "both", "hidden", show_labels),
344	3x	afun = afun_surv_diff,
345	3x	extra_args = list(
346	3x	is_event = list(...)$is_event,
347	3x	control = list(...)$control,
348	3x	time_point = tpt
349		)
350		)
351		}
352		}
353	5x	lyt
354		}

1		#' Subgroup Treatment Effect Pattern (STEP) Fit for Survival Outcome
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This fits the Subgroup Treatment Effect Pattern models for a survival outcome. The treatment arm
6		#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
7		#' hazard ratios are for the comparison of the second level vs. the first one.
8		#'
9		#' The model which is fit is:
10		#'
11		#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
12		#'
13		#' where `degree` is specified by `control_step()`.
14		#'
15		#' @inheritParams argument_convention
16		#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
17		#' `arm`, `biomarker`, and optional `covariates` and `strata`.
18		#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
19		#'
20		#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
21		#' for the biomarker variable, including where the center of the intervals are and their bounds. The
22		#' second part of the columns contain the estimates for the treatment arm comparison.
23		#'
24		#' @note For the default degree 0 the `biomarker` variable is not included in the model.
25		#'
26		#' @seealso [control_step()] and [control_coxph()] for the available customization options.
27		#'
28		#' @examples
29		#' # Testing dataset with just two treatment arms.
30		#' library(dplyr)
31		#'
32		#' adtte_f <- tern_ex_adtte %>%
33		#' filter(
34		#' PARAMCD == "OS",
35		#' ARM %in% c("B: Placebo", "A: Drug X")
36		#' ) %>%
37		#' mutate(
38		#' # Reorder levels of ARM to display reference arm before treatment arm.
39		#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
40		#' is_event = CNSR == 0
41		#' )
42		#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
43		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
44		#'
45		#' variables <- list(
46		#' arm = "ARM",
47		#' biomarker = "BMRKR1",
48		#' covariates = c("AGE", "BMRKR2"),
49		#' event = "is_event",
50		#' time = "AVAL"
51		#' )
52		#'
53		#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
54		#' step_matrix <- fit_survival_step(
55		#' variables = variables,
56		#' data = adtte_f
57		#' )
58		#' dim(step_matrix)
59		#' head(step_matrix)
60		#'
61		#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
62		#' # models. Or specify different Cox regression options.
63		#' step_matrix2 <- fit_survival_step(
64		#' variables = variables,
65		#' data = adtte_f,
66		#' control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
67		#' )
68		#'
69		#' # Use a global model with cubic interaction and only 5 points.
70		#' step_matrix3 <- fit_survival_step(
71		#' variables = variables,
72		#' data = adtte_f,
73		#' control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
74		#' )
75		#'
76		#' @export
77		fit_survival_step <- function(variables,
78		data,
79		control = c(control_step(), control_coxph())) {
80	4x	checkmate::assert_list(control)
81	4x	assert_df_with_variables(data, variables)
82	4x	data <- data[!is.na(data[[variables$biomarker]]), ]
83	4x	window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
84	4x	interval_center <- window_sel$interval[, "Interval Center"]
85	4x	form <- h_step_survival_formula(variables = variables, control = control)
86	4x	estimates <- if (is.null(control$bandwidth)) {
87	1x	h_step_survival_est(
88	1x	formula = form,
89	1x	data = data,
90	1x	variables = variables,
91	1x	x = interval_center,
92	1x	control = control
93		)
94		} else {
95	3x	tmp <- mapply(
96	3x	FUN = h_step_survival_est,
97	3x	x = interval_center,
98	3x	subset = as.list(as.data.frame(window_sel$sel)),
99	3x	MoreArgs = list(
100	3x	formula = form,
101	3x	data = data,
102	3x	variables = variables,
103	3x	control = control
104		)
105		)
106		# Maybe we find a more elegant solution than this.
107	3x	rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
108	3x	t(tmp)
109		}
110	4x	result <- cbind(window_sel$interval, estimates)
111	4x	structure(
112	4x	result,
113	4x	class = c("step", "matrix"),
114	4x	variables = variables,
115	4x	control = control
116		)
117		}

1		#' Tabulate Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate statistics such as response rate and odds ratio for population subgroups.
6		#'
7		#' @inheritParams argument_convention
8		#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
9		#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a
10		#' list, which specifies the new group levels via the names and the
11		#' levels that belong to it in the character vectors that are elements of the list.
12		#' @param label_all (`string`)\cr label for the total population analysis.
13		#' @param method (`string`)\cr specifies the test used to calculate the p-value for the difference between
14		#' two proportions. For options, see [s_test_proportion_diff()]. Default is `NULL` so no test is performed.
15		#'
16		#' @details These functions create a layout starting from a data frame which contains
17		#' the required statistics. Tables typically used as part of forest plot.
18		#'
19		#' @seealso [extract_rsp_subgroups()]
20		#'
21		#' @examples
22		#' library(dplyr)
23		#' library(forcats)
24		#'
25		#' adrs <- tern_ex_adrs
26		#' adrs_labels <- formatters::var_labels(adrs)
27		#'
28		#' adrs_f <- adrs %>%
29		#' filter(PARAMCD == "BESRSPI") %>%
30		#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
31		#' droplevels() %>%
32		#' mutate(
33		#' # Reorder levels of factor to make the placebo group the reference arm.
34		#' ARM = fct_relevel(ARM, "B: Placebo"),
35		#' rsp = AVALC == "CR"
36		#' )
37		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
38		#'
39		#' # Unstratified analysis.
40		#' df <- extract_rsp_subgroups(
41		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
42		#' data = adrs_f
43		#' )
44		#' df
45		#'
46		#' @name response_subgroups
47		NULL
48
49		#' Prepares Response Data for Population Subgroups in Data Frames
50		#'
51		#' @description `r lifecycle::badge("stable")`
52		#'
53		#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
54		#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
55		#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
56		#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strat`.
57		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
58		#'
59		#' @inheritParams argument_convention
60		#' @inheritParams response_subgroups
61		#' @param label_all (`string`)\cr label for the total population analysis.
62		#'
63		#' @return A named list of two elements:
64		#' * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
65		#' `var_label`, and `row_type`.
66		#' * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
67		#' `subgroup`, `var`, `var_label`, and `row_type`.
68		#'
69		#' @seealso [response_subgroups]
70		#'
71		#' @examples
72		#' library(dplyr)
73		#' library(forcats)
74		#'
75		#' adrs <- tern_ex_adrs
76		#' adrs_labels <- formatters::var_labels(adrs)
77		#'
78		#' adrs_f <- adrs %>%
79		#' filter(PARAMCD == "BESRSPI") %>%
80		#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
81		#' droplevels() %>%
82		#' mutate(
83		#' # Reorder levels of factor to make the placebo group the reference arm.
84		#' ARM = fct_relevel(ARM, "B: Placebo"),
85		#' rsp = AVALC == "CR"
86		#' )
87		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
88		#'
89		#' # Unstratified analysis.
90		#' df <- extract_rsp_subgroups(
91		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
92		#' data = adrs_f
93		#' )
94		#' df
95		#'
96		#' # Stratified analysis.
97		#' df_strat <- extract_rsp_subgroups(
98		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strat = "STRATA1"),
99		#' data = adrs_f
100		#' )
101		#' df_strat
102		#'
103		#' # Grouping of the BMRKR2 levels.
104		#' df_grouped <- extract_rsp_subgroups(
105		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
106		#' data = adrs_f,
107		#' groups_lists = list(
108		#' BMRKR2 = list(
109		#' "low" = "LOW",
110		#' "low/medium" = c("LOW", "MEDIUM"),
111		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
112		#' )
113		#' )
114		#' )
115		#' df_grouped
116		#'
117		#' @export
118		extract_rsp_subgroups <- function(variables,
119		data,
120		groups_lists = list(),
121		conf_level = 0.95,
122		method = NULL,
123		label_all = "All Patients") {
124	10x	df_prop <- h_proportion_subgroups_df(
125	10x	variables,
126	10x	data,
127	10x	groups_lists = groups_lists,
128	10x	label_all = label_all
129		)
130	10x	df_or <- h_odds_ratio_subgroups_df(
131	10x	variables,
132	10x	data,
133	10x	groups_lists = groups_lists,
134	10x	conf_level = conf_level,
135	10x	method = method,
136	10x	label_all = label_all
137		)
138
139	10x	list(prop = df_prop, or = df_or)
140		}
141
142		#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
143		#'
144		#' @return
145		#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
146		#'
147		#' @examples
148		#' # Internal function - a_response_subgroups
149		#' \dontrun{
150		#' a_response_subgroups(.formats = list("n" = "xx", "prop" = "xx.xx%"))
151		#' }
152		#'
153		#' @keywords internal
154		a_response_subgroups <- function(.formats = list(
155		n = "xx",
156		n_rsp = "xx",
157		prop = "xx.x%",
158		n_tot = "xx",
159		or = list(format_extreme_values(2L)),
160		ci = list(format_extreme_values_ci(2L)),
161		pval = "x.xxxx \| (<0.0001)"
162		)) {
163	13x	checkmate::assert_list(.formats)
164	13x	checkmate::assert_subset(
165	13x	names(.formats),
166	13x	c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
167		)
168
169	13x	afun_lst <- Map(
170	13x	function(stat, fmt) {
171	86x	if (stat == "ci") {
172	12x	function(df, labelstr = "", ...) {
173	24x	in_rows(.list = combine_vectors(df$lcl, df$ucl), .labels = as.character(df$subgroup), .formats = fmt)
174		}
175		} else {
176	74x	function(df, labelstr = "", ...) {
177	142x	in_rows(.list = as.list(df[[stat]]), .labels = as.character(df$subgroup), .formats = fmt)
178		}
179		}
180		},
181	13x	stat = names(.formats),
182	13x	fmt = .formats
183		)
184
185	13x	afun_lst
186		}
187
188		#' @describeIn response_subgroups Table-creating function which creates a table
189		#' summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
190		#' and [rtables::summarize_row_groups()].
191		#'
192		#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
193		#' created using [extract_rsp_subgroups()].
194		#' @param vars (`character`)\cr the names of statistics to be reported among:
195		#' * `n`: Total number of observations per group.
196		#' * `n_rsp`: Number of responders per group.
197		#' * `prop`: Proportion of responders.
198		#' * `n_tot`: Total number of observations.
199		#' * `or`: Odds ratio.
200		#' * `ci` : Confidence interval of odds ratio.
201		#' * `pval`: p-value of the effect.
202		#' Note, the statistics `n_tot`, `or` and `ci` are required.
203		#'
204		#' @return An `rtables` table summarizing binary response by subgroup.
205		#'
206		#' @examples
207		#' ## Table with default columns.
208		#' basic_table() %>%
209		#' tabulate_rsp_subgroups(df)
210		#'
211		#' ## Table with selected columns.
212		#' basic_table() %>%
213		#' tabulate_rsp_subgroups(
214		#' df = df,
215		#' vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
216		#' )
217		#'
218		#' @export
219		tabulate_rsp_subgroups <- function(lyt,
220		df,
221		vars = c("n_tot", "n", "prop", "or", "ci")) {
222	6x	conf_level <- df$or$conf_level[1]
223	6x	method <- if ("pval_label" %in% names(df$or)) {
224	4x	df$or$pval_label[1]
225		} else {
226	2x	NULL
227		}
228
229	6x	afun_lst <- a_response_subgroups()
230	6x	colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)
231
232	6x	colvars_prop <- list(
233	6x	vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
234	6x	labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
235		)
236	6x	colvars_or <- list(
237	6x	vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
238	6x	labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
239		)
240
241		# Columns from table_prop are optional.
242	6x	if (length(colvars_prop$vars) > 0) {
243	6x	lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
244	6x	lyt_prop <- split_rows_by(
245	6x	lyt = lyt_prop,
246	6x	var = "row_type",
247	6x	split_fun = keep_split_levels("content"),
248	6x	nested = FALSE
249		)
250	6x	lyt_prop <- summarize_row_groups(
251	6x	lyt = lyt_prop,
252	6x	var = "var_label",
253	6x	cfun = afun_lst[names(colvars_prop$labels)]
254		)
255	6x	lyt_prop <- split_cols_by_multivar(
256	6x	lyt = lyt_prop,
257	6x	vars = colvars_prop$vars,
258	6x	varlabels = colvars_prop$labels
259		)
260
261	6x	if ("analysis" %in% df$prop$row_type) {
262	5x	lyt_prop <- split_rows_by(
263	5x	lyt = lyt_prop,
264	5x	var = "row_type",
265	5x	split_fun = keep_split_levels("analysis"),
266	5x	nested = FALSE,
267	5x	child_labels = "hidden"
268		)
269	5x	lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
270	5x	lyt_prop <- analyze_colvars(
271	5x	lyt = lyt_prop,
272	5x	afun = afun_lst[names(colvars_prop$labels)],
273	5x	inclNAs = TRUE
274		)
275		}
276
277	6x	table_prop <- build_table(lyt_prop, df = df$prop)
278		} else {
279	!	table_prop <- NULL
280		}
281
282		# Columns "n_tot", "or", "ci" in table_or are required.
283	6x	lyt_or <- split_cols_by(lyt = lyt, var = "arm")
284	6x	lyt_or <- split_rows_by(
285	6x	lyt = lyt_or,
286	6x	var = "row_type",
287	6x	split_fun = keep_split_levels("content"),
288	6x	nested = FALSE
289		)
290	6x	lyt_or <- split_cols_by_multivar(
291	6x	lyt = lyt_or,
292	6x	vars = colvars_or$vars,
293	6x	varlabels = colvars_or$labels
294		)
295	6x	lyt_or <- summarize_row_groups(
296	6x	lyt = lyt_or,
297	6x	var = "var_label",
298	6x	cfun = afun_lst[names(colvars_or$labels)]
299		) %>%
300	6x	append_topleft("Baseline Risk Factors")
301
302	6x	if ("analysis" %in% df$or$row_type) {
303	5x	lyt_or <- split_rows_by(
304	5x	lyt = lyt_or,
305	5x	var = "row_type",
306	5x	split_fun = keep_split_levels("analysis"),
307	5x	nested = FALSE,
308	5x	child_labels = "hidden"
309		)
310	5x	lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
311	5x	lyt_or <- analyze_colvars(
312	5x	lyt = lyt_or,
313	5x	afun = afun_lst[names(colvars_or$labels)],
314	5x	inclNAs = TRUE
315		)
316		}
317	6x	table_or <- build_table(lyt_or, df = df$or)
318
319	6x	n_tot_id <- match("n_tot", colvars_or$vars)
320	6x	if (is.null(table_prop)) {
321	!	result <- table_or
322	!	or_id <- match("or", colvars_or$vars)
323	!	ci_id <- match("lcl", colvars_or$vars)
324		} else {
325	6x	result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
326	6x	or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
327	6x	ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
328	6x	n_tot_id <- 1L
329		}
330	6x	structure(
331	6x	result,
332	6x	forest_header = paste0(levels(df$prop$arm), "\nBetter"),
333	6x	col_x = or_id,
334	6x	col_ci = ci_id,
335	6x	col_symbol_size = n_tot_id
336		)
337		}
338
339		#' Labels for Column Variables in Binary Response by Subgroup Table
340		#'
341		#' @description `r lifecycle::badge("stable")`
342		#'
343		#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
344		#'
345		#' @inheritParams argument_convention
346		#' @inheritParams tabulate_rsp_subgroups
347		#'
348		#' @return A `list` of variables to tabulate and their labels.
349		#'
350		#' @export
351		d_rsp_subgroups_colvars <- function(vars,
352		conf_level = NULL,
353		method = NULL) {
354	13x	checkmate::assert_character(vars)
355	13x	checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
356	13x	checkmate::assert_subset(
357	13x	vars,
358	13x	c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
359		)
360
361	13x	varlabels <- c(
362	13x	n = "n",
363	13x	n_rsp = "Responders",
364	13x	prop = "Response (%)",
365	13x	n_tot = "Total n",
366	13x	or = "Odds Ratio"
367		)
368	13x	colvars <- vars
369
370	13x	if ("ci" %in% colvars) {
371	13x	checkmate::assert_false(is.null(conf_level))
372
373	13x	varlabels <- c(
374	13x	varlabels,
375	13x	ci = paste0(100 * conf_level, "% CI")
376		)
377
378		# The `lcl`` variable is just a placeholder available in the analysis data,
379		# it is not acutally used in the tabulation.
380		# Variables used in the tabulation are lcl and ucl, see `a_response_subgroups` for details.
381	13x	colvars[colvars == "ci"] <- "lcl"
382		}
383
384	13x	if ("pval" %in% colvars) {
385	10x	varlabels <- c(
386	10x	varlabels,
387	10x	pval = method
388		)
389		}
390
391	13x	list(
392	13x	vars = colvars,
393	13x	labels = varlabels[vars]
394		)
395		}

1		#' Control function for incidence rate
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is an auxiliary function for controlling arguments for the incidence rate, used
6		#' internally to specify details in `s_incidence_rate()`.
7		#'
8		#' @inheritParams argument_convention
9		#' @param time_unit_input (`string`)\cr `day`, `month`, or `year` (default)
10		#' indicating time unit for data input.
11		#' @param time_unit_output (`numeric`)\cr time unit for desired output (in person-years).
12		#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
13		#' for confidence interval type.
14		#'
15		#' @return A list of components with the same names as the arguments.
16		#'
17		#' @seealso [incidence_rate]
18		#'
19		#' @examples
20		#' control_incidence_rate(0.9, "exact", "month", 100)
21		#'
22		#' @export
23		control_incidence_rate <- function(conf_level = 0.95,
24		conf_type = c("normal", "normal_log", "exact", "byar"),
25		time_unit_input = c("year", "day", "week", "month"),
26		time_unit_output = 1) {
27	8x	conf_type <- match.arg(conf_type)
28	7x	time_unit_input <- match.arg(time_unit_input)
29	6x	checkmate::assert_number(time_unit_output)
30	5x	assert_proportion_value(conf_level)
31
32	4x	list(
33	4x	conf_level = conf_level,
34	4x	conf_type = conf_type,
35	4x	time_unit_input = time_unit_input,
36	4x	time_unit_output = time_unit_output
37		)
38		}

1		#' Difference Test for Two Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Various tests were implemented to test the difference between two proportions.
6		#'
7		#' @inheritParams argument_convention
8		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
9		#'
10		#' @seealso [h_prop_diff_test]
11		#'
12		#' @name prop_diff_test
13		NULL
14
15		#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
16		#'
17		#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
18		#' to calculate the p-value.
19		#'
20		#' @return
21		#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
22		#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
23		#'
24		#' @examples
25		#' # Statistics function
26		#' dta <- data.frame(
27		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
28		#' grp = factor(rep(c("A", "B"), each = 50)),
29		#' strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
30		#' )
31		#'
32		#' # Internal function - s_test_proportion_diff
33		#' \dontrun{
34		#' s_test_proportion_diff(
35		#' df = subset(dta, grp == "A"),
36		#' .var = "rsp",
37		#' .ref_group = subset(dta, grp == "B"),
38		#' .in_ref_col = FALSE,
39		#' variables = list(strata = "strat"),
40		#' method = "cmh"
41		#' )
42		#' }
43		#'
44		#' @keywords internal
45		s_test_proportion_diff <- function(df,
46		.var,
47		.ref_group,
48		.in_ref_col,
49		variables = list(strata = NULL),
50		method = c("chisq", "schouten", "fisher", "cmh")) {
51	30x	method <- match.arg(method)
52	30x	y <- list(pval = "")
53
54	30x	if (!.in_ref_col) {
55	30x	assert_df_with_variables(df, list(rsp = .var))
56	30x	assert_df_with_variables(.ref_group, list(rsp = .var))
57	30x	rsp <- factor(
58	30x	c(.ref_group[[.var]], df[[.var]]),
59	30x	levels = c("TRUE", "FALSE")
60		)
61	30x	grp <- factor(
62	30x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
63	30x	levels = c("ref", "Not-ref")
64		)
65
66	30x	if (!is.null(variables$strata) \|\| method == "cmh") {
67	12x	strata <- variables$strata
68	12x	checkmate::assert_false(is.null(strata))
69	12x	strata_vars <- stats::setNames(as.list(strata), strata)
70	12x	assert_df_with_variables(df, strata_vars)
71	12x	assert_df_with_variables(.ref_group, strata_vars)
72	12x	strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
73		}
74
75	30x	tbl <- switch(method,
76	30x	cmh = table(grp, rsp, strata),
77	30x	table(grp, rsp)
78		)
79
80	30x	y$pval <- switch(method,
81	30x	chisq = prop_chisq(tbl),
82	30x	cmh = prop_cmh(tbl),
83	30x	fisher = prop_fisher(tbl),
84	30x	schouten = prop_schouten(tbl)
85		)
86		}
87
88	30x	y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
89	30x	y
90		}
91
92		#' Description of the Difference Test Between Two Proportions
93		#'
94		#' @description `r lifecycle::badge("stable")`
95		#'
96		#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
97		#'
98		#' @inheritParams s_test_proportion_diff
99		#'
100		#' @return `string` describing the test from which the p-value is derived.
101		#'
102		#' @export
103		d_test_proportion_diff <- function(method) {
104	41x	checkmate::assert_string(method)
105	41x	meth_part <- switch(method,
106	41x	"schouten" = "Chi-Squared Test with Schouten Correction",
107	41x	"chisq" = "Chi-Squared Test",
108	41x	"cmh" = "Cochran-Mantel-Haenszel Test",
109	41x	"fisher" = "Fisher's Exact Test",
110	41x	stop(paste(method, "does not have a description"))
111		)
112	41x	paste0("p-value (", meth_part, ")")
113		}
114
115		#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
116		#'
117		#' @return
118		#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
119		#'
120		#' @examples
121		#' # Internal function - a_test_proportion_diff
122		#' \dontrun{
123		#' a_test_proportion_diff(
124		#' df = subset(dta, grp == "A"),
125		#' .var = "rsp",
126		#' .ref_group = subset(dta, grp == "B"),
127		#' .in_ref_col = FALSE,
128		#' variables = list(strata = "strat"),
129		#' method = "cmh"
130		#' )
131		#' }
132		#'
133		#' @keywords internal
134		a_test_proportion_diff <- make_afun(
135		s_test_proportion_diff,
136		.formats = c(pval = "x.xxxx \| (<0.0001)"),
137		.indent_mods = c(pval = 1L)
138		)
139
140		#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
141		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
142		#'
143		#' @param ... other arguments are passed to [s_test_proportion_diff()].
144		#'
145		#' @return
146		#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
147		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
148		#' the statistics from `s_test_proportion_diff()` to the table layout.
149		#'
150		#' @examples
151		#' # With `rtables` pipelines.
152		#' l <- basic_table() %>%
153		#' split_cols_by(var = "grp", ref_group = "B") %>%
154		#' test_proportion_diff(
155		#' vars = "rsp",
156		#' method = "cmh", variables = list(strata = "strat")
157		#' )
158		#'
159		#' build_table(l, df = dta)
160		#'
161		#' @export
162		test_proportion_diff <- function(lyt,
163		vars,
164		...,
165		var_labels = vars,
166		show_labels = "hidden",
167		table_names = vars,
168		.stats = NULL,
169		.formats = NULL,
170		.labels = NULL,
171		.indent_mods = NULL) {
172	5x	afun <- make_afun(
173	5x	a_test_proportion_diff,
174	5x	.stats = .stats,
175	5x	.formats = .formats,
176	5x	.labels = .labels,
177	5x	.indent_mods = .indent_mods
178		)
179	5x	analyze(
180	5x	lyt,
181	5x	vars,
182	5x	afun = afun,
183	5x	var_labels = var_labels,
184	5x	extra_args = list(...),
185	5x	show_labels = show_labels,
186	5x	table_names = table_names
187		)
188		}
189
190		#' Helper Functions to Test Proportion Differences
191		#'
192		#' Helper functions to implement various tests on the difference between two proportions.
193		#'
194		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
195		#'
196		#' @return A p-value.
197		#'
198		#' @seealso [prop_diff_test()] for implementation of these helper functions.
199		#'
200		#' @name h_prop_diff_test
201		NULL
202
203		#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
204		#'
205		#' @examples
206		#' # Non-stratified proportion difference test
207		#'
208		#' ## Data
209		#' A <- 20
210		#' B <- 20
211		#' set.seed(1)
212		#' rsp <- c(
213		#' sample(c(TRUE, FALSE), size = A, prob = c(3 / 4, 1 / 4), replace = TRUE),
214		#' sample(c(TRUE, FALSE), size = A, prob = c(1 / 2, 1 / 2), replace = TRUE)
215		#' )
216		#' grp <- c(rep("A", A), rep("B", B))
217		#' tbl <- table(grp, rsp)
218		#'
219		#' ## Chi-Squared test
220		#' # Internal function - prop_chisq
221		#' \dontrun{
222		#' prop_chisq(tbl)
223		#' }
224		#'
225		#' @keywords internal
226		prop_chisq <- function(tbl) {
227	23x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
228	23x	tbl <- tbl[, c("TRUE", "FALSE")]
229	23x	if (any(colSums(tbl) == 0)) {
230	2x	return(1)
231		}
232	21x	stats::prop.test(tbl, correct = FALSE)$p.value
233		}
234
235		#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
236		#' [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
237		#'
238		#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
239		#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
240		#'
241		#' @examples
242		#' # Stratified proportion difference test
243		#'
244		#' ## Data
245		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
246		#' grp <- factor(rep(c("A", "B"), each = 50))
247		#' strata <- factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
248		#' tbl <- table(grp, rsp, strata)
249		#'
250		#' ## Cochran-Mantel-Haenszel test
251		#' # Internal function - prop_cmh
252		#' \dontrun{
253		#' prop_cmh(tbl)
254		#' }
255		#'
256		#' @keywords internal
257		prop_cmh <- function(ary) {
258	16x	checkmate::assert_array(ary)
259	16x	checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
260	16x	checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
261	16x	strata_sizes <- apply(ary, MARGIN = 3, sum)
262	16x	if (any(strata_sizes < 5)) {
263	1x	warning("<5 data points in some strata. CMH test may be incorrect.")
264	1x	ary <- ary[, , strata_sizes > 1]
265		}
266
267	16x	stats::mantelhaen.test(ary, correct = FALSE)$p.value
268		}
269
270		#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
271		#'
272		#' @seealso For information on the Schouten correction (Schouten, 1980),
273		#' visit https://onlinelibrary.wiley.com/doi/abs/10.1002/bimj.4710220305.
274		#'
275		#' @examples
276		#' ## Chi-Squared test + Schouten correction.
277		#' # Internal function - prop_schouten
278		#' \dontrun{
279		#' prop_schouten(tbl)
280		#' }
281		#'
282		#' @keywords internal
283		prop_schouten <- function(tbl) {
284	100x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
285	100x	tbl <- tbl[, c("TRUE", "FALSE")]
286	100x	if (any(colSums(tbl) == 0)) {
287	1x	return(1)
288		}
289
290	99x	n <- sum(tbl)
291	99x	n1 <- sum(tbl[1, ])
292	99x	n2 <- sum(tbl[2, ])
293
294	99x	ad <- diag(tbl)
295	99x	bc <- diag(apply(tbl, 2, rev))
296	99x	ac <- tbl[, 1]
297	99x	bd <- tbl[, 2]
298
299	99x	t_schouten <- (n - 1) *
300	99x	(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
301	99x	(n1 * n2 * sum(ac) * sum(bd))
302
303	99x	1 - stats::pchisq(t_schouten, df = 1)
304		}
305
306		#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
307		#'
308		#' @examples
309		#' ## Fisher's exact test
310		#' # Internal function - prop_fisher
311		#' \dontrun{
312		#' prop_fisher(tbl)
313		#' }
314		#'
315		#' @keywords internal
316		prop_fisher <- function(tbl) {
317	2x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
318	2x	tbl <- tbl[, c("TRUE", "FALSE")]
319	2x	stats::fisher.test(tbl)$p.value
320		}

1		#' Patient Counts with Abnormal Range Values by Baseline Status
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`), and additional
6		#' analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or `factor`). For each
7		#' direction specified in `abnormal` (e.g. high or low) we condition on baseline range result and count
8		#' patients in the numerator and denominator as follows:
9		#' * `Not <Abnormal>`
10		#' * `denom`: the number of patients without abnormality at baseline (excluding those with missing baseline)
11		#' * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
12		#' * `<Abnormal>`
13		#' * `denom`: the number of patients with abnormality at baseline
14		#' * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
15		#' * `Total`
16		#' * `denom`: the number of patients with at least one valid measurement post-baseline
17		#' * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
18		#'
19		#' @inheritParams argument_convention
20		#' @param abnormal (`character`)\cr identifying the abnormal range level(s) in `.var`.
21		#'
22		#' @note
23		#' * `df` should be filtered to include only post-baseline records.
24		#' * If the baseline variable or analysis variable contains `NA`, it is expected that `NA` has been
25		#' conveyed to `na_level` appropriately beforehand with [df_explicit_na()] or [explicit_na()].
26		#'
27		#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
28		#'
29		#' @name abnormal_by_baseline
30		NULL
31
32		#' Description Function for [s_count_abnormal_by_baseline()]
33		#'
34		#' @description `r lifecycle::badge("stable")`
35		#'
36		#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
37		#'
38		#' @inheritParams abnormal_by_baseline
39		#'
40		#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
41		#'
42		#' @examples
43		#' d_count_abnormal_by_baseline("LOW")
44		#'
45		#' @export
46		d_count_abnormal_by_baseline <- function(abnormal) {
47	7x	not_abn_name <- paste("Not", tolower(abnormal))
48	7x	abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
49	7x	total_name <- "Total"
50
51	7x	list(
52	7x	not_abnormal = not_abn_name,
53	7x	abnormal = abn_name,
54	7x	total = total_name
55		)
56		}
57
58		#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
59		#'
60		#' @param na_level (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
61		#' [df_explicit_na()]). The default is `"<Missing>"`.
62		#'
63		#' @return
64		#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
65		#' `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
66		#'
67		#' @examples
68		#' df <- data.frame(
69		#' USUBJID = as.character(c(1:6)),
70		#' ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
71		#' BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
72		#' )
73		#' df <- df_explicit_na(df)
74		#'
75		#' # Internal function - s_count_abnormal_by_baseline
76		#' \dontrun{
77		#' # Just for one abnormal level.
78		#' s_count_abnormal_by_baseline(df, .var = "ANRIND", abnormal = "HIGH")
79		#' }
80		#'
81		#' @keywords internal
82		s_count_abnormal_by_baseline <- function(df,
83		.var,
84		abnormal,
85		na_level = "<Missing>",
86		variables = list(id = "USUBJID", baseline = "BNRIND")) {
87	5x	checkmate::assert_string(.var)
88	5x	checkmate::assert_string(abnormal)
89	5x	checkmate::assert_string(na_level)
90	5x	assert_df_with_variables(df, c(range = .var, variables))
91	5x	checkmate::assert_subset(names(variables), c("id", "baseline"))
92	5x	checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
93	5x	checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
94	5x	checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
95
96		# If input is passed as character, changed to factor
97	5x	df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_level)
98	5x	df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_level)
99
100	5x	assert_valid_factor(df[[.var]], any.missing = FALSE)
101	4x	assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)
102
103		# Keep only records with valid analysis value.
104	3x	df <- df[df[[.var]] != na_level, ]
105
106	3x	anl <- data.frame(
107	3x	id = df[[variables$id]],
108	3x	var = df[[.var]],
109	3x	baseline = df[[variables$baseline]],
110	3x	stringsAsFactors = FALSE
111		)
112
113		# Total:
114		# - Patients in denominator: have at least one valid measurement post-baseline.
115		# - Patients in numerator: have at least one abnormality.
116	3x	total_denom <- length(unique(anl$id))
117	3x	total_num <- length(unique(anl$id[anl$var == abnormal]))
118
119		# Baseline NA records are counted only in total rows.
120	3x	anl <- anl[anl$baseline != na_level, ]
121
122		# Abnormal:
123		# - Patients in denominator: have abnormality at baseline.
124		# - Patients in numerator: have abnormality at baseline AND
125		# have at least one abnormality post-baseline.
126	3x	abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
127	3x	abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))
128
129		# Not abnormal:
130		# - Patients in denominator: do not have abnormality at baseline.
131		# - Patients in numerator: do not have abnormality at baseline AND
132		# have at least one abnormality post-baseline.
133	3x	not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
134	3x	not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))
135
136	3x	labels <- d_count_abnormal_by_baseline(abnormal)
137	3x	list(fraction = list(
138	3x	not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
139	3x	abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
140	3x	total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
141		))
142		}
143
144		#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
145		#' in `count_abnormal_by_baseline()`.
146		#'
147		#' @return
148		#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
149		#'
150		#' @examples
151		#' # Internal function - a_count_abnormal_by_baseline
152		#' \dontrun{
153		#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `fraction` first
154		#' # so that the `rtables` formatting function `format_fraction()` can be applied correctly.
155		#' afun <- make_afun(a_count_abnormal_by_baseline, .ungroup_stats = "fraction")
156		#' afun(df, .var = "ANRIND", abnormal = "LOW")
157		#' }
158		#'
159		#' @keywords internal
160		a_count_abnormal_by_baseline <- make_afun(
161		s_count_abnormal_by_baseline,
162		.formats = c(fraction = format_fraction)
163		)
164
165		#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
166		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
167		#'
168		#' @return
169		#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
170		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
171		#' the statistics from `s_count_abnormal_by_baseline()` to the table layout.
172		#'
173		#' @examples
174		#' # Layout creating function.
175		#' basic_table() %>%
176		#' count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
177		#' build_table(df)
178		#'
179		#' # Passing of statistics function and formatting arguments.
180		#' df2 <- data.frame(
181		#' ID = as.character(c(1, 2, 3, 4)),
182		#' RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
183		#' BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
184		#' )
185		#'
186		#' basic_table() %>%
187		#' count_abnormal_by_baseline(
188		#' var = "RANGE",
189		#' abnormal = c(Low = "LOW"),
190		#' variables = list(id = "ID", baseline = "BLRANGE"),
191		#' .formats = c(fraction = "xx / xx"),
192		#' .indent_mods = c(fraction = 2L)
193		#' ) %>%
194		#' build_table(df2)
195		#'
196		#' @export
197		count_abnormal_by_baseline <- function(lyt,
198		var,
199		abnormal,
200		...,
201		table_names = abnormal,
202		.stats = NULL,
203		.formats = NULL,
204		.labels = NULL,
205		.indent_mods = NULL) {
206	2x	checkmate::assert_character(abnormal, len = length(table_names), names = "named")
207	2x	checkmate::assert_string(var)
208	2x	afun <- make_afun(
209	2x	a_count_abnormal_by_baseline,
210	2x	.stats = .stats,
211	2x	.formats = .formats,
212	2x	.labels = .labels,
213	2x	.indent_mods = .indent_mods,
214	2x	.ungroup_stats = "fraction"
215		)
216	2x	for (i in seq_along(abnormal)) {
217	4x	abn <- abnormal[i]
218	4x	lyt <- analyze(
219	4x	lyt = lyt,
220	4x	vars = var,
221	4x	var_labels = names(abn),
222	4x	afun = afun,
223	4x	table_names = table_names[i],
224	4x	extra_args = c(list(abnormal = abn), list(...)),
225	4x	show_labels = "visible"
226		)
227		}
228	2x	lyt
229		}

1		#' Sort Data by `PK PARAM` Variable
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' @param pk_data (`data.frame`)\cr Pharmacokinetics dataframe
6		#' @param key_var (`character`)\cr key variable used to merge pk_data and metadata created by `d_pkparam()`
7		#'
8		#' @return A PK `data.frame` sorted by a `PARAM` variable.
9		#'
10		#' @examples
11		#' library(dplyr)
12		#'
13		#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
14		#' pk_ordered_data <- h_pkparam_sort(adpp)
15		#'
16		#' @export
17		h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") {
18	3x	assert_df_with_variables(pk_data, list(key_var = key_var))
19	3x	pk_data$PARAMCD <- pk_data[[key_var]]
20
21	3x	ordered_pk_data <- d_pkparam()
22
23		# Add the numeric values from ordered_pk_data to pk_data
24	3x	joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffix = c("", ".y"))
25
26	3x	joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))]
27
28	3x	joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER)
29
30		# Then order PARAM based on this column
31	3x	joined_data$PARAM <- factor(joined_data$PARAM,
32	3x	levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]),
33	3x	ordered = TRUE
34		)
35
36	3x	joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY,
37	3x	levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]),
38	3x	ordered = TRUE
39		)
40
41	3x	joined_data
42		}

1		#' Horizontal Waterfall Plot
2		#'
3		#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
4		#'
5		#' @description `r lifecycle::badge("stable")`
6		#'
7		#' @param height (`numeric``)\cr vector containing values to be plotted as the waterfall bars.
8		#' @param id (`character`)\cr vector containing IDs to use as the x-axis label for the waterfall bars.
9		#' @param col (`character`)\cr colors.
10		#' @param col_var (`factor`, `character` or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
11		#' @param xlab (`character`)\cr x label. Default is `"ID"`.
12		#' @param ylab (`character`)\cr y label. Default is `"Value"`.
13		#' @param title (`character`)\cr text to be displayed as plot title.
14		#' @param col_legend_title (`character`)\cr text to be displayed as legend title.
15		#'
16		#' @return A `ggplot` waterfall plot.
17		#'
18		#' @examples
19		#' library(dplyr)
20		#' library(nestcolor)
21		#'
22		#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
23		#'
24		#' g_waterfall(
25		#' height = c(3, 5, -1),
26		#' id = letters[1:3],
27		#' col_var = letters[1:3]
28		#' )
29		#'
30		#' adsl_f <- tern_ex_adsl %>%
31		#' select(USUBJID, STUDYID, ARM, ARMCD, SEX)
32		#'
33		#' adrs_f <- tern_ex_adrs %>%
34		#' filter(PARAMCD == "OVRINV") %>%
35		#' mutate(pchg = rnorm(n(), 10, 50))
36		#'
37		#' adrs_f <- head(adrs_f, 30)
38		#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
39		#' head(adrs_f)
40		#'
41		#' g_waterfall(
42		#' height = adrs_f$pchg,
43		#' id = adrs_f$USUBJID,
44		#' col_var = adrs_f$AVALC
45		#' )
46		#'
47		#' g_waterfall(
48		#' height = adrs_f$pchg,
49		#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
50		#' col_var = adrs_f$SEX
51		#' )
52		#'
53		#' g_waterfall(
54		#' height = adrs_f$pchg,
55		#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
56		#' xlab = "ID",
57		#' ylab = "Percentage Change",
58		#' title = "Waterfall plot"
59		#' )
60		#'
61		#' @export
62		g_waterfall <- function(height,
63		id,
64		col_var = NULL,
65		col = getOption("ggplot2.discrete.colour"),
66		xlab = NULL,
67		ylab = NULL,
68		col_legend_title = NULL,
69		title = NULL) {
70	2x	if (!is.null(col_var)) {
71	1x	check_same_n(height = height, id = id, col_var = col_var)
72		} else {
73	1x	check_same_n(height = height, id = id)
74		}
75
76	2x	checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
77	2x	checkmate::assert_character(col, null.ok = TRUE)
78
79	2x	xlabel <- deparse(substitute(id))
80	2x	ylabel <- deparse(substitute(height))
81
82	2x	col_label <- if (!missing(col_var)) {
83	1x	deparse(substitute(col_var))
84		}
85
86	2x	xlab <- if (is.null(xlab)) xlabel else xlab
87	2x	ylab <- if (is.null(ylab)) ylabel else ylab
88	2x	col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title
89
90	2x	plot_data <- data.frame(
91	2x	height = height,
92	2x	id = as.character(id),
93	2x	col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
94	2x	stringsAsFactors = FALSE
95		)
96
97	2x	plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]
98
99	2x	p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
100	2x	ggplot2::geom_col() +
101	2x	ggplot2::geom_text(
102	2x	label = format(plot_data_ord$height, digits = 2),
103	2x	vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
104		) +
105	2x	ggplot2::xlab(xlab) +
106	2x	ggplot2::ylab(ylab) +
107	2x	ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))
108
109	2x	if (!is.null(col_var)) {
110	1x	p <- p +
111	1x	ggplot2::aes(fill = col_var) +
112	1x	ggplot2::labs(fill = col_legend_title) +
113	1x	ggplot2::theme(
114	1x	legend.position = "bottom",
115	1x	legend.background = ggplot2::element_blank(),
116	1x	legend.title = ggplot2::element_text(face = "bold"),
117	1x	legend.box.background = ggplot2::element_rect(colour = "black")
118		)
119		}
120
121	2x	if (!is.null(col)) {
122	1x	p <- p +
123	1x	ggplot2::scale_fill_manual(values = col)
124		}
125
126	2x	if (!is.null(title)) {
127	1x	p <- p +
128	1x	ggplot2::labs(title = title) +
129	1x	ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
130		}
131
132	2x	p
133		}

1		#' Pairwise Formula Special Term
2		#'
3		#' @description `r lifecycle::badge("deprecated")`
4		#'
5		#' The special term `pairwise` indicate that the model should be fitted individually for
6		#' every tested level in comparison to the reference level.
7		#'
8		#' @param x the variable for which pairwise result is expected.
9		#'
10		#' @return Variable "paired".
11		#'
12		#' @details Let's `ARM` being a factor with level A, B, C; let's be B the reference level,
13		#' a model calling the formula including `pairwise(ARM)` will result in two models:
14		#' * A model including only levels A and B, and effect of A estimated in reference to B.
15		#' * A model including only levels C and B, the effect of C estimated in reference to B.
16		#'
17		#' @export
18		pairwise <- function(x) {
19	!	lifecycle::deprecate_warn("0.8.1.9013", "pairwise()", "univariate()")
20	!	structure(x, varname = deparse(substitute(x)))
21		}
22
23		#' Univariate Formula Special Term
24		#'
25		#' @description `r lifecycle::badge("stable")`
26		#'
27		#' The special term `univariate` indicate that the model should be fitted individually for
28		#' every variable included in univariate.
29		#'
30		#' @param x A vector of variable name separated by commas.
31		#'
32		#' @return When used within a model formula, produces univariate models for each variable provided.
33		#'
34		#' @details
35		#' If provided alongside with pairwise specification, the model
36		#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
37		#' + `y ~ ARM`
38		#' + `y ~ ARM + SEX`
39		#' + `y ~ ARM + AGE`
40		#' + `y ~ ARM + RACE`
41		#'
42		#' @export
43		univariate <- function(x) {
44	1x	structure(x, varname = deparse(substitute(x)))
45		}
46
47		# Get the right-hand-term of a formula
48		rht <- function(x) {
49	4x	checkmate::assert_formula(x)
50	4x	y <- as.character(rev(x)[[1]])
51	4x	return(y)
52		}
53
54		#' Hazard Ratio Estimation in Interactions
55		#'
56		#' This function estimates the hazard ratios between arms when an interaction variable is given with
57		#' specific values.
58		#'
59		#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
60		#' given the levels of `given`.
61		#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
62		#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
63		#' @param coef Numeric of estimated coefficients.
64		#' @param vcov Variance-covariance matrix of underlying model.
65		#' @param conf_level Single numeric for the confidence level of estimate intervals.
66		#'
67		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
68		#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
69		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
70		#'
71		#' - b1 (arm b), b2 (arm c)
72		#' - b3 (sex m)
73		#' - b4 (arm b: sex m), b5 (arm c: sex m)
74		#'
75		#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
76		#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
77		#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
78		#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
79		#'
80		#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
81		#' `variable` and `given`, with columns:
82		#' * `coef_hat`: Estimation of the coefficient.
83		#' * `coef_se`: Standard error of the estimation.
84		#' * `hr`: Hazard ratio.
85		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
86		#'
87		#' @seealso [s_cox_multivariate()].
88		#'
89		#' @examples
90		#' library(dplyr)
91		#' library(survival)
92		#'
93		#' ADSL <- tern_ex_adsl %>%
94		#' filter(SEX %in% c("F", "M"))
95		#'
96		#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
97		#' adtte$ARMCD <- droplevels(adtte$ARMCD)
98		#' adtte$SEX <- droplevels(adtte$SEX)
99		#'
100		#' mod <- coxph(
101		#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
102		#' data = adtte
103		#' )
104		#'
105		#' mmat <- stats::model.matrix(mod)[1, ]
106		#' mmat[!mmat == 0] <- 0
107		#'
108		#' # Internal function - estimate_coef
109		#' \dontrun{
110		#' estimate_coef(
111		#' variable = "ARMCD", given = "SEX", lvl_var = "ARM A", lvl_given = "M",
112		#' coef = stats::coef(mod), mmat = mmat, vcov = stats::vcov(mod), conf_level = .95
113		#' )
114		#' }
115		#'
116		#' @keywords internal
117		estimate_coef <- function(variable, given,
118		lvl_var, lvl_given,
119		coef,
120		mmat,
121		vcov,
122		conf_level = 0.95) {
123	8x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
124	8x	giv_lvl <- paste0(given, lvl_given)
125
126	8x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
127	8x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
128	8x	design_mat <- within(
129	8x	data = design_mat,
130	8x	expr = {
131	8x	inter <- paste0(variable, ":", given)
132	8x	rev_inter <- paste0(given, ":", variable)
133		}
134		)
135
136	8x	split_by_variable <- design_mat$variable
137	8x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
138
139	8x	design_mat <- apply(
140	8x	X = design_mat, MARGIN = 1, FUN = function(x) {
141	27x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
142	27x	return(mmat)
143		}
144		)
145	8x	colnames(design_mat) <- interaction_names
146
147	8x	betas <- as.matrix(coef)
148
149	8x	coef_hat <- t(design_mat) %*% betas
150	8x	dimnames(coef_hat)[2] <- "coef"
151
152	8x	coef_se <- apply(design_mat, 2, function(x) {
153	27x	vcov_el <- as.logical(x)
154	27x	y <- vcov[vcov_el, vcov_el]
155	27x	y <- sum(y)
156	27x	y <- sqrt(y)
157	27x	return(y)
158		})
159
160	8x	q_norm <- stats::qnorm((1 + conf_level) / 2)
161	8x	y <- cbind(coef_hat, `se(coef)` = coef_se)
162
163	8x	y <- apply(y, 1, function(x) {
164	27x	x["hr"] <- exp(x["coef"])
165	27x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
166	27x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
167
168	27x	return(x)
169		})
170
171	8x	y <- t(y)
172	8x	y <- by(y, split_by_variable, identity)
173	8x	y <- lapply(y, as.matrix)
174
175	8x	attr(y, "details") <- paste0(
176	8x	"Estimations of ", variable,
177	8x	" hazard ratio given the level of ", given, " compared to ",
178	8x	variable, " level ", lvl_var[1], "."
179		)
180	8x	return(y)
181		}
182
183		#' `tryCatch` around `car::Anova`
184		#'
185		#' Captures warnings when executing [car::Anova].
186		#'
187		#' @inheritParams car::Anova
188		#'
189		#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
190		#'
191		#' @examples
192		#' # `car::Anova` on cox regression model including strata and expected
193		#' # a likelihood ratio test triggers a warning as only Wald method is
194		#' # accepted.
195		#'
196		#' library(survival)
197		#'
198		#' mod <- coxph(
199		#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
200		#' data = ovarian
201		#' )
202		#'
203		#' # Internal function - try_car_anova
204		#' \dontrun{
205		#' with_wald <- try_car_anova(mod = mod, test.statistic = "Wald")
206		#' with_lr <- try_car_anova(mod = mod, test.statistic = "LR")
207		#' }
208		#'
209		#' @keywords internal
210		try_car_anova <- function(mod,
211		test.statistic) { # nolint
212	2x	y <- tryCatch(
213	2x	withCallingHandlers(
214	2x	expr = {
215	2x	warn_text <- c()
216	2x	list(
217	2x	aov = car::Anova(
218	2x	mod,
219	2x	test.statistic = test.statistic,
220	2x	type = "III"
221		),
222	2x	warn_text = warn_text
223		)
224		},
225	2x	warning = function(w) {
226		# If a warning is detected it is handled as "w".
227	!	warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))
228
229		# A warning is sometimes expected, then, we want to restart
230		# the execution while ignoring the warning.
231	!	invokeRestart("muffleWarning")
232		}
233		),
234	2x	finally = {
235		}
236		)
237
238	2x	return(y)
239		}
240
241		#' Fit the Cox Regression Model and Anova
242		#'
243		#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
244		#' This last package introduces more flexibility to get the effect p.values.
245		#'
246		#' @inheritParams t_coxreg
247		#'
248		#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
249		#' `aov` (result of [car::Anova()]).
250		#'
251		#' @noRd
252		fit_n_aov <- function(formula,
253		data = data,
254		conf_level = conf_level,
255		pval_method = c("wald", "likelihood"),
256		...) {
257	1x	pval_method <- match.arg(pval_method)
258
259	1x	environment(formula) <- environment()
260	1x	suppressWarnings({
261		# We expect some warnings due to coxph which fails strict programming.
262	1x	mod <- survival::coxph(formula, data = data, ...)
263	1x	msum <- summary(mod, conf.int = conf_level)
264		})
265
266	1x	aov <- try_car_anova(
267	1x	mod,
268	1x	test.statistic = switch(pval_method,
269	1x	"wald" = "Wald",
270	1x	"likelihood" = "LR"
271		)
272		)
273
274	1x	warn_attr <- aov$warn_text
275	!	if (!is.null(aov$warn_text)) message(warn_attr)
276
277	1x	aov <- aov$aov
278	1x	y <- list(mod = mod, msum = msum, aov = aov)
279	1x	attr(y, "message") <- warn_attr
280
281	1x	return(y)
282		}
283
284		# argument_checks
285		check_formula <- function(formula) {
286	1x	if (!(inherits(formula, "formula"))) {
287	1x	stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
288		}
289
290	!	invisible()
291		}
292
293		check_covariate_formulas <- function(covariates) {
294	1x	if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) \|\| is.null(covariates)) {
295	1x	stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
296		}
297
298	!	invisible()
299		}
300
301		name_covariate_names <- function(covariates) {
302	1x	miss_names <- names(covariates) == ""
303	1x	no_names <- is.null(names(covariates))
304	!	if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
305	!	if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
306	1x	return(covariates)
307		}
308
309		check_increments <- function(increments, covariates) {
310	1x	if (!is.null(increments)) {
311	1x	covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
312	1x	lapply(
313	1x	X = names(increments), FUN = function(x) {
314	3x	if (!x %in% covariates) {
315	1x	warning(
316	1x	paste(
317	1x	"Check `increments`, the `increment` for ", x,
318	1x	"doesn't match any names in investigated covariate(s)."
319		)
320		)
321		}
322		}
323		)
324		}
325
326	1x	invisible()
327		}
328
329		#' Multivariate Cox Model - Summarized Results
330		#'
331		#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
332		#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
333		#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
334		#' covariates included in the model.
335		#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
336		#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
337		#' the p.values need to be interpreted with caution. (Statistical Analysis of Clinical Trials Data with R,
338		#' `NEST's bookdown`)
339		#'
340		#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
341		#' including covariates.
342		#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
343		#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
344		#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
345		#' "wald" (default) or "likelihood".
346		#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
347		#' method for tie handling, one of `exact` (default), `efron`, `breslow`.
348		#'
349		#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
350		#'
351		#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
352		#' but is out of scope as defined by the Global Data Standards Repository
353		#' (`GDS_Standard_TLG_Specs_Tables_2.doc`).
354		#'
355		#' @seealso [estimate_coef()].
356		#'
357		#' @examples
358		#' library(dplyr)
359		#'
360		#' adtte <- tern_ex_adtte
361		#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
362		#' adtte_f <- filter(
363		#' adtte_f,
364		#' PARAMCD == "OS" &
365		#' SEX %in% c("F", "M") &
366		#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
367		#' )
368		#' adtte_f$SEX <- droplevels(adtte_f$SEX)
369		#' adtte_f$RACE <- droplevels(adtte_f$RACE)
370		#'
371		#' # Internal function - s_cox_multivariate
372		#' \dontrun{
373		#' s_cox_multivariate(
374		#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (ARMCD + RACE + AGE)^2, data = adtte_f
375		#' )
376		#' }
377		#'
378		#' @keywords internal
379		s_cox_multivariate <- function(formula, data,
380		conf_level = 0.95,
381		pval_method = c("wald", "likelihood"),
382		...) {
383	1x	tf <- stats::terms(formula, specials = c("strata"))
384	1x	covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
385	1x	lapply(
386	1x	X = covariates,
387	1x	FUN = function(x) {
388	3x	if (is.character(data[[x]])) {
389	1x	data[[x]] <<- as.factor(data[[x]])
390		}
391	3x	invisible()
392		}
393		)
394	1x	pval_method <- match.arg(pval_method)
395
396		# Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
397	1x	y <- fit_n_aov(
398	1x	formula = formula,
399	1x	data = data,
400	1x	conf_level = conf_level,
401	1x	pval_method = pval_method,
402		...
403		)
404	1x	mod <- y$mod
405	1x	aov <- y$aov
406	1x	msum <- y$msum
407	1x	list2env(as.list(y), environment())
408
409	1x	all_term_labs <- attr(mod$terms, "term.labels")
410	1x	term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
411	1x	names(term_labs) <- term_labs
412
413	1x	coef_inter <- NULL
414	1x	if (any(attr(mod$terms, "order") > 1)) {
415	1x	for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
416	1x	names(for_inter) <- for_inter
417	1x	mmat <- stats::model.matrix(mod)[1, ]
418	1x	mmat[!mmat == 0] <- 0
419	1x	mcoef <- stats::coef(mod)
420	1x	mvcov <- stats::vcov(mod)
421
422	1x	estimate_coef_local <- function(variable, given) {
423	6x	estimate_coef(
424	6x	variable, given,
425	6x	coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
426	6x	lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
427		)
428		}
429
430	1x	coef_inter <- lapply(
431	1x	for_inter, function(x) {
432	3x	y <- attr(mod$terms, "factor")[, x]
433	3x	y <- names(y[y > 0])
434	3x	Map(estimate_coef_local, variable = y, given = rev(y))
435		}
436		)
437		}
438
439	1x	list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
440		}

1		#' Helper Functions for Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams h_coxreg_univar_extract
9		#' @inheritParams cox_regression_inter
10		#' @inheritParams control_coxreg
11		#'
12		#' @seealso [cox_regression]
13		#'
14		#' @name h_cox_regression
15		NULL
16
17		#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
18		#' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
19		#'
20		#' @return
21		#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
22		#'
23		#' @examples
24		#' # `h_coxreg_univar_formulas`
25		#'
26		#' ## Simple formulas.
27		#' h_coxreg_univar_formulas(
28		#' variables = list(
29		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
30		#' )
31		#' )
32		#'
33		#' ## Addition of an optional strata.
34		#' h_coxreg_univar_formulas(
35		#' variables = list(
36		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
37		#' strata = "SITE"
38		#' )
39		#' )
40		#'
41		#' ## Inclusion of the interaction term.
42		#' h_coxreg_univar_formulas(
43		#' variables = list(
44		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
45		#' strata = "SITE"
46		#' ),
47		#' interaction = TRUE
48		#' )
49		#'
50		#' ## Only covariates fitted in separate models.
51		#' h_coxreg_univar_formulas(
52		#' variables = list(
53		#' time = "time", event = "status", covariates = c("X", "y")
54		#' )
55		#' )
56		#'
57		#' @export
58		h_coxreg_univar_formulas <- function(variables,
59		interaction = FALSE) {
60	38x	checkmate::assert_list(variables, names = "named")
61	38x	has_arm <- "arm" %in% names(variables)
62	38x	arm_name <- if (has_arm) "arm" else NULL
63
64	38x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
65
66	38x	checkmate::assert_flag(interaction)
67
68	38x	if (!has_arm \|\| is.null(variables$covariates)) {
69	10x	checkmate::assert_false(interaction)
70		}
71
72	36x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
73
74	36x	if (!is.null(variables$covariates)) {
75	35x	forms <- paste0(
76	35x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
77	35x	ifelse(has_arm, variables$arm, "1"),
78	35x	ifelse(interaction, " * ", " + "),
79	35x	variables$covariates,
80	35x	ifelse(
81	35x	!is.null(variables$strata),
82	35x	paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
83		""
84		)
85		)
86		} else {
87	1x	forms <- NULL
88		}
89	36x	nams <- variables$covariates
90	36x	if (has_arm) {
91	29x	ref <- paste0(
92	29x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
93	29x	variables$arm,
94	29x	ifelse(
95	29x	!is.null(variables$strata),
96	29x	paste0(
97	29x	" + strata(", paste0(variables$strata, collapse = ", "), ")"
98		),
99		""
100		)
101		)
102	29x	forms <- c(ref, forms)
103	29x	nams <- c("ref", nams)
104		}
105	36x	stats::setNames(forms, nams)
106		}
107
108		#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
109		#' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
110		#' regression models. Interactions will not be included in multivariate Cox regression model.
111		#'
112		#' @return
113		#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
114		#'
115		#' @examples
116		#' # `h_coxreg_multivar_formula`
117		#'
118		#' h_coxreg_multivar_formula(
119		#' variables = list(
120		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
121		#' )
122		#' )
123		#'
124		#' # Addition of an optional strata.
125		#' h_coxreg_multivar_formula(
126		#' variables = list(
127		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
128		#' strata = "SITE"
129		#' )
130		#' )
131		#'
132		#' # Example without treatment arm.
133		#' h_coxreg_multivar_formula(
134		#' variables = list(
135		#' time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
136		#' strata = "SITE"
137		#' )
138		#' )
139		#'
140		#' @export
141		h_coxreg_multivar_formula <- function(variables) {
142	57x	checkmate::assert_list(variables, names = "named")
143	57x	has_arm <- "arm" %in% names(variables)
144	57x	arm_name <- if (has_arm) "arm" else NULL
145
146	57x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
147
148	57x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
149
150	57x	y <- paste0(
151	57x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
152	57x	ifelse(has_arm, variables$arm, "1")
153		)
154	57x	if (length(variables$covariates) > 0) {
155	18x	y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
156		}
157	57x	if (!is.null(variables$strata)) {
158	5x	y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
159		}
160	57x	y
161		}
162
163		#' @describeIn h_cox_regression Utility function to help tabulate the result of
164		#' a univariate Cox regression model.
165		#'
166		#' @param effect (`string`)\cr the treatment variable.
167		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
168		#'
169		#' @return
170		#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
171		#' `n`, `hr`, `lcl`, `ucl`, and `pval`.
172		#'
173		#' @examples
174		#' library(survival)
175		#'
176		#' dta_simple <- data.frame(
177		#' time = c(5, 5, 10, 10, 5, 5, 10, 10),
178		#' status = c(0, 0, 1, 0, 0, 1, 1, 1),
179		#' armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
180		#' var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
181		#' var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
182		#' )
183		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
184		#' result <- h_coxreg_univar_extract(
185		#' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
186		#' )
187		#' result
188		#'
189		#' @export
190		h_coxreg_univar_extract <- function(effect,
191		covar,
192		data,
193		mod,
194		control = control_coxreg()) {
195	43x	checkmate::assert_string(covar)
196	43x	checkmate::assert_string(effect)
197	43x	checkmate::assert_class(mod, "coxph")
198	43x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
199
200	43x	mod_aov <- muffled_car_anova(mod, test_statistic)
201	43x	msum <- summary(mod, conf.int = control$conf_level)
202	43x	sum_cox <- broom::tidy(msum)
203
204		# Combine results together.
205	43x	effect_aov <- mod_aov[effect, , drop = TRUE]
206	43x	pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
207	43x	sum_main <- sum_cox[grepl(effect, sum_cox$level), ]
208
209	43x	term_label <- if (effect == covar) {
210	22x	paste0(
211	22x	levels(data[[covar]])[2],
212	22x	" vs control (",
213	22x	levels(data[[covar]])[1],
214		")"
215		)
216		} else {
217	21x	unname(labels_or_names(data[covar]))
218		}
219	43x	data.frame(
220	43x	effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
221	43x	term = covar,
222	43x	term_label = term_label,
223	43x	level = levels(data[[effect]])[2],
224	43x	n = mod[["n"]],
225	43x	hr = unname(sum_main["exp(coef)"]),
226	43x	lcl = unname(sum_main[grep("lower", names(sum_main))]),
227	43x	ucl = unname(sum_main[grep("upper", names(sum_main))]),
228	43x	pval = pval,
229	43x	stringsAsFactors = FALSE
230		)
231		}
232
233		#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
234		#' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
235		#'
236		#' @return
237		#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
238		#' `n`, `term`, and `term_label`.
239		#'
240		#' @examples
241		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
242		#' result <- h_coxreg_multivar_extract(
243		#' var = "var1", mod = mod, data = dta_simple
244		#' )
245		#' result
246		#'
247		#' @export
248		h_coxreg_multivar_extract <- function(var,
249		data,
250		mod,
251		control = control_coxreg()) {
252	76x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
253	76x	mod_aov <- muffled_car_anova(mod, test_statistic)
254
255	76x	msum <- summary(mod, conf.int = control$conf_level)
256	76x	sum_anova <- broom::tidy(mod_aov)
257	76x	sum_cox <- broom::tidy(msum)
258
259	76x	ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
260	76x	names(ret_anova)[2] <- "pval"
261	76x	if (is.factor(data[[var]])) {
262	29x	ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
263		} else {
264	47x	ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
265		}
266	76x	names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
267	76x	varlab <- unname(labels_or_names(data[var]))
268	76x	ret_cox$term <- varlab
269
270	76x	if (is.numeric(data[[var]])) {
271	47x	ret <- ret_cox
272	47x	ret$term_label <- ret$term
273	29x	} else if (length(levels(data[[var]])) <= 2) {
274	18x	ret_anova$pval <- NA
275	18x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
276	18x	ret_cox$level <- gsub(var, "", ret_cox$level)
277	18x	ret_cox$term_label <- ret_cox$level
278	18x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
279		} else {
280	11x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
281	11x	ret_cox$level <- gsub(var, "", ret_cox$level)
282	11x	ret_cox$term_label <- ret_cox$level
283	11x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
284		}
285
286	76x	as.data.frame(ret)
287		}

1		#' Number of Patients
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Count the number of unique and non-unique patients in a column (variable).
6		#'
7		#' @inheritParams argument_convention
8		#' @param x (`character` or `factor`)\cr vector of patient IDs.
9		#' @param count_by (`character` or `factor`)\cr optional vector to be combined with `x` when counting
10		#' `nonunique` records.
11		#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
12		#' Defaults to `TRUE`.
13		#'
14		#' @name summarize_num_patients
15		NULL
16
17		#' @describeIn summarize_num_patients Statistics function which counts the number of
18		#' unique patients, the corresponding percentage taken with respect to the
19		#' total number of patients, and the number of non-unique patients.
20		#'
21		#' @return
22		#' * `s_num_patients()` returns a named `list` of 3 statistics:
23		#' * `unique`: Vector of counts and percentages.
24		#' * `nonunique`: Vector of counts.
25		#' * `unique_count`: Counts.
26		#'
27		#' @examples
28		#' # Use the statistics function to count number of unique and nonunique patients.
29		#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
30		#' s_num_patients(
31		#' x = as.character(c(1, 1, 1, 2, 4, NA)),
32		#' labelstr = "",
33		#' .N_col = 6L,
34		#' count_by = as.character(c(1, 1, 2, 1, 1, 1))
35		#' )
36		#'
37		#' @export
38		s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint
39
40	84x	checkmate::assert_string(labelstr)
41	84x	checkmate::assert_count(.N_col)
42	84x	checkmate::assert_multi_class(x, classes = c("factor", "character"))
43	84x	checkmate::assert_flag(unique_count_suffix)
44
45	84x	count1 <- n_available(unique(x))
46	84x	count2 <- n_available(x)
47
48	84x	if (!is.null(count_by)) {
49	10x	checkmate::assert_vector(count_by, len = length(x))
50	10x	checkmate::assert_multi_class(count_by, classes = c("factor", "character"))
51	10x	count2 <- n_available(unique(interaction(x, count_by)))
52		}
53
54	84x	out <- list(
55	84x	unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
56	84x	nonunique = formatters::with_label(count2, labelstr),
57	84x	unique_count = formatters::with_label(count1, ifelse(unique_count_suffix, paste(labelstr, "(n)"), labelstr))
58		)
59
60	84x	out
61		}
62
63		#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
64		#' in a column (variable), the corresponding percentage taken with respect to the total number of
65		#' patients, and the number of non-unique patients in the column.
66		#'
67		#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
68		#'
69		#' @return
70		#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
71		#'
72		#' @examples
73		#' # Count number of unique and non-unique patients.
74		#' df <- data.frame(
75		#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
76		#' EVENT = as.character(c(10, 15, 10, 17, 8))
77		#' )
78		#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
79		#'
80		#' df_by_event <- data.frame(
81		#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
82		#' EVENT = as.character(c(10, 15, 10, 17, 8))
83		#' )
84		#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID")
85		#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
86		#'
87		#' @export
88		s_num_patients_content <- function(df,
89		labelstr = "",
90		.N_col, # nolint
91		.var,
92		required = NULL,
93		count_by = NULL,
94		unique_count_suffix = TRUE) {
95	36x	checkmate::assert_string(.var)
96	36x	checkmate::assert_data_frame(df)
97	36x	if (is.null(count_by)) {
98	33x	assert_df_with_variables(df, list(id = .var))
99		} else {
100	3x	assert_df_with_variables(df, list(id = .var, count_by = count_by))
101		}
102	36x	if (!is.null(required)) {
103	!	checkmate::assert_string(required)
104	!	assert_df_with_variables(df, list(required = required))
105	!	df <- df[!is.na(df[[required]]), , drop = FALSE]
106		}
107
108	36x	x <- df[[.var]]
109	36x	y <- switch(as.numeric(!is.null(count_by)) + 1,
110	36x	NULL,
111	36x	df[[count_by]]
112		)
113
114	36x	s_num_patients(
115	36x	x = x,
116	36x	labelstr = labelstr,
117	36x	.N_col = .N_col,
118	36x	count_by = y,
119	36x	unique_count_suffix = unique_count_suffix
120		)
121		}
122
123		c_num_patients <- make_afun(
124		s_num_patients_content,
125		.stats = c("unique", "nonunique", "unique_count"),
126		.formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
127		)
128
129		#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
130		#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
131		#'
132		#' @return
133		#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
134		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
135		#' the statistics from `s_num_patients_content()` to the table layout.
136		#'
137		#' @export
138		summarize_num_patients <- function(lyt,
139		var,
140		.stats = NULL,
141		.formats = NULL,
142		.labels = c(
143		unique = "Number of patients with at least one event",
144		nonunique = "Number of events"
145		),
146		indent_mod = lifecycle::deprecated(),
147		.indent_mods = 0L,
148		...) {
149	8x	if (lifecycle::is_present(indent_mod)) {
150	!	lifecycle::deprecate_warn("0.8.2", "summarize_num_patients(indent_mod)", "summarize_num_patients(.indent_mods)")
151	!	.indent_mods <- indent_mod
152		}
153
154	4x	if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
155	1x	if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]
156
157	8x	cfun <- make_afun(
158	8x	c_num_patients,
159	8x	.stats = .stats,
160	8x	.formats = .formats,
161	8x	.labels = .labels
162		)
163
164	8x	summarize_row_groups(
165	8x	lyt = lyt,
166	8x	var = var,
167	8x	cfun = cfun,
168	8x	extra_args = list(...),
169	8x	indent_mod = .indent_mods
170		)
171		}
172
173		#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
174		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
175		#'
176		#' @return
177		#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
178		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
179		#' the statistics from `s_num_patients_content()` to the table layout.
180		#'
181		#' @details In general, functions that starts with `analyze*` are expected to
182		#' work like [rtables::analyze()], while functions that starts with `summarize*`
183		#' are based upon [rtables::summarize_row_groups()]. The latter provides a
184		#' value for each dividing split in the row and column space, but, being it
185		#' bound to the fundamental splits, it is repeated by design in every page
186		#' when pagination is involved.
187		#'
188		#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
189		#'
190		#' @examples
191		#' df_tmp <- data.frame(
192		#' USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
193		#' ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
194		#' AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
195		#' )
196		#' tbl <- basic_table() %>%
197		#' split_cols_by("ARM") %>%
198		#' add_colcounts() %>%
199		#' analyze_num_patients("USUBJID", .stats = c("unique")) %>%
200		#' build_table(df_tmp)
201		#' tbl
202		#'
203		#' @export
204		analyze_num_patients <- function(lyt,
205		vars,
206		.stats = NULL,
207		.formats = NULL,
208		.labels = c(
209		unique = "Number of patients with at least one event",
210		nonunique = "Number of events"
211		),
212		show_labels = c("default", "visible", "hidden"),
213		indent_mod = lifecycle::deprecated(),
214		.indent_mods = 0L,
215		...) {
216	2x	if (lifecycle::is_present(indent_mod)) {
217	!	lifecycle::deprecate_warn("0.8.2", "analyze_num_patients(indent_mod)", "analyze_num_patients(.indent_mods)")
218	!	.indent_mods <- indent_mod
219		}
220
221	!	if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
222	!	if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]
223
224	2x	afun <- make_afun(
225	2x	c_num_patients,
226	2x	.stats = .stats,
227	2x	.formats = .formats,
228	2x	.labels = .labels
229		)
230
231	2x	analyze(
232	2x	afun = afun,
233	2x	lyt = lyt,
234	2x	vars = vars,
235	2x	extra_args = list(...),
236	2x	show_labels = show_labels,
237	2x	indent_mod = .indent_mods
238		)
239		}