cardx coverage - 96.71%

Files
Source

#' Functions for Calculating Proportion Confidence Intervals
#'
#' Functions to calculate different proportion confidence intervals for use in `ard_proportion()`.
#'
#' @inheritParams ard_categorical_ci
#' @param x (binary `numeric`/`logical`)\cr
#'   vector of a binary values, i.e. a logical vector, or numeric with values `c(0, 1)`
#' @return Confidence interval of a proportion.
#'
#' @name proportion_ci
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
#' x <- c(
#'   TRUE, TRUE, TRUE, TRUE, TRUE,
#'   FALSE, FALSE, FALSE, FALSE, FALSE
#' )
#'
#' proportion_ci_wald(x, conf.level = 0.9)
#' proportion_ci_wilson(x, correct = TRUE)
#' proportion_ci_clopper_pearson(x)
#' proportion_ci_agresti_coull(x)
#' proportion_ci_jeffreys(x)
NULL

#' @describeIn proportion_ci Calculates the Wald interval by following the usual textbook definition
#'   for a single proportion confidence interval using the normal approximation.
#'
#' \deqn{\hat{p} \pm z_{\alpha/2} \sqrt{\frac{\hat{p}(1 - \hat{p})}{n}}}
#'
#' @param correct (`logical`)\cr apply continuity correction.
#'
#' @export
proportion_ci_wald <- function(x, conf.level = 0.95, correct = FALSE) {
  set_cli_abort_call()

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)
  check_binary(x)
  check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
  check_scalar(conf.level)
  check_class(x = correct, "logical")
  check_scalar(correct)

  x <- stats::na.omit(x)

  n <- length(x)
  p_hat <- mean(x)
  z <- stats::qnorm((1 + conf.level) / 2)
  q_hat <- 1 - p_hat
  correction_factor <- ifelse(correct, 1 / (2 * n), 0)

  err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correction_factor
  l_ci <- max(0, p_hat - err)
  u_ci <- min(1, p_hat + err)

  list(
    N = n,
    n = sum(x),
    estimate = p_hat,
    conf.low = l_ci,
    conf.high = u_ci,
    conf.level = conf.level,
    method =
      glue::glue("Wald Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
  )
}


#' @describeIn proportion_ci Calculates the Wilson interval by calling [stats::prop.test()].
#'  Also referred to as Wilson score interval.
#'
#' \deqn{\frac{\hat{p} +
#' \frac{z^2_{\alpha/2}}{2n} \pm z_{\alpha/2} \sqrt{\frac{\hat{p}(1 - \hat{p})}{n} +
#' \frac{z^2_{\alpha/2}}{4n^2}}}{1 + \frac{z^2_{\alpha/2}}{n}}}
#'
#' @export
proportion_ci_wilson <- function(x, conf.level = 0.95, correct = FALSE) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(pkg = "broom")

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)
  check_binary(x)
  check_class(x = correct, "logical")
  check_scalar(correct)
  check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
  check_scalar(conf.level)

  x <- stats::na.omit(x)

  n <- length(x)
  y <- stats::prop.test(x = sum(x), n = n, correct = correct, conf.level = conf.level)

  list(
    N = n,
    n = sum(x),
    conf.level = conf.level
  ) |>
    utils::modifyList(val = broom::tidy(y) |> as.list()) |>
    utils::modifyList(
      list(
        method =
          glue::glue("Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
      )
    )
}

#' @describeIn proportion_ci Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
#'   Also referred to as the `exact` method.
#'
#' \deqn{
#' \left( \frac{k}{n} \pm z_{\alpha/2} \sqrt{\frac{\frac{k}{n}(1-\frac{k}{n})}{n} +
#' \frac{z^2_{\alpha/2}}{4n^2}} \right)
#' / \left( 1 + \frac{z^2_{\alpha/2}}{n} \right)}
#'
#' @export
proportion_ci_clopper_pearson <- function(x, conf.level = 0.95) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(pkg = "broom")

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)
  check_binary(x)
  check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
  check_scalar(conf.level)

  x <- stats::na.omit(x)
  n <- length(x)

  y <- stats::binom.test(x = sum(x), n = n, conf.level = conf.level)

  list(N = n, n = sum(x), conf.level = conf.level) |>
    utils::modifyList(val = broom::tidy(y) |> as.list()) |>
    utils::modifyList(list(method = "Clopper-Pearson Confidence Interval"))
}

#' @describeIn proportion_ci Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by
#'   (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
#'
#' \deqn{
#' \left( \frac{\tilde{p} + z^2_{\alpha/2}/2}{n + z^2_{\alpha/2}} \pm
#' z_{\alpha/2} \sqrt{\frac{\tilde{p}(1 - \tilde{p})}{n} +
#' \frac{z^2_{\alpha/2}}{4n^2}} \right)}
#'
#' @export
proportion_ci_agresti_coull <- function(x, conf.level = 0.95) {
  set_cli_abort_call()

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)
  check_binary(x)
  check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
  check_scalar(conf.level)

  x <- stats::na.omit(x)

  n <- length(x)
  x_sum <- sum(x)
  z <- stats::qnorm((1 + conf.level) / 2)

  # Add here both z^2 / 2 successes and failures.
  x_sum_tilde <- x_sum + z^2 / 2
  n_tilde <- n + z^2

  # Then proceed as with the Wald interval.
  p_tilde <- x_sum_tilde / n_tilde
  q_tilde <- 1 - p_tilde
  err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
  l_ci <- max(0, p_tilde - err)
  u_ci <- min(1, p_tilde + err)

  list(
    N = n,
    n = sum(x),
    estimate = mean(x),
    conf.low = l_ci,
    conf.high = u_ci,
    conf.level = conf.level,
    method = "Agresti-Coull Confidence Interval"
  )
}

#' @describeIn proportion_ci Calculates the Jeffreys interval, an equal-tailed interval based on the
#'   non-informative Jeffreys prior for a binomial proportion.
#'
#' \deqn{\left( \text{Beta}\left(\frac{k}{2} + \frac{1}{2}, \frac{n - k}{2} + \frac{1}{2}\right)_\alpha,
#' \text{Beta}\left(\frac{k}{2} + \frac{1}{2}, \frac{n - k}{2} + \frac{1}{2}\right)_{1-\alpha} \right)}
#'
#' @export
proportion_ci_jeffreys <- function(x, conf.level = 0.95) {
  set_cli_abort_call()

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)
  check_binary(x)
  check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
  check_scalar(conf.level)
  x <- stats::na.omit(x)

  n <- length(x)
  x_sum <- sum(x)

  alpha <- 1 - conf.level
  l_ci <- ifelse(
    x_sum == 0,
    0,
    stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  u_ci <- ifelse(
    x_sum == n,
    1,
    stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  list(
    N = n,
    n = sum(x),
    estimate = mean(x),
    conf.low = l_ci,
    conf.high = u_ci,
    conf.level = conf.level,
    method = glue::glue("Jeffreys Interval")
  )
}


#' @describeIn proportion_ci Calculates the stratified Wilson confidence
#'   interval for unequal proportions as described in
#'   Xin YA, Su XG. Stratified Wilson and Newcombe confidence intervals
#'   for multiple binomial proportions. _Statistics in Biopharmaceutical Research_. 2010;2(3).
#'
#' \deqn{\frac{\hat{p}_j + \frac{z^2_{\alpha/2}}{2n_j} \pm
#' z_{\alpha/2} \sqrt{\frac{\hat{p}_j(1 - \hat{p}_j)}{n_j} +
#' \frac{z^2_{\alpha/2}}{4n_j^2}}}{1 + \frac{z^2_{\alpha/2}}{n_j}}}
#'
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `x`.
#' @param weights (`numeric`)\cr weights for each level of the strata. If `NULL`, they are
#'   estimated using the iterative algorithm that
#'   minimizes the weighted squared length of the confidence interval.
#' @param max.iterations (positive `integer`)\cr maximum number of iterations for the iterative procedure used
#'   to find estimates of optimal weights.
#' @param correct (scalar `logical`)\cr include the continuity correction. For further information, see for example
#'   [stats::prop.test()].
#'
#' @examples
#' # Stratified Wilson confidence interval with unequal probabilities
#'
#' set.seed(1)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' strata <- interaction(strata_data)
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
#'
#' proportion_ci_strat_wilson(
#'   x = rsp, strata = strata,
#'   conf.level = 0.90
#' )
#'
#' # Not automatic setting of weights
#' proportion_ci_strat_wilson(
#'   x = rsp, strata = strata,
#'   weights = rep(1 / n_strata, n_strata),
#'   conf.level = 0.90
#' )
#'
#' @export
proportion_ci_strat_wilson <- function(x,
                                       strata,
                                       weights = NULL,
                                       conf.level = 0.95,
                                       max.iterations = 10L,
                                       correct = FALSE) {
  set_cli_abort_call()

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)
  check_not_missing(strata)
  check_binary(x)
  check_class(correct, "logical")
  check_scalar(correct)
  check_class(strata, "factor")
  check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
  check_scalar(conf.level)

  # remove missing values from x and strata
  is_na <- is.na(x) | is.na(strata)
  x <- x[!is_na]
  strata <- strata[!is_na]
  if (!inherits(x, "logical")) x <- as.logical(x)
  # check all TRUE/FALSE, if so, not calculable
  if (all(x) || all(!x)) {
    cli::cli_abort("All values in {.arg x} argument are either {.code TRUE} or {.code FALSE} and CI is not estimable.")
  }

  tbl <- table(factor(x, levels = c(FALSE, TRUE)), strata, useNA = "no")
  n_strata <- length(unique(strata))

  # Checking the weights and maximum number of iterations.
  do_iter <- FALSE
  if (is.null(weights)) {
    weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
    do_iter <- TRUE

    # Iteration parameters
    if (!is_scalar_integerish(max.iterations) || max.iterations < 1) {
      cli::cli_abort("Argument {.arg max.iterations} must be a positive integer.")
    }
  }
  check_range(weights, range = c(0, 1), include_bounds = c(TRUE, TRUE))
  sum_weights <- sum(weights) |>
    round() |>
    as.integer()
  if (sum_weights != 1L || abs(sum_weights - sum(weights)) > sqrt(.Machine$double.eps)) {
    cli::cli_abort("The sum of the {.arg weights} argument must be {.val {1L}}")
  }

  xs <- tbl["TRUE", ]
  ns <- colSums(tbl)
  use_stratum <- (ns > 0)
  ns <- ns[use_stratum]
  xs <- xs[use_stratum]
  ests <- xs / ns
  vars <- ests * (1 - ests) / ns

  strata_qnorm <- .strata_normal_quantile(vars, weights, conf.level)

  # Iterative setting of weights if they were not passed in `weights` argument
  weights_new <- if (do_iter) {
    .update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max.iterations, conf.level)$weights
  } else {
    weights
  }

  strata_conf.level <- 2 * stats::pnorm(strata_qnorm) - 1

  ci_by_strata <- Map(
    function(x, n) {
      # Classic Wilson's confidence interval
      suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf.level)$conf.int)
    },
    x = xs,
    n = ns
  )
  lower_by_strata <- sapply(ci_by_strata, "[", 1L)
  upper_by_strata <- sapply(ci_by_strata, "[", 2L)

  lower <- sum(weights_new * lower_by_strata)
  upper <- sum(weights_new * upper_by_strata)

  # Return values
  list(
    N = length(x),
    n = sum(x),
    estimate = mean(x),
    conf.low = lower,
    conf.high = upper,
    conf.level = conf.level,
    weights = if (do_iter) weights_new else NULL,
    method =
      glue::glue("Stratified Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
  ) |>
    compact()
}

#' @describeIn proportion_ci Helper to determine if vector is binary (logical or 0/1)
#'
#' @export
is_binary <- function(x) {
  is.logical(x) || (is_integerish(x) && is_empty(setdiff(x, c(0, 1, NA))))
}

#' Helper Function for the Estimation of Stratified Quantiles
#'
#' This function wraps the estimation of stratified percentiles when we assume
#' the approximation for large numbers. This is necessary only in the case
#' proportions for each strata are unequal.
#'
#' @inheritParams proportion_ci_strat_wilson
#'
#' @return Stratified quantile.
#'
#' @seealso [proportion_ci_strat_wilson()]
#'
#' @keywords internal
#'
#' @examples
#' strata_data <- table(data.frame(
#'   "f1" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' ))
#' ns <- colSums(strata_data)
#' ests <- strata_data["TRUE", ] / ns
#' vars <- ests * (1 - ests) / ns
#' weights <- rep(1 / length(ns), length(ns))
#'
#' cardx:::.strata_normal_quantile(vars, weights, 0.95)
.strata_normal_quantile <- function(vars, weights, conf.level) {
  summands <- weights^2 * vars
  # Stratified quantile
  sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf.level) / 2)
}

#' Helper Function for the Estimation of Weights for `proportion_ci_strat_wilson()`
#'
#' This function wraps the iteration procedure that allows you to estimate
#' the weights for each proportional strata. This assumes to minimize the
#' weighted squared length of the confidence interval.
#'
#' @keywords internal
#' @inheritParams proportion_ci_strat_wilson
#' @param vars (`numeric`)\cr normalized proportions for each strata.
#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
#'   be optimized in the future if we need to estimate better initial weights.
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
#' @param max.iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
#' @param tol (`number`)\cr tolerance threshold for convergence.
#'
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
#'
#' @seealso For references and details see [`proportion_ci_strat_wilson()`].
#'
#' @examples
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
#' sq <- 0.674
#' ws <- rep(1 / length(vs), length(vs))
#' ns <- c(22, 18, 17, 17, 14, 12)
#'
#' cardx:::.update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
.update_weights_strat_wilson <- function(vars,
                                         strata_qnorm,
                                         initial_weights,
                                         n_per_strata,
                                         max.iterations = 50,
                                         conf.level = 0.95,
                                         tol = 0.001) {
  it <- 0
  diff_v <- NULL

  while (it < max.iterations) {
    it <- it + 1
    weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
    weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
    weights_new <- weights_new_t / weights_new_b
    weights_new <- weights_new / sum(weights_new)
    strata_qnorm <- .strata_normal_quantile(vars, weights_new, conf.level)
    diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
    if (diff_v[length(diff_v)] < tol) break
    initial_weights <- weights_new
  }

  if (it == max.iterations) {
    warning("The heuristic to find weights did not converge with max.iterations = ", max.iterations)
  }

  list(
    "n_it" = it,
    "weights" = weights_new,
    "diff_v" = diff_v
  )
}

#' Regression VIF ARD
#'
#' @description
#' Function takes a regression model object and returns the variance inflation factor (VIF)
#' using [`car::vif()`] and converts it to a ARD structure
#'
#' @param x regression model object
#' See car::vif() for details
#'
#' @param ... arguments passed to `car::vif(...)`
#'
#' @return data frame
#' @name ard_car_vif
#' @rdname ard_car_vif
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "car"))
#' lm(AGE ~ ARM + SEX, data = cards::ADSL) |>
#'   ard_car_vif()
ard_car_vif <- function(x, ...) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed("car")

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)

  vif <- cards::eval_capture_conditions(car::vif(x, ...))

  # if vif failed, set result as NULL, error will be kept through eval_capture_conditions()
  if (is.null(vif$result)) {
    # try to capture variable names from `terms()`
    lst_terms <- cards::eval_capture_conditions(attr(stats::terms(x), "term.labels"))
    # we cannot get variable names, error out
    if (!is.null(lst_terms[["error"]])) {
      cli::cli_abort(
        c("There was an error running {.fun car::vif}. See below.", x = vif[["error"]]),
        call = get_cli_abort_call()
      )
    }
    vif$result <- dplyr::tibble(
      variable = lst_terms[["result"]],
      VIF = list(NULL),
      GVIF = list(NULL),
      aGVIF = list(NULL),
      df = list(NULL)
    )
  }
  # if VIF is returned
  else if (!is.matrix(vif$result)) {
    vif$result <- dplyr::tibble(variable = names(vif$result), VIF = vif$result)
  }
  # if Generalized VIF is returned
  else if (is.matrix(vif$result)) {
    vif$result <-
      vif$result |>
      as.data.frame() %>%
      dplyr::mutate(., variable = rownames(.), .before = 1L) |>
      dplyr::rename(
        aGVIF = "GVIF^(1/(2*Df))",
        df = "Df"
      ) |>
      dplyr::tibble()
  }

  # Clean-up the result to fit the ard structure through pivot
  vif$result <-
    vif$result |>
    tidyr::pivot_longer(
      cols = -c("variable"),
      names_to = "stat_name",
      values_to = "stat"
    ) |>
    dplyr::mutate(
      context = "car_vif",
      stat = as.list(.data$stat),
      stat_label = ifelse(
        .data$stat_name == "aGVIF",
        "Adjusted GVIF",
        .data$stat_name
      ),
      fmt_fun = map(
        .data$stat,
        function(.x) {
          # styler: off
          if (is.integer(.x)) return(0L)
          if (is.numeric(.x)) return(1L)
          # styler: on
          NULL
        }
      )
    )

  # Bind the results and possible warning/errors together
  vif_return <- dplyr::tibble(
    vif$result,
    warning = vif["warning"],
    error = vif["error"]
  )

  # Clean up return object
  vif_return |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

#' ARD One-way Test
#'
#' @description
#' Analysis results data for Testing Equal Means in a One-Way Layout.
#' calculated with `oneway.test()`
#'
#' @inheritParams stats::oneway.test
#' @param ... additional arguments passed to `oneway.test(...)`
#'
#' @return ARD data frame
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
#' ard_stats_oneway_test(AGE ~ ARM, data = cards::ADSL)
ard_stats_oneway_test <- function(formula, data, ...) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(c("broom"))

  # check/process inputs -------------------------------------------------------
  check_not_missing(formula)
  check_not_missing(data)
  check_data_frame(data)
  check_class(formula, cls = "formula")

  # build ARD ------------------------------------------------------------------
  df_results <-
    cards::tidy_as_ard(
      lst_tidy =
        cards::eval_capture_conditions(
          stats::oneway.test(formula, data = data, ...) |>
            broom::tidy()
        ),
      tidy_result_names = c("num.df", "den.df", "statistic", "p.value", "method"),
      fun_args_to_record =
        c("var.equal"),
      formals = formals(stats::oneway.test),
      passed_args = dots_list(...),
      lst_ard_columns = list(context = "stats_oneway_test")
    ) |>
    dplyr::mutate(
      .after = "stat_name",
      stat_label =
        dplyr::case_when(
          .data$stat_name %in% "num.df" ~ "Degrees of Freedom",
          .data$stat_name %in% "den.df" ~ "Denominator Degrees of Freedom",
          .data$stat_name %in% "statistic" ~ "F Statistic",
          .data$stat_name %in% "p.value" ~ "p-value",
          .data$stat_name %in% "method" ~ "Method",
          TRUE ~ .data$stat_name,
        )
    )

  # add variable/groups to results and return result
  df_results |>
    dplyr::bind_cols(
      dplyr::tibble(!!!map(as.list(attr(stats::terms(formula), "variables"))[-1], as_label)) %>%
        set_names(., c("variable", paste0("group", seq_len(length(.) - 1L))))
    ) |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

#' Deprecated functions
#'
#' @description
#' `r lifecycle::badge('deprecated')`\cr
#' Some functions have been deprecated and are no longer being actively
#' supported.
#'
#' @name deprecated
#' @keywords internal
NULL

# v0.3.0 -----------------------------------------------------------------------
#' @importFrom cards ard_continuous
#' @export
cards::ard_continuous

#' @importFrom cards ard_categorical
#' @export
cards::ard_categorical

#' @importFrom cards ard_dichotomous
#' @export
cards::ard_dichotomous

#' @rdname deprecated
#' @export
ard_continuous.survey.design <- function(data, ...) {
  lifecycle::deprecate_soft(
    when = "0.3.0",
    what = "cardx::ard_continuous()",
    with = "cardx::ard_summary()"
  )

  ard_summary(data = data, ...) |>
    dplyr::mutate(context = "continuous")
}

#' @rdname deprecated
#' @export
ard_categorical.survey.design <- function(data, ...) {
  lifecycle::deprecate_soft(
    when = "0.3.0",
    what = "cardx::ard_categorical()",
    with = "cardx::ard_tabulate()"
  )

  ard_tabulate(data = data, ...) |>
    dplyr::mutate(context = "categorical")
}

#' @rdname deprecated
#' @export
ard_dichotomous.survey.design <- function(data, ...) {
  lifecycle::deprecate_soft(
    when = "0.3.0",
    what = "cardx::ard_dichotomous()",
    with = "cardx::ard_tabulate_value()",
    details = "The `value` argument no longer has a default value and must be specified."
  )

  ard_tabulate_value(data = data, ...) |>
    dplyr::mutate(context = "dichotomous")
}

#' @rdname deprecated
#' @export
ard_categorical_max <- function(...) {
  lifecycle::deprecate_soft(
    when = "0.3.0",
    what = "cardx::ard_categorical_max()",
    with = "cardx::ard_tabulate_max()"
  )

  ard_tabulate_max(...)
}

#' @rdname deprecated
#' @export
ard_emmeans_mean_difference <- function(...) {
  lifecycle::deprecate_soft(
    when = "0.3.1",
    what = "cardx::ard_emmeans_mean_difference()",
    with = "cardx::ard_emmeans_contrast()"
  )

  ard_emmeans_contrast(...)
}

#' ARD Survival Differences
#'
#' Calculate differences in the Kaplan-Meier estimator of survival using the
#' results from [`survival::survfit()`].
#'
#' @param x (`survift`)\cr
#'   object of class `'survfit'` typically created with [`survival::survfit()`]
#' @param conf.level (scalar `numeric`)\cr
#'   confidence level for confidence interval. Default is `0.95`.
#' @inheritParams ard_survival_survfit
#'
#' @return an ARD data frame of class 'card'
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "ggsurvfit")))
#' library(ggsurvfit)
#' library(survival)
#'
#' survfit(Surv_CNSR() ~ TRTA, data = cards::ADTTE) |>
#'   ard_survival_survfit_diff(times = c(25, 50))
ard_survival_survfit_diff <- function(x, times, conf.level = 0.95) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(c("survival", "broom"))
  check_not_missing(x)
  check_not_missing(times)
  check_class(x, "survfit")

  if (inherits(x, c("survfitms", "survfitcox"))) {
    cli::cli_abort(
      "Argument {.arg x} cannot be class {.cls {c('survfitms', 'survfitcox')}}.",
      call = get_cli_abort_call()
    )
  }
  check_scalar_range(conf.level, range = c(0, 1))
  check_length(
    as.list(x$call)[["formula"]] |> stats::as.formula() |> stats::terms() |> attr("term.labels"),
    length = 1L,
    message = "The {.cls survfit} object passed in argument {.arg x} must be stratified by a single variable."
  )
  if (length(x$strata) < 2) {
    cli::cli_abort(
      "The {.cls survfit} object's stratifying variable must have 2 or more levels.",
      call = get_cli_abort_call()
    )
  }

  # calculate the survival at the specified times
  ard_survival_survfit <-
    ard_survival_survfit(x = x, times = times) |>
    dplyr::filter(.data$stat_name %in% c("estimate", "std.error")) |>
    dplyr::select(-c("stat_label", "context", "fmt_fun"))

  # transform the survival ARD into a cards object with the survival difference
  card <-
    ard_survival_survfit %>%
    {dplyr::left_join( # styler: off
        # remove the first group from the data frame (this is our reference group)
        dplyr::filter(., .by = cards::all_ard_groups(), dplyr::cur_group_id() > 1L) |>
          dplyr::rename(stat1 = "stat"),
        # merge the reference group data
        dplyr::filter(., .by = cards::all_ard_groups(), dplyr::cur_group_id() == 1L) |>
          dplyr::select(stat0 = "stat", everything(), -c("group1_level", "error", "warning")),
        by = c("group1", "variable", "variable_level", "stat_name")
    )} |> # styler: off
    # reshape to put the stats that need to be combined on the same row
    tidyr::pivot_wider(
      id_cols = c("group1", "group1_level", "variable", "variable_level"),
      names_from = "stat_name",
      values_from = c("stat0", "stat1"),
      values_fn = unlist
    ) |>
    # calcualte the primary statistics to return
    dplyr::mutate(
      # reference level
      reference_level = ard_survival_survfit[["group1_level"]][1],
      # short description of method
      method = "Survival Difference (Z-test)",
      # survival difference
      estimate = .data$stat0_estimate - .data$stat1_estimate,
      # survival difference standard error
      std.error = sqrt(.data$stat0_std.error^2 + .data$stat1_std.error^2),
      # Z test statistic
      statistic = .data$estimate / .data$std.error,
      # confidence limits of the survival difference
      conf.low = .data$estimate - .data$std.error * stats::qnorm(1 - (1 - .env$conf.level) / 2),
      conf.high = .data$estimate + .data$std.error * stats::qnorm(1 - (1 - .env$conf.level) / 2),
      # p-value for test where H0: no difference
      p.value = 2 * (1 - stats::pnorm(abs(.data$statistic))),
      across(c("reference_level", "estimate", "std.error", "statistic", "conf.low", "conf.high", "p.value", "method"), as.list)
    ) |>
    # reshape into the cards structure
    dplyr::select(-starts_with("stat0_"), -starts_with("stat1_")) |>
    tidyr::pivot_longer(
      cols = -c(cards::all_ard_groups(), cards::all_ard_variables()),
      names_to = "stat_name",
      values_to = "stat"
    )

  # final prepping of the cards object -----------------------------------------
  card |>
    dplyr::mutate(
      warning = ard_survival_survfit[["warning"]][1],
      error = ard_survival_survfit[["error"]][1],
      fmt_fun = list(1L),
      stat_label =
        dplyr::case_when(
          .data$stat_name %in% "estimate" ~ "Survival Difference",
          .data$stat_name %in% "std.error" ~ "Survival Difference Standard Error",
          .data$stat_name %in% "conf.low" ~ "CI Lower Bound",
          .data$stat_name %in% "conf.high" ~ "CI Upper Bound",
          .data$stat_name %in% "statistic" ~ "z statistic",
          .data$stat_name %in% "p.value" ~ "p-value",
          .default = .data$stat_name
        ),
      context = "survival_survfit_diff",
    ) |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

#' ARD Wald Test
#'
#' @description
#' Function takes a regression model object and calculates Wald
#' statistical test using [`aod::wald.test()`].
#'
#' @param x regression model object
#' @param ... arguments passed to `aod::wald.test(...)`
#' @inheritParams ard_regression
#'
#' @return data frame
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("aod", "broom.helpers")))
#' lm(AGE ~ ARM, data = cards::ADSL) |>
#'   ard_aod_wald_test()
ard_aod_wald_test <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(c("aod", "broom.helpers"))

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)

  # run regression() -----------------------------------------------------------
  reg_model <- cards::eval_capture_conditions(
    ard_regression_basic(x, tidy_fun = tidy_fun, intercept = TRUE, stats_to_remove = c(
      "var_type",
      "var_label",
      "var_class", "label",
      "contrasts_type", "contrasts", "var_nlevels", "std.error",
      "conf.low", "conf.high", "statistic", "p.value", "estimate"
    ))
  )

  if (!is.null(reg_model[["error"]])) {
    cli::cli_abort(
      c("Unable to identify underlying variable names in regression model.",
        i = "Is this model type supported by {.fun broom.helpers::tidy_plus_plus}, which is the function used to identify variable names?"
      ),
      call = get_cli_abort_call()
    )
  }
  aod <-
    reg_model[["result"]] %>%
    dplyr::select(c(
      variable = "variable",
      model_terms = "stat"
    )) %>%
    dplyr::mutate(term_id = dplyr::row_number()) %>%
    tidyr::nest(data = -"variable") %>%
    dplyr::rowwise() %>%
    dplyr::mutate(
      model_terms = unlist(.data$data[["model_terms"]]) %>% list(),
      model_terms_id = rlang::set_names(.data$data[["term_id"]]) %>% list()
    )
  # run wald.test() -----------------------------------------------------------
  wald_test <-
    cards::eval_capture_conditions(lapply(seq_len(length(aod$model_terms_id)), function(terms_id) {
      aod::wald.test(
        Sigma = stats::vcov(x),
        b = stats::coef(x), Terms = aod$model_terms_id[[terms_id]]
      )
    }))


  df_list <- do.call(rbind, lapply(wald_test$result, .extract_wald_results))

  cbind(aod$variable, df_list) %>%
    tidyr::pivot_longer(
      cols = !"aod$variable",
      names_to = "stat_name",
      values_to = "stat"
    ) %>%
    dplyr::rename(
      "variable" = "aod$variable"
    ) |>
    dplyr::mutate(
      stat = as.list(.data$stat),
      stat_label =
        dplyr::case_when(
          .data$stat_name %in% "statistic" ~ "Statistic",
          .data$stat_name %in% "df" ~ "Degrees of Freedom",
          .data$stat_name %in% "p.value" ~ "p-value",
          TRUE ~ .data$stat_name
        ),
      fmt_fun =
        map(
          .data$stat,
          function(.x) {
            # styler: off
            if (is.integer(.x)) return(0L)
            if (is.numeric(.x)) return(1L)
            # styler: on
            NULL
          }
        ),
      context = "aod_wald_test",
      warning = wald_test["warning"],
      error = wald_test["error"]
    ) |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

#' Extract data from wald.test object
#'
#' @param wald_test (`data.frame`)\cr wald test object object from `aod::wald.test()`
#'
#' @return a data frame containing the wald test results.
#' @keywords internal
.extract_wald_results <- function(wald_test) {
  df <- wald_test$result$chi2[("df")]
  statistic <- wald_test$result$chi2[("chi2")]
  p.value <- wald_test$result$chi2[("P")]
  data.frame(df, statistic, p.value)
}

#' ARD for Difference in Survival
#'
#' @description
#' Analysis results data for comparison of survival using [survival::survdiff()].
#'
#' @param formula (`formula`)\cr
#'   a formula
#' @param data (`data.frame`)\cr
#'  a data frame
#' @param rho (`scalar numeric`)\cr
#'   numeric scalar passed to `survival::survdiff(rho)`. Default is `rho=0`.
#' @param ... additional arguments passed to `survival::survdiff()`
#'
#' @return an ARD data frame of class 'card'
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom", "ggsurvfit")))
#' library(survival)
#' library(ggsurvfit)
#'
#' ard_survival_survdiff(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE)
ard_survival_survdiff <- function(formula, data, rho = 0, ...) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(c("survival", "broom"))

  # check/process inputs -------------------------------------------------------
  check_not_missing(formula)
  check_class(formula, cls = "formula")
  if (!missing(data)) check_class(data, cls = "data.frame")
  check_scalar(rho)
  check_class(rho, cls = "numeric")

  # assign method
  method <- dplyr::case_when(
    rho == 0 ~ "Log-rank test",
    rho == 1.5 ~ "Tarone-Ware test",
    rho == 1 ~ "Peto & Peto modification of Gehan-Wilcoxon test",
    .default = glue::glue("G-rho test (\U03C1 = {rho})")
  ) |>
    as.character()

  # calculate survdiff() results -----------------------------------------------
  lst_glance <-
    cards::eval_capture_conditions(
      survival::survdiff(formula = formula, data = data, rho = rho, ...) |>
        broom::glance() |>
        dplyr::mutate(method = .env$method)
    )

  # tidy results up in an ARD format -------------------------------------------
  # extract variable names from formula
  variables <- stats::terms(formula) |>
    attr("term.labels") |>
    .strip_backticks()

  # if there was an error, return results early
  if (is.null(lst_glance[["result"]])) {
    # if no variables in formula, then return an error
    # otherwise, if we do have variable names, then we can construct an empty ARD which will be done below
    if (is_empty(variables)) {
      cli::cli_abort(
        message =
          c("There was an error in {.fun survival::survdiff}. See below:",
            "x" = lst_glance[["error"]]
          ),
        call = get_cli_abort_call()
      )
    }
  }

  .variables_to_survdiff_ard(
    variables = variables,
    method = method,
    # styler: off
    stat_names =
      if (!is.null(lst_glance[["result"]])) names(lst_glance[["result"]])
      else c("statistic", "df", "p.value", "method"),
    stats =
      if (!is.null(lst_glance[["result"]])) unname(as.list(lst_glance[["result"]]))
      else rep_along(c("statistic", "df", "p.value"), list(NULL)) |> c(list(method = method))
    # styler: on
  ) |>
    .add_survdiff_stat_labels() |>
    dplyr::mutate(
      context = "survival_survdiff",
      warning = lst_glance["warning"],
      error = lst_glance["error"],
      fmt_fun = map(
        .data$stat,
        function(x) {
          if (is.numeric(x)) return(1L) # styler: off
          NULL
        }
      )
    ) |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

.variables_to_survdiff_ard <- function(variables,
                                       method,
                                       stat_names,
                                       stats) {
  len <- length(variables)

  df_vars <- dplyr::tibble(!!!rev(variables)) |>
    set_names(
      ifelse(
        len > 1L,
        c(paste0("group_", rev(seq_len(len - 1L))), "variable"),
        "variable"
      )
    )

  dplyr::bind_cols(
    df_vars,
    dplyr::tibble(
      stat_name = .env$stat_names,
      stat = .env$stats
    )
  )
}

.add_survdiff_stat_labels <- function(x) {
  x |>
    dplyr::left_join(
      dplyr::tribble(
        ~stat_name, ~stat_label,
        "statistic", "X^2 Statistic",
        "df", "Degrees of Freedom",
        "p.value", "p-value"
      ),
      by = "stat_name"
    ) |>
    dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name))
}

.strip_backticks <- function(x) {
  ifelse(
    str_detect(x, "^`.*`$"),
    substr(x, 2, nchar(x) - 1),
    x
  )
}

#' ARD Total N
#'
#' Returns the total N for a survey object.
#' The placeholder variable name returned in the object is `"..ard_total_n.."`
#'
#' @inheritParams ard_tabulate_value.survey.design
#' @inheritParams rlang::args_dots_empty
#'
#' @return an ARD data frame of class 'card'
#' @export
#'
#' @examplesIf cardx:::is_pkg_installed("survey")
#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq)
#'
#' ard_total_n(svy_titanic)
ard_total_n.survey.design <- function(data, ...) {
  # process inputs -------------------------------------------------------------
  set_cli_abort_call()
  check_dots_empty()

  # calculate total N ----------------------------------------------------------
  data <- stats::update(data, ..ard_total_n.. = TRUE)

  data |>
    ard_tabulate_value(
      variables = "..ard_total_n..",
      statistic = list(..ard_total_n.. = c("N", "N_unweighted"))
    ) |>
    dplyr::mutate(context = "total_n") |>
    dplyr::select(-cards::all_ard_variables("levels"))
}

#' ARD ANOVA from car Package
#'
#' Function takes a regression model object and calculated ANOVA using [`car::Anova()`].
#'
#' @param x regression model object
#' @param ... arguments passed to `car::Anova(...)`
#'
#' @return data frame
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("broom.helpers", "car", "parameters")))
#' lm(AGE ~ ARM, data = cards::ADSL) |>
#'   ard_car_anova()
#'
#' glm(vs ~ factor(cyl) + factor(am), data = mtcars, family = binomial) |>
#'   ard_car_anova(test.statistic = "Wald")
ard_car_anova <- function(x, ...) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(pkg = c("broom.helpers", "car", "parameters"))

  # check inputs ---------------------------------------------------------------
  check_not_missing(x)

  # run car::Anova() -----------------------------------------------------------
  car_anova <- cards::eval_capture_conditions(car::Anova(x, ...))

  if (!is.null(car_anova[["error"]])) {
    cli::cli_abort(
      c(
        "There was an error running {.fun car::Anova}. See error message below.",
        x = car_anova[["error"]]
      ),
      call = get_cli_abort_call()
    )
  }

  car_anova[["result"]] |>
    broom.helpers::tidy_parameters(conf.int = FALSE) |> # using broom.helpers, because it handle non-syntactic names for us
    dplyr::filter(!(dplyr::row_number() == dplyr::n() & .data$term %in% "Residuals")) |> # removing Residual rows
    dplyr::rename(variable = "term") |>
    tidyr::pivot_longer(
      cols = -"variable",
      names_to = "stat_name",
      values_to = "stat"
    ) |>
    dplyr::mutate(
      stat = as.list(.data$stat),
      stat_label =
        dplyr::case_when(
          .data$stat_name %in% "statistic" ~ "Statistic",
          .data$stat_name %in% "df" ~ "Degrees of Freedom",
          .data$stat_name %in% "p.value" ~ "p-value",
          TRUE ~ .data$stat_name
        ),
      fmt_fun =
        map(
          .data$stat,
          function(.x) {
            # styler: off
            if (is.integer(.x)) return(0L)
            if (is.numeric(.x)) return(1L)
            # styler: on
            NULL
          }
        ),
      context = "car_anova",
      warning = car_anova["warning"],
      error = car_anova["error"]
    ) |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

#' ARD ANOVA
#'
#' @description
#' Analysis results data for Analysis of Variance.
#' Calculated with `stats::aov()`
#'
#' @inheritParams stats::aov
#' @param ... arguments passed to `stats::aov(...)`
#'
#' @return ARD data frame
#' @export
#'
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("broom.helpers", "parameters")))
#' ard_stats_aov(AGE ~ ARM, data = cards::ADSL)
ard_stats_aov <- function(formula, data, ...) {
  set_cli_abort_call()

  # check installed packages ---------------------------------------------------
  check_pkg_installed(c("broom.helpers", "parameters"))

  # check/process inputs -------------------------------------------------------
  check_not_missing(formula)
  check_not_missing(data)
  check_data_frame(data)
  check_class(formula, cls = "formula")

  # build ARD ------------------------------------------------------------------
  aov <-
    cards::eval_capture_conditions(
      stats::aov(formula, data, ...)
    )
  aov[["result"]] |>
    broom.helpers::tidy_parameters() |> # using broom.helpers, because it handle non-syntactic names
    dplyr::filter(!(dplyr::row_number() == dplyr::n() & .data$term %in% "Residuals")) |> # removing Residual rows
    dplyr::rename(variable = "term") |>
    tidyr::pivot_longer(
      cols = -"variable",
      names_to = "stat_name",
      values_to = "stat"
    ) |>
    dplyr::mutate(
      stat = as.list(.data$stat),
      stat_label =
        dplyr::case_when(
          .data$stat_name %in% "statistic" ~ "Statistic",
          .data$stat_name %in% "df" ~ "Degrees of Freedom",
          .data$stat_name %in% "p.value" ~ "p-value",
          .data$stat_name %in% "sumsq" ~ "Sum of Squares",
          .data$stat_name %in% "meansq" ~ "Mean of Sum of Squares",
          TRUE ~ .data$stat_name
        ),
      context = "stats_aov",
      fmt_fun = lapply(
        .data$stat,
        function(x) {
          switch(is.integer(x),
            0L
          ) %||% switch(is.numeric(x),
            1L
          )
        }
      ),
      warning = aov["warning"],
      error = aov["error"]
    ) |>
    cards::as_card() |>
    cards::tidy_ard_column_order()
}

1		#' ARD Mood Test
2		#'
3		#' @description
4		#' Analysis results data for Mood two sample test of scale. Note this not to be confused with
5		#' the Brown-Mood test of medians.
6		#'
7		#' @param data (`data.frame`)\cr
8		#' a data frame. See below for details.
9		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' column name to compare by.
11		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
12		#' column name to be compared. Independent tests will
13		#' be run for each variable.
14		#' @param ... arguments passed to `mood.test(...)`
15		#'
16		#' @return ARD data frame
17		#' @name ard_stats_mood_test
18		#'
19		#' @details
20		#' For the `ard_stats_mood_test()` function, the data is expected to be one row per subject.
21		#' The data is passed as `mood.test(data[[variable]] ~ data[[by]], ...)`.
22		#' @rdname ard_stats_mood_test
23		#' @export
24		#'
25		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
26		#' cards::ADSL \|>
27		#' ard_stats_mood_test(by = "SEX", variables = "AGE")
28		ard_stats_mood_test <- function(data, by, variables, ...) {
29	5x	set_cli_abort_call()
30
31		# check installed packages ---------------------------------------------------
32	5x	check_pkg_installed("broom")
33
34		# check/process inputs -------------------------------------------------------
35	5x	check_not_missing(data)
36	5x	check_not_missing(variables)
37	5x	check_not_missing(by)
38	5x	check_data_frame(data)
39	5x	data <- dplyr::ungroup(data)
40	5x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
41	5x	check_scalar(by)
42
43		# return empty ARD if no variables selected ----------------------------------
44	5x	if (is_empty(variables)) {
45	!	return(dplyr::tibble() \|> cards::as_card())
46		}
47
48		# build ARD ------------------------------------------------------------------
49	5x	lapply(
50	5x	variables,
51	5x	function(variable) {
52	6x	.format_moodtest_results(
53	6x	by = by,
54	6x	variable = variable,
55	6x	lst_tidy =
56	6x	cards::eval_capture_conditions(
57	6x	stats::mood.test(data[[variable]] ~ data[[by]], ...) \|>
58	6x	broom::tidy()
59		),
60		...
61		)
62		}
63		) \|>
64	5x	dplyr::bind_rows()
65		}
66		#' Convert mood test results to ARD
67		#'
68		#' @inheritParams cards::tidy_as_ard
69		#' @inheritParams stats::mood.test
70		#' @param by (`string`)\cr by column name
71		#' @param variable (`string`)\cr variable column name
72		#' @param ... passed to `mood.test(...)`
73		#'
74		#' @return ARD data frame
75		#' @keywords internal
76		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
77		#' cardx:::.format_moodtest_results(
78		#' by = "SEX",
79		#' variable = "AGE",
80		#' lst_tidy =
81		#' cards::eval_capture_conditions(
82		#' stats::mood.test(ADSL[["AGE"]] ~ ADSL[["SEX"]]) \|>
83		#' broom::tidy()
84		#' )
85		#' )
86		.format_moodtest_results <- function(by, variable, lst_tidy, ...) {
87		# build ARD ------------------------------------------------------------------
88	6x	ret <-
89	6x	cards::tidy_as_ard(
90	6x	lst_tidy = lst_tidy,
91	6x	tidy_result_names = c("statistic", "p.value", "method", "alternative"),
92	6x	formals = formals(asNamespace("stats")[["mood.test.default"]]),
93	6x	passed_args = c(dots_list(...)),
94	6x	lst_ard_columns = list(group1 = by, variable = variable, context = "stats_mood_test")
95		)
96
97		# add the stat label ---------------------------------------------------------
98	6x	ret \|>
99	6x	dplyr::left_join(
100	6x	.df_moodtest_stat_labels(),
101	6x	by = "stat_name"
102		) \|>
103	6x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
104	6x	cards::as_card() \|>
105	6x	cards::tidy_ard_column_order()
106		}
107
108		.df_moodtest_stat_labels <- function() {
109	6x	dplyr::tribble(
110	6x	~stat_name, ~stat_label,
111	6x	"statistic", "Z-Statistic",
112	6x	"p.value", "p-value",
113	6x	"alternative", "Alternative Hypothesis"
114		)
115		}

1		#' ARD one-sample Wilcox Rank-sum
2		#'
3		#' @description
4		#' Analysis results data for one-sample Wilcox Rank-sum.
5		#' Result may be stratified by including the `by` argument.
6		#'
7		#' @param data (`data.frame`)\cr
8		#' a data frame. See below for details.
9		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' column names to be analyzed. Independent Wilcox Rank-sum tests will be computed for
11		#' each variable.
12		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
13		#' optional column name to stratify results by.
14		#' @inheritParams ard_stats_wilcox_test
15		#'
16		#' @return ARD data frame
17		#' @export
18		#'
19		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
20		#' cards::ADSL \|>
21		#' ard_stats_wilcox_test_onesample(by = ARM, variables = AGE)
22		ard_stats_wilcox_test_onesample <- function(data, variables, by = dplyr::group_vars(data), conf.level = 0.95, ...) {
23	6x	set_cli_abort_call()
24
25		# check installed packages ---------------------------------------------------
26	6x	check_pkg_installed("broom")
27
28		# check/process inputs -------------------------------------------------------
29	6x	check_not_missing(data)
30	6x	check_not_missing(variables)
31	6x	check_data_frame(data)
32	6x	data <- dplyr::ungroup(data)
33	6x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
34	6x	check_scalar_range(conf.level, range = c(0, 1))
35
36		# return empty ARD if no variables selected ----------------------------------
37	6x	if (is_empty(variables)) {
38	1x	return(dplyr::tibble() \|> cards::as_card())
39		}
40
41	5x	cards::ard_summary(
42	5x	data = data,
43	5x	variables = all_of(variables),
44	5x	by = all_of(by),
45	5x	statistic = all_of(variables) ~ list(wilcox_test_onesample = \(x) stats::wilcox.test(x = x, conf.level = conf.level, ...) \|> broom::tidy())
46		) \|>
47	5x	cards::bind_ard(
48	5x	cards::ard_summary(
49	5x	data = data,
50	5x	variables = all_of(variables),
51	5x	by = all_of(by),
52	5x	statistic =
53	5x	all_of(variables) ~
54	5x	list(conf.level = \(x) {
55	9x	formals(asNamespace("stats")[["wilcox.test.default"]])[c("mu", "exact", "conf.int", "tol.root", "digits.rank")] \|>
56	9x	utils::modifyList(list(conf.level = conf.level, ...)) \|>
57	9x	compact()
58		})
59		)
60		) \|>
61	5x	dplyr::select(-"stat_label") \|>
62	5x	dplyr::left_join(
63	5x	.df_ttest_stat_labels(by = NULL),
64	5x	by = "stat_name"
65		) \|>
66	5x	dplyr::mutate(
67	5x	stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name),
68	5x	context = "stats_wilcox_test_onesample",
69		) \|>
70	5x	cards::as_card() \|>
71	5x	cards::tidy_ard_column_order() \|>
72	5x	cards::tidy_ard_row_order()
73		}

1		#' ARD McNemar's Test
2		#'
3		#' @description
4		#' Analysis results data for McNemar's statistical test.
5		#' We have two functions depending on the structure of the data.
6		#' - `ard_stats_mcnemar_test()` is the structure expected by [`stats::mcnemar.test()`]
7		#' - `ard_stats_mcnemar_test_long()` is one row per ID per group
8		#'
9		#' @param data (`data.frame`)\cr
10		#' a data frame. See below for details.
11		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
12		#' column name to compare by.
13		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
14		#' column names to be compared. Independent tests will
15		#' be computed for each variable.
16		#' @param ... arguments passed to `stats::mcnemar.test(...)`
17		#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
18		#' column name of the subject or participant ID
19		#'
20		#' @return ARD data frame
21		#' @name ard_stats_mcnemar_test
22		#'
23		#' @details
24		#' For the `ard_stats_mcnemar_test()` function, the data is expected to be one row per subject.
25		#' The data is passed as `stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...)`.
26		#' Please use `table(x = data[[variable]], y = data[[by]])` to check the contingency table.
27		#'
28		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
29		#' cards::ADSL \|>
30		#' ard_stats_mcnemar_test(by = "SEX", variables = "EFFFL")
31		#'
32		#' set.seed(1234)
33		#' cards::ADSL[c("USUBJID", "TRT01P")] \|>
34		#' dplyr::mutate(TYPE = "PLANNED") \|>
35		#' dplyr::rename(TRT01 = TRT01P) %>%
36		#' dplyr::bind_rows(dplyr::mutate(., TYPE = "ACTUAL", TRT01 = sample(TRT01))) \|>
37		#' ard_stats_mcnemar_test_long(
38		#' by = TYPE,
39		#' variable = TRT01,
40		#' id = USUBJID
41		#' )
42		NULL
43
44		#' @rdname ard_stats_mcnemar_test
45		#' @export
46		ard_stats_mcnemar_test <- function(data, by, variables, ...) {
47	7x	set_cli_abort_call()
48
49		# check installed packages ---------------------------------------------------
50	7x	check_pkg_installed("broom")
51
52		# check/process inputs -------------------------------------------------------
53	7x	check_not_missing(data)
54	7x	check_not_missing(variables)
55	7x	check_not_missing(by)
56	7x	check_data_frame(data)
57	7x	data <- dplyr::ungroup(data)
58	7x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
59	7x	check_scalar(by)
60
61		# return empty ARD if no variables selected ----------------------------------
62	7x	if (is_empty(variables)) {
63	!	return(dplyr::tibble() \|> cards::as_card())
64		}
65
66		# build ARD ------------------------------------------------------------------
67	7x	lapply(
68	7x	variables,
69	7x	function(variable) {
70	8x	.format_mcnemartest_results(
71	8x	by = by,
72	8x	variable = variable,
73	8x	lst_tidy =
74	8x	cards::eval_capture_conditions(
75	8x	stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...) \|>
76	8x	broom::tidy()
77		),
78		...
79		)
80		}
81		) \|>
82	7x	dplyr::bind_rows()
83		}
84
85		#' @rdname ard_stats_mcnemar_test
86		#' @export
87		ard_stats_mcnemar_test_long <- function(data, by, variables, id, ...) {
88	1x	set_cli_abort_call()
89
90		# check installed packages ---------------------------------------------------
91	1x	check_pkg_installed("broom")
92
93		# check/process inputs -------------------------------------------------------
94	1x	check_not_missing(data)
95	1x	check_not_missing(variables)
96	1x	check_not_missing(by)
97	1x	check_not_missing(id)
98	1x	check_data_frame(data)
99	1x	data <- dplyr::ungroup(data)
100	1x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
101	1x	check_scalar(by)
102	1x	check_scalar(id)
103
104		# return empty ARD if no variables selected ----------------------------------
105	1x	if (is_empty(variables)) {
106	!	return(dplyr::tibble() \|> cards::as_card())
107		}
108
109		# build ARD ------------------------------------------------------------------
110	1x	lapply(
111	1x	variables,
112	1x	function(variable) {
113	1x	.format_mcnemartest_results(
114	1x	by = by,
115	1x	variable = variable,
116	1x	lst_tidy =
117	1x	cards::eval_capture_conditions({
118		# adding this reshape inside the eval, so if there is an error it's captured in the ARD object
119	1x	data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id)
120		# performing McNemars test
121	1x	stats::mcnemar.test(x = data_wide[["by1"]], y = data_wide[["by2"]], ...) \|>
122	1x	broom::tidy()
123		}),
124		...
125		)
126		}
127		) \|>
128	1x	dplyr::bind_rows()
129		}
130
131		#' Convert McNemar's test to ARD
132		#'
133		#' @inheritParams cards::tidy_as_ard
134		#' @inheritParams stats::mcnemar.test
135		#' @param by (`string`)\cr by column name
136		#' @param variable (`string`)\cr variable column name
137		#' @param ... passed to `stats::mcnemar.test(...)`
138		#'
139		#' @return ARD data frame
140		#'
141		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
142		#' cardx:::.format_mcnemartest_results(
143		#' by = "ARM",
144		#' variable = "AGE",
145		#' lst_tidy =
146		#' cards::eval_capture_conditions(
147		#' stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]]) \|>
148		#' broom::tidy()
149		#' )
150		#' )
151		#'
152		#' @keywords internal
153		.format_mcnemartest_results <- function(by, variable, lst_tidy, ...) {
154		# build ARD ------------------------------------------------------------------
155	9x	ret <-
156	9x	cards::tidy_as_ard(
157	9x	lst_tidy = lst_tidy,
158	9x	tidy_result_names = c("statistic", "p.value", "method"),
159	9x	fun_args_to_record = c("correct"),
160	9x	formals = formals(asNamespace("stats")[["mcnemar.test"]]),
161	9x	passed_args = dots_list(...),
162	9x	lst_ard_columns = list(group1 = by, variable = variable, context = "stats_mcnemar_test")
163		)
164
165		# add the stat label ---------------------------------------------------------
166	9x	ret \|>
167	9x	dplyr::left_join(
168	9x	.df_mcnemar_stat_labels(),
169	9x	by = "stat_name"
170		) \|>
171	9x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
172	9x	cards::as_card() \|>
173	9x	cards::tidy_ard_column_order()
174		}
175
176		.df_mcnemar_stat_labels <- function() {
177	9x	dplyr::tribble(
178	9x	~stat_name, ~stat_label,
179	9x	"statistic", "X-squared Statistic",
180	9x	"parameter", "Degrees of Freedom",
181	9x	"p.value", "p-value",
182		)
183		}

1		#' Regression VIF ARD
2		#'
3		#' @description
4		#' Function takes a regression model object and returns the variance inflation factor (VIF)
5		#' using [`car::vif()`] and converts it to a ARD structure
6		#'
7		#' @param x regression model object
8		#' See car::vif() for details
9		#'
10		#' @param ... arguments passed to `car::vif(...)`
11		#'
12		#' @return data frame
13		#' @name ard_car_vif
14		#' @rdname ard_car_vif
15		#' @export
16		#'
17		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "car"))
18		#' lm(AGE ~ ARM + SEX, data = cards::ADSL) \|>
19		#' ard_car_vif()
20		ard_car_vif <- function(x, ...) {
21	5x	set_cli_abort_call()
22
23		# check installed packages ---------------------------------------------------
24	5x	check_pkg_installed("car")
25
26		# check inputs ---------------------------------------------------------------
27	5x	check_not_missing(x)
28
29	5x	vif <- cards::eval_capture_conditions(car::vif(x, ...))
30
31		# if vif failed, set result as NULL, error will be kept through eval_capture_conditions()
32	5x	if (is.null(vif$result)) {
33		# try to capture variable names from `terms()`
34	2x	lst_terms <- cards::eval_capture_conditions(attr(stats::terms(x), "term.labels"))
35		# we cannot get variable names, error out
36	2x	if (!is.null(lst_terms[["error"]])) {
37	1x	cli::cli_abort(
38	1x	c("There was an error running {.fun car::vif}. See below.", x = vif[["error"]]),
39	1x	call = get_cli_abort_call()
40		)
41		}
42	1x	vif$result <- dplyr::tibble(
43	1x	variable = lst_terms[["result"]],
44	1x	VIF = list(NULL),
45	1x	GVIF = list(NULL),
46	1x	aGVIF = list(NULL),
47	1x	df = list(NULL)
48		)
49		}
50		# if VIF is returned
51	3x	else if (!is.matrix(vif$result)) {
52	1x	vif$result <- dplyr::tibble(variable = names(vif$result), VIF = vif$result)
53		}
54		# if Generalized VIF is returned
55	2x	else if (is.matrix(vif$result)) {
56	2x	vif$result <-
57	2x	vif$result \|>
58	2x	as.data.frame() %>%
59	2x	dplyr::mutate(., variable = rownames(.), .before = 1L) \|>
60	2x	dplyr::rename(
61	2x	aGVIF = "GVIF^(1/(2*Df))",
62	2x	df = "Df"
63		) \|>
64	2x	dplyr::tibble()
65		}
66
67		# Clean-up the result to fit the ard structure through pivot
68	4x	vif$result <-
69	4x	vif$result \|>
70	4x	tidyr::pivot_longer(
71	4x	cols = -c("variable"),
72	4x	names_to = "stat_name",
73	4x	values_to = "stat"
74		) \|>
75	4x	dplyr::mutate(
76	4x	context = "car_vif",
77	4x	stat = as.list(.data$stat),
78	4x	stat_label = ifelse(
79	4x	.data$stat_name == "aGVIF",
80	4x	"Adjusted GVIF",
81	4x	.data$stat_name
82		),
83	4x	fmt_fun = map(
84	4x	.data$stat,
85	4x	function(.x) {
86		# styler: off
87	!	if (is.integer(.x)) return(0L)
88	14x	if (is.numeric(.x)) return(1L)
89		# styler: on
90	4x	NULL
91		}
92		)
93		)
94
95		# Bind the results and possible warning/errors together
96	4x	vif_return <- dplyr::tibble(
97	4x	vif$result,
98	4x	warning = vif["warning"],
99	4x	error = vif["error"]
100		)
101
102		# Clean up return object
103	4x	vif_return \|>
104	4x	cards::as_card() \|>
105	4x	cards::tidy_ard_column_order()
106		}

1		#' ARD Dichotomous Survey Statistics
2		#'
3		#' Compute Analysis Results Data (ARD) for dichotomous summary statistics.
4		#'
5		#' @inheritParams ard_tabulate.survey.design
6		#' @param value (named `list`)\cr
7		#' named list of dichotomous values to tabulate.
8		#' Default is `cards::maximum_variable_value(data$variables)`,
9		#' which returns the largest/last value after a sort.
10		#'
11		#' @return an ARD data frame of class 'card'
12		#' @export
13		#'
14		#' @examplesIf cardx:::is_pkg_installed("survey")
15		#' survey::svydesign(ids = ~1, data = mtcars, weights = ~1) \|>
16		#' ard_tabulate_value(by = vs, variables = c(cyl, am), value = list(cyl = 4))
17		ard_tabulate_value.survey.design <- function(data,
18		variables,
19		by = NULL,
20		value = cards::maximum_variable_value(data$variables[variables]),
21		statistic = everything() ~ c("n", "N", "p", "p.std.error", "n_unweighted", "N_unweighted", "p_unweighted"),
22		denominator = c("column", "row", "cell"),
23		fmt_fun = NULL,
24		stat_label = everything() ~ list(
25		p = "%",
26		p.std.error = "SE(%)",
27		deff = "Design Effect",
28		"n_unweighted" = "Unweighted n",
29		"N_unweighted" = "Unweighted N",
30		"p_unweighted" = "Unweighted %"
31		),
32		fmt_fn = deprecated(),
33		...) {
34	22x	set_cli_abort_call()
35	22x	check_dots_empty()
36	22x	check_pkg_installed(pkg = "survey")
37
38		# deprecated args ------------------------------------------------------------
39	22x	if (lifecycle::is_present(fmt_fn)) {
40	!	lifecycle::deprecate_soft(
41	!	when = "0.2.5",
42	!	what = "ard_tabulate_value(fmt_fn)",
43	!	with = "ard_tabulate_value(fmt_fun)"
44		)
45	!	fmt_fun <- fmt_fn
46		}
47
48		# check inputs ---------------------------------------------------------------
49	22x	check_not_missing(variables)
50
51		# process inputs -------------------------------------------------------------
52	22x	cards::process_selectors(data$variables, variables = {{ variables }})
53	22x	cards::process_formula_selectors(data$variables[variables], value = value)
54	22x	cards::fill_formula_selectors(
55	22x	data$variables[variables],
56	22x	value = formals(asNamespace("cardx")[["ard_tabulate_value.survey.design"]])[["value"]] \|> eval()
57		)
58	22x	.check_dichotomous_value(data$variables, value)
59
60		# return empty ARD if no variables selected ----------------------------------
61	19x	if (is_empty(variables)) {
62	!	return(dplyr::tibble() \|> cards::as_card())
63		}
64
65		# calculate summary statistics -----------------------------------------------
66	19x	ard_tabulate(
67	19x	data = data,
68	19x	variables = all_of(variables),
69	19x	by = {{ by }},
70	19x	statistic = statistic,
71	19x	denominator = denominator,
72	19x	fmt_fun = fmt_fun,
73	19x	stat_label = stat_label
74		) \|>
75	19x	dplyr::filter(
76	19x	pmap(
77	19x	list(.data$variable, .data$variable_level),
78	19x	function(variable, variable_level) {
79	1015x	variable_level %in% .env$value[[variable]]
80		}
81		) \|>
82	19x	unlist()
83		) \|>
84	19x	dplyr::mutate(context = "dichotomous")
85		}
86
87		#' Perform Value Checks
88		#'
89		#' Check the validity of the values passed in `ard_tabulate_value(value)`.
90		#'
91		#' @param data (`data.frame`)\cr
92		#' a data frame
93		#' @param value (named `list`)\cr
94		#' a named list
95		#'
96		#' @return returns invisible if check is successful, throws an error message if not.
97		#' @keywords internal
98		#'
99		#' @examples
100		#' cardx:::.check_dichotomous_value(mtcars, list(cyl = 4))
101		.check_dichotomous_value <- function(data, value) {
102	22x	imap(
103	22x	value,
104	22x	function(value, column) {
105	39x	accepted_values <- .unique_and_sorted(data[[column]])
106	39x	if (length(value) != 1L \|\| !value %in% accepted_values) {
107	3x	message <- "Error in argument {.arg value} for variable {.val {column}}."
108	3x	message <-
109	3x	case_switch(
110	3x	length(value) != 1L ~ c(message, "i" = "The value must be one of {.val {accepted_values}}."),
111	3x	.default = c(message, "i" = "A value of {.val {value}} was passed, but must be one of {.val {accepted_values}}.")
112		)
113	3x	if (length(value) == 1L) {
114	3x	message <-
115	3x	case_switch(
116	3x	inherits(data[[column]], "factor") ~
117	3x	c(message, i = "To summarize this value, use {.fun forcats::fct_expand} to add {.val {value}} as a level."),
118	3x	.default = c(message, i = "To summarize this value, make the column a factor and include {.val {value}} as a level.")
119		)
120		}
121
122
123	3x	cli::cli_abort(
124	3x	message = message,
125	3x	call = get_cli_abort_call()
126		)
127		}
128		}
129		) \|>
130	22x	invisible()
131		}
132
133		#' ARD-flavor of unique()
134		#'
135		#' Essentially a wrapper for `unique(x) \|> sort()` with `NA` levels removed.
136		#' For factors, all levels are returned even if they are unobserved.
137		#' Similarly, logical vectors always return `c(TRUE, FALSE)`, even if
138		#' both levels are not observed.
139		#'
140		#' @param x (`any`)\cr
141		#' a vector
142		#'
143		#' @return a vector
144		#' @keywords internal
145		#'
146		#' @examples
147		#' cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters))
148		#'
149		#' cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE))
150		#'
151		#' cards:::.unique_and_sorted(c(5, 5:1))
152		.unique_and_sorted <- function(x, useNA = c("no", "always")) {
153		# styler: off
154	369x	useNA <- match.arg(useNA)
155		# if a factor return a factor that includes the same levels (including unobserved levels)
156	369x	if (inherits(x, "factor")) {
157	277x	return(
158	277x	factor(
159	277x	if (useNA == "no") levels(x)
160	277x	else c(levels(x), NA_character_),
161	277x	levels = levels(x)
162		)
163		)
164		}
165	92x	if (inherits(x, "logical")) {
166	52x	if (useNA == "no") return(c(TRUE, FALSE))
167	!	else return(c(TRUE, FALSE, NA))
168		}
169
170		# otherwise, return a simple unique and sort of the vector
171	40x	if (useNA == "no") return(unique(x) \|> sort())
172	!	else return(unique(x) \|> sort() \|> c(NA))
173		# styler: on
174		}

1		#' Construction Helpers
2		#'
3		#' These functions help construct calls to various types of models.
4		#'
5		#' - `construct_model()`: Builds models of the form `method(data = data, formula = formula, method.args!!!)`.
6		#' If the `package` argument is specified, that package is temporarily attached
7		#' when the model is evaluated.
8		#'
9		#' - `reformulate2()`: This is a copy of `reformulate()` except that variable
10		#' names that contain a space are wrapped in backticks.
11		#'
12		#' - `bt()`: Adds backticks to a character vector.
13		#'
14		#' - `bt_strip()`: Removes backticks from a string if it begins and ends with a backtick.
15		#'
16		#' @param data
17		#' - `construct_model.data.frame()` (`data.frame`) a data frame
18		#' - `construct_model.survey.design()` (`survey.design`) a survey design object
19		#' @param x (`character`)\cr
20		#' character vector, typically of variable names
21		#' @param formula (`formula`)\cr
22		#' a formula
23		#' @param method (`string`)\cr
24		#' string of function naming the function to be called, e.g. `"glm"`.
25		#' If function belongs to a library that is not attached, the package name
26		#' must be specified in the `package` argument.
27		#' @param method.args (named `list`)\cr
28		#' named list of arguments that will be passed to `method`.
29		#'
30		#' Note that this list may contain non-standard evaluation components.
31		#' If you are wrapping this function in other functions, the argument
32		#' must be passed in a way that does not evaluate the list, e.g.
33		#' using rlang's embrace operator `{{ . }}`.
34		#' @param package (`string`)\cr
35		#' a package name that will be temporarily loaded when function
36		#' specified in `method` is executed.
37		#' @param pattern,pattern_term,pattern_response DEPRECATED
38		#' @inheritParams rlang::eval_tidy
39		#' @inheritParams stats::reformulate
40		#' @inheritParams rlang::args_dots_empty
41		#'
42		#' @return depends on the calling function
43		#' @name construction_helpers
44		#'
45		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("withr", "lme4", "broom.helpers", "broom.mixed")))
46		#' construct_model(
47		#' data = mtcars,
48		#' formula = am ~ mpg + (1 \| vs),
49		#' method = "glmer",
50		#' method.args = list(family = binomial),
51		#' package = "lme4"
52		#' ) \|>
53		#' broom.mixed::tidy()
54		#'
55		#' construct_model(
56		#' data = mtcars \|> dplyr::rename(`M P G` = mpg),
57		#' formula = reformulate2(c("M P G", "cyl"), response = "hp"),
58		#' method = "lm"
59		#' ) \|>
60		#' ard_regression() \|>
61		#' dplyr::filter(stat_name %in% c("term", "estimate", "p.value"))
62		NULL
63
64		#' @rdname construction_helpers
65		#' @export
66		construct_model <- function(data, ...) {
67	39x	UseMethod("construct_model")
68		}
69
70		#' @rdname construction_helpers
71		#' @export
72		construct_model.data.frame <- function(data, formula, method, method.args = list(), package = "base", env = caller_env(), ...) {
73	34x	set_cli_abort_call()
74		# check pkg installations ----------------------------------------------------
75	34x	check_dots_empty()
76	34x	check_pkg_installed(c("withr", package))
77
78	34x	check_not_missing(formula)
79	34x	check_class(formula, cls = "formula")
80
81	34x	check_not_missing(method)
82	34x	check_string_or_function(method)
83	30x	if (is_string(method)) check_not_namespaced(method)
84
85		# convert method.args to list of expressions (to account for NSE inputs) -----
86	31x	method.args <- .as_list_of_exprs({{ method.args }})
87
88		# build model ----------------------------------------------------------------
89	31x	call_to_run <- call2(.fn = method, formula = formula, data = data, !!!method.args)
90
91	31x	try_fetch(
92	31x	withr::with_namespace(
93	31x	package = package,
94	31x	eval_tidy(call_to_run, env = env)
95		),
96	31x	error = function(e) {
97	4x	msg <- "There was an error evaluating the model"
98	4x	if (is_string(method)) {
99	3x	call_to_run$data <- expr(.)
100	3x	msg <- paste(msg, "{.code {truncate_call(call_to_run)}}")
101		}
102
103	4x	cli::cli_abort(
104	4x	message = msg,
105	4x	parent = e,
106	4x	call = get_cli_abort_call()
107		)
108		}
109		)
110		}
111
112		#' @rdname construction_helpers
113		#' @export
114		construct_model.survey.design <- function(data, formula, method, method.args = list(), package = "survey", env = caller_env(), ...) {
115	5x	set_cli_abort_call()
116		# check pkg installations ----------------------------------------------------
117	5x	check_dots_empty()
118	5x	check_pkg_installed(c("withr", package))
119
120	5x	check_not_missing(formula)
121	5x	check_class(formula, cls = "formula")
122
123	5x	check_not_missing(method)
124	5x	check_string_or_function(method)
125	5x	if (is_string(method)) check_not_namespaced(method)
126
127		# convert method.args to list of expressions (to account for NSE inputs) -----
128	5x	method.args <- .as_list_of_exprs({{ method.args }})
129
130		# build model ----------------------------------------------------------------
131	5x	call_to_run <- call2(.fn = method, formula = formula, design = data, !!!method.args)
132
133	5x	try_fetch(
134	5x	withr::with_namespace(
135	5x	package = package,
136	5x	eval_tidy(call_to_run, env = env)
137		),
138	5x	error = function(e) {
139	2x	msg <- "There was an error evaluating the model"
140	2x	if (is_string(method)) {
141	2x	call_to_run$design <- expr(.)
142	2x	msg <- paste(msg, "{.code {truncate_call(call_to_run)}}")
143		}
144
145	2x	cli::cli_abort(
146	2x	message = msg,
147	2x	parent = e,
148	2x	call = get_cli_abort_call()
149		)
150		}
151		)
152		}
153
154		.as_list_of_exprs <- function(x, arg_name = "method.args") {
155	37x	x_enexpr <- enexpr(x)
156	37x	if (is_call_simple(x_enexpr)) {
157	37x	return(call_args(x_enexpr))
158		}
159
160	!	cli::cli_abort(
161	!	c("There was an error processing the {.arg {arg_name}} argument.",
162	!	i = "Expecting a simple call. See {.help rlang::is_call_simple} for details."
163		),
164	!	call = get_cli_abort_call()
165		)
166		}
167
168		#' @rdname construction_helpers
169		#' @export
170		reformulate2 <- function(termlabels, response = NULL, intercept = TRUE,
171		env = parent.frame(),
172		pattern_term = NULL, pattern_response = NULL) {
173		# deprecated argument --------------------------------------------------------
174	!	if (!missing(pattern_term)) lifecycle::deprecate_warn("0.2.1", what = "cardx::reformulate2(pattern_term)", details = "Argument has been ignored.") # styler: off
175	!	if (!missing(pattern_response)) lifecycle::deprecate_warn("0.2.1", what = "cardx::reformulate2(pattern_response)", details = "Argument has been ignored.") # styler: off
176
177	1089x	stats::reformulate(
178	1089x	termlabels = bt(termlabels),
179	1089x	response = bt(response),
180	1089x	intercept = intercept,
181	1089x	env = env
182		)
183		}
184
185		#' @rdname construction_helpers
186		#' @export
187		bt <- function(x, pattern = NULL) {
188		# deprecated argument --------------------------------------------------------
189	!	if (!missing(pattern)) lifecycle::deprecate_warn("0.2.1", what = "cardx::bt(pattern)", details = "Argument has been ignored.") # styler: off
190
191	2248x	if (is_empty(x)) {
192	1066x	return(x)
193		}
194
195	1182x	ifelse(
196	1182x	make.names(x) != x & !str_detect(x, "^`.*`$"),
197	1182x	paste0("`", x, "`"),
198	1182x	x
199		)
200		}
201
202		#' @rdname construction_helpers
203		#' @export
204		bt_strip <- function(x) {
205	1x	ifelse(
206	1x	str_detect(x, "^`.*`$"),
207	1x	substr(x, 2, nchar(x) - 1),
208	1x	x
209		)
210		}
211
212		check_not_namespaced <- function(x,
213		arg_name = rlang::caller_arg(x),
214		class = "check_not_namespaced",
215		call = get_cli_abort_call()) {
216	36x	check_string(x, arg_name = arg_name, call = call, class = "check_not_namespaced")
217
218	36x	if (str_detect(x, "::")) {
219	3x	cli::cli_abort(
220	3x	"Argument {.arg {arg_name}} cannot be namespaced when passed as a {.cls string}.",
221	3x	call = call,
222	3x	class = class
223		)
224		}
225
226	33x	invisible(x)
227		}
228
229
230		check_string_or_function <- function(x,
231		arg_name = rlang::caller_arg(x),
232		class = "check_string_or_function",
233		call = get_cli_abort_call()) {
234	39x	if (!is.function(x) && !is_string(x)) {
235	1x	cli::cli_abort(
236	1x	c("Argument {.arg {arg_name}} must be a {.cls string} or {.cls function}."),
237	1x	call = call,
238	1x	class = class
239		)
240		}
241
242	38x	invisible(x)
243		}
244
245		truncate_call <- function(call, max_out = 100) {
246	5x	call_text <- expr_text(call)
247	5x	if (nchar(call_text) > max_out) {
248	!	call_text <- paste(substr(call_text, 1, max_out), "...")
249		}
250	5x	call_text
251		}

1		#' ARD survey continuous CIs
2		#'
3		#' One-sample confidence intervals for continuous variables' means and medians.
4		#' Confidence limits are calculated with `survey::svymean()` and `survey::svyquantile()`.
5		#'
6		#'
7		#' @inheritParams ard_summary.survey.design
8		#' @param method (`string`)\cr
9		#' Method for confidence interval calculation.
10		#' When `"svymean"`, the calculation is computed via `survey::svymean()`.
11		#' Otherwise, it is calculated via`survey::svyquantile(interval.type=method)`
12		#' @param conf.level (scalar `numeric`)\cr
13		#' confidence level for confidence interval. Default is `0.95`.
14		#' @param df (`numeric`)\cr
15		#' denominator degrees of freedom, passed to `survey::confint(df)`.
16		#' Default is `survey::degf(data)`.
17		#' @param ... arguments passed to `survey::confint()`
18		#'
19		#' @return ARD data frame
20		#' @export
21		#'
22		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
23		#' data(api, package = "survey")
24		#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
25		#'
26		#' ard_continuous_ci(dclus1, variables = api00)
27		#' ard_continuous_ci(dclus1, variables = api00, method = "svymedian.xlogit")
28		ard_continuous_ci.survey.design <- function(data,
29		variables,
30		by = NULL,
31		method = c("svymean", "svymedian.mean", "svymedian.beta", "svymedian.xlogit", "svymedian.asin", "svymedian.score"),
32		conf.level = 0.95,
33		df = survey::degf(data),
34		...) {
35	16x	set_cli_abort_call()
36
37		# check inputs ---------------------------------------------------------------
38	16x	check_not_missing(data)
39	16x	check_class(data, "survey.design")
40	16x	check_not_missing(variables)
41
42	16x	cards::process_selectors(
43	16x	data = data$variables,
44	16x	variables = {{ variables }},
45	16x	by = {{ by }}
46		)
47	16x	check_scalar(by, allow_empty = TRUE)
48	16x	check_scalar_range(conf.level, range = c(0, 1))
49	16x	method <- arg_match(method)
50
51	16x	walk(
52	16x	variables,
53	16x	\(variable) {
54	27x	if (!is.numeric(data$variables[[variable]])) {
55	1x	cli::cli_inform(
56	1x	"Column {.val {variable}} is not {.cls numeric} and results may be an unexpected format."
57		)
58		}
59		}
60		)
61
62		# return empty ARD if no variables selected ----------------------------------
63	16x	if (is_empty(variables)) {
64	1x	return(dplyr::tibble() \|> cards::as_card())
65		}
66
67		# calculate and return ARD of one sample CI ----------------------------------
68	15x	.calculate_ard_continuous_survey_ci(
69	15x	FUN = ifelse(method == "svymean", .svymean_confint_wrapper, .svyquantile_confint_wrapper),
70	15x	data = data,
71	15x	variables = variables,
72	15x	by = by,
73	15x	conf.level = conf.level,
74	15x	method = method,
75	15x	df = df,
76		...
77		) \|>
78	15x	.restore_original_column_types(data = data$variables)
79		}
80
81		.calculate_ard_continuous_survey_ci <- function(FUN, data, variables, by, conf.level, ...) {
82		# calculate results ----------------------------------------------------------
83	15x	map(
84	15x	variables,
85	15x	function(variable) {
86	27x	.calculate_one_ard_continuous_survey_ci(
87	27x	FUN = FUN,
88	27x	data = data,
89	27x	variable = variable,
90	27x	by = by,
91	27x	conf.level = conf.level,
92		...
93		)
94		}
95		) \|>
96	15x	dplyr::bind_rows()
97		}
98
99		.calculate_one_ard_continuous_survey_ci <- function(FUN, data, variable, by, conf.level, ...) {
100	27x	if (!is_empty(by)) {
101	8x	by_levels <- .unique_values_sort(data$variables, variable = by)
102	8x	lst_data <-
103	8x	map(
104	8x	by_levels,
105	8x	~ call2("subset", expr(data), expr(!!sym(by) == !!.x)) \|> eval()
106		) \|>
107	8x	set_names(as.character(by_levels))
108		}
109
110	27x	df_full <-
111	27x	case_switch(
112	27x	!is_empty(by) ~
113	27x	tidyr::expand_grid(
114	27x	group1_level = as.character(by_levels) \|> as.list()
115		) \|>
116	27x	dplyr::mutate(group1 = .env$by, variable = .env$variable),
117	27x	.default =
118	27x	dplyr::tibble(variable = .env$variable)
119		) \|>
120	27x	dplyr::rowwise() \|>
121	27x	dplyr::mutate(
122	27x	lst_result =
123	27x	FUN(
124	27x	data =
125	27x	case_switch(
126	27x	is_empty(.env$by) ~ data,
127	27x	.default = lst_data[[.data$group1_level]]
128		),
129	27x	variable = .data$variable,
130	27x	conf.level = .env$conf.level,
131		...
132		) \|>
133	27x	list(),
134	27x	result =
135	27x	.data$lst_result[["result"]] \|>
136	27x	enframe("stat_name", "stat") \|>
137	27x	list(),
138	27x	warning = .data$lst_result["warning"] \|> unname(),
139	27x	error = .data$lst_result["error"] \|> unname(),
140	27x	context = "survey_continuous_ci"
141		) \|>
142	27x	dplyr::select(-"lst_result") \|>
143	27x	dplyr::ungroup() \|>
144	27x	tidyr::unnest("result") \|>
145	27x	dplyr::mutate(
146	27x	stat_label = .data$stat_name,
147	27x	fmt_fun = map(.data$stat, ~ case_switch(is.numeric(.x) ~ 2L, .default = as.character))
148		) \|>
149	27x	cards::as_card() \|>
150	27x	cards::tidy_ard_column_order()
151		}
152
153		.svymean_confint_wrapper <- function(data, variable, conf.level, df, ...) {
154	26x	lst_results <-
155	26x	cards::eval_capture_conditions({
156	26x	svymean <-
157	26x	survey::svymean(x = reformulate2(variable), design = data, na.rm = TRUE)
158
159	26x	lst_svymean <- as.data.frame(svymean) \|>
160	26x	as.list() \|>
161	26x	set_names(c("estimate", "std.error"))
162
163	26x	lst_confint <- stats::confint(svymean, level = conf.level, df = df, ...) \|>
164	26x	as.data.frame() \|>
165	26x	as.list() \|>
166	26x	set_names(c("conf.low", "conf.high"))
167
168	24x	c(lst_svymean, lst_confint)
169		})
170
171		# add NULL results if error
172	26x	if (is_empty(lst_results[["result"]])) {
173	2x	lst_results[["result"]] <- rep_named(c("estimate", "std.error", "conf.low", "conf.high"), list(NULL))
174		}
175
176		# add other args
177	26x	lst_results[["result"]] <- lst_results[["result"]] \|> append(list(conf.level = conf.level))
178
179		# return list result
180	26x	lst_results
181		}
182
183		.svyquantile_confint_wrapper <- function(data, variable, conf.level, method, df, ...) {
184	9x	lst_results <-
185	9x	cards::eval_capture_conditions({
186	9x	svyquantile <-
187	9x	survey::svyquantile(
188	9x	x = reformulate2(variable), design = data, quantiles = 0.5,
189	9x	na.rm = TRUE, interval.type = str_remove(method, pattern = "^svymedian\\.")
190		)
191
192	8x	lst_svyquantile <- svyquantile \|>
193	8x	getElement(1L) \|>
194	8x	as.data.frame() \|>
195	8x	dplyr::select(1L, last_col()) \|>
196	8x	as.list() \|>
197	8x	set_names(c("estimate", "std.error"))
198
199	8x	lst_confint <- stats::confint(svyquantile, level = conf.level, df = df, ...) \|>
200	8x	as.data.frame() \|>
201	8x	as.list() \|>
202	8x	set_names(c("conf.low", "conf.high"))
203
204	8x	c(lst_svyquantile, lst_confint)
205		})
206
207		# add NULL results if error
208	9x	if (is_empty(lst_results[["result"]])) {
209	1x	lst_results[["result"]] <- rep_named(c("estimate", "std.error", "conf.low", "conf.high"), list(NULL))
210		}
211
212		# add other args
213	9x	lst_results[["result"]] <- lst_results[["result"]] \|> append(list(conf.level = conf.level))
214
215		# return list result
216	9x	lst_results
217		}

1		#' Functions for Calculating Proportion Confidence Intervals
2		#'
3		#' Functions to calculate different proportion confidence intervals for use in `ard_proportion()`.
4		#'
5		#' @inheritParams ard_categorical_ci
6		#' @param x (binary `numeric`/`logical`)\cr
7		#' vector of a binary values, i.e. a logical vector, or numeric with values `c(0, 1)`
8		#' @return Confidence interval of a proportion.
9		#'
10		#' @name proportion_ci
11		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
12		#' x <- c(
13		#' TRUE, TRUE, TRUE, TRUE, TRUE,
14		#' FALSE, FALSE, FALSE, FALSE, FALSE
15		#' )
16		#'
17		#' proportion_ci_wald(x, conf.level = 0.9)
18		#' proportion_ci_wilson(x, correct = TRUE)
19		#' proportion_ci_clopper_pearson(x)
20		#' proportion_ci_agresti_coull(x)
21		#' proportion_ci_jeffreys(x)
22		NULL
23
24		#' @describeIn proportion_ci Calculates the Wald interval by following the usual textbook definition
25		#' for a single proportion confidence interval using the normal approximation.
26		#'
27		#' \deqn{\hat{p} \pm z_{\alpha/2} \sqrt{\frac{\hat{p}(1 - \hat{p})}{n}}}
28		#'
29		#' @param correct (`logical`)\cr apply continuity correction.
30		#'
31		#' @export
32		proportion_ci_wald <- function(x, conf.level = 0.95, correct = FALSE) {
33	85x	set_cli_abort_call()
34
35		# check inputs ---------------------------------------------------------------
36	85x	check_not_missing(x)
37	85x	check_binary(x)
38	85x	check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
39	85x	check_scalar(conf.level)
40	85x	check_class(x = correct, "logical")
41	85x	check_scalar(correct)
42
43	85x	x <- stats::na.omit(x)
44
45	85x	n <- length(x)
46	85x	p_hat <- mean(x)
47	85x	z <- stats::qnorm((1 + conf.level) / 2)
48	85x	q_hat <- 1 - p_hat
49	85x	correction_factor <- ifelse(correct, 1 / (2 * n), 0)
50
51	85x	err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correction_factor
52	85x	l_ci <- max(0, p_hat - err)
53	85x	u_ci <- min(1, p_hat + err)
54
55	85x	list(
56	85x	N = n,
57	85x	n = sum(x),
58	85x	estimate = p_hat,
59	85x	conf.low = l_ci,
60	85x	conf.high = u_ci,
61	85x	conf.level = conf.level,
62	85x	method =
63	85x	glue::glue("Wald Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
64		)
65		}
66
67
68		#' @describeIn proportion_ci Calculates the Wilson interval by calling [stats::prop.test()].
69		#' Also referred to as Wilson score interval.
70		#'
71		#' \deqn{\frac{\hat{p} +
72		#' \frac{z^2_{\alpha/2}}{2n} \pm z_{\alpha/2} \sqrt{\frac{\hat{p}(1 - \hat{p})}{n} +
73		#' \frac{z^2_{\alpha/2}}{4n^2}}}{1 + \frac{z^2_{\alpha/2}}{n}}}
74		#'
75		#' @export
76		proportion_ci_wilson <- function(x, conf.level = 0.95, correct = FALSE) {
77	24x	set_cli_abort_call()
78
79		# check installed packages ---------------------------------------------------
80	24x	check_pkg_installed(pkg = "broom")
81
82		# check inputs ---------------------------------------------------------------
83	24x	check_not_missing(x)
84	24x	check_binary(x)
85	23x	check_class(x = correct, "logical")
86	23x	check_scalar(correct)
87	23x	check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
88	23x	check_scalar(conf.level)
89
90	22x	x <- stats::na.omit(x)
91
92	22x	n <- length(x)
93	22x	y <- stats::prop.test(x = sum(x), n = n, correct = correct, conf.level = conf.level)
94
95	21x	list(
96	21x	N = n,
97	21x	n = sum(x),
98	21x	conf.level = conf.level
99		) \|>
100	21x	utils::modifyList(val = broom::tidy(y) \|> as.list()) \|>
101	21x	utils::modifyList(
102	21x	list(
103	21x	method =
104	21x	glue::glue("Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
105		)
106		)
107		}
108
109		#' @describeIn proportion_ci Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
110		#' Also referred to as the `exact` method.
111		#'
112		#' \deqn{
113		#' \left( \frac{k}{n} \pm z_{\alpha/2} \sqrt{\frac{\frac{k}{n}(1-\frac{k}{n})}{n} +
114		#' \frac{z^2_{\alpha/2}}{4n^2}} \right)
115		#' / \left( 1 + \frac{z^2_{\alpha/2}}{n} \right)}
116		#'
117		#' @export
118		proportion_ci_clopper_pearson <- function(x, conf.level = 0.95) {
119	7x	set_cli_abort_call()
120
121		# check installed packages ---------------------------------------------------
122	7x	check_pkg_installed(pkg = "broom")
123
124		# check inputs ---------------------------------------------------------------
125	7x	check_not_missing(x)
126	7x	check_binary(x)
127	7x	check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
128	7x	check_scalar(conf.level)
129
130	7x	x <- stats::na.omit(x)
131	7x	n <- length(x)
132
133	7x	y <- stats::binom.test(x = sum(x), n = n, conf.level = conf.level)
134
135	7x	list(N = n, n = sum(x), conf.level = conf.level) \|>
136	7x	utils::modifyList(val = broom::tidy(y) \|> as.list()) \|>
137	7x	utils::modifyList(list(method = "Clopper-Pearson Confidence Interval"))
138		}
139
140		#' @describeIn proportion_ci Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by
141		#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
142		#'
143		#' \deqn{
144		#' \left( \frac{\tilde{p} + z^2_{\alpha/2}/2}{n + z^2_{\alpha/2}} \pm
145		#' z_{\alpha/2} \sqrt{\frac{\tilde{p}(1 - \tilde{p})}{n} +
146		#' \frac{z^2_{\alpha/2}}{4n^2}} \right)}
147		#'
148		#' @export
149		proportion_ci_agresti_coull <- function(x, conf.level = 0.95) {
150	9x	set_cli_abort_call()
151
152		# check inputs ---------------------------------------------------------------
153	9x	check_not_missing(x)
154	9x	check_binary(x)
155	9x	check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
156	9x	check_scalar(conf.level)
157
158	9x	x <- stats::na.omit(x)
159
160	9x	n <- length(x)
161	9x	x_sum <- sum(x)
162	9x	z <- stats::qnorm((1 + conf.level) / 2)
163
164		# Add here both z^2 / 2 successes and failures.
165	9x	x_sum_tilde <- x_sum + z^2 / 2
166	9x	n_tilde <- n + z^2
167
168		# Then proceed as with the Wald interval.
169	9x	p_tilde <- x_sum_tilde / n_tilde
170	9x	q_tilde <- 1 - p_tilde
171	9x	err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
172	9x	l_ci <- max(0, p_tilde - err)
173	9x	u_ci <- min(1, p_tilde + err)
174
175	9x	list(
176	9x	N = n,
177	9x	n = sum(x),
178	9x	estimate = mean(x),
179	9x	conf.low = l_ci,
180	9x	conf.high = u_ci,
181	9x	conf.level = conf.level,
182	9x	method = "Agresti-Coull Confidence Interval"
183		)
184		}
185
186		#' @describeIn proportion_ci Calculates the Jeffreys interval, an equal-tailed interval based on the
187		#' non-informative Jeffreys prior for a binomial proportion.
188		#'
189		#' \deqn{\left( \text{Beta}\left(\frac{k}{2} + \frac{1}{2}, \frac{n - k}{2} + \frac{1}{2}\right)_\alpha,
190		#' \text{Beta}\left(\frac{k}{2} + \frac{1}{2}, \frac{n - k}{2} + \frac{1}{2}\right)_{1-\alpha} \right)}
191		#'
192		#' @export
193		proportion_ci_jeffreys <- function(x, conf.level = 0.95) {
194	10x	set_cli_abort_call()
195
196		# check inputs ---------------------------------------------------------------
197	10x	check_not_missing(x)
198	10x	check_binary(x)
199	10x	check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
200	10x	check_scalar(conf.level)
201	10x	x <- stats::na.omit(x)
202
203	10x	n <- length(x)
204	10x	x_sum <- sum(x)
205
206	10x	alpha <- 1 - conf.level
207	10x	l_ci <- ifelse(
208	10x	x_sum == 0,
209	10x	0,
210	10x	stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
211		)
212
213	10x	u_ci <- ifelse(
214	10x	x_sum == n,
215	10x	1,
216	10x	stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
217		)
218
219	10x	list(
220	10x	N = n,
221	10x	n = sum(x),
222	10x	estimate = mean(x),
223	10x	conf.low = l_ci,
224	10x	conf.high = u_ci,
225	10x	conf.level = conf.level,
226	10x	method = glue::glue("Jeffreys Interval")
227		)
228		}
229
230
231		#' @describeIn proportion_ci Calculates the stratified Wilson confidence
232		#' interval for unequal proportions as described in
233		#' Xin YA, Su XG. Stratified Wilson and Newcombe confidence intervals
234		#' for multiple binomial proportions. _Statistics in Biopharmaceutical Research_. 2010;2(3).
235		#'
236		#' \deqn{\frac{\hat{p}_j + \frac{z^2_{\alpha/2}}{2n_j} \pm
237		#' z_{\alpha/2} \sqrt{\frac{\hat{p}_j(1 - \hat{p}_j)}{n_j} +
238		#' \frac{z^2_{\alpha/2}}{4n_j^2}}}{1 + \frac{z^2_{\alpha/2}}{n_j}}}
239		#'
240		#'
241		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `x`.
242		#' @param weights (`numeric`)\cr weights for each level of the strata. If `NULL`, they are
243		#' estimated using the iterative algorithm that
244		#' minimizes the weighted squared length of the confidence interval.
245		#' @param max.iterations (positive `integer`)\cr maximum number of iterations for the iterative procedure used
246		#' to find estimates of optimal weights.
247		#' @param correct (scalar `logical`)\cr include the continuity correction. For further information, see for example
248		#' [stats::prop.test()].
249		#'
250		#' @examples
251		#' # Stratified Wilson confidence interval with unequal probabilities
252		#'
253		#' set.seed(1)
254		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
255		#' strata_data <- data.frame(
256		#' "f1" = sample(c("a", "b"), 100, TRUE),
257		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
258		#' stringsAsFactors = TRUE
259		#' )
260		#' strata <- interaction(strata_data)
261		#' n_strata <- ncol(table(rsp, strata)) # Number of strata
262		#'
263		#' proportion_ci_strat_wilson(
264		#' x = rsp, strata = strata,
265		#' conf.level = 0.90
266		#' )
267		#'
268		#' # Not automatic setting of weights
269		#' proportion_ci_strat_wilson(
270		#' x = rsp, strata = strata,
271		#' weights = rep(1 / n_strata, n_strata),
272		#' conf.level = 0.90
273		#' )
274		#'
275		#' @export
276		proportion_ci_strat_wilson <- function(x,
277		strata,
278		weights = NULL,
279		conf.level = 0.95,
280		max.iterations = 10L,
281		correct = FALSE) {
282	25x	set_cli_abort_call()
283
284		# check inputs ---------------------------------------------------------------
285	25x	check_not_missing(x)
286	25x	check_not_missing(strata)
287	25x	check_binary(x)
288	25x	check_class(correct, "logical")
289	25x	check_scalar(correct)
290	25x	check_class(strata, "factor")
291	25x	check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE))
292	25x	check_scalar(conf.level)
293
294		# remove missing values from x and strata
295	25x	is_na <- is.na(x) \| is.na(strata)
296	25x	x <- x[!is_na]
297	25x	strata <- strata[!is_na]
298	8x	if (!inherits(x, "logical")) x <- as.logical(x)
299		# check all TRUE/FALSE, if so, not calculable
300	25x	if (all(x) \|\| all(!x)) {
301	2x	cli::cli_abort("All values in {.arg x} argument are either {.code TRUE} or {.code FALSE} and CI is not estimable.")
302		}
303
304	23x	tbl <- table(factor(x, levels = c(FALSE, TRUE)), strata, useNA = "no")
305	23x	n_strata <- length(unique(strata))
306
307		# Checking the weights and maximum number of iterations.
308	23x	do_iter <- FALSE
309	23x	if (is.null(weights)) {
310	13x	weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
311	13x	do_iter <- TRUE
312
313		# Iteration parameters
314	13x	if (!is_scalar_integerish(max.iterations) \|\| max.iterations < 1) {
315	2x	cli::cli_abort("Argument {.arg max.iterations} must be a positive integer.")
316		}
317		}
318	21x	check_range(weights, range = c(0, 1), include_bounds = c(TRUE, TRUE))
319	19x	sum_weights <- sum(weights) \|>
320	19x	round() \|>
321	19x	as.integer()
322	19x	if (sum_weights != 1L \|\| abs(sum_weights - sum(weights)) > sqrt(.Machine$double.eps)) {
323	1x	cli::cli_abort("The sum of the {.arg weights} argument must be {.val {1L}}")
324		}
325
326	18x	xs <- tbl["TRUE", ]
327	18x	ns <- colSums(tbl)
328	18x	use_stratum <- (ns > 0)
329	18x	ns <- ns[use_stratum]
330	18x	xs <- xs[use_stratum]
331	18x	ests <- xs / ns
332	18x	vars <- ests * (1 - ests) / ns
333
334	18x	strata_qnorm <- .strata_normal_quantile(vars, weights, conf.level)
335
336		# Iterative setting of weights if they were not passed in `weights` argument
337	18x	weights_new <- if (do_iter) {
338	11x	.update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max.iterations, conf.level)$weights
339		} else {
340	7x	weights
341		}
342
343	18x	strata_conf.level <- 2 * stats::pnorm(strata_qnorm) - 1
344
345	18x	ci_by_strata <- Map(
346	18x	function(x, n) {
347		# Classic Wilson's confidence interval
348	75x	suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf.level)$conf.int)
349		},
350	18x	x = xs,
351	18x	n = ns
352		)
353	18x	lower_by_strata <- sapply(ci_by_strata, "[", 1L)
354	18x	upper_by_strata <- sapply(ci_by_strata, "[", 2L)
355
356	18x	lower <- sum(weights_new * lower_by_strata)
357	18x	upper <- sum(weights_new * upper_by_strata)
358
359		# Return values
360	18x	list(
361	18x	N = length(x),
362	18x	n = sum(x),
363	18x	estimate = mean(x),
364	18x	conf.low = lower,
365	18x	conf.high = upper,
366	18x	conf.level = conf.level,
367	18x	weights = if (do_iter) weights_new else NULL,
368	18x	method =
369	18x	glue::glue("Stratified Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
370		) \|>
371	18x	compact()
372		}
373
374		#' @describeIn proportion_ci Helper to determine if vector is binary (logical or 0/1)
375		#'
376		#' @export
377		is_binary <- function(x) {
378	631x	is.logical(x) \|\| (is_integerish(x) && is_empty(setdiff(x, c(0, 1, NA))))
379		}
380
381		#' Helper Function for the Estimation of Stratified Quantiles
382		#'
383		#' This function wraps the estimation of stratified percentiles when we assume
384		#' the approximation for large numbers. This is necessary only in the case
385		#' proportions for each strata are unequal.
386		#'
387		#' @inheritParams proportion_ci_strat_wilson
388		#'
389		#' @return Stratified quantile.
390		#'
391		#' @seealso [proportion_ci_strat_wilson()]
392		#'
393		#' @keywords internal
394		#'
395		#' @examples
396		#' strata_data <- table(data.frame(
397		#' "f1" = sample(c(TRUE, FALSE), 100, TRUE),
398		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
399		#' stringsAsFactors = TRUE
400		#' ))
401		#' ns <- colSums(strata_data)
402		#' ests <- strata_data["TRUE", ] / ns
403		#' vars <- ests * (1 - ests) / ns
404		#' weights <- rep(1 / length(ns), length(ns))
405		#'
406		#' cardx:::.strata_normal_quantile(vars, weights, 0.95)
407		.strata_normal_quantile <- function(vars, weights, conf.level) {
408	42x	summands <- weights^2 * vars
409		# Stratified quantile
410	42x	sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf.level) / 2)
411		}
412
413		#' Helper Function for the Estimation of Weights for `proportion_ci_strat_wilson()`
414		#'
415		#' This function wraps the iteration procedure that allows you to estimate
416		#' the weights for each proportional strata. This assumes to minimize the
417		#' weighted squared length of the confidence interval.
418		#'
419		#' @keywords internal
420		#' @inheritParams proportion_ci_strat_wilson
421		#' @param vars (`numeric`)\cr normalized proportions for each strata.
422		#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
423		#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
424		#' be optimized in the future if we need to estimate better initial weights.
425		#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
426		#' @param max.iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
427		#' @param tol (`number`)\cr tolerance threshold for convergence.
428		#'
429		#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
430		#'
431		#' @seealso For references and details see [`proportion_ci_strat_wilson()`].
432		#'
433		#' @examples
434		#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
435		#' sq <- 0.674
436		#' ws <- rep(1 / length(vs), length(vs))
437		#' ns <- c(22, 18, 17, 17, 14, 12)
438		#'
439		#' cardx:::.update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
440		.update_weights_strat_wilson <- function(vars,
441		strata_qnorm,
442		initial_weights,
443		n_per_strata,
444		max.iterations = 50,
445		conf.level = 0.95,
446		tol = 0.001) {
447	11x	it <- 0
448	11x	diff_v <- NULL
449
450	11x	while (it < max.iterations) {
451	24x	it <- it + 1
452	24x	weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
453	24x	weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
454	24x	weights_new <- weights_new_t / weights_new_b
455	24x	weights_new <- weights_new / sum(weights_new)
456	24x	strata_qnorm <- .strata_normal_quantile(vars, weights_new, conf.level)
457	24x	diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
458	11x	if (diff_v[length(diff_v)] < tol) break
459	13x	initial_weights <- weights_new
460		}
461
462	11x	if (it == max.iterations) {
463	!	warning("The heuristic to find weights did not converge with max.iterations = ", max.iterations)
464		}
465
466	11x	list(
467	11x	"n_it" = it,
468	11x	"weights" = weights_new,
469	11x	"diff_v" = diff_v
470		)
471		}

1		#' ARD Survival Estimates
2		#'
3		#' @description
4		#' Analysis results data for survival quantiles and x-year survival estimates, extracted
5		#' from a [survival::survfit()] model.
6		#'
7		#' @param x (`survfit` or `data.frame`)\cr
8		#' an object of class `survfit` created with [survival::survfit()] or a data frame. See below for details.
9		#' @param times (`numeric`)\cr
10		#' a vector of times for which to return survival probabilities.
11		#' @param probs (`numeric`)\cr
12		#' a vector of probabilities with values in (0,1) specifying the survival quantiles to return.
13		#' @param type (`string` or `NULL`)\cr
14		#' type of statistic to report. Available for Kaplan-Meier time estimates only, otherwise `type`
15		#' is ignored. Default is `NULL`.
16		#' Must be one of the following:
17		#' ```{r, echo = FALSE}
18		#' dplyr::tribble(
19		#' ~type, ~transformation,
20		#' '`"survival"`', '`x`',
21		#' '`"risk"`', '`1 - x`',
22		#' '`"cumhaz"`', '`-log(x)`',
23		#' ) %>%
24		#' knitr::kable()
25		#' ```
26		#' @param y (`Surv` or `string`)\cr
27		#' an object of class `Surv` created using [survival::Surv()]. This object will be passed as the left-hand side of
28		#' the formula constructed and passed to [survival::survfit()]. This object can also be passed as a string.
29		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
30		#' stratification variables to be passed as the right-hand side of the formula constructed and passed to
31		#' [survival::survfit()]. Default is `NULL` for an unstratified model, e.g. `Surv() ~ 1`.
32		#' @param method.args (named `list`)\cr
33		#' named list of arguments that will be passed to [survival::survfit()].
34		#' @inheritParams rlang::args_dots_empty
35		#'
36		#' @section Formula Specification:
37		#' When passing a [`survival::survfit()`] object to `ard_survival_survfit()`,
38		#' the `survfit()` call must use an evaluated formula and not a stored formula.
39		#' Including a proper formula in the call allows the function to accurately
40		#' identify all variables included in the estimation. See below for examples:
41		#'
42		#' ```r
43		#' library(cardx)
44		#' library(survival)
45		#'
46		#' # include formula in `survfit()` call
47		#' survfit(Surv(time, status) ~ sex, lung) \|> ard_survival_survfit(time = 500)
48		#'
49		#' # you can also pass a data frame to `ard_survival_survfit()` as well.
50		#' lung \|>
51		#' ard_survival_survfit(y = Surv(time, status), variables = "sex", time = 500)
52		#' ```
53		#' You cannot, however, pass a stored formula, e.g. `survfit(my_formula, lung)`,
54		#' but you can use stored formulas with `rlang::inject(survfit(!!my_formula, lung))`.
55		#'
56		#' @section Variable Classes:
57		#' When the `survfit` method is called, the class of the stratifying variables
58		#' will be returned as a factor.
59		#'
60		#' When the data frame method is called, the original classes are retained in the
61		#' resulting ARD.
62		#'
63		#' @return an ARD data frame of class 'card'
64		#' @name ard_survival_survfit
65		#'
66		#' @details
67		#' * Only one of either the `times` or `probs` parameters can be specified.
68		#' * Times should be provided using the same scale as the time variable used to fit the provided
69		#' survival fit model.
70		#'
71		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom", "ggsurvfit")))
72		#' library(survival)
73		#' library(ggsurvfit)
74		#'
75		#' survfit(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE) \|>
76		#' ard_survival_survfit(times = c(60, 180))
77		#'
78		#' survfit(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE, conf.int = 0.90) \|>
79		#' ard_survival_survfit(probs = c(0.25, 0.5, 0.75))
80		#'
81		#' cards::ADTTE \|>
82		#' ard_survival_survfit(y = Surv_CNSR(AVAL, CNSR), variables = c("TRTA", "SEX"), times = 90)
83		#'
84		#' # Competing Risks Example ---------------------------
85		#' set.seed(1)
86		#' ADTTE_MS <- cards::ADTTE %>%
87		#' dplyr::mutate(
88		#' CNSR = dplyr::case_when(
89		#' CNSR == 0 ~ "censor",
90		#' runif(dplyr::n()) < 0.5 ~ "death from cancer",
91		#' TRUE ~ "death other causes"
92		#' ) %>% factor()
93		#' )
94		#'
95		#' survfit(Surv(AVAL, CNSR) ~ TRTA, data = ADTTE_MS) %>%
96		#' ard_survival_survfit(times = c(60, 180))
97		NULL
98
99		#' @rdname ard_survival_survfit
100		#' @export
101		ard_survival_survfit <- function(x, ...) {
102	36x	set_cli_abort_call()
103
104	36x	check_not_missing(x)
105	35x	UseMethod("ard_survival_survfit")
106		}
107
108		#' @rdname ard_survival_survfit
109		#' @export
110		ard_survival_survfit.survfit <- function(x, times = NULL, probs = NULL, type = NULL, ...) {
111	27x	set_cli_abort_call()
112
113		# check installed packages ---------------------------------------------------
114	27x	check_pkg_installed(c("survival", "broom"))
115
116		# check/process inputs -------------------------------------------------------
117	27x	if (is.name(x$call$formula)) {
118	1x	cli::cli_abort(
119	1x	message = paste(
120	1x	"The call in the survfit object {.arg x} must be an evaluated formula.",
121	1x	"Please see {.help [{.fun ard_survival_survfit}](cardx::ard_survival_survfit)} documentation for details on properly specifying formulas."
122		),
123	1x	call = get_cli_abort_call()
124		)
125		}
126	26x	if (inherits(x, "survfitcox")) {
127	1x	cli::cli_abort("Argument {.arg x} cannot be class {.cls survfitcox}.",
128	1x	call = get_cli_abort_call()
129		)
130		}
131
132		# competing risks models cannot use the type argument
133	25x	if (inherits(x, c("survfitms", "survfitcoxms")) && !is.null(type)) {
134	1x	cli::cli_abort("Cannot use {.arg type} argument with {.code survfit} models with class {.cls {c('survfitms', 'survfitcoxms')}}.",
135	1x	call = get_cli_abort_call()
136		)
137		}
138	5x	if (!is.null(probs)) check_range(probs, c(0, 1))
139	24x	if (sum(is.null(times), is.null(probs)) != 1) {
140	1x	cli::cli_abort("One and only one of {.arg times} and {.arg probs} must be specified.")
141		}
142
143		# for regular KM estimators, we allow the type argument
144	23x	if (!inherits(x, "survfitms") && !is.null(type)) {
145	3x	type <- arg_match(type, values = c("survival", "risk", "cumhaz"))
146		}
147
148		# cannot specify type arg when probs supplied
149	22x	if (!is.null(probs) && !is.null(type)) {
150	1x	cli::cli_abort("Cannot use {.arg type} argument when {.arg probs} argument specifed.",
151	1x	call = get_cli_abort_call()
152		)
153		}
154
155		# build ARD ------------------------------------------------------------------
156	21x	est_type <- ifelse(is.null(probs), "times", "probs")
157	21x	tidy_survfit <- switch(est_type,
158	21x	"times" = .process_survfit_time(x, times, type %\|\|% "survival"),
159	21x	"probs" = .process_survfit_probs(x, probs)
160		)
161
162	21x	.format_survfit_results(tidy_survfit)
163		}
164
165		#' @rdname ard_survival_survfit
166		#' @export
167		ard_survival_survfit.data.frame <- function(x, y,
168		variables = NULL,
169		times = NULL, probs = NULL, type = NULL,
170		method.args = list(conf.int = 0.95, conf.type = "log"), ...) {
171	7x	set_cli_abort_call()
172
173		# check/process inputs -------------------------------------------------------
174	7x	check_not_missing(y)
175	6x	cards::process_selectors(x, variables = {{ variables }})
176
177		# process outcome as string --------------------------------------------------
178	6x	y <- enquo(y)
179		# if a character was passed, return it as is
180	3x	if (tryCatch(is.character(eval_tidy(y)), error = \(e) FALSE)) y <- eval_tidy(y) # styler: off
181		# otherwise, convert expr to string
182	3x	else y <- expr_deparse(quo_get_expr(y)) # styler: off
183	6x	check_class(
184	6x	with(x, eval(parse_expr(y))),
185	6x	cls = "Surv",
186	6x	message =
187	6x	"The {.arg y} argument must be a string or expression that evaluates to an object of class {.cls Surv}
188	6x	most often created with {.fun survival::Surv} or {.fun ggsurvfit::Surv_CNSR}."
189		)
190
191
192		# build model ----------------------------------------------------------------
193	5x	survfit_formula <-
194	5x	case_switch(
195	5x	!is_empty(variables) ~ stats::reformulate(termlabels = bt(variables), response = y),
196	5x	.default = stats::reformulate(termlabels = "1", response = y)
197		)
198
199	5x	ard <- construct_model(
200	5x	data = x,
201	5x	formula = survfit_formula,
202	5x	method = "survfit",
203	5x	package = "survival",
204	5x	method.args = {{ method.args }}
205		) \|>
206	5x	ard_survival_survfit(times = times, probs = probs, type = type)
207
208	5x	ard_overall <- ard[ard$variable == "..ard_survival_survfit..", ]
209
210	5x	ard \|>
211	5x	dplyr::filter(ard$variable != "..ard_survival_survfit..") \|>
212	5x	.restore_original_column_types(data = x) \|>
213	5x	dplyr::bind_rows(ard_overall)
214		}
215
216		#' Process Survival Fit For Time Estimates
217		#'
218		#' @inheritParams cards::tidy_as_ard
219		#' @inheritParams ard_survival_survfit
220		#' @param start.time (`numeric`)\cr
221		#' default starting time. See [survival::survfit0()] for more details.
222		#'
223		#' @return a `tibble`
224		#'
225		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom")))
226		#' survival::survfit(survival::Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) \|>
227		#' cardx:::.process_survfit_time(times = c(60, 180), type = "risk")
228		#'
229		#' @keywords internal
230		.process_survfit_time <- function(x, times, type, start.time = NULL) {
231		# add start time
232	18x	min_time <- min(x$time)
233	18x	if (is.null(start.time) && min_time < 0) {
234	!	cli::cli_inform(paste(
235	!	"The {.arg start.time} argument has not been set and negative times have been observed. Please set start",
236	!	"time via the {.arg start.time} argument, otherwise the minimum observed time will be used by default."
237		))
238	!	start.time <- min_time
239	18x	} else if (is.null(start.time)) {
240	18x	start.time <- 0
241		}
242	18x	x <- survival::survfit0(x, start.time) %>%
243	18x	summary(times, extend = TRUE)
244
245		# process competing risks/multi-state models
246	18x	multi_state <- inherits(x, "summary.survfitms")
247
248	18x	if (multi_state) {
249		# selecting state to show
250	1x	state <- setdiff(unique(x$states), "(s0)")[[1]]
251	1x	cli::cli_inform("Multi-state model detected. Showing probabilities into state '{state}'.")
252	1x	x$n.risk <- x$n.risk[, 1]
253	1x	ms_cols <- c("pstate", "std.err", "upper", "lower")
254	1x	state_col <- which(colnames(x$pstate) == state)
255	1x	x[ms_cols] <- lapply(x[ms_cols], function(m) m[, state_col])
256	1x	x$surv <- x$pstate
257		}
258
259		# tidy survfit results
260	18x	x_cols <- intersect(names(x), c("time", "n.risk", "surv", "std.err", "upper", "lower", "strata"))
261	18x	tidy_x <- data.frame(x[x_cols]) %>%
262	18x	dplyr::rename(estimate = "surv", std.error = "std.err", conf.high = "upper", conf.low = "lower") %>%
263	18x	dplyr::mutate(
264	18x	conf.level = x$conf.int,
265	18x	conf.type = x$conf.type
266		)
267
268	18x	strat <- "strata" %in% names(tidy_x)
269
270		# get requested estimates
271	18x	df_stat <- tidy_x %>%
272		# find max time
273	18x	dplyr::group_by_at(., dplyr::vars(dplyr::any_of("strata"))) %>%
274	18x	dplyr::mutate(time_max = max(.data$time)) %>%
275	18x	dplyr::ungroup() %>%
276		# add requested timepoints
277	18x	dplyr::full_join(
278	18x	tidy_x %>%
279	18x	dplyr::select(any_of("strata")) %>%
280	18x	dplyr::distinct() %>%
281	18x	dplyr::mutate(
282	18x	time = list(.env$times),
283	18x	col_name = list(paste("stat", seq_len(length(.env$times)), sep = "_"))
284		) %>%
285	18x	tidyr::unnest(cols = c("time", "col_name")),
286	18x	by = unlist(intersect(c("strata", "time"), names(tidy_x)))
287		)
288
289	18x	if (strat) {
290	15x	df_stat <- df_stat %>% dplyr::arrange(.data$strata)
291		}
292
293	18x	df_stat <- df_stat %>%
294	18x	dplyr::arrange(.data$time) %>%
295		# if user-specified time is after max time, make estimate NA
296	18x	dplyr::mutate_at(
297	18x	dplyr::vars("estimate", "conf.high", "conf.low"),
298	18x	~ ifelse(.data$time > .data$time_max, NA_real_, .)
299		) %>%
300	18x	dplyr::mutate(context = type) %>%
301	18x	dplyr::select(!dplyr::any_of(c("time_max", "col_name")))
302
303		# convert estimates to requested type
304	18x	if (type != "survival") {
305	1x	df_stat <- df_stat %>%
306	1x	dplyr::mutate(dplyr::across(
307	1x	any_of(c("estimate", "conf.low", "conf.high")),
308	1x	if (type == "cumhaz") ~ -log(.x) else ~ 1 - .x
309		)) %>%
310	1x	dplyr::rename(conf.low = "conf.high", conf.high = "conf.low")
311		}
312
313	18x	df_stat <- extract_strata(x, df_stat)
314
315	18x	df_stat
316		}
317
318		#' Process Survival Fit For Quantile Estimates
319		#'
320		#' @inheritParams cards::tidy_as_ard
321		#' @inheritParams ard_survival_survfit
322		#'
323		#' @return a `tibble`
324		#'
325		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survival"))
326		#' survival::survfit(survival::Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) \|>
327		#' cardx:::.process_survfit_probs(probs = c(0.25, 0.75))
328		#'
329		#' @keywords internal
330		.process_survfit_probs <- function(x, probs) {
331		# calculate survival quantiles and add estimates to df
332	3x	df_stat <- map2(
333	3x	probs,
334	3x	seq_along(probs),
335	3x	~ stats::quantile(x, probs = .x) %>%
336	3x	as.data.frame() %>%
337	3x	set_names(c("estimate", "conf.low", "conf.high")) %>%
338	3x	dplyr::mutate(strata = row.names(.)) %>%
339	3x	dplyr::select(dplyr::any_of(c("n.risk", "strata", "estimate", "std.error", "conf.low", "conf.high"))) %>%
340	3x	dplyr::mutate(
341	3x	conf.level = x$conf.int,
342	3x	conf.type = x$conf.type,
343	3x	prob = .x
344		)
345		) %>%
346	3x	dplyr::bind_rows() %>%
347	3x	`rownames<-`(NULL) %>%
348	3x	dplyr::mutate(context = "survival_survfit") %>%
349	3x	dplyr::as_tibble()
350
351	1x	if (length(x$n) == 1) df_stat <- df_stat %>% dplyr::select(-"strata")
352
353	3x	df_stat <- extract_strata(x, df_stat)
354
355	3x	df_stat
356		}
357
358		# process stratifying variables
359		extract_strata <- function(x, df_stat) {
360	21x	x_terms <- attr(stats::terms(stats::as.formula(x$call$formula)), "term.labels")
361	21x	x_terms <- gsub(".*\\(", "", gsub("\\)", "", x_terms))
362	21x	if (length(x_terms) > 0L) {
363	17x	strata_lvls <- data.frame()
364
365	17x	for (i in df_stat[["strata"]]) {
366	111x	i <- gsub(".*\\(", "", gsub("\\)", "", i))
367	111x	terms_str <- strsplit(i, paste(c(paste0(x_terms, "="), paste0(", ", x_terms, "=")), collapse = "\|"))[[1]]
368	111x	s_lvl <- terms_str[nchar(terms_str) > 0]
369	111x	strata_lvls <- rbind(strata_lvls, s_lvl)
370		}
371	17x	if (nrow(strata_lvls) > 0) {
372	17x	strata_lvls <- cbind(strata_lvls, t(x_terms))
373	17x	names(strata_lvls) <- c(
374	17x	t(sapply(seq_along(x_terms), function(i) c(paste0("group", i, "_level"), paste0("group", i))))
375		)
376	17x	df_stat <- cbind(df_stat, strata_lvls) %>%
377	17x	dplyr::select(-"strata")
378		}
379		}
380	21x	df_stat
381		}
382
383		#' Convert Tidied Survival Fit to ARD
384		#'
385		#' @inheritParams cards::tidy_as_ard
386		#'
387		#' @return an ARD data frame of class 'card'
388		#'
389		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom")))
390		#' cardx:::.format_survfit_results(
391		#' broom::tidy(survival::survfit(survival::Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE))
392		#' )
393		#'
394		#' @keywords internal
395		.format_survfit_results <- function(tidy_survfit) {
396	21x	est <- if ("time" %in% names(tidy_survfit)) "time" else "prob"
397	21x	conf.level <- tidy_survfit[["conf.level"]][1]
398	21x	conf.type <- tidy_survfit[["conf.type"]][1]
399
400	21x	ret <- tidy_survfit %>%
401	21x	dplyr::select(-dplyr::any_of(c("conf.level", "conf.type"))) %>%
402	21x	dplyr::mutate(dplyr::across(
403	21x	dplyr::any_of(
404	21x	c("n.risk", "estimate", "std.error", "conf.high", "conf.low", "time", "prob")
405		),
406	21x	~ as.list(.)
407		)) %>%
408	21x	tidyr::pivot_longer(
409	21x	cols = dplyr::any_of(c("n.risk", "estimate", "std.error", "conf.high", "conf.low")),
410	21x	names_to = "stat_name",
411	21x	values_to = "stat"
412		) %>%
413	21x	dplyr::mutate(
414	21x	variable = est,
415	21x	variable_level = .data[[est]]
416		) %>%
417	21x	dplyr::select(-all_of(est))
418
419		# statistics applicable to all calculations
420	21x	if (!is.null(conf.level) && !is.null(conf.type)) {
421	21x	ret <- ret %>%
422	21x	dplyr::bind_rows(
423	21x	dplyr::tibble(
424	21x	context = "survival",
425	21x	stat_name = c("conf.level", "conf.type"),
426	21x	stat = list(conf.level, conf.type),
427	21x	variable = "..ard_survival_survfit.."
428		)
429		)
430		}
431
432	21x	ret %>%
433	21x	dplyr::left_join(
434	21x	.df_survfit_stat_labels(),
435	21x	by = "stat_name"
436		) %>%
437	21x	dplyr::mutate(
438	21x	fmt_fun = lapply(
439	21x	.data$stat,
440	21x	function(x) {
441	583x	switch(is.integer(x),
442	583x	0L
443	583x	) %\|\|% switch(is.numeric(x),
444	583x	1L
445		)
446		}
447		),
448	21x	stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)
449		) %>%
450	21x	dplyr::mutate(dplyr::across(matches("group[0-9]*_level"), ~ as.list(as.factor(.x)))) %>%
451	21x	dplyr::mutate(
452	21x	warning = list(NULL),
453	21x	error = list(NULL)
454		) %>%
455	21x	cards::as_card() %>%
456	21x	cards::tidy_ard_column_order() %>%
457	21x	cards::tidy_ard_row_order()
458		}
459
460		.df_survfit_stat_labels <- function() {
461	21x	dplyr::tribble(
462	21x	~stat_name, ~stat_label,
463	21x	"n.risk", "Number of Subjects at Risk",
464	21x	"estimate", "Survival Probability",
465	21x	"std.error", "Standard Error (untransformed)",
466	21x	"conf.low", "CI Lower Bound",
467	21x	"conf.high", "CI Upper Bound",
468	21x	"prob", "Quantile",
469	21x	"time", "Time",
470	21x	"conf.level", "CI Confidence Level",
471	21x	"conf.type", "CI Type"
472		)
473		}

1		#' ARD Proportion Confidence Intervals
2		#'
3		#' Calculate confidence intervals for proportions.
4		#'
5		#' @inheritParams cards::ard_tabulate
6		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
7		#' columns to include in summaries. Columns must be class `<logical>`
8		#' or `<numeric>` values coded as `c(0,1)`.
9		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' columns to stratify calculations by.
11		#' @param denominator (`string`)\cr
12		#' Must be one of `'column'` (default), `'row'`, and `'cell'`, which specifies
13		#' the direction of the calculation/denominator. Argument is similar to
14		#' `cards::ard_tabulate(denominator)`.
15		#' @param conf.level (scalar `numeric`)\cr
16		#' a scalar in `(0,1)` indicating the confidence level.
17		#' Default is `0.95`
18		#' @param method (`string`)\cr
19		#' string indicating the type of confidence interval to calculate.
20		#' Must be one of `r formals(ard_categorical_ci)[["method"]] \|> eval() \|> shQuote("sh")`.
21		#' See `?proportion_ci` for details.
22		#' @param strata,weights,max.iterations arguments passed to `proportion_ci_strat_wilson()`,
23		#' when `method='strat_wilson'`
24		#' @param value ([`formula-list-selector`][cards::syntax])\cr
25		#' function will calculate the CIs for all levels of the variables specified.
26		#' Use this argument to instead request only a single level by summarized.
27		#' Default is `list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE)`, where
28		#' columns coded as `0`/`1` and `TRUE`/`FALSE` will summarize the `1` and `TRUE` levels.
29		#'
30		#' @return an ARD data frame
31		#' @name ard_categorical_ci
32		#'
33		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
34		#' # compute CI for binary variables
35		#' ard_categorical_ci(mtcars, variables = c(vs, am), method = "wilson")
36		#'
37		#' # compute CIs for each level of a categorical variable
38		#' ard_categorical_ci(mtcars, variables = cyl, method = "jeffreys")
39		NULL
40
41		#' @rdname ard_categorical_ci
42		#' @export
43		ard_categorical_ci <- function(data, ...) {
44	58x	check_not_missing(data)
45	58x	UseMethod("ard_categorical_ci")
46		}
47
48		#' @rdname ard_categorical_ci
49		#' @export
50		ard_categorical_ci.data.frame <- function(data,
51		variables,
52		by = dplyr::group_vars(data),
53		method = c(
54		"waldcc", "wald", "clopper-pearson",
55		"wilson", "wilsoncc",
56		"strat_wilson", "strat_wilsoncc",
57		"agresti-coull", "jeffreys"
58		),
59		denominator = c("column", "row", "cell"),
60		conf.level = 0.95,
61		value = list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE),
62		strata = NULL,
63		weights = NULL,
64		max.iterations = 10,
65		...) {
66	41x	set_cli_abort_call()
67	41x	check_dots_empty()
68
69		# check installed packages ---------------------------------------------------
70	41x	check_pkg_installed(pkg = "broom")
71
72		# process inputs -------------------------------------------------------------
73	41x	check_not_missing(variables)
74	41x	cards::process_selectors(data, variables = {{ variables }}, by = {{ by }})
75	41x	denominator <- arg_match(denominator, call = get_cli_abort_call())
76	41x	method <- arg_match(method)
77	41x	if (method %in% c("strat_wilson", "strat_wilsoncc")) {
78	9x	cards::process_selectors(data, strata = {{ strata }})
79	9x	check_scalar(strata)
80		}
81
82		# if the method is strat_wilson, `weights` and `strata` cannot contain NA values
83	41x	if (method %in% c("strat_wilson")) {
84	7x	if (any(is.na({{ weights }}))) {
85	1x	cli::cli_warn("{.field weights} cannot contain {.val NA} values.")
86		}
87		}
88	41x	cards::process_formula_selectors(
89	41x	data[variables],
90	41x	value = value
91		)
92
93		# if there is no by variable, then treat cell as column because it's the same.
94	41x	if (denominator == "cell" && is_empty(by)) {
95	1x	denominator <- "column"
96		}
97
98		# return empty ARD if no variables selected ----------------------------------
99	41x	if (is_empty(variables)) {
100	!	return(dplyr::tibble() \|> cards::as_card())
101		}
102
103		# calculate confidence intervals ---------------------------------------------
104	41x	map(
105	41x	variables,
106	41x	function(variable) {
107	49x	switch(denominator,
108	41x	"column" =
109	36x	.calculate_ard_proportion_column(
110	36x	data = data,
111	36x	variable = variable,
112	36x	by = by,
113	36x	value = value,
114	36x	method = method,
115	36x	conf.level = conf.level,
116	36x	strata = strata,
117	36x	weights = weights,
118	36x	max.iterations = max.iterations
119		),
120	41x	"row" =
121	7x	.calculate_ard_proportion_row(
122	7x	data = data,
123	7x	variable = variable,
124	7x	by = by,
125	7x	value = value,
126	7x	method = method,
127	7x	conf.level = conf.level,
128	7x	strata = strata,
129	7x	weights = weights,
130	7x	max.iterations = max.iterations
131		),
132	41x	"cell" =
133	6x	.calculate_ard_proportion_cell(
134	6x	data = data,
135	6x	variable = variable,
136	6x	by = by,
137	6x	value = value,
138	6x	method = method,
139	6x	conf.level = conf.level,
140	6x	strata = strata,
141	6x	weights = weights,
142	6x	max.iterations = max.iterations
143		)
144		)
145		}
146		) \|>
147	41x	dplyr::bind_rows() \|>
148	41x	dplyr::mutate(
149	41x	context = "proportion_ci"
150		) \|>
151	41x	cards::as_card() \|>
152	41x	cards::tidy_ard_column_order() \|>
153	41x	cards::tidy_ard_row_order()
154		}
155
156		.calculate_ard_proportion_column <- function(data, variable, varname, by, value, method, conf.level, strata, weights, max.iterations) {
157	36x	levels <- .unique_values_sort(data, variable = variable, value = value[[variable]])
158	36x	data <- .as_dummy_column(data, variable = variable, levels = levels, by = by, strata = strata)
159
160	36x	cards::ard_mvsummary(
161	36x	data = data,
162	36x	variables = c(everything(), -all_of(c(by, strata))),
163	36x	by = all_of(by),
164	36x	statistic =
165	36x	~ list(
166	36x	prop_ci =
167	36x	.calculate_prop_ci_fun(
168	36x	data = data, method = method, conf.level = conf.level,
169	36x	strata = strata, weights = weights, max.iterations = max.iterations
170		)
171		)
172		) %>%
173		# merge in the variable levels
174	36x	dplyr::left_join(
175	36x	dplyr::select(., "variable") \|>
176	36x	dplyr::distinct() \|>
177	36x	dplyr::mutate(variable_level = as.list(.env$levels)),
178	36x	by = "variable"
179		) \|>
180		# rename variable column
181	36x	dplyr::mutate(variable = .env$variable)
182		}
183
184		.calculate_prop_ci_fun <- function(data, method, conf.level, strata, weights, max.iterations) {
185	52x	switch(method,
186	52x	"waldcc" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = TRUE),
187	52x	"wald" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = FALSE),
188	52x	"wilsoncc" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = TRUE),
189	52x	"wilson" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = FALSE),
190	52x	"clopper-pearson" = \(x, ...) proportion_ci_clopper_pearson(x, conf.level = conf.level),
191	52x	"agresti-coull" = \(x, ...) proportion_ci_agresti_coull(x, conf.level = conf.level),
192	52x	"jeffreys" = \(x, ...) proportion_ci_jeffreys(x, conf.level = conf.level),
193	52x	"strat_wilsoncc" = \(x, data, ...) {
194	2x	proportion_ci_strat_wilson(x,
195	2x	strata = data[[strata]], weights = weights,
196	2x	max.iterations = max.iterations,
197	2x	conf.level = conf.level, correct = TRUE
198		)
199		},
200	52x	"strat_wilson" = \(x, data, ...) {
201	9x	proportion_ci_strat_wilson(x,
202	9x	strata = data[[strata]], weights = weights,
203	9x	max.iterations = max.iterations,
204	9x	conf.level = conf.level, correct = FALSE
205		)
206		}
207		) \|>
208	52x	cards::as_cards_fn(
209	52x	stat_names =
210	52x	case_switch(
211	52x	method %in% c("strat_wilsoncc", "strat_wilsoncc") ~
212	52x	c("N", "n", "estimate", "conf.low", "conf.high", "conf.level", "weights", "method"),
213	52x	.default = c("N", "n", "estimate", "conf.low", "conf.high", "conf.level", "method")
214		)
215		)
216		}
217
218		.unique_values_sort <- function(data, variable, value = NULL) {
219	289x	unique_levels <-
220		# styler: off
221	289x	if (is.logical(data[[variable]])) c(TRUE, FALSE)
222	289x	else if (is.factor(data[[variable]])) factor(levels(data[[variable]]), levels = levels(data[[variable]]))
223	289x	else unique(data[[variable]]) \|> sort()
224		# styler: on
225
226	289x	if (!is_empty(value) && !value %in% unique_levels) {
227	1x	cli::cli_warn(
228	1x	c("A value of {.code value={.val {value}}} for variable {.val {variable}}
229	1x	was passed, but is not one of the observed levels: {.val {unique_levels}}.",
230	1x	i = "This may be an error.",
231	1x	i = "If value is a valid, convert variable to factor with all levels specified to avoid this message."
232		)
233		)
234		}
235	289x	if (!is_empty(value)) {
236	34x	unique_levels <- value
237		}
238
239	289x	unique_levels
240		}
241
242		.as_dummy_column <- function(data, variable, levels, by, strata) {
243		# define dummy variables and return tibble
244	36x	map(levels, ~ data[[variable]] == .x) \|>
245	36x	set_names(paste0("this_is_not_a_column_name_anyone_would_choose_", variable, "_", levels, "...")) %>%
246	36x	{dplyr::tibble(!!!.)} \|> # styler: off
247	36x	dplyr::bind_cols(data[c(by, strata)])
248		}
249
250		.levels_for_row <- function(data, by) {
251	6x	suppressMessages(cards::nest_for_ard(data = data, by = by, include_data = FALSE)) \|>
252	6x	dplyr::select(cards::all_ard_groups(types = "levels")) \|>
253	6x	map(unlist) \|>
254	6x	reduce(.f = \(.x, .y) paste0(.x, .y)) \|>
255	6x	as.character()
256		}
257
258		.as_dummy_row <- function(data, levels, by, strata) {
259	9x	data_col <- data[, by, drop = FALSE] \|>
260	9x	reduce(.f = \(.x, .y) paste0(.x, .y)) \|>
261	9x	as.character()
262
263		# define dummy variables and return tibble
264	9x	df_res <- map(levels, ~ data_col == .x) \|>
265	9x	set_names(paste0("this_is_not_a_column_name_anyone_would_choose_", levels, "...")) \|>
266	9x	dplyr::bind_cols()
267
268	9x	if (!is_empty(strata)) {
269	1x	df_res <- df_res \|> dplyr::bind_cols(data[, strata, drop = FALSE])
270		}
271
272	9x	df_res
273		}
274
275
276		.calculate_ard_proportion_row <- function(data, variable, by, value, method,
277		conf.level, strata, weights, max.iterations) {
278		# if there are no by variables, then all row percents are 100%
279	7x	if (is_empty(by)) {
280	1x	df_res <-
281	1x	suppressMessages(
282	1x	cards::nest_for_ard(
283	1x	data = data[c(variable, by, strata)],
284	1x	by = variable
285		)
286		) \|>
287	1x	dplyr::rename(variable = "group1", variable_level = "group1_level") %>%
288		{
289	1x	case_switch(
290	1x	!is_empty(value[[variable]]) ~ dplyr::filter(., .data$variable_level %in% !!value[[variable]]),
291	1x	.default = .
292		)
293		} \|>
294	1x	dplyr::mutate(
295	1x	data = map(data, ~ dplyr::mutate(.x, ....ard_all_true.... = TRUE)),
296	1x	prop_ci_fun =
297	1x	map(
298	1x	.data$data,
299	1x	~ .calculate_prop_ci_fun(
300	1x	data = .x,
301	1x	method = .env$method,
302	1x	conf.level = .env$conf.level,
303	1x	strata = .env$strata,
304	1x	weights = .env$weights,
305	1x	max.iterations = .env$max.iterations
306		)
307		),
308	1x	result =
309	1x	map2(
310	1x	.data$data, .data$prop_ci_fun,
311	1x	~ cards::ard_mvsummary(
312	1x	data = .x,
313	1x	variables = "....ard_all_true....",
314	1x	statistic = list("....ard_all_true...." = list(prop_ci = .y))
315		) \|>
316	1x	tidyr::nest(res = -cards::all_ard_variables()) \|>
317	1x	dplyr::select(-cards::all_ard_variables())
318		)
319		) \|>
320	1x	dplyr::select(-c("data", "prop_ci_fun")) \|>
321	1x	tidyr::unnest(cols = "result") \|>
322	1x	tidyr::unnest(cols = "res")
323
324	1x	return(df_res)
325		}
326
327
328	6x	df_grouping_cols <- suppressMessages(cards::nest_for_ard(data, by = by, include_data = FALSE))
329	6x	levels <- .levels_for_row(data = data, by = by)
330
331	6x	suppressMessages(
332	6x	cards::nest_for_ard(
333	6x	data = data[c(variable, by, strata)],
334	6x	by = variable
335		)
336		) \|>
337	6x	dplyr::rename(variable = "group1", variable_level = "group1_level") %>%
338		{
339	6x	case_switch(
340	6x	!is_empty(value[[variable]]) ~ dplyr::filter(., .data$variable_level %in% !!value[[variable]]),
341	6x	.default = .
342		)
343		} \|>
344	6x	dplyr::mutate(
345	6x	df_grouping_cols = list(.env$df_grouping_cols),
346	6x	prop_ci_fun =
347	6x	map(
348	6x	.data$data,
349	6x	~ .calculate_prop_ci_fun(
350	6x	data = .x,
351	6x	method = .env$method,
352	6x	conf.level = .env$conf.level,
353	6x	strata = .env$strata,
354	6x	weights = .env$weights,
355	6x	max.iterations = .env$max.iterations
356		)
357		),
358	6x	result =
359	6x	map2(
360	6x	.data$data, .data$prop_ci_fun,
361	6x	~ .as_dummy_row(data = .x, levels = levels, by = by, strata = strata) \|>
362	6x	cards::ard_mvsummary(
363	6x	variables = c(everything(), -any_of(strata)),
364	6x	statistic = everything() ~ list(prop_ci = .y)
365		) \|>
366	6x	tidyr::nest(res = -cards::all_ard_variables()) \|>
367	6x	dplyr::select(-cards::all_ard_variables())
368		)
369		) \|>
370	6x	dplyr::select(-c("data", "prop_ci_fun")) \|>
371	6x	tidyr::unnest(cols = c("df_grouping_cols", "result")) \|>
372	6x	tidyr::unnest(cols = "res")
373		}
374
375		.calculate_ard_proportion_cell <- function(data, variable, by, value, method,
376		conf.level, strata, weights, max.iterations) {
377		# create the base of what the grouping and variable ARD will look like
378	6x	df_groups_variable <-
379	6x	suppressMessages(
380	6x	cards::nest_for_ard(data, by = c(by, variable), include_data = FALSE)
381		) \|>
382	6x	dplyr::rename(
383	6x	variable = glue::glue("group{length(c(variable, by))}"),
384	6x	variable_level = glue::glue("group{length(c(variable, by))}_level")
385		) %>%
386		{
387	6x	case_switch(
388	6x	!is_empty(value[[variable]]) ~ dplyr::filter(., .data$variable_level %in% !!value[[variable]]),
389	6x	.default = .
390		)
391		}
392
393		# create a vector of all the unique values of by and variable pasted together
394	6x	levels <-
395	6x	df_groups_variable \|>
396	6x	dplyr::select(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")) \|>
397	6x	map(unlist) \|>
398	6x	reduce(.f = \(.x, .y) paste0(.x, .y)) \|>
399	6x	as.character()
400
401		# create a data frame of dummy columns indicating if each of the levels is obs in that row
402		# data frame also contains the strata column is supplied
403	6x	dummy_data <-
404	6x	df_groups_variable \|>
405	6x	dplyr::select(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")) \|>
406	6x	dplyr::mutate(across(everything(), unlist)) \|>
407	6x	stats::setNames(
408	6x	df_groups_variable \|>
409	6x	dplyr::select(cards::all_ard_groups("names"), cards::all_ard_variables("names")) \|>
410	6x	dplyr::slice(1L) \|>
411	6x	unlist() \|>
412	6x	unname()
413		) \|>
414	6x	dplyr::bind_cols(data.frame(...a_name_anyone_would_ever_pick... = levels)) \|>
415	6x	dplyr::left_join(
416	6x	x = data[c(variable, by, strata)],
417	6x	y = _,
418	6x	by = c(variable, by)
419		) \|>
420	6x	dplyr::mutate(
421	6x	...a_name_anyone_would_ever_pick... =
422	6x	dplyr::coalesce(.data$...a_name_anyone_would_ever_pick..., paste(levels, collapse = ""))
423		)
424
425		# make dummy vector missing if any of the other variables are missing
426	6x	dummy_data[["...a_name_anyone_would_ever_pick..."]][apply(is.na(dummy_data[c(variable, by, strata)]), MARGIN = 1, FUN = any)] <- NA
427
428		# finish processing dummy data
429	6x	dummy_data <- dummy_data \|>
430	6x	dplyr::select(-any_of(c(variable, by))) %>%
431		# styler: off
432	6x	{dplyr::bind_cols(
433		.,
434	6x	map(
435	6x	levels,
436	6x	\(level) {
437	25x	(.[["...a_name_anyone_would_ever_pick..."]] == level)
438		}
439		) \|>
440	6x	stats::setNames(paste0("level_", levels)) \|>
441	6x	dplyr::as_tibble()
442		)} \|>
443		# styler: on
444	6x	dplyr::select(-"...a_name_anyone_would_ever_pick...")
445
446	6x	prop_ci_fun <-
447	6x	.calculate_prop_ci_fun(
448	6x	data = dummy_data,
449	6x	method = method,
450	6x	conf.level = conf.level,
451	6x	strata = strata,
452	6x	weights = weights,
453	6x	max.iterations = max.iterations
454		)
455
456	6x	df_res <-
457	6x	cards::ard_mvsummary(
458	6x	data = dummy_data,
459	6x	variables = -any_of(strata),
460	6x	statistic = everything() ~ list(prop_ci = prop_ci_fun)
461		) \|>
462	6x	tidyr::nest(res = -"variable") \|>
463	6x	dplyr::select(-"variable")
464
465	6x	dplyr::bind_cols(df_groups_variable, df_res) \|>
466	6x	tidyr::unnest("res")
467		}

1		#' ARD One-way Test
2		#'
3		#' @description
4		#' Analysis results data for Testing Equal Means in a One-Way Layout.
5		#' calculated with `oneway.test()`
6		#'
7		#' @inheritParams stats::oneway.test
8		#' @param ... additional arguments passed to `oneway.test(...)`
9		#'
10		#' @return ARD data frame
11		#' @export
12		#'
13		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
14		#' ard_stats_oneway_test(AGE ~ ARM, data = cards::ADSL)
15		ard_stats_oneway_test <- function(formula, data, ...) {
16	3x	set_cli_abort_call()
17
18		# check installed packages ---------------------------------------------------
19	3x	check_pkg_installed(c("broom"))
20
21		# check/process inputs -------------------------------------------------------
22	3x	check_not_missing(formula)
23	3x	check_not_missing(data)
24	3x	check_data_frame(data)
25	3x	check_class(formula, cls = "formula")
26
27		# build ARD ------------------------------------------------------------------
28	3x	df_results <-
29	3x	cards::tidy_as_ard(
30	3x	lst_tidy =
31	3x	cards::eval_capture_conditions(
32	3x	stats::oneway.test(formula, data = data, ...) \|>
33	3x	broom::tidy()
34		),
35	3x	tidy_result_names = c("num.df", "den.df", "statistic", "p.value", "method"),
36	3x	fun_args_to_record =
37	3x	c("var.equal"),
38	3x	formals = formals(stats::oneway.test),
39	3x	passed_args = dots_list(...),
40	3x	lst_ard_columns = list(context = "stats_oneway_test")
41		) \|>
42	3x	dplyr::mutate(
43	3x	.after = "stat_name",
44	3x	stat_label =
45	3x	dplyr::case_when(
46	3x	.data$stat_name %in% "num.df" ~ "Degrees of Freedom",
47	3x	.data$stat_name %in% "den.df" ~ "Denominator Degrees of Freedom",
48	3x	.data$stat_name %in% "statistic" ~ "F Statistic",
49	3x	.data$stat_name %in% "p.value" ~ "p-value",
50	3x	.data$stat_name %in% "method" ~ "Method",
51	3x	TRUE ~ .data$stat_name,
52		)
53		)
54
55		# add variable/groups to results and return result
56	3x	df_results \|>
57	3x	dplyr::bind_cols(
58	3x	dplyr::tibble(!!!map(as.list(attr(stats::terms(formula), "variables"))[-1], as_label)) %>%
59	3x	set_names(., c("variable", paste0("group", seq_len(length(.) - 1L))))
60		) \|>
61	3x	cards::as_card() \|>
62	3x	cards::tidy_ard_column_order()
63		}

1		#' ARD Standardized Mean Difference
2		#'
3		#' @description
4		#' Standardized mean difference calculated via [`smd::smd()`] with `na.rm = TRUE`.
5		#' Additionally, this function add a confidence interval to the SMD when
6		#' `std.error=TRUE`, which the original `smd::smd()` does not include.
7		#'
8		#' @param data (`data.frame`/`survey.design`)\cr
9		#' a data frame or object of class 'survey.design'
10		#' (typically created with [`survey::svydesign()`]).
11		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
12		#' column name to compare by.
13		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
14		#' column names to be compared. Independent tests will be computed for
15		#' each variable.
16		#' @param conf.level (scalar `numeric`)\cr
17		#' confidence level for confidence interval. Default is `0.95`.
18		#' @param std.error (scalar `logical`)\cr
19		#' Logical indicator for computing standard errors using `smd::compute_smd_var()`.
20		#' Default is `TRUE`.
21		#' @param ... arguments passed to `smd::smd()`
22		#'
23		#' @return ARD data frame
24		#' @export
25		#'
26		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "smd"))
27		#' ard_smd_smd(cards::ADSL, by = SEX, variables = AGE)
28		#' ard_smd_smd(cards::ADSL, by = SEX, variables = AGEGR1)
29		ard_smd_smd <- function(data, by, variables, std.error = TRUE, conf.level = 0.95, ...) {
30	6x	set_cli_abort_call()
31
32		# check installed packages ---------------------------------------------------
33	6x	check_pkg_installed("smd")
34
35		# check/process inputs -------------------------------------------------------
36	6x	check_not_missing(data)
37	6x	check_not_missing(variables)
38	6x	check_not_missing(by)
39
40		# grab design object if from `survey` ----------------------------------------
41	6x	is_survey <- inherits(data, "survey.design")
42	6x	if (is_survey) {
43	1x	design <- data
44	1x	data <- design$variables
45		}
46
47		# continue check/process inputs ----------------------------------------------
48	6x	check_data_frame(data)
49	6x	data <- dplyr::ungroup(data)
50	6x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
51	6x	check_scalar(by)
52		# This check can be relaxed, but would require some changes to handle multi-row outputs
53	6x	check_n_levels(data[[by]], 2L, message = "The {.arg by} column must have {.val {length}} levels.")
54
55		# return empty ARD if no variables selected ----------------------------------
56	6x	if (is_empty(variables)) {
57	!	return(dplyr::tibble() \|> cards::as_card())
58		}
59
60		# build ARD ------------------------------------------------------------------
61	6x	lapply(
62	6x	variables,
63	6x	function(variable) {
64	7x	.format_smd_results(
65	7x	by = by,
66	7x	variable = variable,
67	7x	lst_tidy =
68	7x	cards::eval_capture_conditions(
69	7x	switch(as.character(is_survey),
70	7x	"TRUE" = smd::smd(x = data[[variable]], g = data[[by]], w = stats::weights(design), na.rm = TRUE, std.error = std.error, ...),
71	7x	"FALSE" = smd::smd(x = data[[variable]], g = data[[by]], na.rm = TRUE, std.error = std.error, ...)
72		) \|>
73	7x	dplyr::select(-any_of("term")) %>%
74		# styler: off
75	6x	{if (isTRUE(std.error))
76	6x	dplyr::mutate(
77		.,
78	6x	conf.low = .data$estimate + stats::qnorm((1 - .env$conf.level) / 2) * .data$std.error,
79	6x	conf.high = .data$estimate - stats::qnorm((1 - .env$conf.level) / 2) * .data$std.error,
80	6x	method = "Standardized Mean Difference"
81		)
82		else
83	!	dplyr::mutate(
84		.,
85	!	method = "Standardized Mean Difference"
86		)}
87		# styler: on
88		),
89		...
90		)
91		}
92		) \|>
93	6x	dplyr::bind_rows()
94		}
95
96
97		.format_smd_results <- function(by, variable, lst_tidy, ...) {
98		# build ARD ------------------------------------------------------------------
99	7x	ret <-
100	7x	cards::tidy_as_ard(
101	7x	lst_tidy = lst_tidy,
102	7x	tidy_result_names = c("estimate", "std.error"),
103	7x	fun_args_to_record = c("gref"),
104	7x	formals = formals(smd::smd)[c("gref")],
105		# removing the `std.error` ARGUMENT (not the result)
106	7x	passed_args = dots_list(...) \|> utils::modifyList(list(std.error = NULL)),
107	7x	lst_ard_columns = list(group1 = by, variable = variable, context = "smd_smd")
108		)
109
110		# add the stat label ---------------------------------------------------------
111	7x	ret \|>
112	7x	dplyr::left_join(
113	7x	dplyr::tribble(
114	7x	~stat_name, ~stat_label,
115	7x	"estimate", "Standardized Mean Difference",
116	7x	"std.error", "Standard Error",
117	7x	"gref", "Integer Reference Group Level"
118		),
119	7x	by = "stat_name"
120		) \|>
121	7x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
122	7x	cards::as_card() \|>
123	7x	cards::tidy_ard_column_order()
124		}

1		#' ARD Poisson Test
2		#'
3		#' @description
4		#' Analysis results data for exact tests of a simple null hypothesis about the rate parameter
5		#' in Poisson distribution, or the comparison of two rate parameters.
6		#'
7		#' @param data (`data.frame`)\cr
8		#' a data frame. See below for details.
9		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' names of the event and time variables (in that order) to be used in computations. Must be of length 2.
11		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
12		#' optional column name to compare by.
13		#' @param conf.level (scalar `numeric`)\cr
14		#' confidence level for confidence interval. Default is `0.95`.
15		#' @param na.rm (scalar `logical`)\cr
16		#' whether missing values should be removed before computations. Default is `TRUE`.
17		#' @param ... arguments passed to [poisson.test()].
18		#' @return an ARD data frame of class 'card'
19		#' @name ard_stats_poisson_test
20		#'
21		#' @details
22		#' * For the `ard_stats_poisson_test()` function, the data is expected to be one row per subject.
23		#' * If `by` is not specified, an exact Poisson test of the rate parameter will be performed. Otherwise, a
24		#' Poisson comparison of two rate parameters will be performed on the levels of `by`. If `by` has more than 2
25		#' levels, an error will occur.
26		#'
27		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
28		#' # Exact test of rate parameter against null hypothesis
29		#' cards::ADTTE \|>
30		#' ard_stats_poisson_test(variables = c(CNSR, AVAL))
31		#'
32		#' # Comparison test of ratio of 2 rate parameters against null hypothesis
33		#' cards::ADTTE \|>
34		#' dplyr::filter(TRTA %in% c("Placebo", "Xanomeline High Dose")) \|>
35		#' ard_stats_poisson_test(by = TRTA, variables = c(CNSR, AVAL))
36		NULL
37
38		#' @rdname ard_stats_poisson_test
39		#' @export
40		ard_stats_poisson_test <- function(data, variables, na.rm = TRUE, by = NULL, conf.level = 0.95, ...) {
41	5x	set_cli_abort_call()
42
43		# check installed packages ---------------------------------------------------
44	5x	check_pkg_installed("broom")
45
46		# check/process inputs -------------------------------------------------------
47	5x	check_not_missing(data)
48	5x	check_not_missing(variables)
49	5x	check_data_frame(data)
50	5x	data <- dplyr::ungroup(data)
51	5x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
52	5x	check_length(variables, 2)
53	5x	check_logical(na.rm)
54	5x	check_scalar(by, allow_empty = TRUE)
55	5x	check_range(conf.level, range = c(0, 1))
56
57		# return empty ARD if no variables selected ----------------------
58	5x	if (is_empty(variables)) {
59	!	return(dplyr::tibble() \|> cards::as_card())
60		}
61
62		# check number of levels in `by`
63	5x	if (!is_empty(by) && dplyr::n_distinct(data[[by]], na.rm = TRUE) != 2L) {
64	1x	cli::cli_abort(
65	1x	"The {.arg by} argument must have a maximum of two levels.",
66	1x	call = get_cli_abort_call()
67		)
68		}
69
70		# calculate numerator and denominator values
71	4x	if (!is_empty(by)) {
72	1x	num <- data \|>
73	1x	dplyr::group_by(.data[[by]]) \|>
74	1x	dplyr::summarise(sum = sum(.data[[variables[1]]], na.rm = na.rm)) \|>
75	1x	dplyr::pull(sum)
76	1x	denom <- data \|>
77	1x	dplyr::group_by(.data[[by]]) \|>
78	1x	dplyr::summarise(sum = sum(.data[[variables[2]]], na.rm = na.rm)) \|>
79	1x	dplyr::pull(sum)
80		} else {
81	3x	num <- sum(data[[variables[1]]], na.rm = na.rm)
82	3x	denom <- sum(data[[variables[2]]], na.rm = na.rm)
83		}
84
85		# build ARD ------------------------------------------------------------------
86	4x	.format_poissontest_results(
87	4x	by = by,
88	4x	variables = variables,
89	4x	lst_tidy =
90	4x	cards::eval_capture_conditions(
91	4x	stats::poisson.test(x = num, T = denom, conf.level = conf.level, ...) \|> broom::tidy()
92		),
93		...
94		)
95		}
96
97		#' Convert Poisson test to ARD
98		#'
99		#' @inheritParams cards::tidy_as_ard
100		#' @inheritParams stats::poisson.test
101		#' @param by (`string`)\cr by column name
102		#' @param variables (`character`)\cr names of the event and time variables
103		#' @param ... passed to [poisson.test()]
104		#'
105		#' @return ARD data frame
106		#' @keywords internal
107		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
108		#' cardx:::.format_poissontest_results(
109		#' by = "ARM",
110		#' variables = c("CNSR", "AVAL"),
111		#' lst_tidy =
112		#' cards::eval_capture_conditions(
113		#' stats::poisson.test(sum(cards::ADTTE[["CNSR"]]), sum(cards::ADTTE[["AVAL"]])) \|>
114		#' broom::tidy()
115		#' )
116		#' )
117		.format_poissontest_results <- function(by = NULL, variables, lst_tidy, ...) {
118		# build ARD ------------------------------------------------------------------
119	4x	ret <-
120	4x	cards::tidy_as_ard(
121	4x	lst_tidy = lst_tidy,
122	4x	tidy_result_names =
123	4x	c(
124	4x	"estimate", "statistic",
125	4x	"p.value", "parameter", "conf.low", "conf.high",
126	4x	"method", "alternative"
127		),
128	4x	fun_args_to_record = c("conf.level", "r"),
129	4x	formals = formals(asNamespace("stats")[["poisson.test"]]),
130	4x	passed_args = dots_list(...),
131	4x	lst_ard_columns = list(context = "stats_poisson_test", variable = variables[2])
132		) \|>
133	4x	dplyr::distinct()
134
135		# rename "r" statistic to "mu"
136	4x	ret$stat_name[ret$stat_name == "r"] <- "mu"
137
138	4x	if (!is_empty(by)) {
139	1x	ret <- ret \|>
140	1x	dplyr::mutate(group1 = by)
141		}
142
143		# add the stat label ---------------------------------------------------------
144	4x	ret \|>
145	4x	dplyr::left_join(
146	4x	.df_poissontest_stat_labels(by = by),
147	4x	by = "stat_name"
148		) \|>
149	4x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
150	4x	cards::as_card() \|>
151	4x	cards::tidy_ard_column_order()
152		}
153
154		.df_poissontest_stat_labels <- function(by = NULL) {
155	4x	dplyr::tribble(
156	4x	~stat_name, ~stat_label,
157	4x	"estimate", ifelse(is_empty(by), "Estimated Rate", "Estimated Rate Ratio"),
158	4x	"statistic", ifelse(is_empty(by), "Number of Events", "Number of Events in First Sample"),
159	4x	"p.value", "p-value",
160	4x	"parameter", "Expected Count",
161	4x	"conf.low", "CI Lower Bound",
162	4x	"conf.high", "CI Upper Bound",
163	4x	"mu", "H0 Mean",
164	4x	"conf.level", "CI Confidence Level"
165		)
166		}

1		#' ARD Wilcoxon Rank-Sum Test
2		#'
3		#' @description
4		#' Analysis results data for paired and non-paired Wilcoxon Rank-Sum tests.
5		#'
6		#' @param data (`data.frame`)\cr
7		#' a data frame. See below for details.
8		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
9		#' optional column name to compare by.
10		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
11		#' column names to be compared. Independent tests will be computed for
12		#' each variable.
13		#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
14		#' column name of the subject or participant ID.
15		#' @param conf.level (scalar `numeric`)\cr
16		#' confidence level for confidence interval. Default is `0.95`.
17		#' @param ... arguments passed to `wilcox.test(...)`
18		#'
19		#' @return ARD data frame
20		#' @name ard_stats_wilcox_test
21		#'
22		#' @details
23		#' For the `ard_stats_wilcox_test()` function, the data is expected to be one row per subject.
24		#' The data is passed as `wilcox.test(data[[variable]] ~ data[[by]], paired = FALSE, ...)`.
25		#'
26		#' For the `ard_stats_paired_wilcox_test()` function, the data is expected to be one row
27		#' per subject per by level. Before the test is calculated, the data are
28		#' reshaped to a wide format to be one row per subject.
29		#' The data are then passed as
30		#' `wilcox.test(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`.
31		#'
32		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
33		#' cards::ADSL \|>
34		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
35		#' ard_stats_wilcox_test(by = "ARM", variables = "AGE")
36		#'
37		#' # constructing a paired data set,
38		#' # where patients receive both treatments
39		#' cards::ADSL[c("ARM", "AGE")] \|>
40		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
41		#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) \|>
42		#' dplyr::arrange(USUBJID, ARM) \|>
43		#' ard_stats_paired_wilcox_test(by = ARM, variables = AGE, id = USUBJID)
44		NULL
45
46		#' @rdname ard_stats_wilcox_test
47		#' @export
48		ard_stats_wilcox_test <- function(data, variables, by = NULL, conf.level = 0.95, ...) {
49	6x	set_cli_abort_call()
50
51		# check installed packages ---------------------------------------------------
52	6x	check_pkg_installed("broom")
53
54		# check/process inputs -------------------------------------------------------
55	6x	check_not_missing(data)
56	6x	check_not_missing(variables)
57	6x	check_data_frame(data)
58	6x	data <- dplyr::ungroup(data)
59	6x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
60	6x	check_scalar(by, allow_empty = TRUE)
61	6x	check_range(conf.level, range = c(0, 1))
62
63		# return empty ARD if no variables selected ----------------------------------
64	6x	if (is_empty(variables)) {
65	!	return(dplyr::tibble() \|> cards::as_card())
66		}
67
68		# build ARD ------------------------------------------------------------------
69	6x	lapply(
70	6x	variables,
71	6x	function(variable) {
72	7x	.format_wilcoxtest_results(
73	7x	by = by,
74	7x	variable = variable,
75	7x	lst_tidy =
76		# styler: off
77	7x	cards::eval_capture_conditions(
78	7x	if (!is_empty(by)) {
79	6x	stats::wilcox.test(data[[variable]] ~ data[[by]], conf.level = conf.level, ...) \|>
80	6x	broom::tidy()
81		}
82		else {
83	1x	stats::wilcox.test(data[[variable]], ...) \|>
84	1x	broom::tidy()
85		}
86		),
87		# styler: on
88	7x	paired = FALSE,
89		...
90		)
91		}
92		) \|>
93	6x	dplyr::bind_rows()
94		}
95
96		#' @rdname ard_stats_wilcox_test
97		#' @export
98		ard_stats_paired_wilcox_test <- function(data, by, variables, id, conf.level = 0.95, ...) {
99	2x	set_cli_abort_call()
100
101		# check installed packages ---------------------------------------------------
102	2x	check_pkg_installed("broom")
103
104		# check/process inputs -------------------------------------------------------
105	2x	check_not_missing(data)
106	2x	check_not_missing(variables)
107	2x	check_not_missing(by)
108	2x	check_not_missing(id)
109	2x	check_data_frame(data)
110	2x	data <- dplyr::ungroup(data)
111	2x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
112	2x	check_scalar(by)
113	2x	check_scalar(id)
114
115		# return empty ARD if no variables selected ----------------------------------
116	2x	if (is_empty(variables)) {
117	!	return(dplyr::tibble() \|> cards::as_card())
118		}
119
120		# build ARD ------------------------------------------------------------------
121	2x	lapply(
122	2x	variables,
123	2x	function(variable) {
124	2x	.format_wilcoxtest_results(
125	2x	by = by,
126	2x	variable = variable,
127	2x	lst_tidy =
128	2x	cards::eval_capture_conditions({
129		# adding this reshape inside the eval, so if there is an error it's captured in the ARD object
130	2x	data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id)
131		# perform paired wilcox test
132	1x	stats::wilcox.test(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, conf.level = conf.level, ...) \|>
133	1x	broom::tidy()
134		}),
135	2x	paired = TRUE,
136		...
137		)
138		}
139		) \|>
140	2x	dplyr::bind_rows()
141		}
142
143
144		#' Convert Wilcoxon test to ARD
145		#'
146		#' @inheritParams cards::tidy_as_ard
147		#' @inheritParams stats::wilcox.test
148		#' @param by (`string`)\cr by column name
149		#' @param variable (`string`)\cr variable column name
150		#' @param ... passed to `stats::wilcox.test(...)`
151		#'
152		#' @return ARD data frame
153		#'
154		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
155		#' # Pre-processing ADSL to have grouping factor (ARM here) with 2 levels
156		#' ADSL <- cards::ADSL \|>
157		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
158		#' ard_stats_wilcox_test(by = "ARM", variables = "AGE")
159		#'
160		#' cardx:::.format_wilcoxtest_results(
161		#' by = "ARM",
162		#' variable = "AGE",
163		#' paired = FALSE,
164		#' lst_tidy =
165		#' cards::eval_capture_conditions(
166		#' stats::wilcox.test(ADSL[["AGE"]] ~ ADSL[["ARM"]], paired = FALSE) \|>
167		#' broom::tidy()
168		#' )
169		#' )
170		#'
171		#' @keywords internal
172		.format_wilcoxtest_results <- function(by = NULL, variable, lst_tidy, paired, ...) {
173		# build ARD ------------------------------------------------------------------
174	9x	ret <-
175	9x	cards::tidy_as_ard(
176	9x	lst_tidy = lst_tidy,
177	9x	tidy_result_names = c("statistic", "p.value", "method", "alternative"),
178	9x	fun_args_to_record = c(
179	9x	"mu", "paired", "exact", "correct", "conf.int",
180	9x	"conf.level", "tol.root", "digits.rank"
181		),
182	9x	formals = formals(asNamespace("stats")[["wilcox.test.default"]]),
183	9x	passed_args = c(list(paired = paired), dots_list(...)),
184	9x	lst_ard_columns = list(variable = variable, context = "stats_wilcox_test")
185		)
186
187	9x	if (!is_empty(by)) {
188	8x	ret <- ret \|>
189	8x	dplyr::mutate(group1 = by)
190		}
191
192		# add the stat label ---------------------------------------------------------
193	9x	ret \|>
194	9x	dplyr::left_join(
195	9x	.df_wilcoxtest_stat_labels(by),
196	9x	by = "stat_name"
197		) \|>
198	9x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
199	9x	cards::as_card() \|>
200	9x	cards::tidy_ard_column_order()
201		}
202
203
204		.df_wilcoxtest_stat_labels <- function(by = NULL) {
205	9x	dplyr::tribble(
206	9x	~stat_name, ~stat_label,
207	9x	"statistic", ifelse(is.null(by), "V Statistic", "X-squared Statistic"),
208	9x	"parameter", "Degrees of Freedom",
209	9x	"estimate", "Median of the Difference",
210	9x	"p.value", "p-value",
211	9x	"conf.low", "CI Lower Bound",
212	9x	"conf.high", "CI Upper Bound",
213	9x	"paired", "Paired test",
214	9x	"conf.level", "CI Confidence Level",
215		)
216		}

1		#' Deprecated functions
2		#'
3		#' @description
4		#' `r lifecycle::badge('deprecated')`\cr
5		#' Some functions have been deprecated and are no longer being actively
6		#' supported.
7		#'
8		#' @name deprecated
9		#' @keywords internal
10		NULL
11
12		# v0.3.0 -----------------------------------------------------------------------
13		#' @importFrom cards ard_continuous
14		#' @export
15		cards::ard_continuous
16
17		#' @importFrom cards ard_categorical
18		#' @export
19		cards::ard_categorical
20
21		#' @importFrom cards ard_dichotomous
22		#' @export
23		cards::ard_dichotomous
24
25		#' @rdname deprecated
26		#' @export
27		ard_continuous.survey.design <- function(data, ...) {
28	!	lifecycle::deprecate_soft(
29	!	when = "0.3.0",
30	!	what = "cardx::ard_continuous()",
31	!	with = "cardx::ard_summary()"
32		)
33
34	!	ard_summary(data = data, ...) \|>
35	!	dplyr::mutate(context = "continuous")
36		}
37
38		#' @rdname deprecated
39		#' @export
40		ard_categorical.survey.design <- function(data, ...) {
41	!	lifecycle::deprecate_soft(
42	!	when = "0.3.0",
43	!	what = "cardx::ard_categorical()",
44	!	with = "cardx::ard_tabulate()"
45		)
46
47	!	ard_tabulate(data = data, ...) \|>
48	!	dplyr::mutate(context = "categorical")
49		}
50
51		#' @rdname deprecated
52		#' @export
53		ard_dichotomous.survey.design <- function(data, ...) {
54	!	lifecycle::deprecate_soft(
55	!	when = "0.3.0",
56	!	what = "cardx::ard_dichotomous()",
57	!	with = "cardx::ard_tabulate_value()",
58	!	details = "The `value` argument no longer has a default value and must be specified."
59		)
60
61	!	ard_tabulate_value(data = data, ...) \|>
62	!	dplyr::mutate(context = "dichotomous")
63		}
64
65		#' @rdname deprecated
66		#' @export
67		ard_categorical_max <- function(...) {
68	!	lifecycle::deprecate_soft(
69	!	when = "0.3.0",
70	!	what = "cardx::ard_categorical_max()",
71	!	with = "cardx::ard_tabulate_max()"
72		)
73
74	!	ard_tabulate_max(...)
75		}
76
77		#' @rdname deprecated
78		#' @export
79		ard_emmeans_mean_difference <- function(...) {
80	!	lifecycle::deprecate_soft(
81	!	when = "0.3.1",
82	!	what = "cardx::ard_emmeans_mean_difference()",
83	!	with = "cardx::ard_emmeans_contrast()"
84		)
85
86	!	ard_emmeans_contrast(...)
87		}

1		#' ARDs for LS Mean Difference and LS Means
2		#'
3		#' @description
4		#' The `ard_emmeans_contrast()` function calculates least-squares mean differences using the 'emmeans'
5		#' package using the following
6		#'
7		#' ```r
8		#' emmeans::emmeans(object = <regression model>, specs = ~ <primary covariate>) \|>
9		#' emmeans::contrast(method = "pairwise") \|>
10		#' summary(infer = TRUE, level = <confidence level>)
11		#' ```
12		#'
13		#' @param data (`data.frame`/`survey.design`)\cr
14		#' a data frame or survey design object
15		#' @inheritParams construct_model
16		#' @param response_type (`string`)
17		#' string indicating whether the model outcome is `'continuous'`
18		#' or `'dichotomous'`. When `'dichotomous'`, the call to `emmeans::emmeans()` is
19		#' supplemented with argument `regrid="response"`.
20		#' @param conf.level (scalar `numeric`)\cr
21		#' confidence level for confidence interval. Default is `0.95`.
22		#' @param primary_covariate (`string`)\cr
23		#' string indicating the primary covariate (typically the dichotomous treatment variable).
24		#' Default is the first covariate listed in the formula.
25		#'
26		#' @return ARD data frame
27		#' @export
28		#' @rdname ard_emmeans
29		#'
30		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "emmeans"))
31		#' # LS Mean Difference
32		#' ard_emmeans_contrast(
33		#' data = mtcars,
34		#' formula = mpg ~ am + cyl,
35		#' method = "lm"
36		#' )
37		#'
38		#' ard_emmeans_contrast(
39		#' data = mtcars,
40		#' formula = vs ~ am + mpg,
41		#' method = "glm",
42		#' method.args = list(family = binomial),
43		#' response_type = "dichotomous"
44		#' )
45		ard_emmeans_contrast <- function(data, formula, method,
46		method.args = list(),
47		package = "base",
48		response_type = c("continuous", "dichotomous"),
49		conf.level = 0.95,
50		primary_covariate =
51		stats::terms(formula) \|>
52		attr("term.labels") \|>
53		getElement(1L)) {
54	5x	set_cli_abort_call()
55
56		# check package installation -------------------------------------------------
57	5x	check_pkg_installed(c("emmeans", package))
58	5x	check_not_missing(data)
59	5x	check_not_missing(formula)
60	5x	check_not_missing(method)
61	5x	check_class(data, c("data.frame", "survey.design"))
62	5x	check_class(formula, cls = "formula")
63	5x	check_string(package)
64	5x	check_string(primary_covariate)
65	5x	check_scalar(conf.level)
66	5x	check_range(conf.level, range = c(0, 1))
67	5x	response_type <- arg_match(response_type, error_call = get_cli_abort_call())
68
69	5x	data_in <- if (dplyr::last(class(data)) == "survey.design") data$variables else data
70
71		# build ARD ------------------------------------------------------------------
72	5x	result <- cards::ard_mvsummary(
73	5x	data = data_in,
74	5x	variables = all_of(primary_covariate),
75	5x	statistic = all_of(primary_covariate) ~ list(
76	5x	emmeans =
77	5x	.calc_emmeans_contrast(
78	5x	data, formula, method, {{ method.args }}, package, response_type, conf.level, primary_covariate
79		)
80		)
81		)
82
83	5x	result \|>
84	5x	dplyr::select(-"stat_label") \|>
85	5x	dplyr::left_join(
86	5x	.df_emmeans_stat_labels("contrast"),
87	5x	by = "stat_name"
88		) \|>
89	5x	dplyr::mutate(
90	5x	variable = "contrast",
91	5x	variable_level = if ("variable_level" %in% .data$stat_name) {
92	5x	.data$stat[.data$stat_name == "variable_level"]
93		} else {
94	5x	NA
95		},
96	5x	group1 = .env$primary_covariate,
97	5x	stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name),
98	5x	context = "emmeans_contrast",
99		) \|>
100	5x	dplyr::filter(.data$stat_name != "variable_level") \|>
101	5x	cards::as_card() \|>
102	5x	cards::tidy_ard_column_order() \|>
103	5x	cards::tidy_ard_row_order()
104		}
105
106		# function to perform calculations ---------------------------------------------
107		.calc_emmeans_contrast <- function(data, formula, method,
108		method.args,
109		package,
110		response_type,
111		conf.level,
112		primary_covariate) {
113	5x	cards::as_cards_fn(
114	5x	\(x, ...) {
115		# construct primary model ------------------------------------------------
116	5x	mod <-
117	5x	construct_model(
118	5x	data = data, formula = formula, method = method,
119	5x	method.args = {{ method.args }},
120	5x	package = package, env = caller_env()
121		)
122
123		# emmeans ----------------------------------------------------------------
124	4x	emmeans_args <- list(object = mod, specs = reformulate2(primary_covariate))
125	3x	if (response_type %in% "dichotomous") emmeans_args <- c(emmeans_args, list(regrid = "response"))
126	4x	emmeans <-
127	4x	withr::with_namespace(
128	4x	package = "emmeans",
129	4x	code = do.call("emmeans", args = emmeans_args)
130		)
131
132		# calculate mean difference estimate -----------------------------------
133	4x	results <-
134	4x	emmeans \|>
135	4x	emmeans::contrast(method = "pairwise") \|>
136	4x	summary(infer = TRUE, level = conf.level) \|>
137	4x	dplyr::rename(variable_level = "contrast")
138
139		# convert results to ARD format ----------------------------------------
140	4x	results \|>
141	4x	dplyr::as_tibble() \|>
142	4x	dplyr::rename(
143	4x	conf.low = any_of("asymp.LCL"),
144	4x	conf.high = any_of("asymp.UCL"),
145	4x	conf.low = any_of("lower.CL"),
146	4x	conf.high = any_of("upper.CL"),
147	4x	std.error = any_of("SE")
148		) \|>
149	4x	dplyr::select(any_of(c(
150	4x	"variable_level", "estimate",
151	4x	"std.error", "df",
152	4x	"conf.low", "conf.high", "p.value"
153		))) \|>
154	4x	dplyr::mutate(
155	4x	conf.level = .env$conf.level,
156	4x	method = ifelse(
157	4x	length(attr(stats::terms(formula), "term.labels") \|> discard(~ startsWith(., "1 \|"))) == 1L,
158	4x	"Least-squares mean difference",
159	4x	"Least-squares adjusted mean difference"
160		)
161		)
162		},
163	5x	stat_names = c("variable_level", "estimate", "std.error", "df", "conf.low", "conf.high", "p.value", "conf.level", "method")
164		)
165		}
166
167		.df_emmeans_stat_labels <- function(estimate) {
168	10x	dplyr::tribble(
169	10x	~stat_name, ~stat_label,
170	10x	"estimate", if (estimate == "contrast") "Mean Difference" else "Mean",
171	10x	"std.error", "Standard Error",
172	10x	"df", "Degrees of Freedom",
173	10x	"conf.low", "CI Lower Bound",
174	10x	"conf.high", "CI Upper Bound",
175	10x	"p.value", "p-value",
176	10x	"conf.level", "CI Confidence Level",
177		)
178		}

1		#' ARD survey categorical CIs
2		#'
3		#' Confidence intervals for categorical variables calculated via
4		#' [`survey::svyciprop()`].
5		#'
6		#' @inheritParams ard_summary.survey.design
7		#' @inheritParams ard_categorical_ci.data.frame
8		#' @param method (`string`)\cr
9		#' Method passed to `survey::svyciprop(method)`
10		#' @param df (`numeric`)\cr
11		#' denominator degrees of freedom, passed to `survey::svyciprop(df)`.
12		#' Default is `survey::degf(data)`.
13		#' @param ... arguments passed to `survey::svyciprop()`
14		#'
15		#' @return ARD data frame
16		#' @export
17		#'
18		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
19		#' data(api, package = "survey")
20		#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
21		#'
22		#' ard_categorical_ci(dclus1, variables = sch.wide)
23		#' ard_categorical_ci(dclus1, variables = sch.wide, value = sch.wide ~ "Yes", method = "xlogit")
24		ard_categorical_ci.survey.design <- function(data,
25		variables,
26		by = NULL,
27		method = c("logit", "likelihood", "asin", "beta", "mean", "xlogit"),
28		conf.level = 0.95,
29		value = list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE),
30		df = survey::degf(data),
31		...) {
32	17x	set_cli_abort_call()
33	17x	check_dots_empty()
34
35		# check inputs ---------------------------------------------------------------
36	17x	check_not_missing(data)
37	17x	check_class(data, "survey.design")
38	17x	check_not_missing(variables)
39
40	17x	cards::process_selectors(
41	17x	data = data$variables,
42	17x	variables = {{ variables }},
43	17x	by = {{ by }}
44		)
45	17x	cards::process_formula_selectors(
46	17x	data = data$variables,
47	17x	value = value
48		)
49	17x	check_scalar(by, allow_empty = TRUE)
50	17x	check_scalar_range(conf.level, range = c(0, 1))
51	17x	method <- arg_match(method)
52
53		# return empty ARD if no variables selected ----------------------------------
54	17x	if (is_empty(variables)) {
55	1x	return(dplyr::tibble() \|> cards::as_card())
56		}
57
58		# calculate and return ARD of one sample CI ----------------------------------
59	16x	.calculate_ard_onesample_survey_ci(
60	16x	FUN = .svyciprop_wrapper,
61	16x	data = data,
62	16x	variables = variables,
63	16x	by = by,
64	16x	conf.level = conf.level,
65	16x	method = method,
66	16x	df = df,
67	16x	value = value,
68		...
69		)
70		}
71
72		.calculate_ard_onesample_survey_ci <- function(FUN, data, variables, by, conf.level, value, ...) {
73		# calculate results ----------------------------------------------------------
74	16x	map(
75	16x	variables,
76	16x	function(variable) {
77	26x	.calculate_one_ard_categorical_survey_ci(
78	26x	FUN = FUN,
79	26x	data = data,
80	26x	variable = variable,
81	26x	by = by,
82	26x	conf.level = conf.level,
83	26x	value = value[[variable]],
84		...
85		)
86		}
87		) \|>
88	16x	dplyr::bind_rows()
89		}
90
91		.calculate_one_ard_categorical_survey_ci <- function(FUN, data, variable, by, conf.level, value, ...) {
92	26x	variable_levels <- .unique_values_sort(data$variables, variable = variable)
93	26x	if (!is_empty(by)) {
94	6x	by_levels <- .unique_values_sort(data$variables, variable = by)
95	6x	lst_data <-
96	6x	map(
97	6x	by_levels,
98	6x	~ call2("subset", expr(data), expr(!!sym(by) == !!.x)) \|> eval()
99		) \|>
100	6x	set_names(as.character(by_levels))
101		}
102
103	26x	df_full <-
104	26x	case_switch(
105	26x	!is_empty(by) ~
106	26x	tidyr::expand_grid(
107	26x	group1_level = as.character(by_levels) \|> as.list(),
108	26x	variable_level = as.character(variable_levels) \|> as.list()
109		) \|>
110	26x	dplyr::mutate(group1 = .env$by, variable = .env$variable),
111	26x	.default =
112	26x	dplyr::tibble(
113	26x	variable = .env$variable,
114	26x	variable_level = as.character(variable_levels) \|> as.list()
115		)
116		) \|>
117	26x	dplyr::rowwise() \|>
118	26x	dplyr::mutate(
119	26x	lst_result =
120	26x	FUN(
121	26x	data =
122	26x	case_switch(
123	26x	is_empty(.env$by) ~ data,
124	26x	.default = lst_data[[.data$group1_level]]
125		),
126	26x	variable = .data$variable,
127	26x	variable_level = .data$variable_level,
128	26x	conf.level = .env$conf.level,
129		...
130		) \|>
131	26x	list(),
132	26x	result =
133	26x	.data$lst_result[["result"]] \|>
134	26x	enframe("stat_name", "stat") \|>
135	26x	list(),
136	26x	warning = .data$lst_result["warning"] \|> unname(),
137	26x	error = .data$lst_result["error"] \|> unname(),
138	26x	context = "categorical_ci"
139		) \|>
140	26x	dplyr::select(-"lst_result") \|>
141	26x	dplyr::ungroup() \|>
142	26x	tidyr::unnest("result") \|>
143	26x	dplyr::mutate(
144	26x	stat_label = .data$stat_name,
145	26x	fmt_fun = map(.data$stat, ~ case_switch(is.numeric(.x) ~ 2L, .default = as.character))
146		) \|>
147	26x	cards::as_card() \|>
148	26x	cards::tidy_ard_column_order() \|>
149	26x	.restore_original_column_types(data = data$variables)
150
151		# if a value was passed for the variable, subset on those results
152	26x	if (!is_empty(value)) {
153	3x	df_full <- df_full \|>
154	3x	dplyr::filter(unlist(.data$variable_level) %in% .env$value)
155		}
156
157	26x	df_full
158		}
159
160
161		.svyciprop_wrapper <- function(data, variable, variable_level, conf.level, method, df, ...) {
162	64x	lst_results <-
163	64x	cards::eval_capture_conditions(
164	64x	survey::svyciprop(
165	64x	formula = inject(~ I(!!sym(variable) == !!variable_level)),
166	64x	design = data,
167	64x	method = method,
168	64x	level = conf.level,
169	64x	df = df,
170		...
171		) %>%
172	64x	{list(.[[1]], attr(., "ci"))} \|> # styler: off
173	64x	unlist() \|>
174	64x	set_names(c("estimate", "conf.low", "conf.high")) \|>
175	64x	as.list()
176		)
177
178		# add NULL results if error
179	64x	if (is_empty(lst_results[["result"]])) {
180	!	lst_results[["result"]] <- rep_named(c("estimate", "conf.low", "conf.high"), list(NULL))
181		}
182
183		# add other args
184	64x	lst_results[["result"]] <- lst_results[["result"]] \|> append(list(method = method, conf.level = conf.level))
185
186		# return list result
187	64x	lst_results
188		}
189
190
191		case_switch <- function(..., .default = NULL) {
192		dots <- dots_list(...)
193
194		for (f in dots) {
195		if (isTRUE(eval(f_lhs(f), envir = attr(f, ".Environment")))) {
196		return(eval(f_rhs(f), envir = attr(f, ".Environment")))
197		}
198		}
199
200		return(.default)
201		}

1		#' ARD Continuous Survey Statistics
2		#'
3		#' Returns an ARD of weighted statistics using the `{survey}` package.
4		#'
5		#' @param data (`survey.design`)\cr
6		#' a design object often created with [`survey::svydesign()`].
7		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
8		#' columns to include in summaries.
9		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' results are calculated for all combinations of the columns specified,
11		#' including unobserved combinations and unobserved factor levels.
12		#' @param statistic ([`formula-list-selector`][cards::syntax])\cr
13		#' a named list, a list of formulas,
14		#' or a single formula where the list element is a character vector of
15		#' statistic names to include. See below for options.
16		#' @param fmt_fun ([`formula-list-selector`][cards::syntax])\cr
17		#' a named list, a list of formulas,
18		#' or a single formula where the list element is a named list of functions
19		#' (or the RHS of a formula),
20		#' e.g. `list(mpg = list(mean = \(x) round(x, digits = 2) \|> as.character))`.
21		#' @param stat_label ([`formula-list-selector`][cards::syntax])\cr
22		#' a named list, a list of formulas, or a single formula where
23		#' the list element is either a named list or a list of formulas defining the
24		#' statistic labels, e.g. `everything() ~ list(mean = "Mean", sd = "SD")` or
25		#' `everything() ~ list(mean ~ "Mean", sd ~ "SD")`.
26		#' @inheritParams ard_tabulate.survey.design
27		#' @inheritParams rlang::args_dots_empty
28		#'
29		#' @section statistic argument:
30		#'
31		#' The following statistics are available:
32		#' `r cardx:::accepted_svy_stats(FALSE) \|> shQuote("sh") \|> paste(collapse = ", ")`,
33		#' where 'p##' is are the percentiles and `##` is an integer between 0 and 100.
34		#'
35		#' The design effect (`"deff"`) is calculated only when requested in the `statistic` argument.
36		#'
37		#'
38		#' @return an ARD data frame of class 'card'
39		#' @export
40		#'
41		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
42		#' data(api, package = "survey")
43		#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
44		#'
45		#' ard_summary(
46		#' data = dclus1,
47		#' variables = api00,
48		#' by = stype
49		#' )
50		ard_summary.survey.design <- function(data, variables, by = NULL,
51		statistic = everything() ~ c("median", "p25", "p75"),
52		fmt_fun = NULL,
53		stat_label = NULL,
54		fmt_fn = deprecated(),
55		...) {
56	50x	set_cli_abort_call()
57	50x	check_dots_empty()
58
59		# deprecated args ------------------------------------------------------------
60	50x	if (lifecycle::is_present(fmt_fn)) {
61	!	lifecycle::deprecate_soft(
62	!	when = "0.2.5",
63	!	what = "ard_summary(fmt_fn)",
64	!	with = "ard_summary(fmt_fun)"
65		)
66	!	fmt_fun <- fmt_fn
67		}
68
69		# check installed packages ---------------------------------------------------
70	50x	check_pkg_installed(pkg = "survey")
71
72		# check inputs ---------------------------------------------------------------
73	50x	check_not_missing(variables)
74
75		# process inputs -------------------------------------------------------------
76	50x	cards::process_selectors(data$variables, variables = {{ variables }}, by = {{ by }})
77	50x	variables <- setdiff(variables, by)
78	50x	check_na_factor_levels(data$variables, by)
79
80	50x	cards::process_formula_selectors(
81	50x	data$variables[variables],
82	50x	statistic = statistic,
83	50x	fmt_fun = fmt_fun,
84	50x	stat_label = stat_label
85		)
86	50x	cards::fill_formula_selectors(
87	50x	data$variables[variables],
88	50x	statistic = formals(asNamespace("cardx")[["ard_summary.survey.design"]])[["statistic"]] \|> eval()
89		)
90	50x	cards::check_list_elements(
91	50x	x = statistic,
92	50x	predicate = \(x) all(x %in% accepted_svy_stats()),
93	50x	error_msg = c("Error in the values of the {.arg statistic} argument for variable {.val {variable}}.",
94	50x	i = "Values must be in {.val {cardx:::accepted_svy_stats(FALSE)}}"
95		)
96		)
97
98		# return empty ARD if no variables selected ----------------------------------
99	50x	if (is_empty(variables)) {
100	!	return(dplyr::tibble() \|> cards::as_card())
101		}
102
103		# compute the weighted statistics --------------------------------------------
104	50x	df_stats <-
105	50x	map(
106	50x	names(statistic),
107	50x	function(variable) {
108	92x	map(
109	92x	statistic[[variable]],
110	92x	function(statistic) {
111	324x	.compute_svy_stat(data, variable = variable, by = by, stat_name = statistic)
112		}
113		)
114		}
115		) \|>
116	50x	dplyr::bind_rows() \|>
117	50x	.restore_original_column_types(data = data$variables)
118
119		# add stat_labels ------------------------------------------------------------
120	50x	df_stats <-
121	50x	df_stats \|>
122	50x	dplyr::left_join(
123	50x	.default_svy_stat_labels(),
124	50x	by = "stat_name"
125		) \|>
126	50x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name))
127	50x	if (!is_empty(stat_label)) {
128	1x	df_stats <-
129	1x	dplyr::rows_update(
130	1x	df_stats,
131	1x	dplyr::tibble(
132	1x	variable = names(stat_label),
133	1x	stat_name = map(.data$variable, ~ names(stat_label[[.x]])),
134	1x	stat_label = map(.data$variable, ~ stat_label[[.x]] \|>
135	1x	unname() \|>
136	1x	unlist())
137		) \|>
138	1x	tidyr::unnest(cols = c("stat_name", "stat_label")),
139	1x	by = c("variable", "stat_name"),
140	1x	unmatched = "ignore"
141		)
142		}
143
144		# add formatting stats -------------------------------------------------------
145	50x	df_stats$fmt_fun <- list(1L)
146	50x	if (!is_empty(fmt_fun)) {
147	1x	df_stats <-
148	1x	dplyr::rows_update(
149	1x	df_stats,
150	1x	dplyr::tibble(
151	1x	variable = names(fmt_fun),
152	1x	stat_name = map(.data$variable, ~ names(fmt_fun[[.x]])),
153	1x	fmt_fun = map(.data$variable, ~ fmt_fun[[.x]] \|> unname())
154		) \|>
155	1x	tidyr::unnest(cols = c("stat_name", "fmt_fun")),
156	1x	by = c("variable", "stat_name"),
157	1x	unmatched = "ignore"
158		)
159		}
160
161		# add class and return ARD object --------------------------------------------
162	50x	df_stats \|>
163	50x	dplyr::mutate(context = "continuous") \|>
164	50x	cards::as_card() \|>
165	50x	cards::tidy_ard_column_order()
166		}
167
168		.default_svy_stat_labels <- function(stat_label = NULL) {
169	50x	dplyr::tribble(
170	50x	~stat_name, ~stat_label,
171	50x	"mean", "Mean",
172	50x	"median", "Median",
173	50x	"var", "Variance",
174	50x	"sd", "Standard Deviation",
175	50x	"sum", "Sum",
176	50x	"deff", "Design Effect",
177	50x	"mean.std.error", "SE(Mean)",
178	50x	"min", "Minimum",
179	50x	"max", "Maximum",
180	50x	"p25", "25% Percentile",
181	50x	"p75", "75% Percentile"
182		)
183		}
184
185		accepted_svy_stats <- function(expand_quantiles = TRUE) {
186	92x	base_stats <-
187	92x	c("mean", "median", "min", "max", "sum", "var", "sd", "mean.std.error", "deff")
188	92x	if (expand_quantiles) {
189	92x	return(c(base_stats, paste0("p", 0:100)))
190		}
191	!	c(base_stats, "p##")
192		}
193
194
195
196		# this function calculates the summary for a single variable, single statistic
197		# and for all `by` levels. it returns an ARD data frame
198		.compute_svy_stat <- function(data, variable, by = NULL, stat_name) {
199		# difftime variable needs to be transformed into numeric for svyquantile
200	324x	if (inherits(data$variables[[variable]], "difftime")) {
201	!	data$variables[[variable]] <- unclass(data$variables[[variable]])
202		}
203
204		# styler: off
205	12x	if (stat_name %in% "mean") args <- list(FUN = survey::svymean)
206	6x	else if (stat_name %in% "sum") args <- list(FUN = survey::svytotal)
207	6x	else if (stat_name %in% "var") args <- list(FUN = survey::svyvar)
208	6x	else if (stat_name %in% "sd") args <- list(FUN = \(...) survey::svyvar(...) \|> sqrt())
209	6x	else if (stat_name %in% "mean.std.error") args <- list(FUN = \(...) survey::svymean(...) \|> survey::SE())
210	6x	else if (stat_name %in% "deff") args <- list(FUN = \(...) survey::svymean(..., deff = TRUE) \|> survey::deff())
211	12x	else if (stat_name %in% "min") args <- list(FUN = \(x, design, na.rm, ...) min(design$variables[[all.vars(x)]], na.rm = na.rm))
212	12x	else if (stat_name %in% "max") args <- list(FUN = \(x, design, na.rm, ...) max(design$variables[[all.vars(x)]], na.rm = na.rm))
213		# define functions for the quantiles
214	258x	else if (stat_name %in% c("median", paste0("p", 0:100))) {
215	258x	quantile <- ifelse(stat_name %in% "median", 0.5, as.numeric(substr(stat_name, 2, nchar(stat_name))) / 100)
216		# univariate results are returned in a different format from stratified.
217	258x	args <-
218	258x	if (is_empty(by)) list(FUN = \(...) survey::svyquantile(...)[[1]], quantiles = quantile)
219	258x	else list(FUN = \(...) survey::svyquantile(...), quantiles = quantile)
220		}
221		# styler: on
222
223		# adding additional args to pass
224	324x	args <-
225	324x	args \|>
226	324x	append(
227	324x	list(
228	324x	design = data,
229		# if all values are NA, turn na.rm to FALSE to avoid error
230	324x	na.rm = !all(is.na(data$variables[[variable]])),
231	324x	keep.var = FALSE
232		)
233		)
234
235
236		# if no by variable, calculate univariate statistics
237	324x	if (is_empty(by)) {
238	46x	args$x <- reformulate2(variable)
239		# calculate statistic (and remove FUN from the argument list)
240	46x	stat <-
241	46x	cards::eval_capture_conditions(
242	46x	do.call(args$FUN, args = args \|> utils::modifyList(list(FUN = NULL)))
243		)
244		# if the result was calculated, then put it into a tibble
245	46x	if (!is.null(stat[["result"]])) {
246	40x	df_stat <-
247	40x	dplyr::tibble(variable, stat[["result"]][1]) \|>
248	40x	set_names(c("variable", "stat")) \|>
249	40x	dplyr::mutate(
250	40x	stat = as.list(unname(.data$stat)),
251	40x	warning = list(stat[["warning"]]),
252	40x	error = list(stat[["error"]])
253		)
254		}
255		# otherwise, if there was an error return tibble with error message
256		else {
257	6x	df_stat <-
258	6x	dplyr::tibble(
259	6x	variable = .env$variable,
260	6x	stat = list(NULL),
261	6x	warning = list(.env$stat[["warning"]]),
262	6x	error = list(.env$stat[["error"]])
263		)
264		}
265		}
266
267		# if there is by variable(s), calculate statistics for the combinations
268		else {
269	278x	args$formula <- reformulate2(variable)
270	278x	args$by <- reformulate2(by)
271	278x	stat <-
272	278x	if (stat_name %in% c("median", paste0("p", 0:100))) {
273	248x	cards::eval_capture_conditions(
274	248x	do.call(survey::svyby, args) \|> set_names(c(by, "quantile", "ci.2.5", "ci.97.5", "se"))
275		)
276	278x	} else if (stat_name %in% "deff") {
277	3x	stat <-
278	3x	cards::eval_capture_conditions(
279	3x	do.call(
280	3x	survey::svyby,
281	3x	args \|> utils::modifyList(list(FUN = survey::svymean, deff = TRUE))
282		) \|>
283	3x	dplyr::select(all_of(by), dplyr::last_col()) # the last column is DEff
284		)
285		} else {
286	27x	cards::eval_capture_conditions(do.call(survey::svyby, args))
287		}
288
289		# if the result was calculated, then put it into a tibble
290	278x	if (!is.null(stat[["result"]])) {
291	122x	df_stat <- stat[["result"]][seq_len(length(by) + 1L)] \|>
292	122x	dplyr::as_tibble() %>%
293		# adding unobserved combinations of "by" variables
294		{
295	122x	dplyr::full_join(
296	122x	cards::nest_for_ard(data$variables, by = by, key = "...ard_no_one_will_ever_pick_this...", rename = FALSE, list_columns = FALSE) \|>
297	122x	dplyr::select(-"...ard_no_one_will_ever_pick_this..."),
298		.,
299	122x	by = by
300		)
301		} \|>
302	122x	set_names(paste0("group", seq_along(by), "_level"), "stat") \|>
303	122x	dplyr::bind_cols(
304	122x	dplyr::tibble(!!!c(by, variable)) \|>
305	122x	set_names(paste0("group", seq_along(by)), "variable")
306		) \|>
307	122x	dplyr::mutate(
308	122x	dplyr::across(c(cards::all_ard_groups("levels"), "stat"), as.list),
309	122x	warning = list(.env$stat[["warning"]]),
310	122x	error = list(.env$stat[["error"]])
311		)
312		}
313		# otherwise, if there was an error return tibble with error message
314		else {
315	156x	df_stat <-
316	156x	cards::nest_for_ard(data$variables, by = by, key = "...ard_no_one_will_ever_pick_this...") \|>
317	156x	dplyr::select(-"...ard_no_one_will_ever_pick_this...") \|>
318	156x	dplyr::mutate(
319	156x	variable = .env$variable,
320	156x	stat = list(NULL),
321	156x	warning = list(.env$stat[["warning"]]),
322	156x	error = list(.env$stat[["error"]])
323		)
324		}
325		}
326
327	324x	df_stat \|>
328	324x	dplyr::mutate(
329	324x	stat_name = .env$stat_name,
330	324x	across(
331	324x	c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")),
332	324x	~ map(.x, as.character)
333		)
334		)
335		}
336
337
338		# some operations coerce the variable types to character.
339		# this function will convert the `_level` values to their original types
340		.restore_original_column_types <- function(ard, data) {
341		# identify grouping variable names with associated levels --------------------
342	179x	by <- character()
343	179x	for (v in names(dplyr::select(ard, cards::all_ard_groups("names")))) {
344	120x	if (paste0(v, "_level") %in% names(ard)) by <- c(by, ard[[v]][1]) # styler: off
345		}
346	179x	variables <- character()
347	179x	if ("variable" %in% names(ard)) {
348	179x	variables <- ard[["variable"]] \|> unique()
349		}
350
351		# if there are no levels to correct, then return ard as it is
352	!	if (is_empty(variables) && is_empty(by)) return(ard) # styler: off
353
354		# add an ID for sorting
355	179x	ard$...ard_id_for_sorting... <- seq_len(nrow(ard))
356
357		# nest the raw data with original types --------------------------------------
358	179x	if (!is_empty(variables)) {
359	65x	if (!"variable_level" %in% names(ard)) df_variable_orginal_types <- unique(ard["variable"]) # styler: off
360	114x	else if (!all(variables %in% names(data))) { # for survfit summaries, the times/probs var is not in the data
361	5x	df_variable_orginal_types <- unique(ard[c("variable", "variable_level")])
362		} else {
363	109x	df_variable_orginal_types <-
364	109x	map(
365	109x	variables,
366	109x	~ cards::nest_for_ard(tidyr::drop_na(data[.x]), by = .x, include_data = FALSE) \|>
367	109x	stats::setNames(c("variable", "variable_level"))
368		) \|>
369	109x	dplyr::bind_rows()
370		}
371		}
372	179x	if (!is_empty(by)) {
373	117x	df_by_orginal_types <-
374	117x	cards::nest_for_ard(tidyr::drop_na(data[by]), by = by, include_data = FALSE)
375		}
376
377		# combine groups and variables together
378	179x	if (!is_empty(variables) && !is_empty(by)) {
379	117x	df_original_types <-
380	117x	dplyr::cross_join(df_by_orginal_types, df_variable_orginal_types)
381	62x	} else if (!is_empty(variables)) {
382	62x	df_original_types <- df_variable_orginal_types
383	!	} else if (!is_empty(by)) {
384	!	df_original_types <- df_by_orginal_types
385		}
386
387		# unlisting the sorting according the character value
388	179x	df_original_types <- df_original_types \|>
389	179x	dplyr::arrange(across(everything(), ~ map(., as.character) \|> unlist()))
390
391	179x	ard_nested <- ard \|>
392	179x	tidyr::nest(..ard_data... = -c(cards::all_ard_groups(), cards::all_ard_variables())) \|>
393	179x	dplyr::arrange(across(
394	179x	c(cards::all_ard_groups(), cards::all_ard_variables()),
395	179x	~ map(., as.character) \|> unlist()
396		))
397
398		# if all columns match, then replace the coerced character cols with their original type/class
399	179x	all_cols_equal <-
400	179x	every(
401	179x	names(df_original_types) \|> setdiff("variable_level"),
402	179x	~ all(
403	179x	unlist(ard_nested[[.x]]) == as.character(unlist(df_original_types[[.x]])) \|
404	179x	(is.na(unlist(ard_nested[[.x]])) & is.na(unlist(df_original_types[[.x]])))
405		)
406		)
407		# the variable level needs to be handled separately because there can be mixed type and we can't unlist
408	179x	if (isTRUE(all_cols_equal) && "variable_level" %in% names(df_original_types)) {
409	114x	all_cols_equal <-
410	114x	seq_len(nrow(df_original_types)) \|>
411	114x	map_lgl(
412	114x	~ identical(
413	114x	as.character(df_original_types[["variable_level"]][[.x]]),
414	114x	as.character(ard_nested[["variable_level"]][[.x]])
415		)
416		) \|>
417	114x	all()
418		}
419
420	179x	if (isTRUE(all_cols_equal)) {
421	179x	return(
422	179x	dplyr::bind_cols(
423	179x	df_original_types,
424	179x	dplyr::select(ard_nested, -all_of(names(df_original_types))),
425	179x	.name_repair = "minimal"
426		) \|>
427	179x	tidyr::unnest(cols = "..ard_data...") \|>
428	179x	dplyr::arrange(.data$...ard_id_for_sorting...) \|>
429	179x	dplyr::select(-"...ard_id_for_sorting...") \|>
430	179x	cards::as_card()
431		)
432		}
433
434		# I hope this message is never triggered!
435	!	cli::cli_inform(c(
436	!	"If you see this message, variable levels have been coerced to character, which could cause downstream issues.",
437	!	"*" = "Please post a reproducible example to {.url https://github.com/insightsengineering/cardx/issues/new}, so we can address in the next release.",
438	!	"i" = "You can create a minimal reproducible example with {.fun reprex::reprex}."
439		))
440
441	!	ard \|>
442	!	dplyr::arrange(.data$...ard_id_for_sorting...) \|>
443	!	dplyr::select(-"...ard_id_for_sorting...")
444		}

1		#' ARD Incidence Rate
2		#'
3		#' @description
4		#'
5		#' Function takes a time at risk variable (`time`) and event count variable (`count`) and calculates the incidence
6		#' rate in person-years.
7		#'
8		#' Incidence rate is calculated as: Total number of events that occurred / Total person-time at risk
9		#'
10		#' @param data (`data.frame`)\cr
11		#' a data frame.
12		#' @param time ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
13		#' column name of time at risk variable.
14		#' @param count ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
15		#' column name of variable indicating count of events that occurred. If `NULL`, each row in `data` is assumed to
16		#' correspond to a single event occurrence.
17		#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
18		#' column name used to identify unique subjects in `data`. If `NULL`, each row in `data` is assumed to correspond to
19		#' a unique subject.
20		#' @param n_person_time (`numeric`)\cr
21		#' amount of person-time to estimate incidence rate for. Defaults to 100.
22		#' @param unit_label (`string`)\cr
23		#' label for the unit of values in `time` and estimated person-time output (e.g. `"years"` for person-years,
24		#' `"days"` for person-days, etc.). If the desired person-time estimate unit does not match the current `time` unit,
25		#' values of `time` should be converted to the correct unit during pre-processing. Defaults to `"time"` (person-time).
26		#' @param conf.level (`numeric`)\cr
27		#' confidence level for the estimated incidence rate.
28		#' @param conf.type (`string`)\cr
29		#' confidence interval type for the estimated incidence rate.
30		#'
31		#' One of: `normal` (default), `normal-log`, `exact`, or `byar`.
32		#' @inheritParams cards::ard_summary
33		#'
34		#' @return an ARD data frame of class 'card'
35		#' @export
36		#'
37		#' @details
38		#' The formulas used to calculate the confidence interval for each CI type are as
39		#' follows, where \eqn{x_i} and \eqn{t_i} represent the number of events and follow-up
40		#' time for subject \eqn{i}, respectively.
41		#'
42		#' * `byar`: Byar's approximation of a Poisson CI. A continuity correction of 0.5 is included in the calculation.
43		#'
44		#' \deqn{CI = (\sum{x_i} + 0.5) (1 - 1 / (9 \times (\sum{x_i} + 0.5)) \pm Z_{1 - \alpha / 2} / (3 \sqrt{\sum{x_i} + 0.5}))^3 / \sum{t_i}}
45		#'
46		#' * `normal`: Normal CI.
47		#'
48		#' \deqn{CI = \sum{x_i} / \sum{t_i} \pm Z_{1 - \alpha / 2} \sqrt{\sum{x_i}} / \sum{t_i}}
49		#'
50		#' * `normal-log`: Normal-Log CI.
51		#'
52		#' \deqn{CI = \exp(\log(\sum{x_i} / \sum{t_i}) \pm Z_{1 - \alpha / 2} / \sqrt{\sum{x_i}})}
53		#'
54		#' * `exact`: Exact CI for a Poisson mean.
55		#'
56		#' \deqn{CI_{lower} = \chi^2_{\alpha / 2, 2\sum{x_i} + 2} / {2 \sum{t_i}}}
57		#' \deqn{CI_{upper} = \chi^2_{1 - \alpha / 2, 2\sum{x_i} + 2} / {2 \sum{t_i}}}
58		#'
59		#' @examples
60		#' set.seed(1)
61		#' data <- data.frame(
62		#' USUBJID = 1:100,
63		#' TRTA = sample(LETTERS[1:3], 100, replace = TRUE),
64		#' AETTE1 = abs(rnorm(100, mean = 0.5)),
65		#' AETOT1 = sample(0:20, 100, replace = TRUE)
66		#' )
67		#'
68		#' data \|>
69		#' ard_incidence_rate(time = AETTE1, count = AETOT1, id = USUBJID, by = TRTA, unit_label = "years")
70		ard_incidence_rate <- function(data,
71		time,
72		count = NULL,
73		id = NULL,
74		by = NULL,
75		strata = NULL,
76		n_person_time = 100,
77		unit_label = "time",
78		conf.level = 0.95,
79		conf.type = c("normal", "normal-log", "exact", "byar")) {
80	10x	set_cli_abort_call()
81
82		# check inputs ---------------------------------------------------------------
83	10x	check_data_frame(data)
84	10x	cards::process_selectors(
85	10x	data,
86	10x	time = {{ time }}, by = {{ by }}, strata = {{ strata }}, count = {{ count }}, id = {{ id }}
87		)
88	10x	check_scalar(time)
89	10x	check_string(unit_label)
90	9x	check_scalar(count, allow_empty = TRUE)
91	9x	check_scalar(id, allow_empty = TRUE)
92	9x	check_scalar_range(conf.level, c(0, 1))
93	9x	check_numeric(n_person_time)
94	9x	check_scalar(n_person_time)
95	9x	if (!class(data[[time]]) %in% c("numeric", "integer")) {
96	1x	cli::cli_abort(
97	1x	message = paste(
98	1x	"The {.arg time} variable must be of type {.cls numeric/integer} but {.arg {time}} is",
99	1x	"{.obj_type_friendly {data[[time]]}}."
100		),
101	1x	call = get_cli_abort_call()
102		)
103		}
104
105
106	8x	conf.type <- arg_match(conf.type, error_call = get_cli_abort_call())
107
108		# build ARD ------------------------------------------------------------------
109	7x	cards::ard_mvsummary(
110	7x	data = data,
111	7x	variables = all_of(time),
112	7x	by = any_of(by),
113	7x	strata = any_of(strata),
114	7x	statistic = all_of(time) ~ list(
115	7x	incidence_rate =
116	7x	.calc_incidence_rate(data, time, count, id, by, strata, n_person_time, unit_label, conf.level, conf.type)
117		)
118		) \|>
119	7x	dplyr::select(-"stat_label") \|>
120	7x	dplyr::left_join(
121	7x	.df_incidence_rate_stat_labels(n_person_time, unit_label),
122	7x	by = "stat_name"
123		) \|>
124	7x	dplyr::mutate(
125	7x	stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name),
126	7x	context = "incidence_rate",
127		) \|>
128	7x	cards::as_card() \|>
129	7x	cards::tidy_ard_column_order() \|>
130	7x	cards::tidy_ard_row_order()
131		}
132
133		# function to perform calculations -------------------------------------------
134		.calc_incidence_rate <- function(data, time, count, id, by, strata, n_person_time, unit_label, conf.level, conf.type) {
135	7x	cards::as_cards_fn(
136	7x	\(x, data, ...) {
137		# calculate number of unique IDs with >=1 event
138	7x	N <- if (!is_empty(id)) {
139	6x	sum(!is.na(unique(data[[id]])))
140		} else {
141	1x	nrow(data)
142		}
143
144		# calculate total person-years
145	7x	tot_person_time <- sum(x, na.rm = TRUE)
146
147		# calculate total number of events
148	7x	n_events <- if (!is_empty(count)) sum(data[[count]], na.rm = TRUE) else nrow(data)
149
150	7x	rate_est <- n_events / tot_person_time
151	7x	rate_se <- sqrt(rate_est / tot_person_time)
152	7x	alpha <- 1 - conf.level
153	7x	if (conf.type %in% c("normal", "normal-log")) {
154	5x	rate_ci <- if (conf.type == "normal") {
155	4x	rate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * rate_se
156		} else {
157	1x	exp(log(rate_est) + c(-1, 1) * stats::qnorm(1 - alpha / 2) * rate_se / rate_est)
158		}
159	5x	conf.low <- rate_ci[1]
160	5x	conf.high <- rate_ci[2]
161	2x	} else if (conf.type == "exact") {
162	1x	conf.low <- stats::qchisq(p = alpha / 2, df = 2 * n_events) / (2 * tot_person_time)
163	1x	conf.high <- stats::qchisq(p = 1 - alpha / 2, df = 2 * n_events + 2) / (2 * tot_person_time)
164	1x	} else if (conf.type == "byar") {
165	1x	seg_1 <- n_events + 0.5
166	1x	seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
167	1x	seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
168	1x	conf.low <- seg_1 * ((seg_2 - seg_3)^3) / tot_person_time
169	1x	conf.high <- seg_1 * ((seg_2 + seg_3)^3) / tot_person_time
170		}
171
172	7x	dplyr::tibble(
173	7x	estimate = rate_est * n_person_time,
174	7x	std.error = rate_se,
175	7x	conf.low = conf.low * n_person_time,
176	7x	conf.high = conf.high * n_person_time,
177	7x	conf.type = conf.type,
178	7x	conf.level = conf.level,
179	7x	tot_person_time = tot_person_time,
180	7x	n_events = n_events,
181	7x	N = N
182		)
183		},
184	7x	stat_names = c(
185	7x	"estimate", "std.error", "conf.low", "conf.high", "conf.type", "conf.level",
186	7x	"tot_person_time", "n_events", "N"
187		)
188		)
189		}
190
191		.df_incidence_rate_stat_labels <- function(n_person_time, unit_label) {
192	7x	time_unit <- paste0("Person-", str_replace(unit_label, "([[:alpha:]])", substr(toupper(unit_label), 1, 1)))
193
194	7x	dplyr::tribble(
195	7x	~stat_name, ~stat_label,
196	7x	"estimate", paste("Incidence rate per", n_person_time, time_unit),
197	7x	"std.error", "Standard Error",
198	7x	"conf.low", "CI Lower Bound",
199	7x	"conf.high", "CI Upper Bound",
200	7x	"conf.type", "CI Type",
201	7x	"conf.level", "CI Confidence Level",
202	7x	"tot_person_time", paste(time_unit, "at Risk"),
203	7x	"n_events", "Number of Events Observed",
204	7x	"N", "Number of Subjects Observed"
205		)
206		}

1		#' ARD Hedge's G Test
2		#'
3		#' @description
4		#' Analysis results data for paired and non-paired Hedge's G Effect Size Test
5		#' using [`effectsize::hedges_g()`].
6		#'
7		#' @param data (`data.frame`)\cr
8		#' a data frame. See below for details.
9		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' column name to compare by. Must be a categorical variable with exactly two levels.
11		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
12		#' column names to be compared. Must be a continuous variable. Independent
13		#' tests will be run for each variable
14		#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
15		#' column name of the subject or participant ID
16		#' @param conf.level (scalar `numeric`)\cr
17		#' confidence level for confidence interval. Default is `0.95`.
18		#' @param ... arguments passed to `effectsize::hedges_g(...)`
19		#'
20		#' @return ARD data frame
21		#' @name ard_effectsize_hedges_g
22		#'
23		#' @details
24		#' For the `ard_effectsize_hedges_g()` function, the data is expected to be one row per subject.
25		#' The data is passed as `effectsize::hedges_g(data[[variable]]~data[[by]], data, paired = FALSE, ...)`.
26		#'
27		#' For the `ard_effectsize_paired_hedges_g()` function, the data is expected to be one row
28		#' per subject per by level. Before the effect size is calculated, the data are
29		#' reshaped to a wide format to be one row per subject.
30		#' The data are then passed as
31		#' `effectsize::hedges_g(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`.
32		#'
33		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
34		#' cards::ADSL \|>
35		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
36		#' ard_effectsize_hedges_g(by = ARM, variables = AGE)
37		#'
38		#' # constructing a paired data set,
39		#' # where patients receive both treatments
40		#' cards::ADSL[c("ARM", "AGE")] \|>
41		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
42		#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) \|>
43		#' dplyr::arrange(USUBJID, ARM) \|>
44		#' dplyr::group_by(USUBJID) \|>
45		#' dplyr::filter(dplyr::n() > 1) \|>
46		#' ard_effectsize_paired_hedges_g(by = ARM, variables = AGE, id = USUBJID)
47		NULL
48
49		#' @rdname ard_effectsize_hedges_g
50		#' @export
51		ard_effectsize_hedges_g <- function(data, by, variables, conf.level = 0.95, ...) {
52	4x	set_cli_abort_call()
53
54		# check installed packages ---------------------------------------------------
55	4x	check_pkg_installed(c("effectsize", "parameters"))
56
57		# check/process inputs -------------------------------------------------------
58	4x	check_not_missing(data)
59	4x	check_not_missing(variables)
60	4x	check_data_frame(data)
61	4x	data <- dplyr::ungroup(data)
62	4x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
63	4x	check_scalar(by)
64	4x	check_range(conf.level, range = c(0, 1))
65
66		# return empty ARD if no variables selected ----------------------------------
67	4x	if (is_empty(variables)) {
68	!	return(dplyr::tibble() \|> cards::as_card())
69		}
70
71		# build ARD ------------------------------------------------------------------
72	4x	lapply(
73	4x	variables,
74	4x	function(variable) {
75	5x	.format_hedges_g_results(
76	5x	by = by,
77	5x	variable = variable,
78	5x	lst_tidy =
79	5x	cards::eval_capture_conditions(
80	5x	effectsize::hedges_g(
81	5x	reformulate2(by, response = variable),
82	5x	data = data \|> tidyr::drop_na(all_of(c(by, variable))),
83	5x	paired = FALSE,
84	5x	ci = conf.level,
85		...
86		) \|>
87	5x	parameters::standardize_names(style = "broom") \|>
88	5x	dplyr::mutate(method = "Hedge's G")
89		),
90	5x	paired = FALSE,
91		...
92		)
93		}
94		) \|>
95	4x	dplyr::bind_rows()
96		}
97
98		#' @rdname ard_effectsize_hedges_g
99		#' @export
100		ard_effectsize_paired_hedges_g <- function(data, by, variables, id, conf.level = 0.95, ...) {
101	3x	set_cli_abort_call()
102
103		# check installed packages ---------------------------------------------------
104	3x	check_pkg_installed(c("effectsize", "parameters"))
105
106		# check/process inputs -------------------------------------------------------
107	3x	check_not_missing(data)
108	3x	check_not_missing(variables)
109	3x	check_not_missing(by)
110	3x	check_not_missing(id)
111	3x	check_data_frame(data)
112	3x	data <- dplyr::ungroup(data)
113	3x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
114	3x	check_scalar(by)
115	3x	check_scalar(id)
116	3x	check_range(conf.level, range = c(0, 1))
117
118		# return empty ARD if no variables selected ----------------------------------
119	3x	if (is_empty(variables)) {
120	!	return(dplyr::tibble() \|> cards::as_card())
121		}
122
123		# build ARD ------------------------------------------------------------------
124	3x	lapply(
125	3x	variables,
126	3x	function(variable) {
127	3x	.format_hedges_g_results(
128	3x	by = by,
129	3x	variable = variable,
130	3x	lst_tidy =
131	3x	cards::eval_capture_conditions({
132		# adding this reshape inside the eval, so if there is an error it's captured in the ARD object
133	3x	data_wide <-
134	3x	data \|>
135	3x	tidyr::drop_na(all_of(c(id, by, variable))) \|>
136	3x	.paired_data_pivot_wider(by = by, variable = variable, id = id) \|>
137	3x	tidyr::drop_na(any_of(c("by1", "by2")))
138		# perform paired cohen's d test
139	2x	effectsize::hedges_g(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, ci = conf.level, ...) \|>
140	2x	parameters::standardize_names(style = "broom") \|>
141	2x	dplyr::mutate(method = "Paired Hedge's G")
142		}),
143	3x	paired = TRUE,
144		...
145		)
146		}
147		) \|>
148	3x	dplyr::bind_rows()
149		}
150
151		#' Convert Hedge's G Test to ARD
152		#'
153		#' @inheritParams cards::tidy_as_ard
154		#' @inheritParams effectsize::hedges_g
155		#' @param by (`string`)\cr by column name
156		#' @param variable (`string`)\cr variable column name
157		#' @param ... passed to `hedges_g(...)`
158		#'
159		#' @return ARD data frame
160		#' @keywords internal
161		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
162		#' cardx:::.format_hedges_g_results(
163		#' by = "ARM",
164		#' variable = "AGE",
165		#' paired = FALSE,
166		#' lst_tidy =
167		#' cards::eval_capture_conditions(
168		#' effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE) \|>
169		#' parameters::standardize_names(style = "broom")
170		#' )
171		#' )
172		.format_hedges_g_results <- function(by, variable, lst_tidy, paired, ...) {
173		# build ARD ------------------------------------------------------------------
174	8x	ret <-
175	8x	cards::tidy_as_ard(
176	8x	lst_tidy = lst_tidy,
177	8x	tidy_result_names = c(
178	8x	"estimate", "conf.level", "conf.low", "conf.high"
179		),
180	8x	fun_args_to_record = c("mu", "paired", "pooled_sd", "alternative"),
181	8x	formals = formals(asNamespace("effectsize")[["hedges_g"]]),
182	8x	passed_args = c(list(paired = paired), dots_list(...)),
183	8x	lst_ard_columns = list(group1 = by, variable = variable, context = "effectsize_hedges_g")
184		)
185
186		# add the stat label ---------------------------------------------------------
187	8x	ret \|>
188	8x	dplyr::left_join(
189	8x	.df_effectsize_stat_labels(),
190	8x	by = "stat_name"
191		) \|>
192	8x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
193	8x	cards::as_card() \|>
194	8x	cards::tidy_ard_column_order()
195		}

1		#' ARD continuous CIs
2		#'
3		#' One-sample confidence intervals for continuous variable means and medians.
4		#'
5		#' @inheritParams ard_stats_t_test
6		#' @param method (`string`)\cr
7		#' a string indicating the method to use for the confidence interval
8		#' calculation. Must be one of `"t.test"` or `"wilcox.test"`
9		#' @param ... arguments passed to `t.test()` or `wilcox.test()`
10		#'
11		#' @return ARD data frame
12		#' @name ard_continuous_ci
13		#'
14		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
15		#' ard_continuous_ci(mtcars, variables = c(mpg, hp), method = "wilcox.test")
16		#' ard_continuous_ci(mtcars, variables = mpg, by = am, method = "t.test")
17		NULL
18
19		#' @rdname ard_continuous_ci
20		#' @export
21		ard_continuous_ci <- function(data, ...) {
22	19x	check_not_missing(data)
23	19x	UseMethod("ard_continuous_ci")
24		}
25
26		#' @rdname ard_continuous_ci
27		#' @export
28		ard_continuous_ci.data.frame <- function(data, variables, by = dplyr::group_vars(data), conf.level = 0.95, method = c("t.test", "wilcox.test"), ...) {
29	3x	set_cli_abort_call()
30
31		# check inputs ---------------------------------------------------------------
32	3x	method <- arg_match(method)
33	3x	check_not_missing(variables)
34	3x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
35
36		# return empty ARD if no variables selected ----------------------------------
37	3x	if (is_empty(variables)) {
38	!	return(dplyr::tibble() \|> cards::as_card())
39		}
40
41		# calculate CIs --------------------------------------------------------------
42	3x	switch(method,
43	3x	"t.test" =
44	3x	ard_stats_t_test_onesample(
45	3x	data = data,
46	3x	variables = {{ variables }},
47	3x	by = {{ by }},
48	3x	conf.level = conf.level,
49		...
50		),
51	3x	"wilcox.test" =
52	3x	ard_stats_wilcox_test_onesample(
53	3x	data = data,
54	3x	variables = {{ variables }},
55	3x	by = {{ by }},
56	3x	conf.level = conf.level,
57	3x	conf.int = TRUE,
58		...
59		)
60		) \|>
61	3x	dplyr::mutate(context = "continuous_ci")
62		}

1		#' ARD to Calculate Categorical Occurrence Rates by Maximum Level Per Unique ID
2		#'
3		#' Function calculates categorical variable level occurrences rates by maximum level per unique ID.
4		#' Each variable in `variables` is evaluated independently and then results for all variables are stacked.
5		#' Only the highest-ordered level will be counted for each unique ID.
6		#' Unordered, non-numeric variables will be converted to factor and the default level order used for ordering.
7		#'
8		#' @inheritParams cards::ard_tabulate
9		#' @inheritParams cards::ard_stack
10		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
11		#' The categorical variables for which occurrence rates per unique ID (by maximum level) will be calculated.
12		#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
13		#' Argument used to subset `data` to identify rows in `data` to calculate categorical variable level occurrence rates.
14		#' @param denominator (`data.frame`, `integer`)\cr
15		#' An optional argument to change the denominator used for `"N"` and `"p"` statistic calculations.
16		#' Defaults to `NULL`, in which case `dplyr::distinct(data, dplyr::pick(all_of(c(id, by))))` is used for these
17		#' calculations. See [cards::ard_tabulate()] for more details on specifying denominators.
18		#' @param quiet (scalar `logical`)\cr
19		#' Logical indicating whether to suppress additional messaging. Default is `FALSE`.
20		#' @param fmt_fn `r lifecycle::badge("deprecated")`
21		#'
22		#' @return an ARD data frame of class 'card'
23		#' @name ard_tabulate_max
24		#'
25		#' @examples
26		#' # Occurrence Rates by Max Level (Highest Severity) --------------------------
27		#' ard_tabulate_max(
28		#' cards::ADAE,
29		#' variables = c(AESER, AESEV),
30		#' id = USUBJID,
31		#' by = TRTA,
32		#' denominator = cards::ADSL
33		#' )
34		NULL
35
36		#' @rdname ard_tabulate_max
37		#' @export
38		ard_tabulate_max <- function(data,
39		variables,
40		id,
41		by = dplyr::group_vars(data),
42		statistic = everything() ~ c("n", "p", "N"),
43		denominator = NULL,
44		strata = NULL,
45		fmt_fun = NULL,
46		stat_label = everything() ~ cards::default_stat_labels(),
47		quiet = FALSE,
48		fmt_fn = deprecated(),
49		...) {
50	17x	set_cli_abort_call()
51
52		# deprecated args ------------------------------------------------------------
53	17x	if (lifecycle::is_present(fmt_fn)) {
54	!	lifecycle::deprecate_soft(
55	!	when = "0.2.5",
56	!	what = "ard_tabulate_max(fmt_fn)",
57	!	with = "ard_tabulate_max(fmt_fun)"
58		)
59	!	fmt_fun <- fmt_fn
60		}
61
62		# check inputs ---------------------------------------------------------------
63	17x	check_not_missing(data)
64	17x	check_not_missing(variables)
65	17x	check_not_missing(id)
66	17x	cards::process_selectors(data,
67	17x	variables = {{ variables }}, id = {{ id }},
68	17x	by = {{ by }}, strata = {{ strata }}
69		)
70	17x	data <- dplyr::ungroup(data)
71
72		# check the id argument is not empty
73	17x	if (is_empty(id)) {
74	!	cli::cli_abort("Argument {.arg id} cannot be empty.", call = get_cli_abort_call())
75		}
76
77		# return empty ARD if no variables selected ----------------------------------
78	17x	if (is_empty(variables)) {
79	1x	return(dplyr::tibble() \|> cards::as_card())
80		}
81
82	16x	lst_results <- lapply(
83	16x	variables,
84	16x	function(x) {
85	20x	ard_categorical(
86	20x	data = data \|>
87	20x	arrange_using_order(c(id, by, strata, x)) \|>
88	20x	dplyr::slice_tail(n = 1L, by = all_of(c(id, by, strata))),
89	20x	variables = all_of(x),
90	20x	by = all_of(by),
91	20x	strata = all_of(strata),
92	20x	statistic = statistic,
93	20x	denominator = denominator,
94	20x	fmt_fun = fmt_fun,
95	20x	stat_label = stat_label
96		)
97		}
98		)
99
100		# print default order of variable levels -------------------------------------
101	14x	for (v in variables) {
102	18x	lvls <- .unique_and_sorted(data[[v]])
103	18x	vec <- cli::cli_vec(
104	18x	lvls,
105	18x	style = list("vec-sep" = " < ", "vec-sep2" = " < ", "vec-last" = " < ", "vec-trunc" = 3)
106		)
107	16x	if (!quiet) cli::cli_inform("{.var {v}}: {.val {vec}}")
108		}
109
110		# combine results ------------------------------------------------------------
111	14x	result <- lst_results \|>
112	14x	dplyr::bind_rows() \|>
113	14x	dplyr::mutate(context = "categorical_max") \|>
114	14x	cards::tidy_ard_column_order() \|>
115	14x	cards::tidy_ard_row_order()
116
117		# return final result --------------------------------------------------------
118	14x	result
119		}
120
121		# internal function copied from cards
122		# like `dplyr::arrange()`, but uses base R's `order()` to keep consistency in some edge cases
123		arrange_using_order <- function(data, columns) {
124	20x	inject(data[with(data, order(!!!syms(columns))), ])
125		}

1		#' ARD Survival Differences
2		#'
3		#' Calculate differences in the Kaplan-Meier estimator of survival using the
4		#' results from [`survival::survfit()`].
5		#'
6		#' @param x (`survift`)\cr
7		#' object of class `'survfit'` typically created with [`survival::survfit()`]
8		#' @param conf.level (scalar `numeric`)\cr
9		#' confidence level for confidence interval. Default is `0.95`.
10		#' @inheritParams ard_survival_survfit
11		#'
12		#' @return an ARD data frame of class 'card'
13		#' @export
14		#'
15		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "ggsurvfit")))
16		#' library(ggsurvfit)
17		#' library(survival)
18		#'
19		#' survfit(Surv_CNSR() ~ TRTA, data = cards::ADTTE) \|>
20		#' ard_survival_survfit_diff(times = c(25, 50))
21		ard_survival_survfit_diff <- function(x, times, conf.level = 0.95) {
22	5x	set_cli_abort_call()
23
24		# check installed packages ---------------------------------------------------
25	5x	check_pkg_installed(c("survival", "broom"))
26	5x	check_not_missing(x)
27	5x	check_not_missing(times)
28	5x	check_class(x, "survfit")
29
30	5x	if (inherits(x, c("survfitms", "survfitcox"))) {
31	1x	cli::cli_abort(
32	1x	"Argument {.arg x} cannot be class {.cls {c('survfitms', 'survfitcox')}}.",
33	1x	call = get_cli_abort_call()
34		)
35		}
36	4x	check_scalar_range(conf.level, range = c(0, 1))
37	4x	check_length(
38	4x	as.list(x$call)[["formula"]] \|> stats::as.formula() \|> stats::terms() \|> attr("term.labels"),
39	4x	length = 1L,
40	4x	message = "The {.cls survfit} object passed in argument {.arg x} must be stratified by a single variable."
41		)
42	3x	if (length(x$strata) < 2) {
43	1x	cli::cli_abort(
44	1x	"The {.cls survfit} object's stratifying variable must have 2 or more levels.",
45	1x	call = get_cli_abort_call()
46		)
47		}
48
49		# calculate the survival at the specified times
50	2x	ard_survival_survfit <-
51	2x	ard_survival_survfit(x = x, times = times) \|>
52	2x	dplyr::filter(.data$stat_name %in% c("estimate", "std.error")) \|>
53	2x	dplyr::select(-c("stat_label", "context", "fmt_fun"))
54
55		# transform the survival ARD into a cards object with the survival difference
56	2x	card <-
57	2x	ard_survival_survfit %>%
58	2x	{dplyr::left_join( # styler: off
59		# remove the first group from the data frame (this is our reference group)
60	2x	dplyr::filter(., .by = cards::all_ard_groups(), dplyr::cur_group_id() > 1L) \|>
61	2x	dplyr::rename(stat1 = "stat"),
62		# merge the reference group data
63	2x	dplyr::filter(., .by = cards::all_ard_groups(), dplyr::cur_group_id() == 1L) \|>
64	2x	dplyr::select(stat0 = "stat", everything(), -c("group1_level", "error", "warning")),
65	2x	by = c("group1", "variable", "variable_level", "stat_name")
66	2x	)} \|> # styler: off
67		# reshape to put the stats that need to be combined on the same row
68	2x	tidyr::pivot_wider(
69	2x	id_cols = c("group1", "group1_level", "variable", "variable_level"),
70	2x	names_from = "stat_name",
71	2x	values_from = c("stat0", "stat1"),
72	2x	values_fn = unlist
73		) \|>
74		# calcualte the primary statistics to return
75	2x	dplyr::mutate(
76		# reference level
77	2x	reference_level = ard_survival_survfit[["group1_level"]][1],
78		# short description of method
79	2x	method = "Survival Difference (Z-test)",
80		# survival difference
81	2x	estimate = .data$stat0_estimate - .data$stat1_estimate,
82		# survival difference standard error
83	2x	std.error = sqrt(.data$stat0_std.error^2 + .data$stat1_std.error^2),
84		# Z test statistic
85	2x	statistic = .data$estimate / .data$std.error,
86		# confidence limits of the survival difference
87	2x	conf.low = .data$estimate - .data$std.error * stats::qnorm(1 - (1 - .env$conf.level) / 2),
88	2x	conf.high = .data$estimate + .data$std.error * stats::qnorm(1 - (1 - .env$conf.level) / 2),
89		# p-value for test where H0: no difference
90	2x	p.value = 2 * (1 - stats::pnorm(abs(.data$statistic))),
91	2x	across(c("reference_level", "estimate", "std.error", "statistic", "conf.low", "conf.high", "p.value", "method"), as.list)
92		) \|>
93		# reshape into the cards structure
94	2x	dplyr::select(-starts_with("stat0_"), -starts_with("stat1_")) \|>
95	2x	tidyr::pivot_longer(
96	2x	cols = -c(cards::all_ard_groups(), cards::all_ard_variables()),
97	2x	names_to = "stat_name",
98	2x	values_to = "stat"
99		)
100
101		# final prepping of the cards object -----------------------------------------
102	2x	card \|>
103	2x	dplyr::mutate(
104	2x	warning = ard_survival_survfit[["warning"]][1],
105	2x	error = ard_survival_survfit[["error"]][1],
106	2x	fmt_fun = list(1L),
107	2x	stat_label =
108	2x	dplyr::case_when(
109	2x	.data$stat_name %in% "estimate" ~ "Survival Difference",
110	2x	.data$stat_name %in% "std.error" ~ "Survival Difference Standard Error",
111	2x	.data$stat_name %in% "conf.low" ~ "CI Lower Bound",
112	2x	.data$stat_name %in% "conf.high" ~ "CI Upper Bound",
113	2x	.data$stat_name %in% "statistic" ~ "z statistic",
114	2x	.data$stat_name %in% "p.value" ~ "p-value",
115	2x	.default = .data$stat_name
116		),
117	2x	context = "survival_survfit_diff",
118		) \|>
119	2x	cards::as_card() \|>
120	2x	cards::tidy_ard_column_order()
121		}

1		#' ARD Cohen's D Test
2		#'
3		#' @description
4		#' Analysis results data for paired and non-paired Cohen's D Effect Size Test
5		#' using [`effectsize::cohens_d()`].
6		#'
7		#' @param data (`data.frame`)\cr
8		#' a data frame. See below for details.
9		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' column name to compare by. Must be a categorical variable with exactly two levels.
11		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
12		#' column names to be compared. Must be a continuous variables.
13		#' Independent tests will be run for each variable.
14		#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
15		#' column name of the subject or participant ID
16		#' @param conf.level (scalar `numeric`)\cr
17		#' confidence level for confidence interval. Default is `0.95`.
18		#' @param ... arguments passed to `effectsize::cohens_d(...)`
19		#'
20		#' @return ARD data frame
21		#' @name ard_effectsize_cohens_d
22		#'
23		#' @details
24		#' For the `ard_effectsize_cohens_d()` function, the data is expected to be one row per subject.
25		#' The data is passed as `effectsize::cohens_d(data[[variable]]~data[[by]], data, paired = FALSE, ...)`.
26		#'
27		#' For the `ard_effectsize_paired_cohens_d()` function, the data is expected to be one row
28		#' per subject per by level. Before the effect size is calculated, the data are
29		#' reshaped to a wide format to be one row per subject.
30		#' The data are then passed as
31		#' `effectsize::cohens_d(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`.
32		#'
33		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
34		#' cards::ADSL \|>
35		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
36		#' ard_effectsize_cohens_d(by = ARM, variables = AGE)
37		#'
38		#' # constructing a paired data set,
39		#' # where patients receive both treatments
40		#' cards::ADSL[c("ARM", "AGE")] \|>
41		#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) \|>
42		#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) \|>
43		#' dplyr::arrange(USUBJID, ARM) \|>
44		#' dplyr::group_by(USUBJID) \|>
45		#' dplyr::filter(dplyr::n() > 1) \|>
46		#' ard_effectsize_paired_cohens_d(by = ARM, variables = AGE, id = USUBJID)
47		NULL
48
49		#' @rdname ard_effectsize_cohens_d
50		#' @export
51		ard_effectsize_cohens_d <- function(data, by, variables, conf.level = 0.95, ...) {
52	4x	set_cli_abort_call()
53
54		# check installed packages ---------------------------------------------------
55	4x	check_pkg_installed(c("effectsize", "parameters"))
56
57		# check/process inputs -------------------------------------------------------
58	4x	check_not_missing(data)
59	4x	check_not_missing(variables)
60	4x	check_not_missing(by)
61	4x	check_data_frame(data)
62	4x	data <- dplyr::ungroup(data)
63	4x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
64	4x	check_scalar(by)
65	4x	check_range(conf.level, range = c(0, 1))
66
67		# return empty ARD if no variables selected ----------------------------------
68	4x	if (is_empty(variables)) {
69	!	return(dplyr::tibble() \|> cards::as_card())
70		}
71
72		# build ARD ------------------------------------------------------------------
73	4x	lapply(
74	4x	variables,
75	4x	function(variable) {
76	5x	.format_cohens_d_results(
77	5x	by = by,
78	5x	variable = variable,
79	5x	lst_tidy =
80	5x	cards::eval_capture_conditions(
81	5x	effectsize::cohens_d(
82	5x	reformulate2(by, response = variable),
83	5x	data = data \|> tidyr::drop_na(all_of(c(by, variable))),
84	5x	paired = FALSE,
85	5x	ci = conf.level,
86		...
87		) \|>
88	5x	parameters::standardize_names(style = "broom") \|>
89	5x	dplyr::mutate(method = "Cohen's D")
90		),
91	5x	paired = FALSE,
92		...
93		)
94		}
95		) \|>
96	4x	dplyr::bind_rows()
97		}
98
99
100		#' @rdname ard_effectsize_cohens_d
101		#' @export
102		ard_effectsize_paired_cohens_d <- function(data, by, variables, id, conf.level = 0.95, ...) {
103	3x	set_cli_abort_call()
104
105		# check installed packages ---------------------------------------------------
106	3x	check_pkg_installed(c("effectsize", "parameters"))
107
108		# check/process inputs -------------------------------------------------------
109	3x	check_not_missing(data)
110	3x	check_not_missing(variables)
111	3x	check_not_missing(by)
112	3x	check_not_missing(id)
113	3x	check_data_frame(data)
114	3x	data <- dplyr::ungroup(data)
115	3x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
116	3x	check_scalar(by)
117	3x	check_scalar(id)
118	3x	check_range(conf.level, range = c(0, 1))
119
120		# return empty ARD if no variables selected ----------------------------------
121	3x	if (is_empty(variables)) {
122	!	return(dplyr::tibble() \|> cards::as_card())
123		}
124
125		# build ARD ------------------------------------------------------------------
126	3x	lapply(
127	3x	variables,
128	3x	function(variable) {
129	3x	.format_cohens_d_results(
130	3x	by = by,
131	3x	variable = variable,
132	3x	lst_tidy =
133	3x	cards::eval_capture_conditions({
134		# adding this reshape inside the eval, so if there is an error it's captured in the ARD object
135	3x	data_wide <-
136	3x	data \|>
137	3x	tidyr::drop_na(all_of(c(id, by, variable))) \|>
138	3x	.paired_data_pivot_wider(by = by, variable = variable, id = id) \|>
139	3x	tidyr::drop_na(any_of(c("by1", "by2")))
140		# perform paired cohen's d test
141	2x	effectsize::cohens_d(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, ci = conf.level, ...) \|>
142	2x	parameters::standardize_names(style = "broom") \|>
143	2x	dplyr::mutate(method = "Paired Cohen's D")
144		}),
145	3x	paired = TRUE,
146		...
147		)
148		}
149		) \|>
150	3x	dplyr::bind_rows()
151		}
152
153		.df_effectsize_stat_labels <- function() {
154	16x	dplyr::tribble(
155	16x	~stat_name, ~stat_label,
156	16x	"estimate", "Effect Size Estimate",
157	16x	"conf.low", "CI Lower Bound",
158	16x	"conf.high", "CI Upper Bound",
159	16x	"conf.level", "CI Confidence Level",
160	16x	"mu", "H0 Mean",
161	16x	"paired", "Paired test",
162	16x	"pooled_sd", "Pooled Standard Deviation",
163	16x	"alternative", "Alternative Hypothesis"
164		)
165		}
166
167
168		#' Convert Cohen's D Test to ARD
169		#'
170		#' @inheritParams cards::tidy_as_ard
171		#' @inheritParams effectsize::cohens_d
172		#' @param by (`string`)\cr by column name
173		#' @param variable (`string`)\cr variable column name
174		#' @param ... passed to `cohens_d(...)`
175		#'
176		#' @return ARD data frame
177		#' @keywords internal
178		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
179		#' cardx:::.format_cohens_d_results(
180		#' by = "ARM",
181		#' variable = "AGE",
182		#' paired = FALSE,
183		#' lst_tidy =
184		#' cards::eval_capture_conditions(
185		#' effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE) \|>
186		#' parameters::standardize_names(style = "broom")
187		#' )
188		#' )
189		.format_cohens_d_results <- function(by, variable, lst_tidy, paired, ...) {
190		# build ARD ------------------------------------------------------------------
191	8x	ret <-
192	8x	cards::tidy_as_ard(
193	8x	lst_tidy = lst_tidy,
194	8x	tidy_result_names = c(
195	8x	"estimate", "conf.level", "conf.low", "conf.high"
196		),
197	8x	fun_args_to_record = c("mu", "paired", "pooled_sd", "alternative"),
198	8x	formals = formals(asNamespace("effectsize")[["cohens_d"]]),
199	8x	passed_args = c(list(paired = paired), dots_list(...)),
200	8x	lst_ard_columns = list(group1 = by, variable = variable, context = "effectsize_cohens_d")
201		)
202
203		# add the stat label ---------------------------------------------------------
204	8x	ret \|>
205	8x	dplyr::left_join(
206	8x	.df_effectsize_stat_labels(),
207	8x	by = "stat_name"
208		) \|>
209	8x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
210	8x	cards::as_card() \|>
211	8x	cards::tidy_ard_column_order()
212		}

1		#' Basic Regression ARD
2		#'
3		#' @description
4		#' A function that takes a regression model and provides basic statistics in an
5		#' ARD structure.
6		#' The default output is simpler than [`ard_regression()`].
7		#' The function primarily matches regression terms to underlying variable names
8		#' and levels.
9		#' The default arguments used are
10		#'
11		#' ```r
12		#' broom.helpers::tidy_plus_plus(
13		#' add_reference_rows = FALSE,
14		#' add_estimate_to_reference_rows = FALSE,
15		#' add_n = FALSE,
16		#' intercept = FALSE
17		#' )
18		#' ```
19		#'
20		#' @inheritParams ard_regression
21		#' @param stats_to_remove (`character`)\cr
22		#' character vector of statistic names to remove. Default is
23		#' `c("term", "var_type", "var_label", "var_class", "label", "contrasts_type", "contrasts", "var_nlevels")`.
24		#'
25		#' @return data frame
26		#' @name ard_regression_basic
27		#'
28		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom.helpers"))
29		#' lm(AGE ~ ARM, data = cards::ADSL) \|>
30		#' ard_regression_basic()
31		#'
32		#' ard_regression_basic(
33		#' x = cards::ADSL,
34		#' formula = AGE ~ ARM,
35		#' method = "lm"
36		#' )
37		NULL
38
39		#' @rdname ard_regression_basic
40		#' @export
41		ard_regression_basic <- function(x, ...) {
42	8x	UseMethod("ard_regression_basic")
43		}
44
45		#' @rdname ard_regression_basic
46		#' @export
47		ard_regression_basic.default <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters,
48		stats_to_remove = c(
49		"term", "var_type", "var_label", "var_class",
50		"label", "contrasts_type", "contrasts", "var_nlevels"
51		),
52		...) {
53	6x	set_cli_abort_call()
54
55		# check installed packages ---------------------------------------------------
56	6x	check_pkg_installed(pkg = "broom.helpers")
57
58		# check inputs ---------------------------------------------------------------
59	6x	check_not_missing(x)
60	6x	check_class(stats_to_remove, cls = "character", allow_empty = TRUE)
61	!	if (is_empty(stats_to_remove)) stats_to_remove <- character(0L) # styler: off
62
63	6x	args <-
64	6x	list(
65	6x	add_reference_rows = FALSE,
66	6x	add_estimate_to_reference_rows = FALSE,
67	6x	add_n = FALSE,
68	6x	intercept = FALSE
69		) \|>
70	6x	utils::modifyList(val = rlang::dots_list(...))
71
72	6x	rlang::inject(ard_regression(x = x, tidy_fun = tidy_fun, !!!args)) \|>
73	6x	dplyr::filter(!.data$stat_name %in% .env$stats_to_remove) \|>
74	6x	dplyr::select(-(cards::all_ard_variables("levels") & dplyr::where(\(x) all(is.na(x)))))
75		}
76
77		#' @rdname ard_regression_basic
78		#' @export
79		ard_regression_basic.data.frame <- function(x, formula, method, method.args = list(), package = "base",
80		tidy_fun = broom.helpers::tidy_with_broom_or_parameters,
81		stats_to_remove = c(
82		"term", "var_type", "var_label", "var_class",
83		"label", "contrasts_type", "contrasts", "var_nlevels"
84		),
85		...) {
86		# check inputs ---------------------------------------------------------------
87	2x	set_cli_abort_call()
88	2x	check_not_missing(x)
89	2x	check_not_missing(formula)
90	1x	check_not_missing(method)
91	1x	check_class(formula, cls = "formula")
92
93		# build model ----------------------------------------------------------------
94	1x	model <-
95	1x	construct_model(
96	1x	data = x,
97	1x	formula = formula,
98	1x	method = method,
99	1x	method.args = {{ method.args }},
100	1x	package = package
101		)
102
103		# summarize model ------------------------------------------------------------
104	1x	ard_regression_basic(x = model, tidy_fun = tidy_fun, stats_to_remove = stats_to_remove, ...)
105		}

1		#' ARD Wald Test
2		#'
3		#' @description
4		#' Function takes a regression model object and calculates Wald
5		#' statistical test using [`aod::wald.test()`].
6		#'
7		#' @param x regression model object
8		#' @param ... arguments passed to `aod::wald.test(...)`
9		#' @inheritParams ard_regression
10		#'
11		#' @return data frame
12		#' @export
13		#'
14		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("aod", "broom.helpers")))
15		#' lm(AGE ~ ARM, data = cards::ADSL) \|>
16		#' ard_aod_wald_test()
17		ard_aod_wald_test <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
18	3x	set_cli_abort_call()
19
20		# check installed packages ---------------------------------------------------
21	3x	check_pkg_installed(c("aod", "broom.helpers"))
22
23		# check inputs ---------------------------------------------------------------
24	3x	check_not_missing(x)
25
26		# run regression() -----------------------------------------------------------
27	3x	reg_model <- cards::eval_capture_conditions(
28	3x	ard_regression_basic(x, tidy_fun = tidy_fun, intercept = TRUE, stats_to_remove = c(
29	3x	"var_type",
30	3x	"var_label",
31	3x	"var_class", "label",
32	3x	"contrasts_type", "contrasts", "var_nlevels", "std.error",
33	3x	"conf.low", "conf.high", "statistic", "p.value", "estimate"
34		))
35		)
36
37	3x	if (!is.null(reg_model[["error"]])) {
38	1x	cli::cli_abort(
39	1x	c("Unable to identify underlying variable names in regression model.",
40	1x	i = "Is this model type supported by {.fun broom.helpers::tidy_plus_plus}, which is the function used to identify variable names?"
41		),
42	1x	call = get_cli_abort_call()
43		)
44		}
45	2x	aod <-
46	2x	reg_model[["result"]] %>%
47	2x	dplyr::select(c(
48	2x	variable = "variable",
49	2x	model_terms = "stat"
50		)) %>%
51	2x	dplyr::mutate(term_id = dplyr::row_number()) %>%
52	2x	tidyr::nest(data = -"variable") %>%
53	2x	dplyr::rowwise() %>%
54	2x	dplyr::mutate(
55	2x	model_terms = unlist(.data$data[["model_terms"]]) %>% list(),
56	2x	model_terms_id = rlang::set_names(.data$data[["term_id"]]) %>% list()
57		)
58		# run wald.test() -----------------------------------------------------------
59	2x	wald_test <-
60	2x	cards::eval_capture_conditions(lapply(seq_len(length(aod$model_terms_id)), function(terms_id) {
61	4x	aod::wald.test(
62	4x	Sigma = stats::vcov(x),
63	4x	b = stats::coef(x), Terms = aod$model_terms_id[[terms_id]]
64		)
65		}))
66
67
68	2x	df_list <- do.call(rbind, lapply(wald_test$result, .extract_wald_results))
69
70	2x	cbind(aod$variable, df_list) %>%
71	2x	tidyr::pivot_longer(
72	2x	cols = !"aod$variable",
73	2x	names_to = "stat_name",
74	2x	values_to = "stat"
75		) %>%
76	2x	dplyr::rename(
77	2x	"variable" = "aod$variable"
78		) \|>
79	2x	dplyr::mutate(
80	2x	stat = as.list(.data$stat),
81	2x	stat_label =
82	2x	dplyr::case_when(
83	2x	.data$stat_name %in% "statistic" ~ "Statistic",
84	2x	.data$stat_name %in% "df" ~ "Degrees of Freedom",
85	2x	.data$stat_name %in% "p.value" ~ "p-value",
86	2x	TRUE ~ .data$stat_name
87		),
88	2x	fmt_fun =
89	2x	map(
90	2x	.data$stat,
91	2x	function(.x) {
92		# styler: off
93	!	if (is.integer(.x)) return(0L)
94	12x	if (is.numeric(.x)) return(1L)
95		# styler: on
96	!	NULL
97		}
98		),
99	2x	context = "aod_wald_test",
100	2x	warning = wald_test["warning"],
101	2x	error = wald_test["error"]
102		) \|>
103	2x	cards::as_card() \|>
104	2x	cards::tidy_ard_column_order()
105		}
106
107		#' Extract data from wald.test object
108		#'
109		#' @param wald_test (`data.frame`)\cr wald test object object from `aod::wald.test()`
110		#'
111		#' @return a data frame containing the wald test results.
112		#' @keywords internal
113		.extract_wald_results <- function(wald_test) {
114	4x	df <- wald_test$result$chi2[("df")]
115	4x	statistic <- wald_test$result$chi2[("chi2")]
116	4x	p.value <- wald_test$result$chi2[("P")]
117	4x	data.frame(df, statistic, p.value)
118		}

1		#' ARD Survey t-test
2		#'
3		#' @description
4		#' Analysis results data for survey t-test using [`survey::svyttest()`].
5		#'
6		#' @param data (`survey.design`)\cr
7		#' a survey design object often created with [`survey::svydesign()`]
8		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
9		#' column name to compare by
10		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
11		#' column names to be compared. Independent tests will be run for each variable.
12		#' @param conf.level (`double`)\cr
13		#' confidence level of the returned confidence interval. Must be between `c(0, 1)`.
14		#' Default is `0.95`
15		#' @param ... arguments passed to [`survey::svyttest()`]
16		#'
17		#' @return ARD data frame
18		#' @export
19		#'
20		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survey", "broom")))
21		#' data(api, package = "survey")
22		#' dclus2 <- survey::svydesign(id = ~ dnum + snum, fpc = ~ fpc1 + fpc2, data = apiclus2)
23		#'
24		#' ard_survey_svyttest(dclus2, variables = enroll, by = comp.imp, conf.level = 0.9)
25		ard_survey_svyttest <- function(data, by, variables, conf.level = 0.95, ...) {
26	5x	set_cli_abort_call()
27
28		# check installed packages ---------------------------------------------------
29	5x	check_pkg_installed(c("survey", "broom"))
30
31		# check/process inputs -------------------------------------------------------
32	5x	check_not_missing(data)
33	5x	check_not_missing(variables)
34	5x	check_not_missing(by)
35	5x	check_range(conf.level, range = c(0, 1))
36	5x	check_class(data, cls = "survey.design")
37	5x	cards::process_selectors(data[["variables"]], by = {{ by }}, variables = {{ variables }})
38	5x	check_scalar(by)
39
40		# return empty ARD if no variables selected ----------------------------------
41	5x	if (is_empty(variables)) {
42	!	return(dplyr::tibble() \|> cards::as_card())
43		}
44
45		# build ARD ------------------------------------------------------------------
46	5x	lapply(
47	5x	variables,
48	5x	function(variable) {
49	6x	.format_svyttest_results(
50	6x	by = by,
51	6x	variable = variable,
52	6x	lst_tidy =
53	6x	cards::eval_capture_conditions(
54	6x	survey::svyttest(reformulate2(termlabels = by, response = variable), design = data, ...) %>%
55		# a slightly enhanced tidier that allows us to specify the conf.level
56		{
57	5x	dplyr::bind_cols(
58	5x	broom::tidy(.) \|> dplyr::select(-c("conf.low", "conf.high")),
59	5x	dplyr::tibble(!!!stats::confint(., level = conf.level) \|> set_names(c("conf.low", "conf.high"))) \|>
60	5x	dplyr::mutate(conf.level = conf.level)
61		)
62		}
63		),
64		...
65		)
66		}
67		) \|>
68	5x	dplyr::bind_rows()
69		}
70
71		.format_svyttest_results <- function(by, variable, lst_tidy, ...) {
72		# build ARD ------------------------------------------------------------------
73	6x	ret <-
74	6x	cards::tidy_as_ard(
75	6x	lst_tidy = lst_tidy,
76	6x	tidy_result_names = c(
77	6x	"estimate", "statistic",
78	6x	"p.value", "parameter",
79	6x	"conf.low", "conf.high",
80	6x	"conf.level", "method", "alternative"
81		),
82	6x	passed_args = dots_list(...),
83	6x	lst_ard_columns = list(group1 = by, variable = variable, context = "survey_svyttest")
84		)
85
86		# add the stat label ---------------------------------------------------------
87	6x	ret \|>
88	6x	dplyr::left_join(
89	6x	.df_ttest_stat_labels(),
90	6x	by = "stat_name"
91		) \|>
92	6x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
93	6x	cards::as_card() \|>
94	6x	cards::tidy_ard_column_order()
95		}

1		#' Regression ARD
2		#'
3		#' Function takes a regression model object and converts it to a ARD
4		#' structure using the `broom.helpers` package.
5		#'
6		#' @inheritParams construct_model
7		#' @param x (regression model/`data.frame`)\cr
8		#' regression model object or a data frame
9		#' @param tidy_fun (`function`)\cr
10		#' a tidier. Default is [`broom.helpers::tidy_with_broom_or_parameters`]
11		#' @param ... Arguments passed to [`broom.helpers::tidy_plus_plus()`]
12		#'
13		#' @return data frame
14		#' @name ard_regression
15		#'
16		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom.helpers"))
17		#' lm(AGE ~ ARM, data = cards::ADSL) \|>
18		#' ard_regression(add_estimate_to_reference_rows = TRUE)
19		#'
20		#' ard_regression(
21		#' x = cards::ADSL,
22		#' formula = AGE ~ ARM,
23		#' method = "lm"
24		#' )
25		NULL
26
27		#' @rdname ard_regression
28		#' @export
29		ard_regression <- function(x, ...) {
30	20x	UseMethod("ard_regression")
31		}
32
33		#' @rdname ard_regression
34		#' @export
35		ard_regression.default <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
36	19x	set_cli_abort_call()
37
38		# check installed packages ---------------------------------------------------
39	19x	check_pkg_installed(pkg = "broom.helpers")
40
41		# check inputs ---------------------------------------------------------------
42	19x	check_not_missing(x)
43
44		# summarize model ------------------------------------------------------------
45	19x	lst_results <- cards::eval_capture_conditions(
46	19x	broom.helpers::tidy_plus_plus(
47	19x	model = x,
48	19x	tidy_fun = tidy_fun,
49		...
50		)
51		)
52
53		# final tidying up of cards data frame ---------------------------------------
54	19x	.regression_final_ard_prep(lst_results)
55		}
56
57		#' @rdname ard_regression
58		#' @export
59		ard_regression.data.frame <- function(x, formula, method, method.args = list(), package = "base",
60		tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
61		# check inputs ---------------------------------------------------------------
62	1x	set_cli_abort_call()
63	1x	check_not_missing(x)
64	1x	check_not_missing(formula)
65	1x	check_not_missing(method)
66	1x	check_class(formula, cls = "formula")
67
68		# build model ----------------------------------------------------------------
69	1x	model <-
70	1x	construct_model(
71	1x	data = x,
72	1x	formula = formula,
73	1x	method = method,
74	1x	method.args = {{ method.args }},
75	1x	package = package
76		)
77
78		# summarize model ------------------------------------------------------------
79	1x	ard_regression(x = model, tidy_fun = tidy_fun, ...)
80		}
81
82		.regression_final_ard_prep <- function(lst_results) {
83		# saving the results in data frame -------------------------------------------
84	19x	df_card <-
85	19x	if (!is.null(lst_results[["result"]])) {
86	18x	lst_results[["result"]] \|>
87	18x	dplyr::mutate(
88	18x	variable_level = as.list(dplyr::if_else(.data$var_type %in% "continuous", NA_character_, .data$label)),
89	18x	dplyr::across(-c("variable", "variable_level"), .fns = as.list)
90		) \|>
91	18x	tidyr::pivot_longer(
92	18x	cols = -c("variable", "variable_level"),
93	18x	names_to = "stat_name",
94	18x	values_to = "stat"
95		) \|>
96	18x	dplyr::filter(map_lgl(.data$stat, Negate(is.na))) \|>
97	18x	dplyr::select(-(cards::all_ard_variables("levels") & dplyr::where(\(x) all(is.na(x)))))
98	19x	} else { # if there was an error return a shell of an ARD data frame
99	1x	dplyr::tibble(
100	1x	variable = "model_1",
101	1x	stat_name = "estimate",
102	1x	stat = list(NULL)
103		)
104		}
105
106		# final tidying up of ARD data frame ---------------------------------------
107	19x	df_card \|>
108	19x	dplyr::mutate(
109	19x	warning = lst_results["warning"],
110	19x	error = lst_results["error"],
111	19x	fmt_fun = lapply(
112	19x	.data$stat,
113	19x	function(x) {
114	499x	switch(is.integer(x),
115	499x	0L
116	499x	) %\|\|% switch(is.numeric(x),
117	499x	1L
118		)
119		}
120		),
121	19x	context = "regression"
122		) \|>
123	19x	dplyr::left_join(
124	19x	.df_regression_stat_labels(),
125	19x	by = "stat_name"
126		) \|>
127	19x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
128	19x	cards::as_card() \|>
129	19x	cards::tidy_ard_column_order()
130		}
131
132		.df_regression_stat_labels <- function() {
133	19x	dplyr::tribble(
134	19x	~stat_name, ~stat_label,
135	19x	"var_label", "Label",
136	19x	"var_class", "Class",
137	19x	"var_type", "Type",
138	19x	"var_nlevels", "N Levels",
139	19x	"contrasts_type", "Contrast Type",
140	19x	"label", "Level Label",
141	19x	"n_obs", "N Obs.",
142	19x	"n_event", "N Events",
143	19x	"exposure", "Exposure Time",
144	19x	"estimate", "Coefficient",
145	19x	"std.error", "Standard Error",
146	19x	"p.value", "p-value",
147	19x	"conf.low", "CI Lower Bound",
148	19x	"conf.high", "CI Upper Bound",
149		)
150		}

1		#' @description
2		#' The `ard_emmeans_emmeans()` function calculates least-squares means using the 'emmeans'
3		#' package using the following
4		#'
5		#' ```r
6		#' emmeans::emmeans(object = <regression model>, specs = ~ <primary covariate>) \|>
7		#' summary(emmeans, calc = c(n = ".wgt."))
8		#' ```
9		#'
10		#' The arguments `data`, `formula`, `method`, `method.args`, `package` are used
11		#' to construct the regression model via `cardx::construct_model()`.
12		#'
13		#' @export
14		#' @rdname ard_emmeans
15		#'
16		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "emmeans"))
17		#' # LS Means
18		#' ard_emmeans_emmeans(
19		#' data = mtcars,
20		#' formula = mpg ~ am + cyl,
21		#' method = "lm"
22		#' )
23		#'
24		#' ard_emmeans_emmeans(
25		#' data = mtcars,
26		#' formula = vs ~ am + mpg,
27		#' method = "glm",
28		#' method.args = list(family = binomial),
29		#' response_type = "dichotomous"
30		#' )
31		ard_emmeans_emmeans <- function(data,
32		formula,
33		method,
34		method.args = list(),
35		package = "base",
36		response_type = c("continuous", "dichotomous"),
37		conf.level = 0.95,
38		primary_covariate =
39		stats::terms(formula) \|>
40		attr("term.labels") \|>
41		getElement(1L)) {
42	5x	set_cli_abort_call()
43
44		# check package installation -------------------------------------------------
45	5x	check_pkg_installed(c("emmeans", package))
46	5x	check_not_missing(data)
47	5x	check_not_missing(formula)
48	5x	check_not_missing(method)
49	5x	check_class(data, c("data.frame", "survey.design"))
50	5x	check_class(formula, cls = "formula")
51	5x	check_string(package)
52	5x	check_string(primary_covariate)
53	5x	check_scalar(conf.level)
54	5x	check_range(conf.level, range = c(0, 1))
55	5x	response_type <- arg_match(response_type, error_call = get_cli_abort_call())
56
57	5x	data_in <- if (dplyr::last(class(data)) == "survey.design") data$variables else data
58
59		# build ARD ------------------------------------------------------------------
60	5x	result <- cards::ard_mvsummary(
61	5x	data = data_in,
62	5x	variables = all_of(primary_covariate),
63	5x	statistic = all_of(primary_covariate) ~ list(
64	5x	emmeans =
65	5x	.calc_emmeans(
66	5x	data = data, formula = formula, method = method,
67	5x	method.args = {{ method.args }}, package = package,
68	5x	response_type = response_type, conf.level = conf.level,
69	5x	primary_covariate = primary_covariate
70		)
71		)
72		)
73		# unlist stat column
74	5x	if (length(result$stat[[which(result$stat_label == "variable_level")]]) > 1) {
75	4x	result <- result \|> tidyr::unnest_longer(col = "stat")
76		}
77
78	5x	result \|>
79	5x	dplyr::select(-"stat_label") \|>
80	5x	dplyr::left_join(
81	5x	.df_emmeans_stat_labels("emmeans"),
82	5x	by = "stat_name"
83		) \|>
84	5x	dplyr::mutate(
85	5x	variable = "contrast",
86	5x	variable_level = if ("variable_level" %in% .data$stat_name) {
87	5x	rep_len(.data$stat[.data$stat_name == "variable_level"], length.out = nrow(result))
88		} else {
89	5x	NA
90		},
91	5x	group1 = .env$primary_covariate,
92	5x	stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name),
93	5x	context = "emmeans_emmeans"
94		) \|>
95	5x	dplyr::filter(!is.na(.data$stat)) \|>
96	5x	dplyr::filter(.data$stat_name != "variable_level") \|>
97	5x	dplyr::arrange(.data$variable_level) \|>
98	5x	cards::as_card() \|>
99	5x	cards::tidy_ard_column_order() \|>
100	5x	cards::tidy_ard_row_order()
101		}
102
103		# function to perform calculations ---------------------------------------------
104		.calc_emmeans <- function(data, formula, method,
105		method.args,
106		package,
107		response_type,
108		conf.level,
109		primary_covariate) {
110	5x	cards::as_cards_fn(
111	5x	\(x, ...) {
112		# construct primary model ------------------------------------------------
113	5x	mod <-
114	5x	construct_model(
115	5x	data = data, formula = formula, method = method,
116	5x	method.args = {{ method.args }},
117	5x	package = package, env = caller_env()
118		)
119
120		# emmeans ----------------------------------------------------------------
121	4x	emmeans_args <- list(object = mod, specs = reformulate2(primary_covariate))
122	3x	if (response_type %in% "dichotomous") emmeans_args <- c(emmeans_args, list(regrid = "response"))
123	4x	emmeans <-
124	4x	withr::with_namespace(
125	4x	package = "emmeans",
126	4x	code = do.call("emmeans", args = emmeans_args)
127		)
128
129		# calculate mean estimates ---------------------------------------------
130	4x	results <-
131	4x	summary(emmeans, calc = c(n = ".wgt.")) \|>
132	4x	dplyr::as_tibble() \|>
133	4x	dplyr::rename(
134	4x	estimate = any_of(c("emmean", "prob")),
135	4x	n = any_of("n")
136		) \|>
137	4x	dplyr::rename(variable_level = all_of(primary_covariate)) \|>
138	4x	dplyr::mutate(variable_level = as.character(.data$variable_level))
139
140		# convert results to ARD format ------------------------------------------
141	4x	results \|>
142	4x	dplyr::as_tibble() \|>
143	4x	dplyr::rename(
144	4x	conf.low = any_of("asymp.LCL"),
145	4x	conf.high = any_of("asymp.UCL"),
146	4x	conf.low = any_of("lower.CL"),
147	4x	conf.high = any_of("upper.CL"),
148	4x	std.error = any_of("SE")
149		) \|>
150	4x	dplyr::select(any_of(c(
151	4x	"variable_level", "estimate",
152	4x	"std.error", "df", "n",
153	4x	"conf.low", "conf.high", "p.value"
154		))) \|>
155	4x	dplyr::mutate(
156	4x	conf.level = .env$conf.level,
157	4x	method = "Least-squares means"
158		) \|>
159	4x	dplyr::mutate(across(everything(), ~ .x \|> as.list()))
160		},
161	5x	stat_names = c("variable_level", "estimate", "std.error", "df", "conf.low", "conf.high", "p.value", "conf.level", "method", "n")
162		)
163		}

1		#' ARD for Difference in Survival
2		#'
3		#' @description
4		#' Analysis results data for comparison of survival using [survival::survdiff()].
5		#'
6		#' @param formula (`formula`)\cr
7		#' a formula
8		#' @param data (`data.frame`)\cr
9		#' a data frame
10		#' @param rho (`scalar numeric`)\cr
11		#' numeric scalar passed to `survival::survdiff(rho)`. Default is `rho=0`.
12		#' @param ... additional arguments passed to `survival::survdiff()`
13		#'
14		#' @return an ARD data frame of class 'card'
15		#' @export
16		#'
17		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom", "ggsurvfit")))
18		#' library(survival)
19		#' library(ggsurvfit)
20		#'
21		#' ard_survival_survdiff(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE)
22		ard_survival_survdiff <- function(formula, data, rho = 0, ...) {
23	5x	set_cli_abort_call()
24
25		# check installed packages ---------------------------------------------------
26	5x	check_pkg_installed(c("survival", "broom"))
27
28		# check/process inputs -------------------------------------------------------
29	5x	check_not_missing(formula)
30	5x	check_class(formula, cls = "formula")
31	5x	if (!missing(data)) check_class(data, cls = "data.frame")
32	5x	check_scalar(rho)
33	5x	check_class(rho, cls = "numeric")
34
35		# assign method
36	5x	method <- dplyr::case_when(
37	5x	rho == 0 ~ "Log-rank test",
38	5x	rho == 1.5 ~ "Tarone-Ware test",
39	5x	rho == 1 ~ "Peto & Peto modification of Gehan-Wilcoxon test",
40	5x	.default = glue::glue("G-rho test (\U03C1 = {rho})")
41		) \|>
42	5x	as.character()
43
44		# calculate survdiff() results -----------------------------------------------
45	5x	lst_glance <-
46	5x	cards::eval_capture_conditions(
47	5x	survival::survdiff(formula = formula, data = data, rho = rho, ...) \|>
48	5x	broom::glance() \|>
49	5x	dplyr::mutate(method = .env$method)
50		)
51
52		# tidy results up in an ARD format -------------------------------------------
53		# extract variable names from formula
54	5x	variables <- stats::terms(formula) \|>
55	5x	attr("term.labels") \|>
56	5x	.strip_backticks()
57
58		# if there was an error, return results early
59	5x	if (is.null(lst_glance[["result"]])) {
60		# if no variables in formula, then return an error
61		# otherwise, if we do have variable names, then we can construct an empty ARD which will be done below
62	2x	if (is_empty(variables)) {
63	1x	cli::cli_abort(
64	1x	message =
65	1x	c("There was an error in {.fun survival::survdiff}. See below:",
66	1x	"x" = lst_glance[["error"]]
67		),
68	1x	call = get_cli_abort_call()
69		)
70		}
71		}
72
73	4x	.variables_to_survdiff_ard(
74	4x	variables = variables,
75	4x	method = method,
76		# styler: off
77	4x	stat_names =
78	4x	if (!is.null(lst_glance[["result"]])) names(lst_glance[["result"]])
79	4x	else c("statistic", "df", "p.value", "method"),
80	4x	stats =
81	4x	if (!is.null(lst_glance[["result"]])) unname(as.list(lst_glance[["result"]]))
82	4x	else rep_along(c("statistic", "df", "p.value"), list(NULL)) \|> c(list(method = method))
83		# styler: on
84		) \|>
85	4x	.add_survdiff_stat_labels() \|>
86	4x	dplyr::mutate(
87	4x	context = "survival_survdiff",
88	4x	warning = lst_glance["warning"],
89	4x	error = lst_glance["error"],
90	4x	fmt_fun = map(
91	4x	.data$stat,
92	4x	function(x) {
93	9x	if (is.numeric(x)) return(1L) # styler: off
94	7x	NULL
95		}
96		)
97		) \|>
98	4x	cards::as_card() \|>
99	4x	cards::tidy_ard_column_order()
100		}
101
102		.variables_to_survdiff_ard <- function(variables,
103		method,
104		stat_names,
105		stats) {
106	4x	len <- length(variables)
107
108	4x	df_vars <- dplyr::tibble(!!!rev(variables)) \|>
109	4x	set_names(
110	4x	ifelse(
111	4x	len > 1L,
112	4x	c(paste0("group_", rev(seq_len(len - 1L))), "variable"),
113	4x	"variable"
114		)
115		)
116
117	4x	dplyr::bind_cols(
118	4x	df_vars,
119	4x	dplyr::tibble(
120	4x	stat_name = .env$stat_names,
121	4x	stat = .env$stats
122		)
123		)
124		}
125
126		.add_survdiff_stat_labels <- function(x) {
127	4x	x \|>
128	4x	dplyr::left_join(
129	4x	dplyr::tribble(
130	4x	~stat_name, ~stat_label,
131	4x	"statistic", "X^2 Statistic",
132	4x	"df", "Degrees of Freedom",
133	4x	"p.value", "p-value"
134		),
135	4x	by = "stat_name"
136		) \|>
137	4x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name))
138		}
139
140		.strip_backticks <- function(x) {
141	5x	ifelse(
142	5x	str_detect(x, "^`.*`$"),
143	5x	substr(x, 2, nchar(x) - 1),
144	5x	x
145		)
146		}

1		#' ARD Total N
2		#'
3		#' Returns the total N for a survey object.
4		#' The placeholder variable name returned in the object is `"..ard_total_n.."`
5		#'
6		#' @inheritParams ard_tabulate_value.survey.design
7		#' @inheritParams rlang::args_dots_empty
8		#'
9		#' @return an ARD data frame of class 'card'
10		#' @export
11		#'
12		#' @examplesIf cardx:::is_pkg_installed("survey")
13		#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq)
14		#'
15		#' ard_total_n(svy_titanic)
16		ard_total_n.survey.design <- function(data, ...) {
17		# process inputs -------------------------------------------------------------
18	3x	set_cli_abort_call()
19	3x	check_dots_empty()
20
21		# calculate total N ----------------------------------------------------------
22	3x	data <- stats::update(data, ..ard_total_n.. = TRUE)
23
24	3x	data \|>
25	3x	ard_tabulate_value(
26	3x	variables = "..ard_total_n..",
27	3x	statistic = list(..ard_total_n.. = c("N", "N_unweighted"))
28		) \|>
29	3x	dplyr::mutate(context = "total_n") \|>
30	3x	dplyr::select(-cards::all_ard_variables("levels"))
31		}

1		#' ARD 2-sample proportion test
2		#'
3		#' @description
4		#' Analysis results data for a 2-sample test or proportions using [`stats::prop.test()`].
5		#'
6		#' @param data (`data.frame`)\cr
7		#' a data frame.
8		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
9		#' column name to compare by
10		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
11		#' column names to be compared. Must be a binary column coded as `TRUE`/`FALSE`
12		#' or `1`/`0`. Independent tests will be computed for each variable.
13		#' @param conf.level (scalar `numeric`)\cr
14		#' confidence level for confidence interval. Default is `0.95`.
15		#' @param ... arguments passed to `prop.test(...)`
16		#'
17		#' @return ARD data frame
18		#' @export
19		#'
20		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
21		#' mtcars \|>
22		#' ard_stats_prop_test(by = vs, variables = am)
23		ard_stats_prop_test <- function(data, by, variables, conf.level = 0.95, ...) {
24	6x	set_cli_abort_call()
25
26		# check installed packages ---------------------------------------------------
27	6x	check_pkg_installed(pkg = "broom")
28
29		# check inputs ---------------------------------------------------------------
30	6x	check_not_missing(data)
31	6x	check_not_missing(variables)
32	6x	check_not_missing(by)
33	6x	check_data_frame(data)
34	6x	check_scalar_range(conf.level, range = c(0, 1))
35
36		# process inputs -------------------------------------------------------------
37	6x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
38	6x	check_scalar(by)
39	6x	data <- data[c(by, variables)] \|> dplyr::ungroup() \|> tidyr::drop_na() # styler: off
40
41		# return empty ARD if no variables selected ----------------------------------
42	6x	if (is_empty(variables)) {
43	!	return(dplyr::tibble() \|> cards::as_card())
44		}
45
46		# build ARD ------------------------------------------------------------------
47	6x	lapply(
48	6x	variables,
49	6x	function(variable) {
50	7x	.format_proptest_results(
51	7x	by = by,
52	7x	variable = variable,
53	7x	lst_tidy =
54	7x	cards::eval_capture_conditions({
55	7x	check_binary(data[[variable]], arg_name = "variable")
56
57	4x	data_counts <-
58	4x	dplyr::arrange(data, .data[[by]]) \|>
59	4x	dplyr::summarise(
60	4x	.by = all_of(by),
61	4x	x = sum(.data[[variable]]),
62	4x	n = length(.data[[variable]])
63		)
64
65	4x	if (nrow(data_counts) != 2) {
66	1x	cli::cli_abort(
67	1x	c(
68	1x	"The {.arg by} column must have exactly 2 levels.",
69	1x	"The levels are {.val {data_counts[[by]]}}"
70		),
71	1x	call = get_cli_abort_call()
72		)
73		}
74
75	3x	stats::prop.test(
76	3x	x = data_counts[["x"]],
77	3x	n = data_counts[["n"]],
78	3x	conf.level = conf.level,
79		...
80		) \|>
81	3x	broom::tidy() \|>
82		# add central estimate for difference
83	3x	dplyr::mutate(estimate = .data$estimate1 - .data$estimate2, .before = 1L)
84		}),
85		...
86		)
87		}
88		) \|>
89	6x	dplyr::bind_rows()
90		}
91
92
93		#' Convert prop.test to ARD
94		#'
95		#' @inheritParams cards::tidy_as_ard
96		#' @param by (`string`)\cr by column name
97		#' @param variable (`string`)\cr variable column name
98		#' @param ... passed to `prop.test(...)`
99		#'
100		#' @return ARD data frame
101		#' @keywords internal
102		.format_proptest_results <- function(by, variable, lst_tidy, ...) {
103		# build ARD ------------------------------------------------------------------
104	7x	ret <-
105	7x	cards::tidy_as_ard(
106	7x	lst_tidy = lst_tidy,
107	7x	tidy_result_names = c(
108	7x	"estimate", "estimate1", "estimate2", "statistic",
109	7x	"p.value", "parameter", "conf.low", "conf.high",
110	7x	"method", "alternative"
111		),
112	7x	fun_args_to_record = c("p", "conf.level", "correct"),
113	7x	formals = formals(stats::prop.test),
114	7x	passed_args = dots_list(...),
115	7x	lst_ard_columns = list(group1 = by, variable = variable, context = "stats_prop_test")
116		)
117
118		# add the stat label ---------------------------------------------------------
119	7x	ret \|>
120	7x	dplyr::left_join(
121	7x	.df_proptest_stat_labels(),
122	7x	by = "stat_name"
123		) \|>
124	7x	dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) \|>
125	7x	cards::as_card() \|>
126	7x	cards::tidy_ard_column_order()
127		}
128
129		.df_proptest_stat_labels <- function() {
130	7x	dplyr::tribble(
131	7x	~stat_name, ~stat_label,
132	7x	"estimate1", "Group 1 Rate",
133	7x	"estimate2", "Group 2 Rate",
134	7x	"estimate", "Rate Difference",
135	7x	"p.value", "p-value",
136	7x	"statistic", "X-squared Statistic",
137	7x	"parameter", "Degrees of Freedom",
138	7x	"conf.low", "CI Lower Bound",
139	7x	"conf.high", "CI Upper Bound",
140	7x	"conf.level", "CI Confidence Level",
141	7x	"correct", "Yates' continuity correction",
142		)
143		}

1		#' ARD one-sample t-test
2		#'
3		#' @description
4		#' Analysis results data for one-sample t-tests.
5		#' Result may be stratified by including the `by` argument.
6		#'
7		#' @param data (`data.frame`)\cr
8		#' a data frame. See below for details.
9		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
10		#' column names to be analyzed. Independent t-tests will be computed for
11		#' each variable.
12		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
13		#' optional column name to stratify results by.
14		#' @inheritParams ard_stats_t_test
15		#'
16		#' @return ARD data frame
17		#' @export
18		#'
19		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
20		#' cards::ADSL \|>
21		#' ard_stats_t_test_onesample(by = ARM, variables = AGE)
22		ard_stats_t_test_onesample <- function(data, variables, by = dplyr::group_vars(data), conf.level = 0.95, ...) {
23	5x	set_cli_abort_call()
24
25		# check installed packages ---------------------------------------------------
26	5x	check_pkg_installed("broom")
27
28		# check/process inputs -------------------------------------------------------
29	5x	check_not_missing(data)
30	5x	check_not_missing(variables)
31	5x	check_data_frame(data)
32	5x	data <- dplyr::ungroup(data)
33	5x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
34	5x	check_scalar_range(conf.level, range = c(0, 1))
35
36		# return empty ARD if no variables selected ----------------------------------
37	5x	if (is_empty(variables)) {
38	1x	return(dplyr::tibble() \|> cards::as_card())
39		}
40
41	4x	cards::ard_summary(
42	4x	data = data,
43	4x	variables = all_of(variables),
44	4x	by = all_of(by),
45	4x	statistic = all_of(variables) ~ list(t_test_onesample = \(x) stats::t.test(x = x, conf.level = conf.level, ...) \|> broom::tidy())
46		) \|>
47	4x	cards::bind_ard(
48	4x	cards::ard_summary(
49	4x	data = data,
50	4x	variables = all_of(variables),
51	4x	by = all_of(by),
52	4x	statistic =
53	4x	all_of(variables) ~
54	4x	list(conf.level = \(x) {
55	8x	formals(asNamespace("stats")[["t.test.default"]])["mu"] \|>
56	8x	utils::modifyList(list(conf.level = conf.level, ...))
57		})
58		)
59		) \|>
60	4x	dplyr::select(-"stat_label") \|>
61	4x	dplyr::left_join(
62	4x	.df_ttest_stat_labels(by = NULL),
63	4x	by = "stat_name"
64		) \|>
65	4x	dplyr::mutate(
66	4x	stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name),
67	4x	context = "stats_t_test_onesample",
68		) \|>
69	4x	cards::as_card() \|>
70	4x	cards::tidy_ard_column_order() \|>
71	4x	cards::tidy_ard_row_order()
72		}

1		#' ARD Chi-squared Test
2		#'
3		#' @description
4		#' Analysis results data for Pearson's Chi-squared Test.
5		#' Calculated with `chisq.test(x = data[[variable]], y = data[[by]], ...)`
6		#'
7		#'
8		#' @param data (`data.frame`)\cr
9		#' a data frame.
10		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
11		#' column name to compare by.
12		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
13		#' column names to be compared. Independent tests will be computed for
14		#' each variable.
15		#' @param ... additional arguments passed to `chisq.test(...)`
16		#'
17		#' @return ARD data frame
18		#' @export
19		#'
20		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
21		#' cards::ADSL \|>
22		#' ard_stats_chisq_test(by = "ARM", variables = "AGEGR1")
23		ard_stats_chisq_test <- function(data, by, variables, ...) {
24	4x	set_cli_abort_call()
25
26		# check installed packages ---------------------------------------------------
27	4x	check_pkg_installed("broom")
28
29		# check/process inputs -------------------------------------------------------
30	4x	check_not_missing(data)
31	4x	check_not_missing(variables)
32	4x	check_not_missing(by)
33	4x	check_data_frame(data)
34	4x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
35	4x	check_scalar(by)
36
37		# return empty ARD if no variables selected ----------------------------------
38	4x	if (is_empty(variables)) {
39	!	return(dplyr::tibble() \|> cards::as_card())
40		}
41
42		# build ARD ------------------------------------------------------------------
43	4x	lapply(
44	4x	variables,
45	4x	function(variable) {
46	5x	cards::tidy_as_ard(
47	5x	lst_tidy =
48	5x	cards::eval_capture_conditions(
49	5x	stats::chisq.test(x = data[[variable]], y = data[[by]], ...) \|>
50	5x	broom::tidy()
51		),
52	5x	tidy_result_names = c("statistic", "p.value", "parameter", "method"),
53	5x	fun_args_to_record =
54	5x	c("correct", "p", "rescale.p", "simulate.p.value", "B"),
55	5x	formals = formals(stats::chisq.test),
56	5x	passed_args = dots_list(...),
57	5x	lst_ard_columns = list(group1 = by, variable = variable, context = "stats_chisq_test")
58		) \|>
59	5x	dplyr::mutate(
60	5x	.after = "stat_name",
61	5x	stat_label =
62	5x	dplyr::case_when(
63	5x	.data$stat_name %in% "statistic" ~ "X-squared Statistic",
64	5x	.data$stat_name %in% "p.value" ~ "p-value",
65	5x	.data$stat_name %in% "parameter" ~ "Degrees of Freedom",
66	5x	TRUE ~ .data$stat_name,
67		)
68		)
69		}
70		) \|>
71	4x	dplyr::bind_rows() \|>
72	4x	cards::as_card()
73		}

1		#' ARD Fisher's Exact Test
2		#'
3		#' @description
4		#' Analysis results data for Fisher's Exact Test.
5		#' Calculated with `fisher.test(x = data[[variable]], y = data[[by]], ...)`
6		#'
7		#'
8		#' @param data (`data.frame`)\cr
9		#' a data frame.
10		#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
11		#' column name to compare by
12		#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
13		#' column names to be compared. Independent tests will be computed for
14		#' each variable.
15		#' @param conf.level (scalar `numeric`)\cr
16		#' confidence level for confidence interval. Default is `0.95`.
17		#' @param ... additional arguments passed to `fisher.test(...)`
18		#'
19		#' @return ARD data frame
20		#' @export
21		#'
22		#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
23		#' cards::ADSL[1:30, ] \|>
24		#' ard_stats_fisher_test(by = "ARM", variables = "AGEGR1")
25		ard_stats_fisher_test <- function(data, by, variables, conf.level = 0.95, ...) {
26	4x	set_cli_abort_call()
27
28		# check installed packages ---------------------------------------------------
29	4x	check_pkg_installed("broom")
30
31		# check/process inputs -------------------------------------------------------
32	4x	check_not_missing(data)
33	4x	check_not_missing(variables)
34	4x	check_not_missing(by)
35	4x	check_data_frame(data)
36	4x	cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
37	4x	check_scalar(by)
38	4x	check_range(conf.level, range = c(0, 1))
39
40		# return empty ARD if no variables selected ----------------------------------
41	4x	if (is_empty(variables)) {
42	!	return(dplyr::tibble() \|> cards::as_card())
43		}
44
45		# build ARD ------------------------------------------------------------------
46	4x	lapply(
47	4x	variables,
48	4x	function(variable) {
49	5x	cards::tidy_as_ard(
50	5x	lst_tidy =
51	5x	cards::eval_capture_conditions(
52	5x	stats::fisher.test(x = data[[variable]], y = data[[by]], conf.level = conf.level, ...) \|>
53	5x	broom::tidy()
54		),
55	5x	tidy_result_names =
56	5x	c("estimate", "p.value", "conf.low", "conf.high", "method", "alternative"),
57	5x	fun_args_to_record =
58	5x	c(
59	5x	"workspace", "hybrid", "hybridPars", "control", "or",
60	5x	"conf.int", "conf.level", "simulate.p.value", "B"
61		),
62	5x	formals = formals(stats::fisher.test),
63	5x	passed_args = dots_list(...),
64	5x	lst_ard_columns = list(group1 = by, variable = variable, context = "stats_fisher_test")
65		) \|>
66	5x	dplyr::mutate(
67	5x	.after = "stat_name",
68	5x	stat_label =
69	5x	dplyr::case_when(
70	5x	.data$stat_name %in% "p.value" ~ "p-value",
71	5x	TRUE ~ .data$stat_name,
72		)
73		)
74		}
75		) \|>
76	4x	dplyr::bind_rows() \|>
77	4x	cards::as_card()
78		}