| 1 |
#' ARD Mood Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Mood two sample test of scale. Note this not to be confused with |
|
| 5 |
#' the Brown-Mood test of medians. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. See below for details. |
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column name to compare by. |
|
| 11 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column name to be compared. Independent tests will |
|
| 13 |
#' be run for each variable. |
|
| 14 |
#' @param ... arguments passed to `mood.test(...)` |
|
| 15 |
#' |
|
| 16 |
#' @return ARD data frame |
|
| 17 |
#' @name ard_stats_mood_test |
|
| 18 |
#' |
|
| 19 |
#' @details |
|
| 20 |
#' For the `ard_stats_mood_test()` function, the data is expected to be one row per subject. |
|
| 21 |
#' The data is passed as `mood.test(data[[variable]] ~ data[[by]], ...)`. |
|
| 22 |
#' @rdname ard_stats_mood_test |
|
| 23 |
#' @export |
|
| 24 |
#' |
|
| 25 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 26 |
#' cards::ADSL |> |
|
| 27 |
#' ard_stats_mood_test(by = "SEX", variables = "AGE") |
|
| 28 |
ard_stats_mood_test <- function(data, by, variables, ...) {
|
|
| 29 | 5x |
set_cli_abort_call() |
| 30 | ||
| 31 |
# check installed packages --------------------------------------------------- |
|
| 32 | 5x |
check_pkg_installed("broom")
|
| 33 | ||
| 34 |
# check/process inputs ------------------------------------------------------- |
|
| 35 | 5x |
check_not_missing(data) |
| 36 | 5x |
check_not_missing(variables) |
| 37 | 5x |
check_not_missing(by) |
| 38 | 5x |
check_data_frame(data) |
| 39 | 5x |
data <- dplyr::ungroup(data) |
| 40 | 5x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 41 | 5x |
check_scalar(by) |
| 42 | ||
| 43 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 44 | 5x |
if (is_empty(variables)) {
|
| 45 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 46 |
} |
|
| 47 | ||
| 48 |
# build ARD ------------------------------------------------------------------ |
|
| 49 | 5x |
lapply( |
| 50 | 5x |
variables, |
| 51 | 5x |
function(variable) {
|
| 52 | 6x |
.format_moodtest_results( |
| 53 | 6x |
by = by, |
| 54 | 6x |
variable = variable, |
| 55 | 6x |
lst_tidy = |
| 56 | 6x |
cards::eval_capture_conditions( |
| 57 | 6x |
stats::mood.test(data[[variable]] ~ data[[by]], ...) |> |
| 58 | 6x |
broom::tidy() |
| 59 |
), |
|
| 60 |
... |
|
| 61 |
) |
|
| 62 |
} |
|
| 63 |
) |> |
|
| 64 | 5x |
dplyr::bind_rows() |
| 65 |
} |
|
| 66 |
#' Convert mood test results to ARD |
|
| 67 |
#' |
|
| 68 |
#' @inheritParams cards::tidy_as_ard |
|
| 69 |
#' @inheritParams stats::mood.test |
|
| 70 |
#' @param by (`string`)\cr by column name |
|
| 71 |
#' @param variable (`string`)\cr variable column name |
|
| 72 |
#' @param ... passed to `mood.test(...)` |
|
| 73 |
#' |
|
| 74 |
#' @return ARD data frame |
|
| 75 |
#' @keywords internal |
|
| 76 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 77 |
#' cardx:::.format_moodtest_results( |
|
| 78 |
#' by = "SEX", |
|
| 79 |
#' variable = "AGE", |
|
| 80 |
#' lst_tidy = |
|
| 81 |
#' cards::eval_capture_conditions( |
|
| 82 |
#' stats::mood.test(ADSL[["AGE"]] ~ ADSL[["SEX"]]) |> |
|
| 83 |
#' broom::tidy() |
|
| 84 |
#' ) |
|
| 85 |
#' ) |
|
| 86 |
.format_moodtest_results <- function(by, variable, lst_tidy, ...) {
|
|
| 87 |
# build ARD ------------------------------------------------------------------ |
|
| 88 | 6x |
ret <- |
| 89 | 6x |
cards::tidy_as_ard( |
| 90 | 6x |
lst_tidy = lst_tidy, |
| 91 | 6x |
tidy_result_names = c("statistic", "p.value", "method", "alternative"),
|
| 92 | 6x |
formals = formals(asNamespace("stats")[["mood.test.default"]]),
|
| 93 | 6x |
passed_args = c(dots_list(...)), |
| 94 | 6x |
lst_ard_columns = list(group1 = by, variable = variable, context = "stats_mood_test") |
| 95 |
) |
|
| 96 | ||
| 97 |
# add the stat label --------------------------------------------------------- |
|
| 98 | 6x |
ret |> |
| 99 | 6x |
dplyr::left_join( |
| 100 | 6x |
.df_moodtest_stat_labels(), |
| 101 | 6x |
by = "stat_name" |
| 102 |
) |> |
|
| 103 | 6x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 104 | 6x |
cards::as_card() |> |
| 105 | 6x |
cards::tidy_ard_column_order() |
| 106 |
} |
|
| 107 | ||
| 108 |
.df_moodtest_stat_labels <- function() {
|
|
| 109 | 6x |
dplyr::tribble( |
| 110 | 6x |
~stat_name, ~stat_label, |
| 111 | 6x |
"statistic", "Z-Statistic", |
| 112 | 6x |
"p.value", "p-value", |
| 113 | 6x |
"alternative", "Alternative Hypothesis" |
| 114 |
) |
|
| 115 |
} |
| 1 |
#' ARD Categorical Survey Statistics |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Compute tabulations on survey-weighted data. |
|
| 5 |
#' |
|
| 6 |
#' The counts and proportion (`"N"`, `"n"`, `"p"`) are calculated using `survey::svytable()`, |
|
| 7 |
#' and the standard errors and design effect (`"p.std.error"`, `"deff"`) are |
|
| 8 |
#' calculated using `survey::svymean()`. |
|
| 9 |
#' |
|
| 10 |
#' The design effect (`"deff"`) is calculated only when requested in the `statistic` argument. |
|
| 11 |
#' |
|
| 12 |
#' The unweighted statistics are calculated with `cards::ard_tabulate.data.frame()`. |
|
| 13 |
#' |
|
| 14 |
#' @param data (`survey.design`)\cr |
|
| 15 |
#' a design object often created with [`survey::svydesign()`]. |
|
| 16 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 17 |
#' columns to include in summaries. |
|
| 18 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 19 |
#' results are calculated for **all combinations** of the column specified |
|
| 20 |
#' and the variables. A single column may be specified. |
|
| 21 |
#' @param denominator (`string`)\cr |
|
| 22 |
#' a string indicating the type proportions to calculate. Must be one of |
|
| 23 |
#' `"column"` (the default), `"row"`, and `"cell"`. |
|
| 24 |
#' @param statistic ([`formula-list-selector`][cards::syntax])\cr |
|
| 25 |
#' a named list, a list of formulas, |
|
| 26 |
#' or a single formula where the list element is a character vector of |
|
| 27 |
#' statistic names to include. See default value for options. |
|
| 28 |
#' @param fmt_fun ([`formula-list-selector`][cards::syntax])\cr |
|
| 29 |
#' a named list, a list of formulas, |
|
| 30 |
#' or a single formula where the list element is a named list of functions |
|
| 31 |
#' (or the RHS of a formula), |
|
| 32 |
#' e.g. `list(mpg = list(mean = \(x) round(x, digits = 2) |> as.character()))`. |
|
| 33 |
#' @param stat_label ([`formula-list-selector`][cards::syntax])\cr |
|
| 34 |
#' a named list, a list of formulas, or a single formula where |
|
| 35 |
#' the list element is either a named list or a list of formulas defining the |
|
| 36 |
#' statistic labels, e.g. `everything() ~ list(mean = "Mean", sd = "SD")` or |
|
| 37 |
#' `everything() ~ list(mean ~ "Mean", sd ~ "SD")`. |
|
| 38 |
#' @param fmt_fn `r lifecycle::badge("deprecated")`
|
|
| 39 |
#' @inheritParams rlang::args_dots_empty |
|
| 40 |
#' |
|
| 41 |
#' @return an ARD data frame of class 'card' |
|
| 42 |
#' @export |
|
| 43 |
#' |
|
| 44 |
#' @examplesIf cardx:::is_pkg_installed("survey")
|
|
| 45 |
#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) |
|
| 46 |
#' |
|
| 47 |
#' ard_tabulate(svy_titanic, variables = c(Class, Age), by = Survived) |
|
| 48 |
ard_tabulate.survey.design <- function(data, |
|
| 49 |
variables, |
|
| 50 |
by = NULL, |
|
| 51 |
statistic = everything() ~ c("n", "N", "p", "p.std.error", "n_unweighted", "N_unweighted", "p_unweighted"),
|
|
| 52 |
denominator = c("column", "row", "cell"),
|
|
| 53 |
fmt_fun = NULL, |
|
| 54 |
stat_label = everything() ~ list( |
|
| 55 |
p = "%", |
|
| 56 |
p.std.error = "SE(%)", |
|
| 57 |
deff = "Design Effect", |
|
| 58 |
"n_unweighted" = "Unweighted n", |
|
| 59 |
"N_unweighted" = "Unweighted N", |
|
| 60 |
"p_unweighted" = "Unweighted %" |
|
| 61 |
), |
|
| 62 |
fmt_fn = deprecated(), |
|
| 63 |
...) {
|
|
| 64 | 91x |
set_cli_abort_call() |
| 65 | 91x |
check_pkg_installed(pkg = "survey") |
| 66 | 91x |
check_dots_empty() |
| 67 | ||
| 68 |
# deprecated args ------------------------------------------------------------ |
|
| 69 | 91x |
if (lifecycle::is_present(fmt_fn)) {
|
| 70 | ! |
lifecycle::deprecate_soft( |
| 71 | ! |
when = "0.2.5", |
| 72 | ! |
what = "ard_tabulate(fmt_fn)", |
| 73 | ! |
with = "ard_tabulate(fmt_fun)" |
| 74 |
) |
|
| 75 | ! |
fmt_fun <- fmt_fn |
| 76 |
} |
|
| 77 | ||
| 78 |
# process arguments ---------------------------------------------------------- |
|
| 79 | 91x |
check_not_missing(variables) |
| 80 | 91x |
cards::process_selectors( |
| 81 | 91x |
data = data$variables, |
| 82 | 91x |
variables = {{ variables }},
|
| 83 | 91x |
by = {{ by }}
|
| 84 |
) |
|
| 85 | 91x |
variables <- setdiff(variables, by) |
| 86 | 91x |
check_scalar(by, allow_empty = TRUE) |
| 87 | ||
| 88 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 89 | 91x |
if (is_empty(variables)) {
|
| 90 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 91 |
} |
|
| 92 | ||
| 93 | 91x |
check_na_factor_levels(data$variables, c(by, variables)) |
| 94 | ||
| 95 | 91x |
cards::process_formula_selectors( |
| 96 | 91x |
data = data$variables[variables], |
| 97 | 91x |
statistic = statistic, |
| 98 | 91x |
fmt_fun = fmt_fun, |
| 99 | 91x |
stat_label = stat_label |
| 100 |
) |
|
| 101 | 91x |
cards::fill_formula_selectors( |
| 102 | 91x |
data = data$variables[variables], |
| 103 | 91x |
statistic = formals(asNamespace("cardx")[["ard_tabulate.survey.design"]])[["statistic"]] |> eval(),
|
| 104 |
) |
|
| 105 | 91x |
accepted_svy_stats <- c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted")
|
| 106 | 91x |
cards::check_list_elements( |
| 107 | 91x |
x = statistic, |
| 108 | 91x |
predicate = \(x) all(x %in% accepted_svy_stats), |
| 109 | 91x |
error_msg = c("Error in the values of the {.arg statistic} argument.",
|
| 110 | 91x |
i = "Values must be in {.val {accepted_svy_stats}}"
|
| 111 |
) |
|
| 112 |
) |
|
| 113 | 91x |
denominator <- arg_match(denominator) |
| 114 | ||
| 115 |
# Check if deff is in any of the requested statistics |
|
| 116 | 91x |
deff <- any(map_lgl(statistic, ~ "deff" %in% .x)) |
| 117 | ||
| 118 |
# check the missingness |
|
| 119 | 91x |
walk( |
| 120 | 91x |
variables, |
| 121 | 91x |
\(.x) {
|
| 122 | 163x |
if (all(is.na(data$variables[[.x]])) && |
| 123 | 163x |
!inherits(data$variables[[.x]], "factor")) {
|
| 124 | 4x |
cli::cli_abort( |
| 125 | 4x |
c("Column {.val {.x}} is all missing and cannot be tabulated.",
|
| 126 | 4x |
i = "Only columns of class {.cls factor} can be tabulated when all values are missing."
|
| 127 |
), |
|
| 128 | 4x |
call = get_cli_abort_call() |
| 129 |
) |
|
| 130 |
} |
|
| 131 |
} |
|
| 132 |
) |
|
| 133 | ||
| 134 |
# return note about column names that result in errors ----------------------- |
|
| 135 | 87x |
if (any(by %in% c("variable", "variable_level", "group1_level", "p", "n"))) {
|
| 136 | 2x |
cli::cli_abort( |
| 137 | 2x |
"The {.arg by} argument cannot include variables named {.val {c('variable', 'variable_level', 'group1_level', 'p', 'n')}}.",
|
| 138 | 2x |
call = get_cli_abort_call() |
| 139 |
) |
|
| 140 |
} |
|
| 141 | ||
| 142 | 85x |
if (any(variables %in% c("by", "name", "n", "p", "p.std.error"))) {
|
| 143 | 2x |
cli::cli_abort( |
| 144 | 2x |
"The {.arg variables} argument cannot include variables named {.val {c('by', 'name', 'n', 'p', 'p.std.error')}}.",
|
| 145 | 2x |
call = get_cli_abort_call() |
| 146 |
) |
|
| 147 |
} |
|
| 148 | ||
| 149 | ||
| 150 | ||
| 151 |
# calculate counts ----------------------------------------------------------- |
|
| 152 |
# this tabulation accounts for unobserved combinations |
|
| 153 | 83x |
svytable_counts <- .svytable_counts(data, variables, by, denominator) |
| 154 | ||
| 155 |
# calculate rate SE and DEFF ------------------------------------------------- |
|
| 156 | 83x |
svytable_rates <- .svytable_rate_stats(data, variables, by, denominator, deff) |
| 157 | ||
| 158 |
# convert results into a proper ARD object ----------------------------------- |
|
| 159 | 83x |
cards <- |
| 160 | 83x |
svytable_counts |> |
| 161 |
# merge in the SE(p) and DEFF |
|
| 162 | 83x |
dplyr::left_join( |
| 163 | 83x |
svytable_rates |> dplyr::select(-"p"), |
| 164 | 83x |
by = intersect(c("group1", "group1_level", "variable", "variable_level"), names(svytable_counts))
|
| 165 |
) |> |
|
| 166 |
# make columns list columns |
|
| 167 | 83x |
dplyr::mutate(across(-any_of(c("group1", "variable")), as.list)) |>
|
| 168 | 83x |
tidyr::pivot_longer( |
| 169 | 83x |
cols = -c(cards::all_ard_groups(), cards::all_ard_variables()), |
| 170 | 83x |
names_to = "stat_name", |
| 171 | 83x |
values_to = "stat" |
| 172 |
) |> |
|
| 173 |
# keep statistics requested by user |
|
| 174 | 83x |
dplyr::inner_join( |
| 175 | 83x |
statistic |> enframe("variable", "stat_name") |> tidyr::unnest(cols = "stat_name"),
|
| 176 | 83x |
by = c("variable", "stat_name")
|
| 177 |
) |
|
| 178 | ||
| 179 |
# add unweighted statistics -------------------------------------------------- |
|
| 180 | 83x |
statistic_unweighted <- statistic |> |
| 181 | 83x |
lapply(\(x) keep(x, ~ endsWith(.x, "_unweighted")) |> str_remove("_unweighted$")) |>
|
| 182 | 83x |
compact() |
| 183 | ||
| 184 | 83x |
if (!is_empty(statistic_unweighted)) {
|
| 185 | 83x |
cards_unweighted <- |
| 186 | 83x |
ard_tabulate( |
| 187 | 83x |
data = data[["variables"]], |
| 188 | 83x |
variables = all_of(names(statistic_unweighted)), |
| 189 | 83x |
by = any_of(by), |
| 190 | 83x |
statistic = statistic_unweighted, |
| 191 | 83x |
denominator = denominator |
| 192 |
) |> |
|
| 193 |
# all the survey levels are reported as character, so we do the same here. |
|
| 194 | 83x |
dplyr::mutate( |
| 195 | 83x |
across( |
| 196 | 83x |
c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")),
|
| 197 | 83x |
~ map(.x, as.character) |
| 198 |
) |
|
| 199 |
) |> |
|
| 200 | 83x |
dplyr::select(-c("stat_label", "fmt_fun", "warning", "error")) |>
|
| 201 | 83x |
dplyr::mutate( |
| 202 | 83x |
stat_name = |
| 203 | 83x |
dplyr::case_match(.data$stat_name, "n" ~ "n_unweighted", "N" ~ "N_unweighted", "p" ~ "p_unweighted") |
| 204 |
) |
|
| 205 | 83x |
cards <- cards |> dplyr::bind_rows(cards_unweighted) # styler: off |
| 206 |
} |
|
| 207 | ||
| 208 |
# final processing of fmt_fun ------------------------------------------------ |
|
| 209 | 83x |
cards <- cards |> |
| 210 | 83x |
.process_nested_list_as_df( |
| 211 | 83x |
arg = fmt_fun, |
| 212 | 83x |
new_column = "fmt_fun" |
| 213 |
) |> |
|
| 214 | 83x |
.default_svy_cat_fmt_fun() |
| 215 | ||
| 216 |
# merge in statistic labels -------------------------------------------------- |
|
| 217 | 83x |
cards <- cards |> |
| 218 | 83x |
.process_nested_list_as_df( |
| 219 | 83x |
arg = stat_label, |
| 220 | 83x |
new_column = "stat_label", |
| 221 | 83x |
unlist = TRUE |
| 222 |
) |> |
|
| 223 | 83x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |
| 224 | ||
| 225 |
# return final object -------------------------------------------------------- |
|
| 226 | 83x |
cards |> |
| 227 | 83x |
.restore_original_column_types(data = data$variables) |> |
| 228 | 83x |
dplyr::mutate( |
| 229 | 83x |
context = "categorical", |
| 230 | 83x |
warning = list(NULL), |
| 231 | 83x |
error = list(NULL), |
| 232 |
) |> |
|
| 233 | 83x |
cards::as_card() |> |
| 234 | 83x |
cards::tidy_ard_column_order() |> |
| 235 | 83x |
cards::tidy_ard_row_order() |
| 236 |
} |
|
| 237 | ||
| 238 |
# check for functions with NA factor levels (these are not allowed) |
|
| 239 |
check_na_factor_levels <- function(data, variables) {
|
|
| 240 | 141x |
walk( |
| 241 | 141x |
variables, |
| 242 | 141x |
\(variable) {
|
| 243 | 279x |
if (is.factor(data[[variable]]) && any(is.na(levels(data[[variable]])))) {
|
| 244 | ! |
cli::cli_abort( |
| 245 | ! |
"Column {.val {variable}} is a factor with {.val {NA}} levels, which are not allowed.",
|
| 246 | ! |
call = get_cli_abort_call() |
| 247 |
) |
|
| 248 |
} |
|
| 249 |
} |
|
| 250 |
) |
|
| 251 |
} |
|
| 252 | ||
| 253 |
# this function returns a tibble with the SE(p) and DEFF |
|
| 254 |
.svytable_rate_stats <- function(data, variables, by, denominator, deff) {
|
|
| 255 | 58x |
if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off |
| 256 | 83x |
if (!is_empty(by) && length(by_lvls) == 1L) {
|
| 257 | 6x |
data$variables[[by]] <- |
| 258 | 6x |
case_switch( |
| 259 | 6x |
inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[by]], paste("not", by_lvls)),
|
| 260 | 6x |
.default = factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls)))
|
| 261 |
) |
|
| 262 |
} |
|
| 263 | 83x |
if (!is_empty(by) && inherits(data$variables[[by]], "logical")) {
|
| 264 | 9x |
data$variables[[by]] <- factor(data$variables[[by]], levels = c(TRUE, FALSE)) |
| 265 |
} |
|
| 266 | 83x |
if (!is_empty(by) && !inherits(data$variables[[by]], "factor")) {
|
| 267 | 3x |
data$variables[[by]] <- factor(data$variables[[by]]) |
| 268 |
} |
|
| 269 | ||
| 270 | 83x |
lapply( |
| 271 | 83x |
variables, |
| 272 | 83x |
\(variable) {
|
| 273 |
# convert the variable to a factor if not already one or a lgl, so we get the correct rate stats from svymean |
|
| 274 | 155x |
if (!inherits(data$variables[[variable]], c("factor", "logical"))) {
|
| 275 | 8x |
data$variables[[variable]] <- factor(data$variables[[variable]]) |
| 276 |
} |
|
| 277 | ||
| 278 |
# there are issues with svymean() when a variable has only one level. adding a second as needed |
|
| 279 | 155x |
variable_lvls <- .unique_values_sort(data$variables, variable) |
| 280 | 155x |
if (length(variable_lvls) == 1L) {
|
| 281 | 6x |
data$variables[[variable]] <- |
| 282 | 6x |
case_switch( |
| 283 | 6x |
inherits(data$variables[[variable]], "factor") ~ fct_expand(data$variables[[variable]], paste("not", variable_lvls)),
|
| 284 | 6x |
.default = factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls)))
|
| 285 |
) |
|
| 286 |
} |
|
| 287 | 155x |
if (inherits(data$variables[[variable]], "logical")) {
|
| 288 | 24x |
data$variables[[variable]] <- factor(data$variables[[variable]], levels = c(TRUE, FALSE)) |
| 289 |
} |
|
| 290 | 155x |
if (!inherits(data$variables[[variable]], "factor")) {
|
| 291 | ! |
data$variables[[variable]] <- factor(data$variables[[variable]]) |
| 292 |
} |
|
| 293 | ||
| 294 |
# each combination of denominator and whether there is a by variable is handled separately |
|
| 295 | 155x |
result <- |
| 296 | 155x |
case_switch( |
| 297 |
# by variable and column percentages |
|
| 298 | 155x |
!is_empty(by) && denominator == "column" ~ |
| 299 | 155x |
.one_svytable_rates_by_column(data, variable, by, deff), |
| 300 |
# by variable and row percentages |
|
| 301 | 155x |
!is_empty(by) && denominator == "row" ~ |
| 302 | 155x |
.one_svytable_rates_by_row(data, variable, by, deff), |
| 303 |
# by variable and cell percentages |
|
| 304 | 155x |
!is_empty(by) && denominator == "cell" ~ |
| 305 | 155x |
.one_svytable_rates_by_cell(data, variable, by, deff), |
| 306 |
# no by variable and column/cell percentages |
|
| 307 | 155x |
denominator %in% c("column", "cell") ~
|
| 308 | 155x |
.one_svytable_rates_no_by_column_and_cell(data, variable, deff), |
| 309 |
# no by variable and row percentages |
|
| 310 | 155x |
denominator == "row" ~ |
| 311 | 155x |
.one_svytable_rates_no_by_row(data, variable, deff) |
| 312 |
) |
|
| 313 | ||
| 314 |
# if a level was added, remove the fake level |
|
| 315 | 155x |
if (length(variable_lvls) == 1L) {
|
| 316 | 6x |
result <- result |> dplyr::filter(.data$variable_level %in% variable_lvls) |
| 317 |
} |
|
| 318 | 155x |
if (!is_empty(by) && length(by_lvls) == 1L) {
|
| 319 | 12x |
result <- result |> dplyr::filter(.data$group1_level %in% by_lvls) |
| 320 |
} |
|
| 321 | ||
| 322 | 155x |
result |
| 323 |
} |
|
| 324 |
) |> |
|
| 325 | 83x |
dplyr::bind_rows() |
| 326 |
} |
|
| 327 | ||
| 328 |
.one_svytable_rates_no_by_row <- function(data, variable, deff) {
|
|
| 329 | 10x |
result <- dplyr::tibble( |
| 330 | 10x |
variable = .env$variable, |
| 331 | 10x |
variable_level = unique(data$variables[[variable]]) |> sort() |> as.character(), |
| 332 | 10x |
p = 1, |
| 333 | 10x |
p.std.error = 0 |
| 334 |
) |
|
| 335 | 10x |
if (isTRUE(deff)) {
|
| 336 | ! |
result$deff <- NaN |
| 337 |
} |
|
| 338 | 10x |
result |
| 339 |
} |
|
| 340 | ||
| 341 |
.one_svytable_rates_no_by_column_and_cell <- function(data, variable, deff) {
|
|
| 342 | 32x |
survey::svymean(reformulate2(variable), design = data, na.rm = TRUE, deff = deff) |> |
| 343 | 32x |
dplyr::as_tibble(rownames = "var_level") |> |
| 344 | 32x |
dplyr::mutate( |
| 345 | 32x |
variable_level = str_remove(.data$var_level, pattern = paste0("^", .env$variable)),
|
| 346 | 32x |
variable = .env$variable |
| 347 |
) |> |
|
| 348 | 32x |
dplyr::select("variable", "variable_level", p = "mean", p.std.error = "SE", any_of("deff"))
|
| 349 |
} |
|
| 350 | ||
| 351 |
.one_svytable_rates_by_cell <- function(data, variable, by, deff) {
|
|
| 352 | 22x |
df_interaction_id <- |
| 353 | 22x |
.df_all_combos(data, variable, by) |> |
| 354 | 22x |
dplyr::mutate( |
| 355 | 22x |
var_level = |
| 356 | 22x |
glue::glue("interaction({.env$by}, {.env$variable}){.data$group1_level}.{.data$variable_level}")
|
| 357 |
) |
|
| 358 | ||
| 359 | 22x |
survey::svymean( |
| 360 | 22x |
x = inject(~ interaction(!!sym(bt(by)), !!sym(bt(variable)))), |
| 361 | 22x |
design = data, |
| 362 | 22x |
na.rm = TRUE, |
| 363 | 22x |
deff = deff |
| 364 |
) |> |
|
| 365 | 22x |
dplyr::as_tibble(rownames = "var_level") |> |
| 366 | 22x |
dplyr::left_join(df_interaction_id, by = "var_level") |> |
| 367 | 22x |
dplyr::select( |
| 368 | 22x |
cards::all_ard_groups(), cards::all_ard_variables(), |
| 369 | 22x |
p = "mean", p.std.error = "SE", any_of("deff")
|
| 370 |
) |
|
| 371 |
} |
|
| 372 | ||
| 373 |
.one_svytable_rates_by_row <- function(data, variable, by, deff) {
|
|
| 374 | 62x |
survey::svyby( |
| 375 | 62x |
formula = reformulate2(by), |
| 376 | 62x |
by = reformulate2(variable), |
| 377 | 62x |
design = data, |
| 378 | 62x |
FUN = survey::svymean, |
| 379 | 62x |
na.rm = TRUE, |
| 380 | 62x |
deff = deff |
| 381 |
) |> |
|
| 382 | 62x |
dplyr::as_tibble() |> |
| 383 | 62x |
tidyr::pivot_longer(-all_of(variable)) |> |
| 384 | 62x |
dplyr::mutate( |
| 385 | 62x |
stat = |
| 386 | 62x |
dplyr::case_when( |
| 387 | 62x |
startsWith(.data$name, paste0("se.", by)) | startsWith(.data$name, paste0("se.`", by, "`")) ~ "p.std.error",
|
| 388 | 62x |
startsWith(.data$name, paste0("DEff.", by)) | startsWith(.data$name, paste0("DEff.`", by, "`")) ~ "deff",
|
| 389 | 62x |
TRUE ~ "p" |
| 390 |
), |
|
| 391 | 62x |
name = |
| 392 | 62x |
str_remove_all(.data$name, "se\\.") %>% |
| 393 | 62x |
str_remove_all("DEff\\.") %>%
|
| 394 | 62x |
str_remove_all(by) %>% |
| 395 | 62x |
str_remove_all("`")
|
| 396 |
) |> |
|
| 397 | 62x |
tidyr::pivot_wider(names_from = "stat", values_from = "value") |> |
| 398 | 62x |
(\(x) set_names(x, c("variable_level", "group1_level", names(x)[-c(1:2)])))() |>
|
| 399 | 62x |
dplyr::mutate( |
| 400 | 62x |
group1 = .env$by, |
| 401 | 62x |
variable = .env$variable, |
| 402 | 62x |
across(c("group1_level", "variable_level"), as.character)
|
| 403 |
) |
|
| 404 |
} |
|
| 405 | ||
| 406 |
.one_svytable_rates_by_column <- function(data, variable, by, deff) {
|
|
| 407 | 29x |
survey::svyby( |
| 408 | 29x |
formula = reformulate2(variable), |
| 409 | 29x |
by = reformulate2(by), |
| 410 | 29x |
design = data, |
| 411 | 29x |
FUN = survey::svymean, |
| 412 | 29x |
na.rm = TRUE, |
| 413 | 29x |
deff = deff |
| 414 |
) |> |
|
| 415 | 29x |
dplyr::as_tibble() |> |
| 416 | 29x |
tidyr::pivot_longer(-all_of(by)) |> |
| 417 | 29x |
dplyr::mutate( |
| 418 | 29x |
stat = |
| 419 | 29x |
dplyr::case_when( |
| 420 | 29x |
startsWith(.data$name, paste0("se.", variable)) | startsWith(.data$name, paste0("se.`", variable, "`")) ~ "p.std.error",
|
| 421 | 29x |
startsWith(.data$name, paste0("DEff.", variable)) | startsWith(.data$name, paste0("DEff.`", variable, "`")) ~ "deff",
|
| 422 | 29x |
TRUE ~ "p" |
| 423 |
), |
|
| 424 | 29x |
name = |
| 425 | 29x |
str_remove_all(.data$name, "se\\.") %>% |
| 426 | 29x |
str_remove_all("DEff\\.") %>%
|
| 427 | 29x |
str_remove_all(variable) %>% |
| 428 | 29x |
str_remove_all("`")
|
| 429 |
) |> |
|
| 430 | 29x |
tidyr::pivot_wider(names_from = "stat", values_from = "value") |> |
| 431 | 29x |
(\(x) set_names(x, c("group1_level", "variable_level", names(x)[-c(1:2)])))() |>
|
| 432 | 29x |
dplyr::mutate( |
| 433 | 29x |
group1 = .env$by, |
| 434 | 29x |
variable = .env$variable, |
| 435 | 29x |
across(c("group1_level", "variable_level"), as.character)
|
| 436 |
) |
|
| 437 |
} |
|
| 438 | ||
| 439 |
.svytable_counts <- function(data, variables, by, denominator) {
|
|
| 440 | 83x |
df_counts <- |
| 441 | 83x |
lapply( |
| 442 | 83x |
variables, |
| 443 | 83x |
\(variable) {
|
| 444 |
# perform weighted tabulation |
|
| 445 | 155x |
df_count <- |
| 446 | 155x |
survey::svytable(formula = reformulate2(c(by, variable)), design = data) |> |
| 447 | 155x |
dplyr::as_tibble() |
| 448 | 155x |
if (is_empty(by)) {
|
| 449 | 42x |
names(df_count) <- c("variable_level", "n")
|
| 450 | 42x |
df_count$variable <- variable |
| 451 |
} else {
|
|
| 452 | 113x |
names(df_count) <- c("group1_level", "variable_level", "n")
|
| 453 | 113x |
df_count$variable <- variable |
| 454 | 113x |
df_count$group1 <- by |
| 455 |
} |
|
| 456 | ||
| 457 |
# adding unobserved levels |
|
| 458 | 155x |
.df_all_combos(data, variable, by) %>% |
| 459 | 155x |
dplyr::left_join( |
| 460 | 155x |
df_count, |
| 461 | 155x |
by = names(.) |
| 462 |
) |> |
|
| 463 | 155x |
tidyr::replace_na(list(n = 0)) # unobserved levels assigned zero count |
| 464 |
} |
|
| 465 |
) |> |
|
| 466 | 83x |
dplyr::bind_rows() |
| 467 | ||
| 468 |
# add big N and p, then return data frame of results |
|
| 469 | 83x |
switch(denominator, |
| 470 |
"column" = |
|
| 471 | 29x |
df_counts |> |
| 472 | 29x |
dplyr::mutate( |
| 473 | 29x |
.by = c(cards::all_ard_groups(), cards::all_ard_variables("names")),
|
| 474 | 29x |
N = sum(.data$n), |
| 475 | 29x |
p = .data$n / .data$N |
| 476 |
), |
|
| 477 |
"row" = |
|
| 478 | 38x |
df_counts |> |
| 479 | 38x |
dplyr::mutate( |
| 480 | 38x |
.by = cards::all_ard_variables(), |
| 481 | 38x |
N = sum(.data$n), |
| 482 | 38x |
p = .data$n / .data$N |
| 483 |
), |
|
| 484 |
"cell" = |
|
| 485 | 16x |
df_counts |> |
| 486 | 16x |
dplyr::mutate( |
| 487 | 16x |
.by = c(cards::all_ard_groups("names"), cards::all_ard_variables("names")),
|
| 488 | 16x |
N = sum(.data$n), |
| 489 | 16x |
p = .data$n / .data$N |
| 490 |
) |
|
| 491 |
) |
|
| 492 |
} |
|
| 493 | ||
| 494 |
.df_all_combos <- function(data, variable, by) {
|
|
| 495 | 177x |
df <- |
| 496 | 177x |
tidyr::expand_grid( |
| 497 | 177x |
group1_level = switch(!is_empty(by), |
| 498 | 177x |
.unique_and_sorted(data$variables[[by]]) |
| 499 |
), |
|
| 500 | 177x |
variable_level = .unique_and_sorted(data$variables[[variable]]) |
| 501 |
) |> |
|
| 502 | 177x |
dplyr::mutate(variable = .env$variable) |
| 503 | 135x |
if (!is_empty(by)) df$group1 <- by |
| 504 | 177x |
df <- dplyr::relocate(df, any_of(c("group1", "group1_level", "variable", "variable_level")))
|
| 505 | ||
| 506 |
# convert levels to character for merging later |
|
| 507 | 177x |
df |> |
| 508 | 177x |
dplyr::mutate( |
| 509 | 177x |
across( |
| 510 | 177x |
c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")),
|
| 511 | 177x |
as.character |
| 512 |
) |
|
| 513 |
) |
|
| 514 |
} |
|
| 515 | ||
| 516 |
case_switch <- function(..., .default = NULL) {
|
|
| 517 | 890x |
dots <- dots_list(...) |
| 518 | ||
| 519 | 890x |
for (f in dots) {
|
| 520 | 1132x |
if (isTRUE(eval(f_lhs(f), envir = attr(f, ".Environment")))) {
|
| 521 | 672x |
return(eval(f_rhs(f), envir = attr(f, ".Environment"))) |
| 522 |
} |
|
| 523 |
} |
|
| 524 | ||
| 525 | 218x |
return(.default) |
| 526 |
} |
|
| 527 | ||
| 528 |
.default_svy_cat_fmt_fun <- function(x) {
|
|
| 529 | 89x |
x |> |
| 530 | 89x |
dplyr::mutate( |
| 531 | 89x |
fmt_fun = |
| 532 | 89x |
pmap( |
| 533 | 89x |
list(.data$stat_name, .data$stat, .data$fmt_fun), |
| 534 | 89x |
function(stat_name, stat, fmt_fun) {
|
| 535 | 5301x |
if (!is_empty(fmt_fun)) {
|
| 536 | ! |
return(fmt_fun) |
| 537 |
} |
|
| 538 | 5301x |
if (stat_name %in% c("p", "p_miss", "p_nonmiss", "p_unweighted")) {
|
| 539 | 1498x |
return(cards::label_round(digits = 1, scale = 100)) |
| 540 |
} |
|
| 541 | 3803x |
if (stat_name %in% c("n", "N", "N_miss", "N_nonmiss", "N_obs", "n_unweighted", "N_unweighted")) {
|
| 542 | 3000x |
return(cards::label_round(digits = 0)) |
| 543 |
} |
|
| 544 | 803x |
if (is.integer(stat)) {
|
| 545 | 36x |
return(0L) |
| 546 |
} |
|
| 547 | 767x |
if (is.numeric(stat)) {
|
| 548 | 767x |
return(1L) |
| 549 |
} |
|
| 550 | ! |
return(as.character) |
| 551 |
} |
|
| 552 |
) |
|
| 553 |
) |
|
| 554 |
} |
|
| 555 | ||
| 556 |
#' Convert Nested Lists to Column |
|
| 557 |
#' |
|
| 558 |
#' Some arguments, such as `stat_label`, are passed as nested lists. This |
|
| 559 |
#' function properly unnests these lists and adds them to the results data frame. |
|
| 560 |
#' |
|
| 561 |
#' @param x (`data.frame`)\cr |
|
| 562 |
#' result data frame |
|
| 563 |
#' @param arg (`list`)\cr |
|
| 564 |
#' the nested list |
|
| 565 |
#' @param new_column (`string`)\cr |
|
| 566 |
#' new column name |
|
| 567 |
#' @param unlist (`logical`)\cr |
|
| 568 |
#' whether to fully unlist final results |
|
| 569 |
#' |
|
| 570 |
#' @return a data frame |
|
| 571 |
#' @keywords internal |
|
| 572 |
#' |
|
| 573 |
#' @examples |
|
| 574 |
#' ard <- ard_tabulate(cards::ADSL, by = "ARM", variables = "AGEGR1") |
|
| 575 |
#' |
|
| 576 |
#' cardx:::.process_nested_list_as_df(ard, NULL, "new_col") |
|
| 577 |
.process_nested_list_as_df <- function(x, arg, new_column, unlist = FALSE) {
|
|
| 578 |
# add column if not already present |
|
| 579 | 178x |
if (!new_column %in% names(x)) {
|
| 580 | 178x |
x[[new_column]] <- list(NULL) |
| 581 |
} |
|
| 582 | ||
| 583 |
# process argument if not NULL, and update new column |
|
| 584 | 178x |
if (!is_empty(arg)) {
|
| 585 | 89x |
df_argument <- |
| 586 | 89x |
imap( |
| 587 | 89x |
arg, |
| 588 | 89x |
function(enlst_arg, variable) {
|
| 589 | 164x |
lst_stat_names <- |
| 590 | 164x |
x[c("variable", "stat_name")] |>
|
| 591 | 164x |
dplyr::filter(.data$variable %in% .env$variable) |> |
| 592 | 164x |
unique() %>% |
| 593 | 164x |
{stats::setNames(as.list(.[["stat_name"]]), .[["stat_name"]])} # styler: off
|
| 594 | ||
| 595 | 164x |
cards::compute_formula_selector( |
| 596 | 164x |
data = lst_stat_names, |
| 597 | 164x |
x = enlst_arg |
| 598 |
) %>% |
|
| 599 |
# styler: off |
|
| 600 | 164x |
{dplyr::tibble(
|
| 601 | 164x |
variable = variable, |
| 602 | 164x |
stat_name = names(.), |
| 603 | 164x |
"{new_column}" := unname(.)
|
| 604 |
)} |
|
| 605 |
# styler: on |
|
| 606 |
} |
|
| 607 |
) |> |
|
| 608 | 89x |
dplyr::bind_rows() |
| 609 | ||
| 610 | 89x |
x <- x |> dplyr::rows_update(df_argument, by = c("variable", "stat_name"), unmatched = "ignore")
|
| 611 |
} |
|
| 612 | ||
| 613 | 178x |
if (isTRUE(unlist)) {
|
| 614 | 89x |
x[[new_column]] <- lapply(x[[new_column]], function(x) x %||% NA) |> unlist() |
| 615 |
} |
|
| 616 | ||
| 617 | 178x |
x |
| 618 |
} |
| 1 |
#' Construction Helpers |
|
| 2 |
#' |
|
| 3 |
#' These functions help construct calls to various types of models. |
|
| 4 |
#' |
|
| 5 |
#' - `construct_model()`: Builds models of the form `method(data = data, formula = formula, method.args!!!)`. |
|
| 6 |
#' If the `package` argument is specified, that package is temporarily attached |
|
| 7 |
#' when the model is evaluated. |
|
| 8 |
#' |
|
| 9 |
#' - `reformulate2()`: This is a copy of `reformulate()` except that variable |
|
| 10 |
#' names that contain a space are wrapped in backticks. |
|
| 11 |
#' |
|
| 12 |
#' - `bt()`: Adds backticks to a character vector. |
|
| 13 |
#' |
|
| 14 |
#' - `bt_strip()`: Removes backticks from a string if it begins and ends with a backtick. |
|
| 15 |
#' |
|
| 16 |
#' @param data |
|
| 17 |
#' - `construct_model.data.frame()` (`data.frame`) a data frame |
|
| 18 |
#' - `construct_model.survey.design()` (`survey.design`) a survey design object |
|
| 19 |
#' @param x (`character`)\cr |
|
| 20 |
#' character vector, typically of variable names |
|
| 21 |
#' @param formula (`formula`)\cr |
|
| 22 |
#' a formula |
|
| 23 |
#' @param method (`string`)\cr |
|
| 24 |
#' string of function naming the function to be called, e.g. `"glm"`. |
|
| 25 |
#' If function belongs to a library that is not attached, the package name |
|
| 26 |
#' must be specified in the `package` argument. |
|
| 27 |
#' @param method.args (named `list`)\cr |
|
| 28 |
#' named list of arguments that will be passed to `method`. |
|
| 29 |
#' |
|
| 30 |
#' Note that this list may contain non-standard evaluation components. |
|
| 31 |
#' If you are wrapping this function in other functions, the argument |
|
| 32 |
#' must be passed in a way that does not evaluate the list, e.g. |
|
| 33 |
#' using rlang's embrace operator `{{ . }}`.
|
|
| 34 |
#' @param package (`string`)\cr |
|
| 35 |
#' a package name that will be temporarily loaded when function |
|
| 36 |
#' specified in `method` is executed. |
|
| 37 |
#' @param pattern,pattern_term,pattern_response DEPRECATED |
|
| 38 |
#' @inheritParams rlang::eval_tidy |
|
| 39 |
#' @inheritParams stats::reformulate |
|
| 40 |
#' @inheritParams rlang::args_dots_empty |
|
| 41 |
#' |
|
| 42 |
#' @return depends on the calling function |
|
| 43 |
#' @name construction_helpers |
|
| 44 |
#' |
|
| 45 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("withr", "lme4", "broom.helpers", "broom.mixed")))
|
|
| 46 |
#' construct_model( |
|
| 47 |
#' data = mtcars, |
|
| 48 |
#' formula = am ~ mpg + (1 | vs), |
|
| 49 |
#' method = "glmer", |
|
| 50 |
#' method.args = list(family = binomial), |
|
| 51 |
#' package = "lme4" |
|
| 52 |
#' ) |> |
|
| 53 |
#' broom.mixed::tidy() |
|
| 54 |
#' |
|
| 55 |
#' construct_model( |
|
| 56 |
#' data = mtcars |> dplyr::rename(`M P G` = mpg), |
|
| 57 |
#' formula = reformulate2(c("M P G", "cyl"), response = "hp"),
|
|
| 58 |
#' method = "lm" |
|
| 59 |
#' ) |> |
|
| 60 |
#' ard_regression() |> |
|
| 61 |
#' dplyr::filter(stat_name %in% c("term", "estimate", "p.value"))
|
|
| 62 |
NULL |
|
| 63 | ||
| 64 |
#' @rdname construction_helpers |
|
| 65 |
#' @export |
|
| 66 |
construct_model <- function(data, ...) {
|
|
| 67 | 39x |
UseMethod("construct_model")
|
| 68 |
} |
|
| 69 | ||
| 70 |
#' @rdname construction_helpers |
|
| 71 |
#' @export |
|
| 72 |
construct_model.data.frame <- function(data, formula, method, method.args = list(), package = "base", env = caller_env(), ...) {
|
|
| 73 | 34x |
set_cli_abort_call() |
| 74 |
# check pkg installations ---------------------------------------------------- |
|
| 75 | 34x |
check_dots_empty() |
| 76 | 34x |
check_pkg_installed(c("withr", package))
|
| 77 | ||
| 78 | 34x |
check_not_missing(formula) |
| 79 | 34x |
check_class(formula, cls = "formula") |
| 80 | ||
| 81 | 34x |
check_not_missing(method) |
| 82 | 34x |
check_string_or_function(method) |
| 83 | 30x |
if (is_string(method)) check_not_namespaced(method) |
| 84 | ||
| 85 |
# convert method.args to list of expressions (to account for NSE inputs) ----- |
|
| 86 | 31x |
method.args <- .as_list_of_exprs({{ method.args }})
|
| 87 | ||
| 88 |
# build model ---------------------------------------------------------------- |
|
| 89 | 31x |
call_to_run <- call2(.fn = method, formula = formula, data = data, !!!method.args) |
| 90 | ||
| 91 | 31x |
try_fetch( |
| 92 | 31x |
withr::with_namespace( |
| 93 | 31x |
package = package, |
| 94 | 31x |
eval_tidy(call_to_run, env = env) |
| 95 |
), |
|
| 96 | 31x |
error = function(e) {
|
| 97 | 4x |
msg <- "There was an error evaluating the model" |
| 98 | 4x |
if (is_string(method)) {
|
| 99 | 3x |
call_to_run$data <- expr(.) |
| 100 | 3x |
msg <- paste(msg, "{.code {truncate_call(call_to_run)}}")
|
| 101 |
} |
|
| 102 | ||
| 103 | 4x |
cli::cli_abort( |
| 104 | 4x |
message = msg, |
| 105 | 4x |
parent = e, |
| 106 | 4x |
call = get_cli_abort_call() |
| 107 |
) |
|
| 108 |
} |
|
| 109 |
) |
|
| 110 |
} |
|
| 111 | ||
| 112 |
#' @rdname construction_helpers |
|
| 113 |
#' @export |
|
| 114 |
construct_model.survey.design <- function(data, formula, method, method.args = list(), package = "survey", env = caller_env(), ...) {
|
|
| 115 | 5x |
set_cli_abort_call() |
| 116 |
# check pkg installations ---------------------------------------------------- |
|
| 117 | 5x |
check_dots_empty() |
| 118 | 5x |
check_pkg_installed(c("withr", package))
|
| 119 | ||
| 120 | 5x |
check_not_missing(formula) |
| 121 | 5x |
check_class(formula, cls = "formula") |
| 122 | ||
| 123 | 5x |
check_not_missing(method) |
| 124 | 5x |
check_string_or_function(method) |
| 125 | 5x |
if (is_string(method)) check_not_namespaced(method) |
| 126 | ||
| 127 |
# convert method.args to list of expressions (to account for NSE inputs) ----- |
|
| 128 | 5x |
method.args <- .as_list_of_exprs({{ method.args }})
|
| 129 | ||
| 130 |
# build model ---------------------------------------------------------------- |
|
| 131 | 5x |
call_to_run <- call2(.fn = method, formula = formula, design = data, !!!method.args) |
| 132 | ||
| 133 | 5x |
try_fetch( |
| 134 | 5x |
withr::with_namespace( |
| 135 | 5x |
package = package, |
| 136 | 5x |
eval_tidy(call_to_run, env = env) |
| 137 |
), |
|
| 138 | 5x |
error = function(e) {
|
| 139 | 2x |
msg <- "There was an error evaluating the model" |
| 140 | 2x |
if (is_string(method)) {
|
| 141 | 2x |
call_to_run$design <- expr(.) |
| 142 | 2x |
msg <- paste(msg, "{.code {truncate_call(call_to_run)}}")
|
| 143 |
} |
|
| 144 | ||
| 145 | 2x |
cli::cli_abort( |
| 146 | 2x |
message = msg, |
| 147 | 2x |
parent = e, |
| 148 | 2x |
call = get_cli_abort_call() |
| 149 |
) |
|
| 150 |
} |
|
| 151 |
) |
|
| 152 |
} |
|
| 153 | ||
| 154 |
.as_list_of_exprs <- function(x, arg_name = "method.args") {
|
|
| 155 | 37x |
x_enexpr <- enexpr(x) |
| 156 | 37x |
if (is_call_simple(x_enexpr)) {
|
| 157 | 37x |
return(call_args(x_enexpr)) |
| 158 |
} |
|
| 159 | ||
| 160 | ! |
cli::cli_abort( |
| 161 | ! |
c("There was an error processing the {.arg {arg_name}} argument.",
|
| 162 | ! |
i = "Expecting a simple call. See {.help rlang::is_call_simple} for details."
|
| 163 |
), |
|
| 164 | ! |
call = get_cli_abort_call() |
| 165 |
) |
|
| 166 |
} |
|
| 167 | ||
| 168 |
#' @rdname construction_helpers |
|
| 169 |
#' @export |
|
| 170 |
reformulate2 <- function(termlabels, response = NULL, intercept = TRUE, |
|
| 171 |
env = parent.frame(), |
|
| 172 |
pattern_term = NULL, pattern_response = NULL) {
|
|
| 173 |
# deprecated argument -------------------------------------------------------- |
|
| 174 | ! |
if (!missing(pattern_term)) lifecycle::deprecate_warn("0.2.1", what = "cardx::reformulate2(pattern_term)", details = "Argument has been ignored.") # styler: off
|
| 175 | ! |
if (!missing(pattern_response)) lifecycle::deprecate_warn("0.2.1", what = "cardx::reformulate2(pattern_response)", details = "Argument has been ignored.") # styler: off
|
| 176 | ||
| 177 | 1089x |
stats::reformulate( |
| 178 | 1089x |
termlabels = bt(termlabels), |
| 179 | 1089x |
response = bt(response), |
| 180 | 1089x |
intercept = intercept, |
| 181 | 1089x |
env = env |
| 182 |
) |
|
| 183 |
} |
|
| 184 | ||
| 185 |
#' @rdname construction_helpers |
|
| 186 |
#' @export |
|
| 187 |
bt <- function(x, pattern = NULL) {
|
|
| 188 |
# deprecated argument -------------------------------------------------------- |
|
| 189 | ! |
if (!missing(pattern)) lifecycle::deprecate_warn("0.2.1", what = "cardx::bt(pattern)", details = "Argument has been ignored.") # styler: off
|
| 190 | ||
| 191 | 2248x |
if (is_empty(x)) {
|
| 192 | 1066x |
return(x) |
| 193 |
} |
|
| 194 | ||
| 195 | 1182x |
ifelse( |
| 196 | 1182x |
make.names(x) != x & !str_detect(x, "^`.*`$"), |
| 197 | 1182x |
paste0("`", x, "`"),
|
| 198 | 1182x |
x |
| 199 |
) |
|
| 200 |
} |
|
| 201 | ||
| 202 |
#' @rdname construction_helpers |
|
| 203 |
#' @export |
|
| 204 |
bt_strip <- function(x) {
|
|
| 205 | 1x |
ifelse( |
| 206 | 1x |
str_detect(x, "^`.*`$"), |
| 207 | 1x |
substr(x, 2, nchar(x) - 1), |
| 208 | 1x |
x |
| 209 |
) |
|
| 210 |
} |
|
| 211 | ||
| 212 |
check_not_namespaced <- function(x, |
|
| 213 |
arg_name = rlang::caller_arg(x), |
|
| 214 |
class = "check_not_namespaced", |
|
| 215 |
call = get_cli_abort_call()) {
|
|
| 216 | 36x |
check_string(x, arg_name = arg_name, call = call, class = "check_not_namespaced") |
| 217 | ||
| 218 | 36x |
if (str_detect(x, "::")) {
|
| 219 | 3x |
cli::cli_abort( |
| 220 | 3x |
"Argument {.arg {arg_name}} cannot be namespaced when passed as a {.cls string}.",
|
| 221 | 3x |
call = call, |
| 222 | 3x |
class = class |
| 223 |
) |
|
| 224 |
} |
|
| 225 | ||
| 226 | 33x |
invisible(x) |
| 227 |
} |
|
| 228 | ||
| 229 | ||
| 230 |
check_string_or_function <- function(x, |
|
| 231 |
arg_name = rlang::caller_arg(x), |
|
| 232 |
class = "check_string_or_function", |
|
| 233 |
call = get_cli_abort_call()) {
|
|
| 234 | 39x |
if (!is.function(x) && !is_string(x)) {
|
| 235 | 1x |
cli::cli_abort( |
| 236 | 1x |
c("Argument {.arg {arg_name}} must be a {.cls string} or {.cls function}."),
|
| 237 | 1x |
call = call, |
| 238 | 1x |
class = class |
| 239 |
) |
|
| 240 |
} |
|
| 241 | ||
| 242 | 38x |
invisible(x) |
| 243 |
} |
|
| 244 | ||
| 245 |
truncate_call <- function(call, max_out = 100) {
|
|
| 246 | 5x |
call_text <- expr_text(call) |
| 247 | 5x |
if (nchar(call_text) > max_out) {
|
| 248 | ! |
call_text <- paste(substr(call_text, 1, max_out), "...") |
| 249 |
} |
|
| 250 | 5x |
call_text |
| 251 |
} |
| 1 |
#' ARD survey continuous CIs |
|
| 2 |
#' |
|
| 3 |
#' One-sample confidence intervals for continuous variables' means and medians. |
|
| 4 |
#' Confidence limits are calculated with `survey::svymean()` and `survey::svyquantile()`. |
|
| 5 |
#' |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams ard_summary.survey.design |
|
| 8 |
#' @param method (`string`)\cr |
|
| 9 |
#' Method for confidence interval calculation. |
|
| 10 |
#' When `"svymean"`, the calculation is computed via `survey::svymean()`. |
|
| 11 |
#' Otherwise, it is calculated via`survey::svyquantile(interval.type=method)` |
|
| 12 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 13 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 14 |
#' @param df (`numeric`)\cr |
|
| 15 |
#' denominator degrees of freedom, passed to `survey::confint(df)`. |
|
| 16 |
#' Default is `survey::degf(data)`. |
|
| 17 |
#' @param ... arguments passed to `survey::confint()` |
|
| 18 |
#' |
|
| 19 |
#' @return ARD data frame |
|
| 20 |
#' @export |
|
| 21 |
#' |
|
| 22 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
|
|
| 23 |
#' data(api, package = "survey") |
|
| 24 |
#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |
|
| 25 |
#' |
|
| 26 |
#' ard_continuous_ci(dclus1, variables = api00) |
|
| 27 |
#' ard_continuous_ci(dclus1, variables = api00, method = "svymedian.xlogit") |
|
| 28 |
ard_continuous_ci.survey.design <- function(data, |
|
| 29 |
variables, |
|
| 30 |
by = NULL, |
|
| 31 |
method = c("svymean", "svymedian.mean", "svymedian.beta", "svymedian.xlogit", "svymedian.asin", "svymedian.score"),
|
|
| 32 |
conf.level = 0.95, |
|
| 33 |
df = survey::degf(data), |
|
| 34 |
...) {
|
|
| 35 | 16x |
set_cli_abort_call() |
| 36 | ||
| 37 |
# check inputs --------------------------------------------------------------- |
|
| 38 | 16x |
check_not_missing(data) |
| 39 | 16x |
check_class(data, "survey.design") |
| 40 | 16x |
check_not_missing(variables) |
| 41 | ||
| 42 | 16x |
cards::process_selectors( |
| 43 | 16x |
data = data$variables, |
| 44 | 16x |
variables = {{ variables }},
|
| 45 | 16x |
by = {{ by }}
|
| 46 |
) |
|
| 47 | 16x |
check_scalar(by, allow_empty = TRUE) |
| 48 | 16x |
check_scalar_range(conf.level, range = c(0, 1)) |
| 49 | 16x |
method <- arg_match(method) |
| 50 | ||
| 51 | 16x |
walk( |
| 52 | 16x |
variables, |
| 53 | 16x |
\(variable) {
|
| 54 | 27x |
if (!is.numeric(data$variables[[variable]])) {
|
| 55 | 1x |
cli::cli_inform( |
| 56 | 1x |
"Column {.val {variable}} is not {.cls numeric} and results may be an unexpected format."
|
| 57 |
) |
|
| 58 |
} |
|
| 59 |
} |
|
| 60 |
) |
|
| 61 | ||
| 62 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 63 | 16x |
if (is_empty(variables)) {
|
| 64 | 1x |
return(dplyr::tibble() |> cards::as_card()) |
| 65 |
} |
|
| 66 | ||
| 67 |
# calculate and return ARD of one sample CI ---------------------------------- |
|
| 68 | 15x |
.calculate_ard_continuous_survey_ci( |
| 69 | 15x |
FUN = ifelse(method == "svymean", .svymean_confint_wrapper, .svyquantile_confint_wrapper), |
| 70 | 15x |
data = data, |
| 71 | 15x |
variables = variables, |
| 72 | 15x |
by = by, |
| 73 | 15x |
conf.level = conf.level, |
| 74 | 15x |
method = method, |
| 75 | 15x |
df = df, |
| 76 |
... |
|
| 77 |
) |> |
|
| 78 | 15x |
.restore_original_column_types(data = data$variables) |
| 79 |
} |
|
| 80 | ||
| 81 |
.calculate_ard_continuous_survey_ci <- function(FUN, data, variables, by, conf.level, ...) {
|
|
| 82 |
# calculate results ---------------------------------------------------------- |
|
| 83 | 15x |
map( |
| 84 | 15x |
variables, |
| 85 | 15x |
function(variable) {
|
| 86 | 27x |
.calculate_one_ard_continuous_survey_ci( |
| 87 | 27x |
FUN = FUN, |
| 88 | 27x |
data = data, |
| 89 | 27x |
variable = variable, |
| 90 | 27x |
by = by, |
| 91 | 27x |
conf.level = conf.level, |
| 92 |
... |
|
| 93 |
) |
|
| 94 |
} |
|
| 95 |
) |> |
|
| 96 | 15x |
dplyr::bind_rows() |
| 97 |
} |
|
| 98 | ||
| 99 |
.calculate_one_ard_continuous_survey_ci <- function(FUN, data, variable, by, conf.level, ...) {
|
|
| 100 | 27x |
if (!is_empty(by)) {
|
| 101 | 8x |
by_levels <- .unique_values_sort(data$variables, variable = by) |
| 102 | 8x |
lst_data <- |
| 103 | 8x |
map( |
| 104 | 8x |
by_levels, |
| 105 | 8x |
~ call2("subset", expr(data), expr(!!sym(by) == !!.x)) |> eval()
|
| 106 |
) |> |
|
| 107 | 8x |
set_names(as.character(by_levels)) |
| 108 |
} |
|
| 109 | ||
| 110 | 27x |
df_full <- |
| 111 | 27x |
case_switch( |
| 112 | 27x |
!is_empty(by) ~ |
| 113 | 27x |
tidyr::expand_grid( |
| 114 | 27x |
group1_level = as.character(by_levels) |> as.list() |
| 115 |
) |> |
|
| 116 | 27x |
dplyr::mutate(group1 = .env$by, variable = .env$variable), |
| 117 | 27x |
.default = |
| 118 | 27x |
dplyr::tibble(variable = .env$variable) |
| 119 |
) |> |
|
| 120 | 27x |
dplyr::rowwise() |> |
| 121 | 27x |
dplyr::mutate( |
| 122 | 27x |
lst_result = |
| 123 | 27x |
FUN( |
| 124 | 27x |
data = |
| 125 | 27x |
case_switch( |
| 126 | 27x |
is_empty(.env$by) ~ data, |
| 127 | 27x |
.default = lst_data[[.data$group1_level]] |
| 128 |
), |
|
| 129 | 27x |
variable = .data$variable, |
| 130 | 27x |
conf.level = .env$conf.level, |
| 131 |
... |
|
| 132 |
) |> |
|
| 133 | 27x |
list(), |
| 134 | 27x |
result = |
| 135 | 27x |
.data$lst_result[["result"]] |> |
| 136 | 27x |
enframe("stat_name", "stat") |>
|
| 137 | 27x |
list(), |
| 138 | 27x |
warning = .data$lst_result["warning"] |> unname(), |
| 139 | 27x |
error = .data$lst_result["error"] |> unname(), |
| 140 | 27x |
context = "survey_continuous_ci" |
| 141 |
) |> |
|
| 142 | 27x |
dplyr::select(-"lst_result") |> |
| 143 | 27x |
dplyr::ungroup() |> |
| 144 | 27x |
tidyr::unnest("result") |>
|
| 145 | 27x |
dplyr::mutate( |
| 146 | 27x |
stat_label = .data$stat_name, |
| 147 | 27x |
fmt_fun = map(.data$stat, ~ case_switch(is.numeric(.x) ~ 2L, .default = as.character)) |
| 148 |
) |> |
|
| 149 | 27x |
cards::as_card() |> |
| 150 | 27x |
cards::tidy_ard_column_order() |
| 151 |
} |
|
| 152 | ||
| 153 |
.svymean_confint_wrapper <- function(data, variable, conf.level, df, ...) {
|
|
| 154 | 26x |
lst_results <- |
| 155 | 26x |
cards::eval_capture_conditions({
|
| 156 | 26x |
svymean <- |
| 157 | 26x |
survey::svymean(x = reformulate2(variable), design = data, na.rm = TRUE) |
| 158 | ||
| 159 | 26x |
lst_svymean <- as.data.frame(svymean) |> |
| 160 | 26x |
as.list() |> |
| 161 | 26x |
set_names(c("estimate", "std.error"))
|
| 162 | ||
| 163 | 26x |
lst_confint <- stats::confint(svymean, level = conf.level, df = df, ...) |> |
| 164 | 26x |
as.data.frame() |> |
| 165 | 26x |
as.list() |> |
| 166 | 26x |
set_names(c("conf.low", "conf.high"))
|
| 167 | ||
| 168 | 24x |
c(lst_svymean, lst_confint) |
| 169 |
}) |
|
| 170 | ||
| 171 |
# add NULL results if error |
|
| 172 | 26x |
if (is_empty(lst_results[["result"]])) {
|
| 173 | 2x |
lst_results[["result"]] <- rep_named(c("estimate", "std.error", "conf.low", "conf.high"), list(NULL))
|
| 174 |
} |
|
| 175 | ||
| 176 |
# add other args |
|
| 177 | 26x |
lst_results[["result"]] <- lst_results[["result"]] |> append(list(conf.level = conf.level)) |
| 178 | ||
| 179 |
# return list result |
|
| 180 | 26x |
lst_results |
| 181 |
} |
|
| 182 | ||
| 183 |
.svyquantile_confint_wrapper <- function(data, variable, conf.level, method, df, ...) {
|
|
| 184 | 9x |
lst_results <- |
| 185 | 9x |
cards::eval_capture_conditions({
|
| 186 | 9x |
svyquantile <- |
| 187 | 9x |
survey::svyquantile( |
| 188 | 9x |
x = reformulate2(variable), design = data, quantiles = 0.5, |
| 189 | 9x |
na.rm = TRUE, interval.type = str_remove(method, pattern = "^svymedian\\.") |
| 190 |
) |
|
| 191 | ||
| 192 | 8x |
lst_svyquantile <- svyquantile |> |
| 193 | 8x |
getElement(1L) |> |
| 194 | 8x |
as.data.frame() |> |
| 195 | 8x |
dplyr::select(1L, last_col()) |> |
| 196 | 8x |
as.list() |> |
| 197 | 8x |
set_names(c("estimate", "std.error"))
|
| 198 | ||
| 199 | 8x |
lst_confint <- stats::confint(svyquantile, level = conf.level, df = df, ...) |> |
| 200 | 8x |
as.data.frame() |> |
| 201 | 8x |
as.list() |> |
| 202 | 8x |
set_names(c("conf.low", "conf.high"))
|
| 203 | ||
| 204 | 8x |
c(lst_svyquantile, lst_confint) |
| 205 |
}) |
|
| 206 | ||
| 207 |
# add NULL results if error |
|
| 208 | 9x |
if (is_empty(lst_results[["result"]])) {
|
| 209 | 1x |
lst_results[["result"]] <- rep_named(c("estimate", "std.error", "conf.low", "conf.high"), list(NULL))
|
| 210 |
} |
|
| 211 | ||
| 212 |
# add other args |
|
| 213 | 9x |
lst_results[["result"]] <- lst_results[["result"]] |> append(list(conf.level = conf.level)) |
| 214 | ||
| 215 |
# return list result |
|
| 216 | 9x |
lst_results |
| 217 |
} |
| 1 |
#' ARD one-sample Wilcox Rank-sum |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for one-sample Wilcox Rank-sum. |
|
| 5 |
#' Result may be stratified by including the `by` argument. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. See below for details. |
|
| 9 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column names to be analyzed. Independent Wilcox Rank-sum tests will be computed for |
|
| 11 |
#' each variable. |
|
| 12 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' optional column name to stratify results by. |
|
| 14 |
#' @inheritParams ard_stats_wilcox_test |
|
| 15 |
#' |
|
| 16 |
#' @return ARD data frame |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 20 |
#' cards::ADSL |> |
|
| 21 |
#' ard_stats_wilcox_test_onesample(by = ARM, variables = AGE) |
|
| 22 |
ard_stats_wilcox_test_onesample <- function(data, variables, by = dplyr::group_vars(data), conf.level = 0.95, ...) {
|
|
| 23 | 6x |
set_cli_abort_call() |
| 24 | ||
| 25 |
# check installed packages --------------------------------------------------- |
|
| 26 | 6x |
check_pkg_installed("broom")
|
| 27 | ||
| 28 |
# check/process inputs ------------------------------------------------------- |
|
| 29 | 6x |
check_not_missing(data) |
| 30 | 6x |
check_not_missing(variables) |
| 31 | 6x |
check_data_frame(data) |
| 32 | 6x |
data <- dplyr::ungroup(data) |
| 33 | 6x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 34 | 6x |
check_scalar_range(conf.level, range = c(0, 1)) |
| 35 | ||
| 36 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 37 | 6x |
if (is_empty(variables)) {
|
| 38 | 1x |
return(dplyr::tibble() |> cards::as_card()) |
| 39 |
} |
|
| 40 | ||
| 41 | 5x |
cards::ard_summary( |
| 42 | 5x |
data = data, |
| 43 | 5x |
variables = all_of(variables), |
| 44 | 5x |
by = all_of(by), |
| 45 | 5x |
statistic = all_of(variables) ~ list(wilcox_test_onesample = \(x) stats::wilcox.test(x = x, conf.level = conf.level, ...) |> broom::tidy()) |
| 46 |
) |> |
|
| 47 | 5x |
cards::bind_ard( |
| 48 | 5x |
cards::ard_summary( |
| 49 | 5x |
data = data, |
| 50 | 5x |
variables = all_of(variables), |
| 51 | 5x |
by = all_of(by), |
| 52 | 5x |
statistic = |
| 53 | 5x |
all_of(variables) ~ |
| 54 | 5x |
list(conf.level = \(x) {
|
| 55 | 9x |
formals(asNamespace("stats")[["wilcox.test.default"]])[c("mu", "exact", "conf.int", "tol.root", "digits.rank")] |>
|
| 56 | 9x |
utils::modifyList(list(conf.level = conf.level, ...)) |> |
| 57 | 9x |
compact() |
| 58 |
}) |
|
| 59 |
) |
|
| 60 |
) |> |
|
| 61 | 5x |
dplyr::select(-"stat_label") |> |
| 62 | 5x |
dplyr::left_join( |
| 63 | 5x |
.df_ttest_stat_labels(by = NULL), |
| 64 | 5x |
by = "stat_name" |
| 65 |
) |> |
|
| 66 | 5x |
dplyr::mutate( |
| 67 | 5x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 68 | 5x |
context = "stats_wilcox_test_onesample", |
| 69 |
) |> |
|
| 70 | 5x |
cards::as_card() |> |
| 71 | 5x |
cards::tidy_ard_column_order() |> |
| 72 | 5x |
cards::tidy_ard_row_order() |
| 73 |
} |
| 1 |
#' Functions for Calculating Proportion Confidence Intervals |
|
| 2 |
#' |
|
| 3 |
#' Functions to calculate different proportion confidence intervals for use in `ard_proportion()`. |
|
| 4 |
#' |
|
| 5 |
#' @inheritParams ard_categorical_ci |
|
| 6 |
#' @param x (binary `numeric`/`logical`)\cr |
|
| 7 |
#' vector of a binary values, i.e. a logical vector, or numeric with values `c(0, 1)` |
|
| 8 |
#' @return Confidence interval of a proportion. |
|
| 9 |
#' |
|
| 10 |
#' @name proportion_ci |
|
| 11 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 12 |
#' x <- c( |
|
| 13 |
#' TRUE, TRUE, TRUE, TRUE, TRUE, |
|
| 14 |
#' FALSE, FALSE, FALSE, FALSE, FALSE |
|
| 15 |
#' ) |
|
| 16 |
#' |
|
| 17 |
#' proportion_ci_wald(x, conf.level = 0.9) |
|
| 18 |
#' proportion_ci_wilson(x, correct = TRUE) |
|
| 19 |
#' proportion_ci_clopper_pearson(x) |
|
| 20 |
#' proportion_ci_agresti_coull(x) |
|
| 21 |
#' proportion_ci_jeffreys(x) |
|
| 22 |
NULL |
|
| 23 | ||
| 24 |
#' @describeIn proportion_ci Calculates the Wald interval by following the usual textbook definition |
|
| 25 |
#' for a single proportion confidence interval using the normal approximation. |
|
| 26 |
#' |
|
| 27 |
#' \deqn{\hat{p} \pm z_{\alpha/2} \sqrt{\frac{\hat{p}(1 - \hat{p})}{n}}}
|
|
| 28 |
#' |
|
| 29 |
#' @param correct (`logical`)\cr apply continuity correction. |
|
| 30 |
#' |
|
| 31 |
#' @export |
|
| 32 |
proportion_ci_wald <- function(x, conf.level = 0.95, correct = FALSE) {
|
|
| 33 | 85x |
set_cli_abort_call() |
| 34 | ||
| 35 |
# check inputs --------------------------------------------------------------- |
|
| 36 | 85x |
check_not_missing(x) |
| 37 | 85x |
check_binary(x) |
| 38 | 85x |
check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE)) |
| 39 | 85x |
check_scalar(conf.level) |
| 40 | 85x |
check_class(x = correct, "logical") |
| 41 | 85x |
check_scalar(correct) |
| 42 | ||
| 43 | 85x |
x <- stats::na.omit(x) |
| 44 | ||
| 45 | 85x |
n <- length(x) |
| 46 | 85x |
p_hat <- mean(x) |
| 47 | 85x |
z <- stats::qnorm((1 + conf.level) / 2) |
| 48 | 85x |
q_hat <- 1 - p_hat |
| 49 | 85x |
correction_factor <- ifelse(correct, 1 / (2 * n), 0) |
| 50 | ||
| 51 | 85x |
err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correction_factor |
| 52 | 85x |
l_ci <- max(0, p_hat - err) |
| 53 | 85x |
u_ci <- min(1, p_hat + err) |
| 54 | ||
| 55 | 85x |
list( |
| 56 | 85x |
N = n, |
| 57 | 85x |
n = sum(x), |
| 58 | 85x |
estimate = p_hat, |
| 59 | 85x |
conf.low = l_ci, |
| 60 | 85x |
conf.high = u_ci, |
| 61 | 85x |
conf.level = conf.level, |
| 62 | 85x |
method = |
| 63 | 85x |
glue::glue("Wald Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
|
| 64 |
) |
|
| 65 |
} |
|
| 66 | ||
| 67 | ||
| 68 |
#' @describeIn proportion_ci Calculates the Wilson interval by calling [stats::prop.test()]. |
|
| 69 |
#' Also referred to as Wilson score interval. |
|
| 70 |
#' |
|
| 71 |
#' \deqn{\frac{\hat{p} +
|
|
| 72 |
#' \frac{z^2_{\alpha/2}}{2n} \pm z_{\alpha/2} \sqrt{\frac{\hat{p}(1 - \hat{p})}{n} +
|
|
| 73 |
#' \frac{z^2_{\alpha/2}}{4n^2}}}{1 + \frac{z^2_{\alpha/2}}{n}}}
|
|
| 74 |
#' |
|
| 75 |
#' @export |
|
| 76 |
proportion_ci_wilson <- function(x, conf.level = 0.95, correct = FALSE) {
|
|
| 77 | 24x |
set_cli_abort_call() |
| 78 | ||
| 79 |
# check installed packages --------------------------------------------------- |
|
| 80 | 24x |
check_pkg_installed(pkg = "broom") |
| 81 | ||
| 82 |
# check inputs --------------------------------------------------------------- |
|
| 83 | 24x |
check_not_missing(x) |
| 84 | 24x |
check_binary(x) |
| 85 | 23x |
check_class(x = correct, "logical") |
| 86 | 23x |
check_scalar(correct) |
| 87 | 23x |
check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE)) |
| 88 | 23x |
check_scalar(conf.level) |
| 89 | ||
| 90 | 22x |
x <- stats::na.omit(x) |
| 91 | ||
| 92 | 22x |
n <- length(x) |
| 93 | 22x |
y <- stats::prop.test(x = sum(x), n = n, correct = correct, conf.level = conf.level) |
| 94 | ||
| 95 | 21x |
list( |
| 96 | 21x |
N = n, |
| 97 | 21x |
n = sum(x), |
| 98 | 21x |
conf.level = conf.level |
| 99 |
) |> |
|
| 100 | 21x |
utils::modifyList(val = broom::tidy(y) |> as.list()) |> |
| 101 | 21x |
utils::modifyList( |
| 102 | 21x |
list( |
| 103 | 21x |
method = |
| 104 | 21x |
glue::glue("Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
|
| 105 |
) |
|
| 106 |
) |
|
| 107 |
} |
|
| 108 | ||
| 109 |
#' @describeIn proportion_ci Calculates the Clopper-Pearson interval by calling [stats::binom.test()]. |
|
| 110 |
#' Also referred to as the `exact` method. |
|
| 111 |
#' |
|
| 112 |
#' \deqn{
|
|
| 113 |
#' \left( \frac{k}{n} \pm z_{\alpha/2} \sqrt{\frac{\frac{k}{n}(1-\frac{k}{n})}{n} +
|
|
| 114 |
#' \frac{z^2_{\alpha/2}}{4n^2}} \right)
|
|
| 115 |
#' / \left( 1 + \frac{z^2_{\alpha/2}}{n} \right)}
|
|
| 116 |
#' |
|
| 117 |
#' @export |
|
| 118 |
proportion_ci_clopper_pearson <- function(x, conf.level = 0.95) {
|
|
| 119 | 7x |
set_cli_abort_call() |
| 120 | ||
| 121 |
# check installed packages --------------------------------------------------- |
|
| 122 | 7x |
check_pkg_installed(pkg = "broom") |
| 123 | ||
| 124 |
# check inputs --------------------------------------------------------------- |
|
| 125 | 7x |
check_not_missing(x) |
| 126 | 7x |
check_binary(x) |
| 127 | 7x |
check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE)) |
| 128 | 7x |
check_scalar(conf.level) |
| 129 | ||
| 130 | 7x |
x <- stats::na.omit(x) |
| 131 | 7x |
n <- length(x) |
| 132 | ||
| 133 | 7x |
y <- stats::binom.test(x = sum(x), n = n, conf.level = conf.level) |
| 134 | ||
| 135 | 7x |
list(N = n, n = sum(x), conf.level = conf.level) |> |
| 136 | 7x |
utils::modifyList(val = broom::tidy(y) |> as.list()) |> |
| 137 | 7x |
utils::modifyList(list(method = "Clopper-Pearson Confidence Interval")) |
| 138 |
} |
|
| 139 | ||
| 140 |
#' @describeIn proportion_ci Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by |
|
| 141 |
#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI. |
|
| 142 |
#' |
|
| 143 |
#' \deqn{
|
|
| 144 |
#' \left( \frac{\tilde{p} + z^2_{\alpha/2}/2}{n + z^2_{\alpha/2}} \pm
|
|
| 145 |
#' z_{\alpha/2} \sqrt{\frac{\tilde{p}(1 - \tilde{p})}{n} +
|
|
| 146 |
#' \frac{z^2_{\alpha/2}}{4n^2}} \right)}
|
|
| 147 |
#' |
|
| 148 |
#' @export |
|
| 149 |
proportion_ci_agresti_coull <- function(x, conf.level = 0.95) {
|
|
| 150 | 9x |
set_cli_abort_call() |
| 151 | ||
| 152 |
# check inputs --------------------------------------------------------------- |
|
| 153 | 9x |
check_not_missing(x) |
| 154 | 9x |
check_binary(x) |
| 155 | 9x |
check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE)) |
| 156 | 9x |
check_scalar(conf.level) |
| 157 | ||
| 158 | 9x |
x <- stats::na.omit(x) |
| 159 | ||
| 160 | 9x |
n <- length(x) |
| 161 | 9x |
x_sum <- sum(x) |
| 162 | 9x |
z <- stats::qnorm((1 + conf.level) / 2) |
| 163 | ||
| 164 |
# Add here both z^2 / 2 successes and failures. |
|
| 165 | 9x |
x_sum_tilde <- x_sum + z^2 / 2 |
| 166 | 9x |
n_tilde <- n + z^2 |
| 167 | ||
| 168 |
# Then proceed as with the Wald interval. |
|
| 169 | 9x |
p_tilde <- x_sum_tilde / n_tilde |
| 170 | 9x |
q_tilde <- 1 - p_tilde |
| 171 | 9x |
err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
| 172 | 9x |
l_ci <- max(0, p_tilde - err) |
| 173 | 9x |
u_ci <- min(1, p_tilde + err) |
| 174 | ||
| 175 | 9x |
list( |
| 176 | 9x |
N = n, |
| 177 | 9x |
n = sum(x), |
| 178 | 9x |
estimate = mean(x), |
| 179 | 9x |
conf.low = l_ci, |
| 180 | 9x |
conf.high = u_ci, |
| 181 | 9x |
conf.level = conf.level, |
| 182 | 9x |
method = "Agresti-Coull Confidence Interval" |
| 183 |
) |
|
| 184 |
} |
|
| 185 | ||
| 186 |
#' @describeIn proportion_ci Calculates the Jeffreys interval, an equal-tailed interval based on the |
|
| 187 |
#' non-informative Jeffreys prior for a binomial proportion. |
|
| 188 |
#' |
|
| 189 |
#' \deqn{\left( \text{Beta}\left(\frac{k}{2} + \frac{1}{2}, \frac{n - k}{2} + \frac{1}{2}\right)_\alpha,
|
|
| 190 |
#' \text{Beta}\left(\frac{k}{2} + \frac{1}{2}, \frac{n - k}{2} + \frac{1}{2}\right)_{1-\alpha} \right)}
|
|
| 191 |
#' |
|
| 192 |
#' @export |
|
| 193 |
proportion_ci_jeffreys <- function(x, conf.level = 0.95) {
|
|
| 194 | 10x |
set_cli_abort_call() |
| 195 | ||
| 196 |
# check inputs --------------------------------------------------------------- |
|
| 197 | 10x |
check_not_missing(x) |
| 198 | 10x |
check_binary(x) |
| 199 | 10x |
check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE)) |
| 200 | 10x |
check_scalar(conf.level) |
| 201 | 10x |
x <- stats::na.omit(x) |
| 202 | ||
| 203 | 10x |
n <- length(x) |
| 204 | 10x |
x_sum <- sum(x) |
| 205 | ||
| 206 | 10x |
alpha <- 1 - conf.level |
| 207 | 10x |
l_ci <- ifelse( |
| 208 | 10x |
x_sum == 0, |
| 209 | 10x |
0, |
| 210 | 10x |
stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5) |
| 211 |
) |
|
| 212 | ||
| 213 | 10x |
u_ci <- ifelse( |
| 214 | 10x |
x_sum == n, |
| 215 | 10x |
1, |
| 216 | 10x |
stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5) |
| 217 |
) |
|
| 218 | ||
| 219 | 10x |
list( |
| 220 | 10x |
N = n, |
| 221 | 10x |
n = sum(x), |
| 222 | 10x |
estimate = mean(x), |
| 223 | 10x |
conf.low = l_ci, |
| 224 | 10x |
conf.high = u_ci, |
| 225 | 10x |
conf.level = conf.level, |
| 226 | 10x |
method = glue::glue("Jeffreys Interval")
|
| 227 |
) |
|
| 228 |
} |
|
| 229 | ||
| 230 | ||
| 231 |
#' @describeIn proportion_ci Calculates the stratified Wilson confidence |
|
| 232 |
#' interval for unequal proportions as described in |
|
| 233 |
#' Xin YA, Su XG. Stratified Wilson and Newcombe confidence intervals |
|
| 234 |
#' for multiple binomial proportions. _Statistics in Biopharmaceutical Research_. 2010;2(3). |
|
| 235 |
#' |
|
| 236 |
#' \deqn{\frac{\hat{p}_j + \frac{z^2_{\alpha/2}}{2n_j} \pm
|
|
| 237 |
#' z_{\alpha/2} \sqrt{\frac{\hat{p}_j(1 - \hat{p}_j)}{n_j} +
|
|
| 238 |
#' \frac{z^2_{\alpha/2}}{4n_j^2}}}{1 + \frac{z^2_{\alpha/2}}{n_j}}}
|
|
| 239 |
#' |
|
| 240 |
#' |
|
| 241 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `x`. |
|
| 242 |
#' @param weights (`numeric`)\cr weights for each level of the strata. If `NULL`, they are |
|
| 243 |
#' estimated using the iterative algorithm that |
|
| 244 |
#' minimizes the weighted squared length of the confidence interval. |
|
| 245 |
#' @param max.iterations (positive `integer`)\cr maximum number of iterations for the iterative procedure used |
|
| 246 |
#' to find estimates of optimal weights. |
|
| 247 |
#' @param correct (scalar `logical`)\cr include the continuity correction. For further information, see for example |
|
| 248 |
#' [stats::prop.test()]. |
|
| 249 |
#' |
|
| 250 |
#' @examples |
|
| 251 |
#' # Stratified Wilson confidence interval with unequal probabilities |
|
| 252 |
#' |
|
| 253 |
#' set.seed(1) |
|
| 254 |
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE) |
|
| 255 |
#' strata_data <- data.frame( |
|
| 256 |
#' "f1" = sample(c("a", "b"), 100, TRUE),
|
|
| 257 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
| 258 |
#' stringsAsFactors = TRUE |
|
| 259 |
#' ) |
|
| 260 |
#' strata <- interaction(strata_data) |
|
| 261 |
#' n_strata <- ncol(table(rsp, strata)) # Number of strata |
|
| 262 |
#' |
|
| 263 |
#' proportion_ci_strat_wilson( |
|
| 264 |
#' x = rsp, strata = strata, |
|
| 265 |
#' conf.level = 0.90 |
|
| 266 |
#' ) |
|
| 267 |
#' |
|
| 268 |
#' # Not automatic setting of weights |
|
| 269 |
#' proportion_ci_strat_wilson( |
|
| 270 |
#' x = rsp, strata = strata, |
|
| 271 |
#' weights = rep(1 / n_strata, n_strata), |
|
| 272 |
#' conf.level = 0.90 |
|
| 273 |
#' ) |
|
| 274 |
#' |
|
| 275 |
#' @export |
|
| 276 |
proportion_ci_strat_wilson <- function(x, |
|
| 277 |
strata, |
|
| 278 |
weights = NULL, |
|
| 279 |
conf.level = 0.95, |
|
| 280 |
max.iterations = 10L, |
|
| 281 |
correct = FALSE) {
|
|
| 282 | 25x |
set_cli_abort_call() |
| 283 | ||
| 284 |
# check inputs --------------------------------------------------------------- |
|
| 285 | 25x |
check_not_missing(x) |
| 286 | 25x |
check_not_missing(strata) |
| 287 | 25x |
check_binary(x) |
| 288 | 25x |
check_class(correct, "logical") |
| 289 | 25x |
check_scalar(correct) |
| 290 | 25x |
check_class(strata, "factor") |
| 291 | 25x |
check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE)) |
| 292 | 25x |
check_scalar(conf.level) |
| 293 | ||
| 294 |
# remove missing values from x and strata |
|
| 295 | 25x |
is_na <- is.na(x) | is.na(strata) |
| 296 | 25x |
x <- x[!is_na] |
| 297 | 25x |
strata <- strata[!is_na] |
| 298 | 8x |
if (!inherits(x, "logical")) x <- as.logical(x) |
| 299 |
# check all TRUE/FALSE, if so, not calculable |
|
| 300 | 25x |
if (all(x) || all(!x)) {
|
| 301 | 2x |
cli::cli_abort("All values in {.arg x} argument are either {.code TRUE} or {.code FALSE} and CI is not estimable.")
|
| 302 |
} |
|
| 303 | ||
| 304 | 23x |
tbl <- table(factor(x, levels = c(FALSE, TRUE)), strata, useNA = "no") |
| 305 | 23x |
n_strata <- length(unique(strata)) |
| 306 | ||
| 307 |
# Checking the weights and maximum number of iterations. |
|
| 308 | 23x |
do_iter <- FALSE |
| 309 | 23x |
if (is.null(weights)) {
|
| 310 | 13x |
weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure |
| 311 | 13x |
do_iter <- TRUE |
| 312 | ||
| 313 |
# Iteration parameters |
|
| 314 | 13x |
if (!is_scalar_integerish(max.iterations) || max.iterations < 1) {
|
| 315 | 2x |
cli::cli_abort("Argument {.arg max.iterations} must be a positive integer.")
|
| 316 |
} |
|
| 317 |
} |
|
| 318 | 21x |
check_range(weights, range = c(0, 1), include_bounds = c(TRUE, TRUE)) |
| 319 | 19x |
sum_weights <- sum(weights) |> |
| 320 | 19x |
round() |> |
| 321 | 19x |
as.integer() |
| 322 | 19x |
if (sum_weights != 1L || abs(sum_weights - sum(weights)) > sqrt(.Machine$double.eps)) {
|
| 323 | 1x |
cli::cli_abort("The sum of the {.arg weights} argument must be {.val {1L}}")
|
| 324 |
} |
|
| 325 | ||
| 326 | 18x |
xs <- tbl["TRUE", ] |
| 327 | 18x |
ns <- colSums(tbl) |
| 328 | 18x |
use_stratum <- (ns > 0) |
| 329 | 18x |
ns <- ns[use_stratum] |
| 330 | 18x |
xs <- xs[use_stratum] |
| 331 | 18x |
ests <- xs / ns |
| 332 | 18x |
vars <- ests * (1 - ests) / ns |
| 333 | ||
| 334 | 18x |
strata_qnorm <- .strata_normal_quantile(vars, weights, conf.level) |
| 335 | ||
| 336 |
# Iterative setting of weights if they were not passed in `weights` argument |
|
| 337 | 18x |
weights_new <- if (do_iter) {
|
| 338 | 11x |
.update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max.iterations, conf.level)$weights |
| 339 |
} else {
|
|
| 340 | 7x |
weights |
| 341 |
} |
|
| 342 | ||
| 343 | 18x |
strata_conf.level <- 2 * stats::pnorm(strata_qnorm) - 1 |
| 344 | ||
| 345 | 18x |
ci_by_strata <- Map( |
| 346 | 18x |
function(x, n) {
|
| 347 |
# Classic Wilson's confidence interval |
|
| 348 | 75x |
suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf.level)$conf.int) |
| 349 |
}, |
|
| 350 | 18x |
x = xs, |
| 351 | 18x |
n = ns |
| 352 |
) |
|
| 353 | 18x |
lower_by_strata <- sapply(ci_by_strata, "[", 1L) |
| 354 | 18x |
upper_by_strata <- sapply(ci_by_strata, "[", 2L) |
| 355 | ||
| 356 | 18x |
lower <- sum(weights_new * lower_by_strata) |
| 357 | 18x |
upper <- sum(weights_new * upper_by_strata) |
| 358 | ||
| 359 |
# Return values |
|
| 360 | 18x |
list( |
| 361 | 18x |
N = length(x), |
| 362 | 18x |
n = sum(x), |
| 363 | 18x |
estimate = mean(x), |
| 364 | 18x |
conf.low = lower, |
| 365 | 18x |
conf.high = upper, |
| 366 | 18x |
conf.level = conf.level, |
| 367 | 18x |
weights = if (do_iter) weights_new else NULL, |
| 368 | 18x |
method = |
| 369 | 18x |
glue::glue("Stratified Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction")
|
| 370 |
) |> |
|
| 371 | 18x |
compact() |
| 372 |
} |
|
| 373 | ||
| 374 |
#' @describeIn proportion_ci Helper to determine if vector is binary (logical or 0/1) |
|
| 375 |
#' |
|
| 376 |
#' @export |
|
| 377 |
is_binary <- function(x) {
|
|
| 378 | 631x |
is.logical(x) || (is_integerish(x) && is_empty(setdiff(x, c(0, 1, NA)))) |
| 379 |
} |
|
| 380 | ||
| 381 |
#' Helper Function for the Estimation of Stratified Quantiles |
|
| 382 |
#' |
|
| 383 |
#' This function wraps the estimation of stratified percentiles when we assume |
|
| 384 |
#' the approximation for large numbers. This is necessary only in the case |
|
| 385 |
#' proportions for each strata are unequal. |
|
| 386 |
#' |
|
| 387 |
#' @inheritParams proportion_ci_strat_wilson |
|
| 388 |
#' |
|
| 389 |
#' @return Stratified quantile. |
|
| 390 |
#' |
|
| 391 |
#' @seealso [proportion_ci_strat_wilson()] |
|
| 392 |
#' |
|
| 393 |
#' @keywords internal |
|
| 394 |
#' |
|
| 395 |
#' @examples |
|
| 396 |
#' strata_data <- table(data.frame( |
|
| 397 |
#' "f1" = sample(c(TRUE, FALSE), 100, TRUE), |
|
| 398 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
| 399 |
#' stringsAsFactors = TRUE |
|
| 400 |
#' )) |
|
| 401 |
#' ns <- colSums(strata_data) |
|
| 402 |
#' ests <- strata_data["TRUE", ] / ns |
|
| 403 |
#' vars <- ests * (1 - ests) / ns |
|
| 404 |
#' weights <- rep(1 / length(ns), length(ns)) |
|
| 405 |
#' |
|
| 406 |
#' cardx:::.strata_normal_quantile(vars, weights, 0.95) |
|
| 407 |
.strata_normal_quantile <- function(vars, weights, conf.level) {
|
|
| 408 | 42x |
summands <- weights^2 * vars |
| 409 |
# Stratified quantile |
|
| 410 | 42x |
sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf.level) / 2) |
| 411 |
} |
|
| 412 | ||
| 413 |
#' Helper Function for the Estimation of Weights for `proportion_ci_strat_wilson()` |
|
| 414 |
#' |
|
| 415 |
#' This function wraps the iteration procedure that allows you to estimate |
|
| 416 |
#' the weights for each proportional strata. This assumes to minimize the |
|
| 417 |
#' weighted squared length of the confidence interval. |
|
| 418 |
#' |
|
| 419 |
#' @keywords internal |
|
| 420 |
#' @inheritParams proportion_ci_strat_wilson |
|
| 421 |
#' @param vars (`numeric`)\cr normalized proportions for each strata. |
|
| 422 |
#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles. |
|
| 423 |
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can |
|
| 424 |
#' be optimized in the future if we need to estimate better initial weights. |
|
| 425 |
#' @param n_per_strata (`numeric`)\cr number of elements in each strata. |
|
| 426 |
#' @param max.iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked. |
|
| 427 |
#' @param tol (`number`)\cr tolerance threshold for convergence. |
|
| 428 |
#' |
|
| 429 |
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`. |
|
| 430 |
#' |
|
| 431 |
#' @seealso For references and details see [`proportion_ci_strat_wilson()`]. |
|
| 432 |
#' |
|
| 433 |
#' @examples |
|
| 434 |
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018) |
|
| 435 |
#' sq <- 0.674 |
|
| 436 |
#' ws <- rep(1 / length(vs), length(vs)) |
|
| 437 |
#' ns <- c(22, 18, 17, 17, 14, 12) |
|
| 438 |
#' |
|
| 439 |
#' cardx:::.update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001) |
|
| 440 |
.update_weights_strat_wilson <- function(vars, |
|
| 441 |
strata_qnorm, |
|
| 442 |
initial_weights, |
|
| 443 |
n_per_strata, |
|
| 444 |
max.iterations = 50, |
|
| 445 |
conf.level = 0.95, |
|
| 446 |
tol = 0.001) {
|
|
| 447 | 11x |
it <- 0 |
| 448 | 11x |
diff_v <- NULL |
| 449 | ||
| 450 | 11x |
while (it < max.iterations) {
|
| 451 | 24x |
it <- it + 1 |
| 452 | 24x |
weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2 |
| 453 | 24x |
weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2)) |
| 454 | 24x |
weights_new <- weights_new_t / weights_new_b |
| 455 | 24x |
weights_new <- weights_new / sum(weights_new) |
| 456 | 24x |
strata_qnorm <- .strata_normal_quantile(vars, weights_new, conf.level) |
| 457 | 24x |
diff_v <- c(diff_v, sum(abs(weights_new - initial_weights))) |
| 458 | 11x |
if (diff_v[length(diff_v)] < tol) break |
| 459 | 13x |
initial_weights <- weights_new |
| 460 |
} |
|
| 461 | ||
| 462 | 11x |
if (it == max.iterations) {
|
| 463 | ! |
warning("The heuristic to find weights did not converge with max.iterations = ", max.iterations)
|
| 464 |
} |
|
| 465 | ||
| 466 | 11x |
list( |
| 467 | 11x |
"n_it" = it, |
| 468 | 11x |
"weights" = weights_new, |
| 469 | 11x |
"diff_v" = diff_v |
| 470 |
) |
|
| 471 |
} |
| 1 |
#' ARD Survival Estimates |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for survival quantiles and x-year survival estimates, extracted |
|
| 5 |
#' from a [survival::survfit()] model. |
|
| 6 |
#' |
|
| 7 |
#' @param x (`survfit` or `data.frame`)\cr |
|
| 8 |
#' an object of class `survfit` created with [survival::survfit()] or a data frame. See below for details. |
|
| 9 |
#' @param times (`numeric`)\cr |
|
| 10 |
#' a vector of times for which to return survival probabilities. |
|
| 11 |
#' @param probs (`numeric`)\cr |
|
| 12 |
#' a vector of probabilities with values in (0,1) specifying the survival quantiles to return. |
|
| 13 |
#' @param type (`string` or `NULL`)\cr |
|
| 14 |
#' type of statistic to report. Available for Kaplan-Meier time estimates only, otherwise `type` |
|
| 15 |
#' is ignored. Default is `NULL`. |
|
| 16 |
#' Must be one of the following: |
|
| 17 |
#' ```{r, echo = FALSE}
|
|
| 18 |
#' dplyr::tribble( |
|
| 19 |
#' ~type, ~transformation, |
|
| 20 |
#' '`"survival"`', '`x`', |
|
| 21 |
#' '`"risk"`', '`1 - x`', |
|
| 22 |
#' '`"cumhaz"`', '`-log(x)`', |
|
| 23 |
#' ) %>% |
|
| 24 |
#' knitr::kable() |
|
| 25 |
#' ``` |
|
| 26 |
#' @param y (`Surv` or `string`)\cr |
|
| 27 |
#' an object of class `Surv` created using [survival::Surv()]. This object will be passed as the left-hand side of |
|
| 28 |
#' the formula constructed and passed to [survival::survfit()]. This object can also be passed as a string. |
|
| 29 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 30 |
#' stratification variables to be passed as the right-hand side of the formula constructed and passed to |
|
| 31 |
#' [survival::survfit()]. Default is `NULL` for an unstratified model, e.g. `Surv() ~ 1`. |
|
| 32 |
#' @param method.args (named `list`)\cr |
|
| 33 |
#' named list of arguments that will be passed to [survival::survfit()]. |
|
| 34 |
#' @inheritParams rlang::args_dots_empty |
|
| 35 |
#' |
|
| 36 |
#' @section Formula Specification: |
|
| 37 |
#' When passing a [`survival::survfit()`] object to `ard_survival_survfit()`, |
|
| 38 |
#' the `survfit()` call must use an evaluated formula and not a stored formula. |
|
| 39 |
#' Including a proper formula in the call allows the function to accurately |
|
| 40 |
#' identify all variables included in the estimation. See below for examples: |
|
| 41 |
#' |
|
| 42 |
#' ```r |
|
| 43 |
#' library(cardx) |
|
| 44 |
#' library(survival) |
|
| 45 |
#' |
|
| 46 |
#' # include formula in `survfit()` call |
|
| 47 |
#' survfit(Surv(time, status) ~ sex, lung) |> ard_survival_survfit(time = 500) |
|
| 48 |
#' |
|
| 49 |
#' # you can also pass a data frame to `ard_survival_survfit()` as well. |
|
| 50 |
#' lung |> |
|
| 51 |
#' ard_survival_survfit(y = Surv(time, status), variables = "sex", time = 500) |
|
| 52 |
#' ``` |
|
| 53 |
#' You **cannot**, however, pass a stored formula, e.g. `survfit(my_formula, lung)`, |
|
| 54 |
#' but you can use stored formulas with `rlang::inject(survfit(!!my_formula, lung))`. |
|
| 55 |
#' |
|
| 56 |
#' @section Variable Classes: |
|
| 57 |
#' When the `survfit` method is called, the class of the stratifying variables |
|
| 58 |
#' will be returned as a factor. |
|
| 59 |
#' |
|
| 60 |
#' When the data frame method is called, the original classes are retained in the |
|
| 61 |
#' resulting ARD. |
|
| 62 |
#' |
|
| 63 |
#' @return an ARD data frame of class 'card' |
|
| 64 |
#' @name ard_survival_survfit |
|
| 65 |
#' |
|
| 66 |
#' @details |
|
| 67 |
#' * Only one of either the `times` or `probs` parameters can be specified. |
|
| 68 |
#' * Times should be provided using the same scale as the time variable used to fit the provided |
|
| 69 |
#' survival fit model. |
|
| 70 |
#' |
|
| 71 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom", "ggsurvfit")))
|
|
| 72 |
#' library(survival) |
|
| 73 |
#' library(ggsurvfit) |
|
| 74 |
#' |
|
| 75 |
#' survfit(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE) |> |
|
| 76 |
#' ard_survival_survfit(times = c(60, 180)) |
|
| 77 |
#' |
|
| 78 |
#' survfit(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE, conf.int = 0.90) |> |
|
| 79 |
#' ard_survival_survfit(probs = c(0.25, 0.5, 0.75)) |
|
| 80 |
#' |
|
| 81 |
#' cards::ADTTE |> |
|
| 82 |
#' ard_survival_survfit(y = Surv_CNSR(AVAL, CNSR), variables = c("TRTA", "SEX"), times = 90)
|
|
| 83 |
#' |
|
| 84 |
#' # Competing Risks Example --------------------------- |
|
| 85 |
#' set.seed(1) |
|
| 86 |
#' ADTTE_MS <- cards::ADTTE %>% |
|
| 87 |
#' dplyr::mutate( |
|
| 88 |
#' CNSR = dplyr::case_when( |
|
| 89 |
#' CNSR == 0 ~ "censor", |
|
| 90 |
#' runif(dplyr::n()) < 0.5 ~ "death from cancer", |
|
| 91 |
#' TRUE ~ "death other causes" |
|
| 92 |
#' ) %>% factor() |
|
| 93 |
#' ) |
|
| 94 |
#' |
|
| 95 |
#' survfit(Surv(AVAL, CNSR) ~ TRTA, data = ADTTE_MS) %>% |
|
| 96 |
#' ard_survival_survfit(times = c(60, 180)) |
|
| 97 |
NULL |
|
| 98 | ||
| 99 |
#' @rdname ard_survival_survfit |
|
| 100 |
#' @export |
|
| 101 |
ard_survival_survfit <- function(x, ...) {
|
|
| 102 | 36x |
set_cli_abort_call() |
| 103 | ||
| 104 | 36x |
check_not_missing(x) |
| 105 | 35x |
UseMethod("ard_survival_survfit")
|
| 106 |
} |
|
| 107 | ||
| 108 |
#' @rdname ard_survival_survfit |
|
| 109 |
#' @export |
|
| 110 |
ard_survival_survfit.survfit <- function(x, times = NULL, probs = NULL, type = NULL, ...) {
|
|
| 111 | 27x |
set_cli_abort_call() |
| 112 | ||
| 113 |
# check installed packages --------------------------------------------------- |
|
| 114 | 27x |
check_pkg_installed(c("survival", "broom"))
|
| 115 | ||
| 116 |
# check/process inputs ------------------------------------------------------- |
|
| 117 | 27x |
if (is.name(x$call$formula)) {
|
| 118 | 1x |
cli::cli_abort( |
| 119 | 1x |
message = paste( |
| 120 | 1x |
"The call in the survfit object {.arg x} must be an evaluated formula.",
|
| 121 | 1x |
"Please see {.help [{.fun ard_survival_survfit}](cardx::ard_survival_survfit)} documentation for details on properly specifying formulas."
|
| 122 |
), |
|
| 123 | 1x |
call = get_cli_abort_call() |
| 124 |
) |
|
| 125 |
} |
|
| 126 | 26x |
if (inherits(x, "survfitcox")) {
|
| 127 | 1x |
cli::cli_abort("Argument {.arg x} cannot be class {.cls survfitcox}.",
|
| 128 | 1x |
call = get_cli_abort_call() |
| 129 |
) |
|
| 130 |
} |
|
| 131 | ||
| 132 |
# competing risks models cannot use the type argument |
|
| 133 | 25x |
if (inherits(x, c("survfitms", "survfitcoxms")) && !is.null(type)) {
|
| 134 | 1x |
cli::cli_abort("Cannot use {.arg type} argument with {.code survfit} models with class {.cls {c('survfitms', 'survfitcoxms')}}.",
|
| 135 | 1x |
call = get_cli_abort_call() |
| 136 |
) |
|
| 137 |
} |
|
| 138 | 5x |
if (!is.null(probs)) check_range(probs, c(0, 1)) |
| 139 | 24x |
if (sum(is.null(times), is.null(probs)) != 1) {
|
| 140 | 1x |
cli::cli_abort("One and only one of {.arg times} and {.arg probs} must be specified.")
|
| 141 |
} |
|
| 142 | ||
| 143 |
# for regular KM estimators, we allow the type argument |
|
| 144 | 23x |
if (!inherits(x, "survfitms") && !is.null(type)) {
|
| 145 | 3x |
type <- arg_match(type, values = c("survival", "risk", "cumhaz"))
|
| 146 |
} |
|
| 147 | ||
| 148 |
# cannot specify type arg when probs supplied |
|
| 149 | 22x |
if (!is.null(probs) && !is.null(type)) {
|
| 150 | 1x |
cli::cli_abort("Cannot use {.arg type} argument when {.arg probs} argument specifed.",
|
| 151 | 1x |
call = get_cli_abort_call() |
| 152 |
) |
|
| 153 |
} |
|
| 154 | ||
| 155 |
# build ARD ------------------------------------------------------------------ |
|
| 156 | 21x |
est_type <- ifelse(is.null(probs), "times", "probs") |
| 157 | 21x |
tidy_survfit <- switch(est_type, |
| 158 | 21x |
"times" = .process_survfit_time(x, times, type %||% "survival"), |
| 159 | 21x |
"probs" = .process_survfit_probs(x, probs) |
| 160 |
) |
|
| 161 | ||
| 162 | 21x |
.format_survfit_results(tidy_survfit) |
| 163 |
} |
|
| 164 | ||
| 165 |
#' @rdname ard_survival_survfit |
|
| 166 |
#' @export |
|
| 167 |
ard_survival_survfit.data.frame <- function(x, y, |
|
| 168 |
variables = NULL, |
|
| 169 |
times = NULL, probs = NULL, type = NULL, |
|
| 170 |
method.args = list(conf.int = 0.95, conf.type = "log"), ...) {
|
|
| 171 | 7x |
set_cli_abort_call() |
| 172 | ||
| 173 |
# check/process inputs ------------------------------------------------------- |
|
| 174 | 7x |
check_not_missing(y) |
| 175 | 6x |
cards::process_selectors(x, variables = {{ variables }})
|
| 176 | ||
| 177 |
# process outcome as string -------------------------------------------------- |
|
| 178 | 6x |
y <- enquo(y) |
| 179 |
# if a character was passed, return it as is |
|
| 180 | 3x |
if (tryCatch(is.character(eval_tidy(y)), error = \(e) FALSE)) y <- eval_tidy(y) # styler: off |
| 181 |
# otherwise, convert expr to string |
|
| 182 | 3x |
else y <- expr_deparse(quo_get_expr(y)) # styler: off |
| 183 | 6x |
check_class( |
| 184 | 6x |
with(x, eval(parse_expr(y))), |
| 185 | 6x |
cls = "Surv", |
| 186 | 6x |
message = |
| 187 | 6x |
"The {.arg y} argument must be a string or expression that evaluates to an object of class {.cls Surv}
|
| 188 | 6x |
most often created with {.fun survival::Surv} or {.fun ggsurvfit::Surv_CNSR}."
|
| 189 |
) |
|
| 190 | ||
| 191 | ||
| 192 |
# build model ---------------------------------------------------------------- |
|
| 193 | 5x |
survfit_formula <- |
| 194 | 5x |
case_switch( |
| 195 | 5x |
!is_empty(variables) ~ stats::reformulate(termlabels = bt(variables), response = y), |
| 196 | 5x |
.default = stats::reformulate(termlabels = "1", response = y) |
| 197 |
) |
|
| 198 | ||
| 199 | 5x |
ard <- construct_model( |
| 200 | 5x |
data = x, |
| 201 | 5x |
formula = survfit_formula, |
| 202 | 5x |
method = "survfit", |
| 203 | 5x |
package = "survival", |
| 204 | 5x |
method.args = {{ method.args }}
|
| 205 |
) |> |
|
| 206 | 5x |
ard_survival_survfit(times = times, probs = probs, type = type) |
| 207 | ||
| 208 | 5x |
ard_overall <- ard[ard$variable == "..ard_survival_survfit..", ] |
| 209 | ||
| 210 | 5x |
ard |> |
| 211 | 5x |
dplyr::filter(ard$variable != "..ard_survival_survfit..") |> |
| 212 | 5x |
.restore_original_column_types(data = x) |> |
| 213 | 5x |
dplyr::bind_rows(ard_overall) |
| 214 |
} |
|
| 215 | ||
| 216 |
#' Process Survival Fit For Time Estimates |
|
| 217 |
#' |
|
| 218 |
#' @inheritParams cards::tidy_as_ard |
|
| 219 |
#' @inheritParams ard_survival_survfit |
|
| 220 |
#' @param start.time (`numeric`)\cr |
|
| 221 |
#' default starting time. See [survival::survfit0()] for more details. |
|
| 222 |
#' |
|
| 223 |
#' @return a `tibble` |
|
| 224 |
#' |
|
| 225 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom")))
|
|
| 226 |
#' survival::survfit(survival::Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |> |
|
| 227 |
#' cardx:::.process_survfit_time(times = c(60, 180), type = "risk") |
|
| 228 |
#' |
|
| 229 |
#' @keywords internal |
|
| 230 |
.process_survfit_time <- function(x, times, type, start.time = NULL) {
|
|
| 231 |
# add start time |
|
| 232 | 18x |
min_time <- min(x$time) |
| 233 | 18x |
if (is.null(start.time) && min_time < 0) {
|
| 234 | ! |
cli::cli_inform(paste( |
| 235 | ! |
"The {.arg start.time} argument has not been set and negative times have been observed. Please set start",
|
| 236 | ! |
"time via the {.arg start.time} argument, otherwise the minimum observed time will be used by default."
|
| 237 |
)) |
|
| 238 | ! |
start.time <- min_time |
| 239 | 18x |
} else if (is.null(start.time)) {
|
| 240 | 18x |
start.time <- 0 |
| 241 |
} |
|
| 242 | 18x |
x <- survival::survfit0(x, start.time) %>% |
| 243 | 18x |
summary(times, extend = TRUE) |
| 244 | ||
| 245 |
# process competing risks/multi-state models |
|
| 246 | 18x |
multi_state <- inherits(x, "summary.survfitms") |
| 247 | ||
| 248 | 18x |
if (multi_state) {
|
| 249 |
# selecting state to show |
|
| 250 | 1x |
state <- setdiff(unique(x$states), "(s0)")[[1]] |
| 251 | 1x |
cli::cli_inform("Multi-state model detected. Showing probabilities into state '{state}'.")
|
| 252 | 1x |
x$n.risk <- x$n.risk[, 1] |
| 253 | 1x |
ms_cols <- c("pstate", "std.err", "upper", "lower")
|
| 254 | 1x |
state_col <- which(colnames(x$pstate) == state) |
| 255 | 1x |
x[ms_cols] <- lapply(x[ms_cols], function(m) m[, state_col]) |
| 256 | 1x |
x$surv <- x$pstate |
| 257 |
} |
|
| 258 | ||
| 259 |
# tidy survfit results |
|
| 260 | 18x |
x_cols <- intersect(names(x), c("time", "n.risk", "surv", "std.err", "upper", "lower", "strata"))
|
| 261 | 18x |
tidy_x <- data.frame(x[x_cols]) %>% |
| 262 | 18x |
dplyr::rename(estimate = "surv", std.error = "std.err", conf.high = "upper", conf.low = "lower") %>% |
| 263 | 18x |
dplyr::mutate( |
| 264 | 18x |
conf.level = x$conf.int, |
| 265 | 18x |
conf.type = x$conf.type |
| 266 |
) |
|
| 267 | ||
| 268 | 18x |
strat <- "strata" %in% names(tidy_x) |
| 269 | ||
| 270 |
# get requested estimates |
|
| 271 | 18x |
df_stat <- tidy_x %>% |
| 272 |
# find max time |
|
| 273 | 18x |
dplyr::group_by_at(., dplyr::vars(dplyr::any_of("strata"))) %>%
|
| 274 | 18x |
dplyr::mutate(time_max = max(.data$time)) %>% |
| 275 | 18x |
dplyr::ungroup() %>% |
| 276 |
# add requested timepoints |
|
| 277 | 18x |
dplyr::full_join( |
| 278 | 18x |
tidy_x %>% |
| 279 | 18x |
dplyr::select(any_of("strata")) %>%
|
| 280 | 18x |
dplyr::distinct() %>% |
| 281 | 18x |
dplyr::mutate( |
| 282 | 18x |
time = list(.env$times), |
| 283 | 18x |
col_name = list(paste("stat", seq_len(length(.env$times)), sep = "_"))
|
| 284 |
) %>% |
|
| 285 | 18x |
tidyr::unnest(cols = c("time", "col_name")),
|
| 286 | 18x |
by = unlist(intersect(c("strata", "time"), names(tidy_x)))
|
| 287 |
) |
|
| 288 | ||
| 289 | 18x |
if (strat) {
|
| 290 | 15x |
df_stat <- df_stat %>% dplyr::arrange(.data$strata) |
| 291 |
} |
|
| 292 | ||
| 293 | 18x |
df_stat <- df_stat %>% |
| 294 | 18x |
dplyr::arrange(.data$time) %>% |
| 295 |
# if user-specified time is after max time, make estimate NA |
|
| 296 | 18x |
dplyr::mutate_at( |
| 297 | 18x |
dplyr::vars("estimate", "conf.high", "conf.low"),
|
| 298 | 18x |
~ ifelse(.data$time > .data$time_max, NA_real_, .) |
| 299 |
) %>% |
|
| 300 | 18x |
dplyr::mutate(context = type) %>% |
| 301 | 18x |
dplyr::select(!dplyr::any_of(c("time_max", "col_name")))
|
| 302 | ||
| 303 |
# convert estimates to requested type |
|
| 304 | 18x |
if (type != "survival") {
|
| 305 | 1x |
df_stat <- df_stat %>% |
| 306 | 1x |
dplyr::mutate(dplyr::across( |
| 307 | 1x |
any_of(c("estimate", "conf.low", "conf.high")),
|
| 308 | 1x |
if (type == "cumhaz") ~ -log(.x) else ~ 1 - .x |
| 309 |
)) %>% |
|
| 310 | 1x |
dplyr::rename(conf.low = "conf.high", conf.high = "conf.low") |
| 311 |
} |
|
| 312 | ||
| 313 | 18x |
df_stat <- extract_strata(x, df_stat) |
| 314 | ||
| 315 | 18x |
df_stat |
| 316 |
} |
|
| 317 | ||
| 318 |
#' Process Survival Fit For Quantile Estimates |
|
| 319 |
#' |
|
| 320 |
#' @inheritParams cards::tidy_as_ard |
|
| 321 |
#' @inheritParams ard_survival_survfit |
|
| 322 |
#' |
|
| 323 |
#' @return a `tibble` |
|
| 324 |
#' |
|
| 325 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survival"))
|
|
| 326 |
#' survival::survfit(survival::Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |> |
|
| 327 |
#' cardx:::.process_survfit_probs(probs = c(0.25, 0.75)) |
|
| 328 |
#' |
|
| 329 |
#' @keywords internal |
|
| 330 |
.process_survfit_probs <- function(x, probs) {
|
|
| 331 |
# calculate survival quantiles and add estimates to df |
|
| 332 | 3x |
df_stat <- map2( |
| 333 | 3x |
probs, |
| 334 | 3x |
seq_along(probs), |
| 335 | 3x |
~ stats::quantile(x, probs = .x) %>% |
| 336 | 3x |
as.data.frame() %>% |
| 337 | 3x |
set_names(c("estimate", "conf.low", "conf.high")) %>%
|
| 338 | 3x |
dplyr::mutate(strata = row.names(.)) %>% |
| 339 | 3x |
dplyr::select(dplyr::any_of(c("n.risk", "strata", "estimate", "std.error", "conf.low", "conf.high"))) %>%
|
| 340 | 3x |
dplyr::mutate( |
| 341 | 3x |
conf.level = x$conf.int, |
| 342 | 3x |
conf.type = x$conf.type, |
| 343 | 3x |
prob = .x |
| 344 |
) |
|
| 345 |
) %>% |
|
| 346 | 3x |
dplyr::bind_rows() %>% |
| 347 | 3x |
`rownames<-`(NULL) %>% |
| 348 | 3x |
dplyr::mutate(context = "survival_survfit") %>% |
| 349 | 3x |
dplyr::as_tibble() |
| 350 | ||
| 351 | 1x |
if (length(x$n) == 1) df_stat <- df_stat %>% dplyr::select(-"strata") |
| 352 | ||
| 353 | 3x |
df_stat <- extract_strata(x, df_stat) |
| 354 | ||
| 355 | 3x |
df_stat |
| 356 |
} |
|
| 357 | ||
| 358 |
# process stratifying variables |
|
| 359 |
extract_strata <- function(x, df_stat) {
|
|
| 360 | 21x |
x_terms <- attr(stats::terms(stats::as.formula(x$call$formula)), "term.labels") |
| 361 | 21x |
x_terms <- gsub(".*\\(", "", gsub("\\)", "", x_terms))
|
| 362 | 21x |
if (length(x_terms) > 0L) {
|
| 363 | 17x |
strata_lvls <- data.frame() |
| 364 | ||
| 365 | 17x |
for (i in df_stat[["strata"]]) {
|
| 366 | 111x |
i <- gsub(".*\\(", "", gsub("\\)", "", i))
|
| 367 | 111x |
terms_str <- strsplit(i, paste(c(paste0(x_terms, "="), paste0(", ", x_terms, "=")), collapse = "|"))[[1]]
|
| 368 | 111x |
s_lvl <- terms_str[nchar(terms_str) > 0] |
| 369 | 111x |
strata_lvls <- rbind(strata_lvls, s_lvl) |
| 370 |
} |
|
| 371 | 17x |
if (nrow(strata_lvls) > 0) {
|
| 372 | 17x |
strata_lvls <- cbind(strata_lvls, t(x_terms)) |
| 373 | 17x |
names(strata_lvls) <- c( |
| 374 | 17x |
t(sapply(seq_along(x_terms), function(i) c(paste0("group", i, "_level"), paste0("group", i))))
|
| 375 |
) |
|
| 376 | 17x |
df_stat <- cbind(df_stat, strata_lvls) %>% |
| 377 | 17x |
dplyr::select(-"strata") |
| 378 |
} |
|
| 379 |
} |
|
| 380 | 21x |
df_stat |
| 381 |
} |
|
| 382 | ||
| 383 |
#' Convert Tidied Survival Fit to ARD |
|
| 384 |
#' |
|
| 385 |
#' @inheritParams cards::tidy_as_ard |
|
| 386 |
#' |
|
| 387 |
#' @return an ARD data frame of class 'card' |
|
| 388 |
#' |
|
| 389 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom")))
|
|
| 390 |
#' cardx:::.format_survfit_results( |
|
| 391 |
#' broom::tidy(survival::survfit(survival::Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE)) |
|
| 392 |
#' ) |
|
| 393 |
#' |
|
| 394 |
#' @keywords internal |
|
| 395 |
.format_survfit_results <- function(tidy_survfit) {
|
|
| 396 | 21x |
est <- if ("time" %in% names(tidy_survfit)) "time" else "prob"
|
| 397 | 21x |
conf.level <- tidy_survfit[["conf.level"]][1] |
| 398 | 21x |
conf.type <- tidy_survfit[["conf.type"]][1] |
| 399 | ||
| 400 | 21x |
ret <- tidy_survfit %>% |
| 401 | 21x |
dplyr::select(-dplyr::any_of(c("conf.level", "conf.type"))) %>%
|
| 402 | 21x |
dplyr::mutate(dplyr::across( |
| 403 | 21x |
dplyr::any_of( |
| 404 | 21x |
c("n.risk", "estimate", "std.error", "conf.high", "conf.low", "time", "prob")
|
| 405 |
), |
|
| 406 | 21x |
~ as.list(.) |
| 407 |
)) %>% |
|
| 408 | 21x |
tidyr::pivot_longer( |
| 409 | 21x |
cols = dplyr::any_of(c("n.risk", "estimate", "std.error", "conf.high", "conf.low")),
|
| 410 | 21x |
names_to = "stat_name", |
| 411 | 21x |
values_to = "stat" |
| 412 |
) %>% |
|
| 413 | 21x |
dplyr::mutate( |
| 414 | 21x |
variable = est, |
| 415 | 21x |
variable_level = .data[[est]] |
| 416 |
) %>% |
|
| 417 | 21x |
dplyr::select(-all_of(est)) |
| 418 | ||
| 419 |
# statistics applicable to all calculations |
|
| 420 | 21x |
if (!is.null(conf.level) && !is.null(conf.type)) {
|
| 421 | 21x |
ret <- ret %>% |
| 422 | 21x |
dplyr::bind_rows( |
| 423 | 21x |
dplyr::tibble( |
| 424 | 21x |
context = "survival", |
| 425 | 21x |
stat_name = c("conf.level", "conf.type"),
|
| 426 | 21x |
stat = list(conf.level, conf.type), |
| 427 | 21x |
variable = "..ard_survival_survfit.." |
| 428 |
) |
|
| 429 |
) |
|
| 430 |
} |
|
| 431 | ||
| 432 | 21x |
ret %>% |
| 433 | 21x |
dplyr::left_join( |
| 434 | 21x |
.df_survfit_stat_labels(), |
| 435 | 21x |
by = "stat_name" |
| 436 |
) %>% |
|
| 437 | 21x |
dplyr::mutate( |
| 438 | 21x |
fmt_fun = lapply( |
| 439 | 21x |
.data$stat, |
| 440 | 21x |
function(x) {
|
| 441 | 583x |
switch(is.integer(x), |
| 442 | 583x |
0L |
| 443 | 583x |
) %||% switch(is.numeric(x), |
| 444 | 583x |
1L |
| 445 |
) |
|
| 446 |
} |
|
| 447 |
), |
|
| 448 | 21x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name) |
| 449 |
) %>% |
|
| 450 | 21x |
dplyr::mutate(dplyr::across(matches("group[0-9]*_level"), ~ as.list(as.factor(.x)))) %>%
|
| 451 | 21x |
dplyr::mutate( |
| 452 | 21x |
warning = list(NULL), |
| 453 | 21x |
error = list(NULL) |
| 454 |
) %>% |
|
| 455 | 21x |
cards::as_card() %>% |
| 456 | 21x |
cards::tidy_ard_column_order() %>% |
| 457 | 21x |
cards::tidy_ard_row_order() |
| 458 |
} |
|
| 459 | ||
| 460 |
.df_survfit_stat_labels <- function() {
|
|
| 461 | 21x |
dplyr::tribble( |
| 462 | 21x |
~stat_name, ~stat_label, |
| 463 | 21x |
"n.risk", "Number of Subjects at Risk", |
| 464 | 21x |
"estimate", "Survival Probability", |
| 465 | 21x |
"std.error", "Standard Error (untransformed)", |
| 466 | 21x |
"conf.low", "CI Lower Bound", |
| 467 | 21x |
"conf.high", "CI Upper Bound", |
| 468 | 21x |
"prob", "Quantile", |
| 469 | 21x |
"time", "Time", |
| 470 | 21x |
"conf.level", "CI Confidence Level", |
| 471 | 21x |
"conf.type", "CI Type" |
| 472 |
) |
|
| 473 |
} |
| 1 |
#' ARD McNemar's Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for McNemar's statistical test. |
|
| 5 |
#' We have two functions depending on the structure of the data. |
|
| 6 |
#' - `ard_stats_mcnemar_test()` is the structure expected by [`stats::mcnemar.test()`] |
|
| 7 |
#' - `ard_stats_mcnemar_test_long()` is one row per ID per group |
|
| 8 |
#' |
|
| 9 |
#' @param data (`data.frame`)\cr |
|
| 10 |
#' a data frame. See below for details. |
|
| 11 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column name to compare by. |
|
| 13 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 14 |
#' column names to be compared. Independent tests will |
|
| 15 |
#' be computed for each variable. |
|
| 16 |
#' @param ... arguments passed to `stats::mcnemar.test(...)` |
|
| 17 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 18 |
#' column name of the subject or participant ID |
|
| 19 |
#' |
|
| 20 |
#' @return ARD data frame |
|
| 21 |
#' @name ard_stats_mcnemar_test |
|
| 22 |
#' |
|
| 23 |
#' @details |
|
| 24 |
#' For the `ard_stats_mcnemar_test()` function, the data is expected to be one row per subject. |
|
| 25 |
#' The data is passed as `stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...)`. |
|
| 26 |
#' Please use `table(x = data[[variable]], y = data[[by]])` to check the contingency table. |
|
| 27 |
#' |
|
| 28 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 29 |
#' cards::ADSL |> |
|
| 30 |
#' ard_stats_mcnemar_test(by = "SEX", variables = "EFFFL") |
|
| 31 |
#' |
|
| 32 |
#' set.seed(1234) |
|
| 33 |
#' cards::ADSL[c("USUBJID", "TRT01P")] |>
|
|
| 34 |
#' dplyr::mutate(TYPE = "PLANNED") |> |
|
| 35 |
#' dplyr::rename(TRT01 = TRT01P) %>% |
|
| 36 |
#' dplyr::bind_rows(dplyr::mutate(., TYPE = "ACTUAL", TRT01 = sample(TRT01))) |> |
|
| 37 |
#' ard_stats_mcnemar_test_long( |
|
| 38 |
#' by = TYPE, |
|
| 39 |
#' variable = TRT01, |
|
| 40 |
#' id = USUBJID |
|
| 41 |
#' ) |
|
| 42 |
NULL |
|
| 43 | ||
| 44 |
#' @rdname ard_stats_mcnemar_test |
|
| 45 |
#' @export |
|
| 46 |
ard_stats_mcnemar_test <- function(data, by, variables, ...) {
|
|
| 47 | 7x |
set_cli_abort_call() |
| 48 | ||
| 49 |
# check installed packages --------------------------------------------------- |
|
| 50 | 7x |
check_pkg_installed("broom")
|
| 51 | ||
| 52 |
# check/process inputs ------------------------------------------------------- |
|
| 53 | 7x |
check_not_missing(data) |
| 54 | 7x |
check_not_missing(variables) |
| 55 | 7x |
check_not_missing(by) |
| 56 | 7x |
check_data_frame(data) |
| 57 | 7x |
data <- dplyr::ungroup(data) |
| 58 | 7x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 59 | 7x |
check_scalar(by) |
| 60 | ||
| 61 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 62 | 7x |
if (is_empty(variables)) {
|
| 63 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 64 |
} |
|
| 65 | ||
| 66 |
# build ARD ------------------------------------------------------------------ |
|
| 67 | 7x |
lapply( |
| 68 | 7x |
variables, |
| 69 | 7x |
function(variable) {
|
| 70 | 8x |
.format_mcnemartest_results( |
| 71 | 8x |
by = by, |
| 72 | 8x |
variable = variable, |
| 73 | 8x |
lst_tidy = |
| 74 | 8x |
cards::eval_capture_conditions( |
| 75 | 8x |
stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...) |> |
| 76 | 8x |
broom::tidy() |
| 77 |
), |
|
| 78 |
... |
|
| 79 |
) |
|
| 80 |
} |
|
| 81 |
) |> |
|
| 82 | 7x |
dplyr::bind_rows() |
| 83 |
} |
|
| 84 | ||
| 85 |
#' @rdname ard_stats_mcnemar_test |
|
| 86 |
#' @export |
|
| 87 |
ard_stats_mcnemar_test_long <- function(data, by, variables, id, ...) {
|
|
| 88 | 1x |
set_cli_abort_call() |
| 89 | ||
| 90 |
# check installed packages --------------------------------------------------- |
|
| 91 | 1x |
check_pkg_installed("broom")
|
| 92 | ||
| 93 |
# check/process inputs ------------------------------------------------------- |
|
| 94 | 1x |
check_not_missing(data) |
| 95 | 1x |
check_not_missing(variables) |
| 96 | 1x |
check_not_missing(by) |
| 97 | 1x |
check_not_missing(id) |
| 98 | 1x |
check_data_frame(data) |
| 99 | 1x |
data <- dplyr::ungroup(data) |
| 100 | 1x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
|
| 101 | 1x |
check_scalar(by) |
| 102 | 1x |
check_scalar(id) |
| 103 | ||
| 104 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 105 | 1x |
if (is_empty(variables)) {
|
| 106 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 107 |
} |
|
| 108 | ||
| 109 |
# build ARD ------------------------------------------------------------------ |
|
| 110 | 1x |
lapply( |
| 111 | 1x |
variables, |
| 112 | 1x |
function(variable) {
|
| 113 | 1x |
.format_mcnemartest_results( |
| 114 | 1x |
by = by, |
| 115 | 1x |
variable = variable, |
| 116 | 1x |
lst_tidy = |
| 117 | 1x |
cards::eval_capture_conditions({
|
| 118 |
# adding this reshape inside the eval, so if there is an error it's captured in the ARD object |
|
| 119 | 1x |
data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id) |
| 120 |
# performing McNemars test |
|
| 121 | 1x |
stats::mcnemar.test(x = data_wide[["by1"]], y = data_wide[["by2"]], ...) |> |
| 122 | 1x |
broom::tidy() |
| 123 |
}), |
|
| 124 |
... |
|
| 125 |
) |
|
| 126 |
} |
|
| 127 |
) |> |
|
| 128 | 1x |
dplyr::bind_rows() |
| 129 |
} |
|
| 130 | ||
| 131 |
#' Convert McNemar's test to ARD |
|
| 132 |
#' |
|
| 133 |
#' @inheritParams cards::tidy_as_ard |
|
| 134 |
#' @inheritParams stats::mcnemar.test |
|
| 135 |
#' @param by (`string`)\cr by column name |
|
| 136 |
#' @param variable (`string`)\cr variable column name |
|
| 137 |
#' @param ... passed to `stats::mcnemar.test(...)` |
|
| 138 |
#' |
|
| 139 |
#' @return ARD data frame |
|
| 140 |
#' |
|
| 141 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 142 |
#' cardx:::.format_mcnemartest_results( |
|
| 143 |
#' by = "ARM", |
|
| 144 |
#' variable = "AGE", |
|
| 145 |
#' lst_tidy = |
|
| 146 |
#' cards::eval_capture_conditions( |
|
| 147 |
#' stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]]) |> |
|
| 148 |
#' broom::tidy() |
|
| 149 |
#' ) |
|
| 150 |
#' ) |
|
| 151 |
#' |
|
| 152 |
#' @keywords internal |
|
| 153 |
.format_mcnemartest_results <- function(by, variable, lst_tidy, ...) {
|
|
| 154 |
# build ARD ------------------------------------------------------------------ |
|
| 155 | 9x |
ret <- |
| 156 | 9x |
cards::tidy_as_ard( |
| 157 | 9x |
lst_tidy = lst_tidy, |
| 158 | 9x |
tidy_result_names = c("statistic", "p.value", "method"),
|
| 159 | 9x |
fun_args_to_record = c("correct"),
|
| 160 | 9x |
formals = formals(asNamespace("stats")[["mcnemar.test"]]),
|
| 161 | 9x |
passed_args = dots_list(...), |
| 162 | 9x |
lst_ard_columns = list(group1 = by, variable = variable, context = "stats_mcnemar_test") |
| 163 |
) |
|
| 164 | ||
| 165 |
# add the stat label --------------------------------------------------------- |
|
| 166 | 9x |
ret |> |
| 167 | 9x |
dplyr::left_join( |
| 168 | 9x |
.df_mcnemar_stat_labels(), |
| 169 | 9x |
by = "stat_name" |
| 170 |
) |> |
|
| 171 | 9x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 172 | 9x |
cards::as_card() |> |
| 173 | 9x |
cards::tidy_ard_column_order() |
| 174 |
} |
|
| 175 | ||
| 176 |
.df_mcnemar_stat_labels <- function() {
|
|
| 177 | 9x |
dplyr::tribble( |
| 178 | 9x |
~stat_name, ~stat_label, |
| 179 | 9x |
"statistic", "X-squared Statistic", |
| 180 | 9x |
"parameter", "Degrees of Freedom", |
| 181 | 9x |
"p.value", "p-value", |
| 182 |
) |
|
| 183 |
} |
| 1 |
#' Regression VIF ARD |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Function takes a regression model object and returns the variance inflation factor (VIF) |
|
| 5 |
#' using [`car::vif()`] and converts it to a ARD structure |
|
| 6 |
#' |
|
| 7 |
#' @param x regression model object |
|
| 8 |
#' See car::vif() for details |
|
| 9 |
#' |
|
| 10 |
#' @param ... arguments passed to `car::vif(...)` |
|
| 11 |
#' |
|
| 12 |
#' @return data frame |
|
| 13 |
#' @name ard_car_vif |
|
| 14 |
#' @rdname ard_car_vif |
|
| 15 |
#' @export |
|
| 16 |
#' |
|
| 17 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "car"))
|
|
| 18 |
#' lm(AGE ~ ARM + SEX, data = cards::ADSL) |> |
|
| 19 |
#' ard_car_vif() |
|
| 20 |
ard_car_vif <- function(x, ...) {
|
|
| 21 | 5x |
set_cli_abort_call() |
| 22 | ||
| 23 |
# check installed packages --------------------------------------------------- |
|
| 24 | 5x |
check_pkg_installed("car")
|
| 25 | ||
| 26 |
# check inputs --------------------------------------------------------------- |
|
| 27 | 5x |
check_not_missing(x) |
| 28 | ||
| 29 | 5x |
vif <- cards::eval_capture_conditions(car::vif(x, ...)) |
| 30 | ||
| 31 |
# if vif failed, set result as NULL, error will be kept through eval_capture_conditions() |
|
| 32 | 5x |
if (is.null(vif$result)) {
|
| 33 |
# try to capture variable names from `terms()` |
|
| 34 | 2x |
lst_terms <- cards::eval_capture_conditions(attr(stats::terms(x), "term.labels")) |
| 35 |
# we cannot get variable names, error out |
|
| 36 | 2x |
if (!is.null(lst_terms[["error"]])) {
|
| 37 | 1x |
cli::cli_abort( |
| 38 | 1x |
c("There was an error running {.fun car::vif}. See below.", x = vif[["error"]]),
|
| 39 | 1x |
call = get_cli_abort_call() |
| 40 |
) |
|
| 41 |
} |
|
| 42 | 1x |
vif$result <- dplyr::tibble( |
| 43 | 1x |
variable = lst_terms[["result"]], |
| 44 | 1x |
VIF = list(NULL), |
| 45 | 1x |
GVIF = list(NULL), |
| 46 | 1x |
aGVIF = list(NULL), |
| 47 | 1x |
df = list(NULL) |
| 48 |
) |
|
| 49 |
} |
|
| 50 |
# if VIF is returned |
|
| 51 | 3x |
else if (!is.matrix(vif$result)) {
|
| 52 | 1x |
vif$result <- dplyr::tibble(variable = names(vif$result), VIF = vif$result) |
| 53 |
} |
|
| 54 |
# if Generalized VIF is returned |
|
| 55 | 2x |
else if (is.matrix(vif$result)) {
|
| 56 | 2x |
vif$result <- |
| 57 | 2x |
vif$result |> |
| 58 | 2x |
as.data.frame() %>% |
| 59 | 2x |
dplyr::mutate(., variable = rownames(.), .before = 1L) |> |
| 60 | 2x |
dplyr::rename( |
| 61 | 2x |
aGVIF = "GVIF^(1/(2*Df))", |
| 62 | 2x |
df = "Df" |
| 63 |
) |> |
|
| 64 | 2x |
dplyr::tibble() |
| 65 |
} |
|
| 66 | ||
| 67 |
# Clean-up the result to fit the ard structure through pivot |
|
| 68 | 4x |
vif$result <- |
| 69 | 4x |
vif$result |> |
| 70 | 4x |
tidyr::pivot_longer( |
| 71 | 4x |
cols = -c("variable"),
|
| 72 | 4x |
names_to = "stat_name", |
| 73 | 4x |
values_to = "stat" |
| 74 |
) |> |
|
| 75 | 4x |
dplyr::mutate( |
| 76 | 4x |
context = "car_vif", |
| 77 | 4x |
stat = as.list(.data$stat), |
| 78 | 4x |
stat_label = ifelse( |
| 79 | 4x |
.data$stat_name == "aGVIF", |
| 80 | 4x |
"Adjusted GVIF", |
| 81 | 4x |
.data$stat_name |
| 82 |
), |
|
| 83 | 4x |
fmt_fun = map( |
| 84 | 4x |
.data$stat, |
| 85 | 4x |
function(.x) {
|
| 86 |
# styler: off |
|
| 87 | ! |
if (is.integer(.x)) return(0L) |
| 88 | 14x |
if (is.numeric(.x)) return(1L) |
| 89 |
# styler: on |
|
| 90 | 4x |
NULL |
| 91 |
} |
|
| 92 |
) |
|
| 93 |
) |
|
| 94 | ||
| 95 |
# Bind the results and possible warning/errors together |
|
| 96 | 4x |
vif_return <- dplyr::tibble( |
| 97 | 4x |
vif$result, |
| 98 | 4x |
warning = vif["warning"], |
| 99 | 4x |
error = vif["error"] |
| 100 |
) |
|
| 101 | ||
| 102 |
# Clean up return object |
|
| 103 | 4x |
vif_return |> |
| 104 | 4x |
cards::as_card() |> |
| 105 | 4x |
cards::tidy_ard_column_order() |
| 106 |
} |
| 1 |
#' ARD Proportion Confidence Intervals |
|
| 2 |
#' |
|
| 3 |
#' Calculate confidence intervals for proportions. |
|
| 4 |
#' |
|
| 5 |
#' @inheritParams cards::ard_tabulate |
|
| 6 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 7 |
#' columns to include in summaries. Columns must be class `<logical>` |
|
| 8 |
#' or `<numeric>` values coded as `c(0,1)`. |
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' columns to stratify calculations by. |
|
| 11 |
#' @param denominator (`string`)\cr |
|
| 12 |
#' Must be one of `'column'` (default), `'row'`, and `'cell'`, which specifies |
|
| 13 |
#' the direction of the calculation/denominator. Argument is similar to |
|
| 14 |
#' `cards::ard_tabulate(denominator)`. |
|
| 15 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 16 |
#' a scalar in `(0,1)` indicating the confidence level. |
|
| 17 |
#' Default is `0.95` |
|
| 18 |
#' @param method (`string`)\cr |
|
| 19 |
#' string indicating the type of confidence interval to calculate. |
|
| 20 |
#' Must be one of `r formals(ard_categorical_ci)[["method"]] |> eval() |> shQuote("sh")`.
|
|
| 21 |
#' See `?proportion_ci` for details. |
|
| 22 |
#' @param strata,weights,max.iterations arguments passed to `proportion_ci_strat_wilson()`, |
|
| 23 |
#' when `method='strat_wilson'` |
|
| 24 |
#' @param value ([`formula-list-selector`][cards::syntax])\cr |
|
| 25 |
#' function will calculate the CIs for all levels of the variables specified. |
|
| 26 |
#' Use this argument to instead request only a single level by summarized. |
|
| 27 |
#' Default is `list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE)`, where |
|
| 28 |
#' columns coded as `0`/`1` and `TRUE`/`FALSE` will summarize the `1` and `TRUE` levels. |
|
| 29 |
#' |
|
| 30 |
#' @return an ARD data frame |
|
| 31 |
#' @name ard_categorical_ci |
|
| 32 |
#' |
|
| 33 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 34 |
#' # compute CI for binary variables |
|
| 35 |
#' ard_categorical_ci(mtcars, variables = c(vs, am), method = "wilson") |
|
| 36 |
#' |
|
| 37 |
#' # compute CIs for each level of a categorical variable |
|
| 38 |
#' ard_categorical_ci(mtcars, variables = cyl, method = "jeffreys") |
|
| 39 |
NULL |
|
| 40 | ||
| 41 |
#' @rdname ard_categorical_ci |
|
| 42 |
#' @export |
|
| 43 |
ard_categorical_ci <- function(data, ...) {
|
|
| 44 | 58x |
check_not_missing(data) |
| 45 | 58x |
UseMethod("ard_categorical_ci")
|
| 46 |
} |
|
| 47 | ||
| 48 |
#' @rdname ard_categorical_ci |
|
| 49 |
#' @export |
|
| 50 |
ard_categorical_ci.data.frame <- function(data, |
|
| 51 |
variables, |
|
| 52 |
by = dplyr::group_vars(data), |
|
| 53 |
method = c( |
|
| 54 |
"waldcc", "wald", "clopper-pearson", |
|
| 55 |
"wilson", "wilsoncc", |
|
| 56 |
"strat_wilson", "strat_wilsoncc", |
|
| 57 |
"agresti-coull", "jeffreys" |
|
| 58 |
), |
|
| 59 |
denominator = c("column", "row", "cell"),
|
|
| 60 |
conf.level = 0.95, |
|
| 61 |
value = list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE), |
|
| 62 |
strata = NULL, |
|
| 63 |
weights = NULL, |
|
| 64 |
max.iterations = 10, |
|
| 65 |
...) {
|
|
| 66 | 41x |
set_cli_abort_call() |
| 67 | 41x |
check_dots_empty() |
| 68 | ||
| 69 |
# check installed packages --------------------------------------------------- |
|
| 70 | 41x |
check_pkg_installed(pkg = "broom") |
| 71 | ||
| 72 |
# process inputs ------------------------------------------------------------- |
|
| 73 | 41x |
check_not_missing(variables) |
| 74 | 41x |
cards::process_selectors(data, variables = {{ variables }}, by = {{ by }})
|
| 75 | 41x |
denominator <- arg_match(denominator, call = get_cli_abort_call()) |
| 76 | 41x |
method <- arg_match(method) |
| 77 | 41x |
if (method %in% c("strat_wilson", "strat_wilsoncc")) {
|
| 78 | 9x |
cards::process_selectors(data, strata = {{ strata }})
|
| 79 | 9x |
check_scalar(strata) |
| 80 |
} |
|
| 81 | ||
| 82 |
# if the method is strat_wilson, `weights` and `strata` cannot contain NA values |
|
| 83 | 41x |
if (method %in% c("strat_wilson")) {
|
| 84 | 7x |
if (any(is.na({{ weights }}))) {
|
| 85 | 1x |
cli::cli_warn("{.field weights} cannot contain {.val NA} values.")
|
| 86 |
} |
|
| 87 |
} |
|
| 88 | 41x |
cards::process_formula_selectors( |
| 89 | 41x |
data[variables], |
| 90 | 41x |
value = value |
| 91 |
) |
|
| 92 | ||
| 93 |
# if there is no by variable, then treat cell as column because it's the same. |
|
| 94 | 41x |
if (denominator == "cell" && is_empty(by)) {
|
| 95 | 1x |
denominator <- "column" |
| 96 |
} |
|
| 97 | ||
| 98 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 99 | 41x |
if (is_empty(variables)) {
|
| 100 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 101 |
} |
|
| 102 | ||
| 103 |
# calculate confidence intervals --------------------------------------------- |
|
| 104 | 41x |
map( |
| 105 | 41x |
variables, |
| 106 | 41x |
function(variable) {
|
| 107 | 49x |
switch(denominator, |
| 108 | 41x |
"column" = |
| 109 | 36x |
.calculate_ard_proportion_column( |
| 110 | 36x |
data = data, |
| 111 | 36x |
variable = variable, |
| 112 | 36x |
by = by, |
| 113 | 36x |
value = value, |
| 114 | 36x |
method = method, |
| 115 | 36x |
conf.level = conf.level, |
| 116 | 36x |
strata = strata, |
| 117 | 36x |
weights = weights, |
| 118 | 36x |
max.iterations = max.iterations |
| 119 |
), |
|
| 120 | 41x |
"row" = |
| 121 | 7x |
.calculate_ard_proportion_row( |
| 122 | 7x |
data = data, |
| 123 | 7x |
variable = variable, |
| 124 | 7x |
by = by, |
| 125 | 7x |
value = value, |
| 126 | 7x |
method = method, |
| 127 | 7x |
conf.level = conf.level, |
| 128 | 7x |
strata = strata, |
| 129 | 7x |
weights = weights, |
| 130 | 7x |
max.iterations = max.iterations |
| 131 |
), |
|
| 132 | 41x |
"cell" = |
| 133 | 6x |
.calculate_ard_proportion_cell( |
| 134 | 6x |
data = data, |
| 135 | 6x |
variable = variable, |
| 136 | 6x |
by = by, |
| 137 | 6x |
value = value, |
| 138 | 6x |
method = method, |
| 139 | 6x |
conf.level = conf.level, |
| 140 | 6x |
strata = strata, |
| 141 | 6x |
weights = weights, |
| 142 | 6x |
max.iterations = max.iterations |
| 143 |
) |
|
| 144 |
) |
|
| 145 |
} |
|
| 146 |
) |> |
|
| 147 | 41x |
dplyr::bind_rows() |> |
| 148 | 41x |
dplyr::mutate( |
| 149 | 41x |
context = "proportion_ci" |
| 150 |
) |> |
|
| 151 | 41x |
cards::as_card() |> |
| 152 | 41x |
cards::tidy_ard_column_order() |> |
| 153 | 41x |
cards::tidy_ard_row_order() |
| 154 |
} |
|
| 155 | ||
| 156 |
.calculate_ard_proportion_column <- function(data, variable, varname, by, value, method, conf.level, strata, weights, max.iterations) {
|
|
| 157 | 36x |
levels <- .unique_values_sort(data, variable = variable, value = value[[variable]]) |
| 158 | 36x |
data <- .as_dummy_column(data, variable = variable, levels = levels, by = by, strata = strata) |
| 159 | ||
| 160 | 36x |
cards::ard_mvsummary( |
| 161 | 36x |
data = data, |
| 162 | 36x |
variables = c(everything(), -all_of(c(by, strata))), |
| 163 | 36x |
by = all_of(by), |
| 164 | 36x |
statistic = |
| 165 | 36x |
~ list( |
| 166 | 36x |
prop_ci = |
| 167 | 36x |
.calculate_prop_ci_fun( |
| 168 | 36x |
data = data, method = method, conf.level = conf.level, |
| 169 | 36x |
strata = strata, weights = weights, max.iterations = max.iterations |
| 170 |
) |
|
| 171 |
) |
|
| 172 |
) %>% |
|
| 173 |
# merge in the variable levels |
|
| 174 | 36x |
dplyr::left_join( |
| 175 | 36x |
dplyr::select(., "variable") |> |
| 176 | 36x |
dplyr::distinct() |> |
| 177 | 36x |
dplyr::mutate(variable_level = as.list(.env$levels)), |
| 178 | 36x |
by = "variable" |
| 179 |
) |> |
|
| 180 |
# rename variable column |
|
| 181 | 36x |
dplyr::mutate(variable = .env$variable) |
| 182 |
} |
|
| 183 | ||
| 184 |
.calculate_prop_ci_fun <- function(data, method, conf.level, strata, weights, max.iterations) {
|
|
| 185 | 52x |
switch(method, |
| 186 | 52x |
"waldcc" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = TRUE), |
| 187 | 52x |
"wald" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = FALSE), |
| 188 | 52x |
"wilsoncc" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = TRUE), |
| 189 | 52x |
"wilson" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = FALSE), |
| 190 | 52x |
"clopper-pearson" = \(x, ...) proportion_ci_clopper_pearson(x, conf.level = conf.level), |
| 191 | 52x |
"agresti-coull" = \(x, ...) proportion_ci_agresti_coull(x, conf.level = conf.level), |
| 192 | 52x |
"jeffreys" = \(x, ...) proportion_ci_jeffreys(x, conf.level = conf.level), |
| 193 | 52x |
"strat_wilsoncc" = \(x, data, ...) {
|
| 194 | 2x |
proportion_ci_strat_wilson(x, |
| 195 | 2x |
strata = data[[strata]], weights = weights, |
| 196 | 2x |
max.iterations = max.iterations, |
| 197 | 2x |
conf.level = conf.level, correct = TRUE |
| 198 |
) |
|
| 199 |
}, |
|
| 200 | 52x |
"strat_wilson" = \(x, data, ...) {
|
| 201 | 9x |
proportion_ci_strat_wilson(x, |
| 202 | 9x |
strata = data[[strata]], weights = weights, |
| 203 | 9x |
max.iterations = max.iterations, |
| 204 | 9x |
conf.level = conf.level, correct = FALSE |
| 205 |
) |
|
| 206 |
} |
|
| 207 |
) |> |
|
| 208 | 52x |
cards::as_cards_fn( |
| 209 | 52x |
stat_names = |
| 210 | 52x |
case_switch( |
| 211 | 52x |
method %in% c("strat_wilsoncc", "strat_wilsoncc") ~
|
| 212 | 52x |
c("N", "n", "estimate", "conf.low", "conf.high", "conf.level", "weights", "method"),
|
| 213 | 52x |
.default = c("N", "n", "estimate", "conf.low", "conf.high", "conf.level", "method")
|
| 214 |
) |
|
| 215 |
) |
|
| 216 |
} |
|
| 217 | ||
| 218 |
.unique_values_sort <- function(data, variable, value = NULL) {
|
|
| 219 | 289x |
unique_levels <- |
| 220 |
# styler: off |
|
| 221 | 289x |
if (is.logical(data[[variable]])) c(TRUE, FALSE) |
| 222 | 289x |
else if (is.factor(data[[variable]])) factor(levels(data[[variable]]), levels = levels(data[[variable]])) |
| 223 | 289x |
else unique(data[[variable]]) |> sort() |
| 224 |
# styler: on |
|
| 225 | ||
| 226 | 289x |
if (!is_empty(value) && !value %in% unique_levels) {
|
| 227 | 1x |
cli::cli_warn( |
| 228 | 1x |
c("A value of {.code value={.val {value}}} for variable {.val {variable}}
|
| 229 | 1x |
was passed, but is not one of the observed levels: {.val {unique_levels}}.",
|
| 230 | 1x |
i = "This may be an error.", |
| 231 | 1x |
i = "If value is a valid, convert variable to factor with all levels specified to avoid this message." |
| 232 |
) |
|
| 233 |
) |
|
| 234 |
} |
|
| 235 | 289x |
if (!is_empty(value)) {
|
| 236 | 34x |
unique_levels <- value |
| 237 |
} |
|
| 238 | ||
| 239 | 289x |
unique_levels |
| 240 |
} |
|
| 241 | ||
| 242 |
.as_dummy_column <- function(data, variable, levels, by, strata) {
|
|
| 243 |
# define dummy variables and return tibble |
|
| 244 | 36x |
map(levels, ~ data[[variable]] == .x) |> |
| 245 | 36x |
set_names(paste0("this_is_not_a_column_name_anyone_would_choose_", variable, "_", levels, "...")) %>%
|
| 246 | 36x |
{dplyr::tibble(!!!.)} |> # styler: off
|
| 247 | 36x |
dplyr::bind_cols(data[c(by, strata)]) |
| 248 |
} |
|
| 249 | ||
| 250 |
.levels_for_row <- function(data, by) {
|
|
| 251 | 6x |
suppressMessages(cards::nest_for_ard(data = data, by = by, include_data = FALSE)) |> |
| 252 | 6x |
dplyr::select(cards::all_ard_groups(types = "levels")) |> |
| 253 | 6x |
map(unlist) |> |
| 254 | 6x |
reduce(.f = \(.x, .y) paste0(.x, .y)) |> |
| 255 | 6x |
as.character() |
| 256 |
} |
|
| 257 | ||
| 258 |
.as_dummy_row <- function(data, levels, by, strata) {
|
|
| 259 | 9x |
data_col <- data[, by, drop = FALSE] |> |
| 260 | 9x |
reduce(.f = \(.x, .y) paste0(.x, .y)) |> |
| 261 | 9x |
as.character() |
| 262 | ||
| 263 |
# define dummy variables and return tibble |
|
| 264 | 9x |
df_res <- map(levels, ~ data_col == .x) |> |
| 265 | 9x |
set_names(paste0("this_is_not_a_column_name_anyone_would_choose_", levels, "...")) |>
|
| 266 | 9x |
dplyr::bind_cols() |
| 267 | ||
| 268 | 9x |
if (!is_empty(strata)) {
|
| 269 | 1x |
df_res <- df_res |> dplyr::bind_cols(data[, strata, drop = FALSE]) |
| 270 |
} |
|
| 271 | ||
| 272 | 9x |
df_res |
| 273 |
} |
|
| 274 | ||
| 275 | ||
| 276 |
.calculate_ard_proportion_row <- function(data, variable, by, value, method, |
|
| 277 |
conf.level, strata, weights, max.iterations) {
|
|
| 278 |
# if there are no by variables, then all row percents are 100% |
|
| 279 | 7x |
if (is_empty(by)) {
|
| 280 | 1x |
df_res <- |
| 281 | 1x |
suppressMessages( |
| 282 | 1x |
cards::nest_for_ard( |
| 283 | 1x |
data = data[c(variable, by, strata)], |
| 284 | 1x |
by = variable |
| 285 |
) |
|
| 286 |
) |> |
|
| 287 | 1x |
dplyr::rename(variable = "group1", variable_level = "group1_level") %>% |
| 288 |
{
|
|
| 289 | 1x |
case_switch( |
| 290 | 1x |
!is_empty(value[[variable]]) ~ dplyr::filter(., .data$variable_level %in% !!value[[variable]]), |
| 291 | 1x |
.default = . |
| 292 |
) |
|
| 293 |
} |> |
|
| 294 | 1x |
dplyr::mutate( |
| 295 | 1x |
data = map(data, ~ dplyr::mutate(.x, ....ard_all_true.... = TRUE)), |
| 296 | 1x |
prop_ci_fun = |
| 297 | 1x |
map( |
| 298 | 1x |
.data$data, |
| 299 | 1x |
~ .calculate_prop_ci_fun( |
| 300 | 1x |
data = .x, |
| 301 | 1x |
method = .env$method, |
| 302 | 1x |
conf.level = .env$conf.level, |
| 303 | 1x |
strata = .env$strata, |
| 304 | 1x |
weights = .env$weights, |
| 305 | 1x |
max.iterations = .env$max.iterations |
| 306 |
) |
|
| 307 |
), |
|
| 308 | 1x |
result = |
| 309 | 1x |
map2( |
| 310 | 1x |
.data$data, .data$prop_ci_fun, |
| 311 | 1x |
~ cards::ard_mvsummary( |
| 312 | 1x |
data = .x, |
| 313 | 1x |
variables = "....ard_all_true....", |
| 314 | 1x |
statistic = list("....ard_all_true...." = list(prop_ci = .y))
|
| 315 |
) |> |
|
| 316 | 1x |
tidyr::nest(res = -cards::all_ard_variables()) |> |
| 317 | 1x |
dplyr::select(-cards::all_ard_variables()) |
| 318 |
) |
|
| 319 |
) |> |
|
| 320 | 1x |
dplyr::select(-c("data", "prop_ci_fun")) |>
|
| 321 | 1x |
tidyr::unnest(cols = "result") |> |
| 322 | 1x |
tidyr::unnest(cols = "res") |
| 323 | ||
| 324 | 1x |
return(df_res) |
| 325 |
} |
|
| 326 | ||
| 327 | ||
| 328 | 6x |
df_grouping_cols <- suppressMessages(cards::nest_for_ard(data, by = by, include_data = FALSE)) |
| 329 | 6x |
levels <- .levels_for_row(data = data, by = by) |
| 330 | ||
| 331 | 6x |
suppressMessages( |
| 332 | 6x |
cards::nest_for_ard( |
| 333 | 6x |
data = data[c(variable, by, strata)], |
| 334 | 6x |
by = variable |
| 335 |
) |
|
| 336 |
) |> |
|
| 337 | 6x |
dplyr::rename(variable = "group1", variable_level = "group1_level") %>% |
| 338 |
{
|
|
| 339 | 6x |
case_switch( |
| 340 | 6x |
!is_empty(value[[variable]]) ~ dplyr::filter(., .data$variable_level %in% !!value[[variable]]), |
| 341 | 6x |
.default = . |
| 342 |
) |
|
| 343 |
} |> |
|
| 344 | 6x |
dplyr::mutate( |
| 345 | 6x |
df_grouping_cols = list(.env$df_grouping_cols), |
| 346 | 6x |
prop_ci_fun = |
| 347 | 6x |
map( |
| 348 | 6x |
.data$data, |
| 349 | 6x |
~ .calculate_prop_ci_fun( |
| 350 | 6x |
data = .x, |
| 351 | 6x |
method = .env$method, |
| 352 | 6x |
conf.level = .env$conf.level, |
| 353 | 6x |
strata = .env$strata, |
| 354 | 6x |
weights = .env$weights, |
| 355 | 6x |
max.iterations = .env$max.iterations |
| 356 |
) |
|
| 357 |
), |
|
| 358 | 6x |
result = |
| 359 | 6x |
map2( |
| 360 | 6x |
.data$data, .data$prop_ci_fun, |
| 361 | 6x |
~ .as_dummy_row(data = .x, levels = levels, by = by, strata = strata) |> |
| 362 | 6x |
cards::ard_mvsummary( |
| 363 | 6x |
variables = c(everything(), -any_of(strata)), |
| 364 | 6x |
statistic = everything() ~ list(prop_ci = .y) |
| 365 |
) |> |
|
| 366 | 6x |
tidyr::nest(res = -cards::all_ard_variables()) |> |
| 367 | 6x |
dplyr::select(-cards::all_ard_variables()) |
| 368 |
) |
|
| 369 |
) |> |
|
| 370 | 6x |
dplyr::select(-c("data", "prop_ci_fun")) |>
|
| 371 | 6x |
tidyr::unnest(cols = c("df_grouping_cols", "result")) |>
|
| 372 | 6x |
tidyr::unnest(cols = "res") |
| 373 |
} |
|
| 374 | ||
| 375 |
.calculate_ard_proportion_cell <- function(data, variable, by, value, method, |
|
| 376 |
conf.level, strata, weights, max.iterations) {
|
|
| 377 |
# create the base of what the grouping and variable ARD will look like |
|
| 378 | 6x |
df_groups_variable <- |
| 379 | 6x |
suppressMessages( |
| 380 | 6x |
cards::nest_for_ard(data, by = c(by, variable), include_data = FALSE) |
| 381 |
) |> |
|
| 382 | 6x |
dplyr::rename( |
| 383 | 6x |
variable = glue::glue("group{length(c(variable, by))}"),
|
| 384 | 6x |
variable_level = glue::glue("group{length(c(variable, by))}_level")
|
| 385 |
) %>% |
|
| 386 |
{
|
|
| 387 | 6x |
case_switch( |
| 388 | 6x |
!is_empty(value[[variable]]) ~ dplyr::filter(., .data$variable_level %in% !!value[[variable]]), |
| 389 | 6x |
.default = . |
| 390 |
) |
|
| 391 |
} |
|
| 392 | ||
| 393 |
# create a vector of all the unique values of by and variable pasted together |
|
| 394 | 6x |
levels <- |
| 395 | 6x |
df_groups_variable |> |
| 396 | 6x |
dplyr::select(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")) |>
|
| 397 | 6x |
map(unlist) |> |
| 398 | 6x |
reduce(.f = \(.x, .y) paste0(.x, .y)) |> |
| 399 | 6x |
as.character() |
| 400 | ||
| 401 |
# create a data frame of dummy columns indicating if each of the levels is obs in that row |
|
| 402 |
# data frame also contains the strata column is supplied |
|
| 403 | 6x |
dummy_data <- |
| 404 | 6x |
df_groups_variable |> |
| 405 | 6x |
dplyr::select(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")) |>
|
| 406 | 6x |
dplyr::mutate(across(everything(), unlist)) |> |
| 407 | 6x |
stats::setNames( |
| 408 | 6x |
df_groups_variable |> |
| 409 | 6x |
dplyr::select(cards::all_ard_groups("names"), cards::all_ard_variables("names")) |>
|
| 410 | 6x |
dplyr::slice(1L) |> |
| 411 | 6x |
unlist() |> |
| 412 | 6x |
unname() |
| 413 |
) |> |
|
| 414 | 6x |
dplyr::bind_cols(data.frame(...a_name_anyone_would_ever_pick... = levels)) |> |
| 415 | 6x |
dplyr::left_join( |
| 416 | 6x |
x = data[c(variable, by, strata)], |
| 417 | 6x |
y = _, |
| 418 | 6x |
by = c(variable, by) |
| 419 |
) |> |
|
| 420 | 6x |
dplyr::mutate( |
| 421 | 6x |
...a_name_anyone_would_ever_pick... = |
| 422 | 6x |
dplyr::coalesce(.data$...a_name_anyone_would_ever_pick..., paste(levels, collapse = "")) |
| 423 |
) |
|
| 424 | ||
| 425 |
# make dummy vector missing if any of the other variables are missing |
|
| 426 | 6x |
dummy_data[["...a_name_anyone_would_ever_pick..."]][apply(is.na(dummy_data[c(variable, by, strata)]), MARGIN = 1, FUN = any)] <- NA |
| 427 | ||
| 428 |
# finish processing dummy data |
|
| 429 | 6x |
dummy_data <- dummy_data |> |
| 430 | 6x |
dplyr::select(-any_of(c(variable, by))) %>% |
| 431 |
# styler: off |
|
| 432 | 6x |
{dplyr::bind_cols(
|
| 433 |
., |
|
| 434 | 6x |
map( |
| 435 | 6x |
levels, |
| 436 | 6x |
\(level) {
|
| 437 | 25x |
(.[["...a_name_anyone_would_ever_pick..."]] == level) |
| 438 |
} |
|
| 439 |
) |> |
|
| 440 | 6x |
stats::setNames(paste0("level_", levels)) |>
|
| 441 | 6x |
dplyr::as_tibble() |
| 442 |
)} |> |
|
| 443 |
# styler: on |
|
| 444 | 6x |
dplyr::select(-"...a_name_anyone_would_ever_pick...") |
| 445 | ||
| 446 | 6x |
prop_ci_fun <- |
| 447 | 6x |
.calculate_prop_ci_fun( |
| 448 | 6x |
data = dummy_data, |
| 449 | 6x |
method = method, |
| 450 | 6x |
conf.level = conf.level, |
| 451 | 6x |
strata = strata, |
| 452 | 6x |
weights = weights, |
| 453 | 6x |
max.iterations = max.iterations |
| 454 |
) |
|
| 455 | ||
| 456 | 6x |
df_res <- |
| 457 | 6x |
cards::ard_mvsummary( |
| 458 | 6x |
data = dummy_data, |
| 459 | 6x |
variables = -any_of(strata), |
| 460 | 6x |
statistic = everything() ~ list(prop_ci = prop_ci_fun) |
| 461 |
) |> |
|
| 462 | 6x |
tidyr::nest(res = -"variable") |> |
| 463 | 6x |
dplyr::select(-"variable") |
| 464 | ||
| 465 | 6x |
dplyr::bind_cols(df_groups_variable, df_res) |> |
| 466 | 6x |
tidyr::unnest("res")
|
| 467 |
} |
| 1 |
#' ARD Dichotomous Survey Statistics |
|
| 2 |
#' |
|
| 3 |
#' Compute Analysis Results Data (ARD) for dichotomous summary statistics. |
|
| 4 |
#' |
|
| 5 |
#' @inheritParams ard_tabulate.survey.design |
|
| 6 |
#' @param value (named `list`)\cr |
|
| 7 |
#' named list of dichotomous values to tabulate. |
|
| 8 |
#' Default is `cards::maximum_variable_value(data$variables)`, |
|
| 9 |
#' which returns the largest/last value after a sort. |
|
| 10 |
#' |
|
| 11 |
#' @return an ARD data frame of class 'card' |
|
| 12 |
#' @export |
|
| 13 |
#' |
|
| 14 |
#' @examplesIf cardx:::is_pkg_installed("survey")
|
|
| 15 |
#' survey::svydesign(ids = ~1, data = mtcars, weights = ~1) |> |
|
| 16 |
#' ard_tabulate_value(by = vs, variables = c(cyl, am), value = list(cyl = 4)) |
|
| 17 |
ard_tabulate_value.survey.design <- function(data, |
|
| 18 |
variables, |
|
| 19 |
by = NULL, |
|
| 20 |
value = cards::maximum_variable_value(data$variables[variables]), |
|
| 21 |
statistic = everything() ~ c("n", "N", "p", "p.std.error", "n_unweighted", "N_unweighted", "p_unweighted"),
|
|
| 22 |
denominator = c("column", "row", "cell"),
|
|
| 23 |
fmt_fun = NULL, |
|
| 24 |
stat_label = everything() ~ list( |
|
| 25 |
p = "%", |
|
| 26 |
p.std.error = "SE(%)", |
|
| 27 |
deff = "Design Effect", |
|
| 28 |
"n_unweighted" = "Unweighted n", |
|
| 29 |
"N_unweighted" = "Unweighted N", |
|
| 30 |
"p_unweighted" = "Unweighted %" |
|
| 31 |
), |
|
| 32 |
fmt_fn = deprecated(), |
|
| 33 |
...) {
|
|
| 34 | 22x |
set_cli_abort_call() |
| 35 | 22x |
check_dots_empty() |
| 36 | 22x |
check_pkg_installed(pkg = "survey") |
| 37 | ||
| 38 |
# deprecated args ------------------------------------------------------------ |
|
| 39 | 22x |
if (lifecycle::is_present(fmt_fn)) {
|
| 40 | ! |
lifecycle::deprecate_soft( |
| 41 | ! |
when = "0.2.5", |
| 42 | ! |
what = "ard_tabulate_value(fmt_fn)", |
| 43 | ! |
with = "ard_tabulate_value(fmt_fun)" |
| 44 |
) |
|
| 45 | ! |
fmt_fun <- fmt_fn |
| 46 |
} |
|
| 47 | ||
| 48 |
# check inputs --------------------------------------------------------------- |
|
| 49 | 22x |
check_not_missing(variables) |
| 50 | ||
| 51 |
# process inputs ------------------------------------------------------------- |
|
| 52 | 22x |
cards::process_selectors(data$variables, variables = {{ variables }})
|
| 53 | 22x |
cards::process_formula_selectors(data$variables[variables], value = value) |
| 54 | 22x |
cards::fill_formula_selectors( |
| 55 | 22x |
data$variables[variables], |
| 56 | 22x |
value = formals(asNamespace("cardx")[["ard_tabulate_value.survey.design"]])[["value"]] |> eval()
|
| 57 |
) |
|
| 58 | 22x |
.check_dichotomous_value(data$variables, value) |
| 59 | ||
| 60 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 61 | 19x |
if (is_empty(variables)) {
|
| 62 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 63 |
} |
|
| 64 | ||
| 65 |
# calculate summary statistics ----------------------------------------------- |
|
| 66 | 19x |
ard_tabulate( |
| 67 | 19x |
data = data, |
| 68 | 19x |
variables = all_of(variables), |
| 69 | 19x |
by = {{ by }},
|
| 70 | 19x |
statistic = statistic, |
| 71 | 19x |
denominator = denominator, |
| 72 | 19x |
fmt_fun = fmt_fun, |
| 73 | 19x |
stat_label = stat_label |
| 74 |
) |> |
|
| 75 | 19x |
dplyr::filter( |
| 76 | 19x |
pmap( |
| 77 | 19x |
list(.data$variable, .data$variable_level), |
| 78 | 19x |
function(variable, variable_level) {
|
| 79 | 1015x |
variable_level %in% .env$value[[variable]] |
| 80 |
} |
|
| 81 |
) |> |
|
| 82 | 19x |
unlist() |
| 83 |
) |> |
|
| 84 | 19x |
dplyr::mutate(context = "dichotomous") |
| 85 |
} |
|
| 86 | ||
| 87 |
#' Perform Value Checks |
|
| 88 |
#' |
|
| 89 |
#' Check the validity of the values passed in `ard_tabulate_value(value)`. |
|
| 90 |
#' |
|
| 91 |
#' @param data (`data.frame`)\cr |
|
| 92 |
#' a data frame |
|
| 93 |
#' @param value (named `list`)\cr |
|
| 94 |
#' a named list |
|
| 95 |
#' |
|
| 96 |
#' @return returns invisible if check is successful, throws an error message if not. |
|
| 97 |
#' @keywords internal |
|
| 98 |
#' |
|
| 99 |
#' @examples |
|
| 100 |
#' cardx:::.check_dichotomous_value(mtcars, list(cyl = 4)) |
|
| 101 |
.check_dichotomous_value <- function(data, value) {
|
|
| 102 | 22x |
imap( |
| 103 | 22x |
value, |
| 104 | 22x |
function(value, column) {
|
| 105 | 39x |
accepted_values <- .unique_and_sorted(data[[column]]) |
| 106 | 39x |
if (length(value) != 1L || !value %in% accepted_values) {
|
| 107 | 3x |
message <- "Error in argument {.arg value} for variable {.val {column}}."
|
| 108 | 3x |
message <- |
| 109 | 3x |
case_switch( |
| 110 | 3x |
length(value) != 1L ~ c(message, "i" = "The value must be one of {.val {accepted_values}}."),
|
| 111 | 3x |
.default = c(message, "i" = "A value of {.val {value}} was passed, but must be one of {.val {accepted_values}}.")
|
| 112 |
) |
|
| 113 | 3x |
if (length(value) == 1L) {
|
| 114 | 3x |
message <- |
| 115 | 3x |
case_switch( |
| 116 | 3x |
inherits(data[[column]], "factor") ~ |
| 117 | 3x |
c(message, i = "To summarize this value, use {.fun forcats::fct_expand} to add {.val {value}} as a level."),
|
| 118 | 3x |
.default = c(message, i = "To summarize this value, make the column a factor and include {.val {value}} as a level.")
|
| 119 |
) |
|
| 120 |
} |
|
| 121 | ||
| 122 | ||
| 123 | 3x |
cli::cli_abort( |
| 124 | 3x |
message = message, |
| 125 | 3x |
call = get_cli_abort_call() |
| 126 |
) |
|
| 127 |
} |
|
| 128 |
} |
|
| 129 |
) |> |
|
| 130 | 22x |
invisible() |
| 131 |
} |
|
| 132 | ||
| 133 |
#' ARD-flavor of unique() |
|
| 134 |
#' |
|
| 135 |
#' Essentially a wrapper for `unique(x) |> sort()` with `NA` levels removed. |
|
| 136 |
#' For factors, all levels are returned even if they are unobserved. |
|
| 137 |
#' Similarly, logical vectors always return `c(TRUE, FALSE)`, even if |
|
| 138 |
#' both levels are not observed. |
|
| 139 |
#' |
|
| 140 |
#' @param x (`any`)\cr |
|
| 141 |
#' a vector |
|
| 142 |
#' |
|
| 143 |
#' @return a vector |
|
| 144 |
#' @keywords internal |
|
| 145 |
#' |
|
| 146 |
#' @examples |
|
| 147 |
#' cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters)) |
|
| 148 |
#' |
|
| 149 |
#' cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE)) |
|
| 150 |
#' |
|
| 151 |
#' cards:::.unique_and_sorted(c(5, 5:1)) |
|
| 152 |
.unique_and_sorted <- function(x, useNA = c("no", "always")) {
|
|
| 153 |
# styler: off |
|
| 154 | 369x |
useNA <- match.arg(useNA) |
| 155 |
# if a factor return a factor that includes the same levels (including unobserved levels) |
|
| 156 | 369x |
if (inherits(x, "factor")) {
|
| 157 | 277x |
return( |
| 158 | 277x |
factor( |
| 159 | 277x |
if (useNA == "no") levels(x) |
| 160 | 277x |
else c(levels(x), NA_character_), |
| 161 | 277x |
levels = levels(x) |
| 162 |
) |
|
| 163 |
) |
|
| 164 |
} |
|
| 165 | 92x |
if (inherits(x, "logical")) {
|
| 166 | 52x |
if (useNA == "no") return(c(TRUE, FALSE)) |
| 167 | ! |
else return(c(TRUE, FALSE, NA)) |
| 168 |
} |
|
| 169 | ||
| 170 |
# otherwise, return a simple unique and sort of the vector |
|
| 171 | 40x |
if (useNA == "no") return(unique(x) |> sort()) |
| 172 | ! |
else return(unique(x) |> sort() |> c(NA)) |
| 173 |
# styler: on |
|
| 174 |
} |
| 1 |
#' ARD One-way Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Testing Equal Means in a One-Way Layout. |
|
| 5 |
#' calculated with `oneway.test()` |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams stats::oneway.test |
|
| 8 |
#' @param ... additional arguments passed to `oneway.test(...)` |
|
| 9 |
#' |
|
| 10 |
#' @return ARD data frame |
|
| 11 |
#' @export |
|
| 12 |
#' |
|
| 13 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 14 |
#' ard_stats_oneway_test(AGE ~ ARM, data = cards::ADSL) |
|
| 15 |
ard_stats_oneway_test <- function(formula, data, ...) {
|
|
| 16 | 3x |
set_cli_abort_call() |
| 17 | ||
| 18 |
# check installed packages --------------------------------------------------- |
|
| 19 | 3x |
check_pkg_installed(c("broom"))
|
| 20 | ||
| 21 |
# check/process inputs ------------------------------------------------------- |
|
| 22 | 3x |
check_not_missing(formula) |
| 23 | 3x |
check_not_missing(data) |
| 24 | 3x |
check_data_frame(data) |
| 25 | 3x |
check_class(formula, cls = "formula") |
| 26 | ||
| 27 |
# build ARD ------------------------------------------------------------------ |
|
| 28 | 3x |
df_results <- |
| 29 | 3x |
cards::tidy_as_ard( |
| 30 | 3x |
lst_tidy = |
| 31 | 3x |
cards::eval_capture_conditions( |
| 32 | 3x |
stats::oneway.test(formula, data = data, ...) |> |
| 33 | 3x |
broom::tidy() |
| 34 |
), |
|
| 35 | 3x |
tidy_result_names = c("num.df", "den.df", "statistic", "p.value", "method"),
|
| 36 | 3x |
fun_args_to_record = |
| 37 | 3x |
c("var.equal"),
|
| 38 | 3x |
formals = formals(stats::oneway.test), |
| 39 | 3x |
passed_args = dots_list(...), |
| 40 | 3x |
lst_ard_columns = list(context = "stats_oneway_test") |
| 41 |
) |> |
|
| 42 | 3x |
dplyr::mutate( |
| 43 | 3x |
.after = "stat_name", |
| 44 | 3x |
stat_label = |
| 45 | 3x |
dplyr::case_when( |
| 46 | 3x |
.data$stat_name %in% "num.df" ~ "Degrees of Freedom", |
| 47 | 3x |
.data$stat_name %in% "den.df" ~ "Denominator Degrees of Freedom", |
| 48 | 3x |
.data$stat_name %in% "statistic" ~ "F Statistic", |
| 49 | 3x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 50 | 3x |
.data$stat_name %in% "method" ~ "Method", |
| 51 | 3x |
TRUE ~ .data$stat_name, |
| 52 |
) |
|
| 53 |
) |
|
| 54 | ||
| 55 |
# add variable/groups to results and return result |
|
| 56 | 3x |
df_results |> |
| 57 | 3x |
dplyr::bind_cols( |
| 58 | 3x |
dplyr::tibble(!!!map(as.list(attr(stats::terms(formula), "variables"))[-1], as_label)) %>% |
| 59 | 3x |
set_names(., c("variable", paste0("group", seq_len(length(.) - 1L))))
|
| 60 |
) |> |
|
| 61 | 3x |
cards::as_card() |> |
| 62 | 3x |
cards::tidy_ard_column_order() |
| 63 |
} |
| 1 |
#' ARD Standardized Mean Difference |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Standardized mean difference calculated via [`smd::smd()`] with `na.rm = TRUE`. |
|
| 5 |
#' Additionally, this function add a confidence interval to the SMD when |
|
| 6 |
#' `std.error=TRUE`, which the original `smd::smd()` does not include. |
|
| 7 |
#' |
|
| 8 |
#' @param data (`data.frame`/`survey.design`)\cr |
|
| 9 |
#' a data frame or object of class 'survey.design' |
|
| 10 |
#' (typically created with [`survey::svydesign()`]). |
|
| 11 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column name to compare by. |
|
| 13 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 14 |
#' column names to be compared. Independent tests will be computed for |
|
| 15 |
#' each variable. |
|
| 16 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 17 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 18 |
#' @param std.error (scalar `logical`)\cr |
|
| 19 |
#' Logical indicator for computing standard errors using `smd::compute_smd_var()`. |
|
| 20 |
#' Default is `TRUE`. |
|
| 21 |
#' @param ... arguments passed to `smd::smd()` |
|
| 22 |
#' |
|
| 23 |
#' @return ARD data frame |
|
| 24 |
#' @export |
|
| 25 |
#' |
|
| 26 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "smd"))
|
|
| 27 |
#' ard_smd_smd(cards::ADSL, by = SEX, variables = AGE) |
|
| 28 |
#' ard_smd_smd(cards::ADSL, by = SEX, variables = AGEGR1) |
|
| 29 |
ard_smd_smd <- function(data, by, variables, std.error = TRUE, conf.level = 0.95, ...) {
|
|
| 30 | 6x |
set_cli_abort_call() |
| 31 | ||
| 32 |
# check installed packages --------------------------------------------------- |
|
| 33 | 6x |
check_pkg_installed("smd")
|
| 34 | ||
| 35 |
# check/process inputs ------------------------------------------------------- |
|
| 36 | 6x |
check_not_missing(data) |
| 37 | 6x |
check_not_missing(variables) |
| 38 | 6x |
check_not_missing(by) |
| 39 | ||
| 40 |
# grab design object if from `survey` ---------------------------------------- |
|
| 41 | 6x |
is_survey <- inherits(data, "survey.design") |
| 42 | 6x |
if (is_survey) {
|
| 43 | 1x |
design <- data |
| 44 | 1x |
data <- design$variables |
| 45 |
} |
|
| 46 | ||
| 47 |
# continue check/process inputs ---------------------------------------------- |
|
| 48 | 6x |
check_data_frame(data) |
| 49 | 6x |
data <- dplyr::ungroup(data) |
| 50 | 6x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 51 | 6x |
check_scalar(by) |
| 52 |
# This check can be relaxed, but would require some changes to handle multi-row outputs |
|
| 53 | 6x |
check_n_levels(data[[by]], 2L, message = "The {.arg by} column must have {.val {length}} levels.")
|
| 54 | ||
| 55 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 56 | 6x |
if (is_empty(variables)) {
|
| 57 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 58 |
} |
|
| 59 | ||
| 60 |
# build ARD ------------------------------------------------------------------ |
|
| 61 | 6x |
lapply( |
| 62 | 6x |
variables, |
| 63 | 6x |
function(variable) {
|
| 64 | 7x |
.format_smd_results( |
| 65 | 7x |
by = by, |
| 66 | 7x |
variable = variable, |
| 67 | 7x |
lst_tidy = |
| 68 | 7x |
cards::eval_capture_conditions( |
| 69 | 7x |
switch(as.character(is_survey), |
| 70 | 7x |
"TRUE" = smd::smd(x = data[[variable]], g = data[[by]], w = stats::weights(design), na.rm = TRUE, std.error = std.error, ...), |
| 71 | 7x |
"FALSE" = smd::smd(x = data[[variable]], g = data[[by]], na.rm = TRUE, std.error = std.error, ...) |
| 72 |
) |> |
|
| 73 | 7x |
dplyr::select(-any_of("term")) %>%
|
| 74 |
# styler: off |
|
| 75 | 6x |
{if (isTRUE(std.error))
|
| 76 | 6x |
dplyr::mutate( |
| 77 |
., |
|
| 78 | 6x |
conf.low = .data$estimate + stats::qnorm((1 - .env$conf.level) / 2) * .data$std.error, |
| 79 | 6x |
conf.high = .data$estimate - stats::qnorm((1 - .env$conf.level) / 2) * .data$std.error, |
| 80 | 6x |
method = "Standardized Mean Difference" |
| 81 |
) |
|
| 82 |
else |
|
| 83 | ! |
dplyr::mutate( |
| 84 |
., |
|
| 85 | ! |
method = "Standardized Mean Difference" |
| 86 |
)} |
|
| 87 |
# styler: on |
|
| 88 |
), |
|
| 89 |
... |
|
| 90 |
) |
|
| 91 |
} |
|
| 92 |
) |> |
|
| 93 | 6x |
dplyr::bind_rows() |
| 94 |
} |
|
| 95 | ||
| 96 | ||
| 97 |
.format_smd_results <- function(by, variable, lst_tidy, ...) {
|
|
| 98 |
# build ARD ------------------------------------------------------------------ |
|
| 99 | 7x |
ret <- |
| 100 | 7x |
cards::tidy_as_ard( |
| 101 | 7x |
lst_tidy = lst_tidy, |
| 102 | 7x |
tidy_result_names = c("estimate", "std.error"),
|
| 103 | 7x |
fun_args_to_record = c("gref"),
|
| 104 | 7x |
formals = formals(smd::smd)[c("gref")],
|
| 105 |
# removing the `std.error` ARGUMENT (not the result) |
|
| 106 | 7x |
passed_args = dots_list(...) |> utils::modifyList(list(std.error = NULL)), |
| 107 | 7x |
lst_ard_columns = list(group1 = by, variable = variable, context = "smd_smd") |
| 108 |
) |
|
| 109 | ||
| 110 |
# add the stat label --------------------------------------------------------- |
|
| 111 | 7x |
ret |> |
| 112 | 7x |
dplyr::left_join( |
| 113 | 7x |
dplyr::tribble( |
| 114 | 7x |
~stat_name, ~stat_label, |
| 115 | 7x |
"estimate", "Standardized Mean Difference", |
| 116 | 7x |
"std.error", "Standard Error", |
| 117 | 7x |
"gref", "Integer Reference Group Level" |
| 118 |
), |
|
| 119 | 7x |
by = "stat_name" |
| 120 |
) |> |
|
| 121 | 7x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 122 | 7x |
cards::as_card() |> |
| 123 | 7x |
cards::tidy_ard_column_order() |
| 124 |
} |
| 1 |
#' ARD Poisson Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for exact tests of a simple null hypothesis about the rate parameter |
|
| 5 |
#' in Poisson distribution, or the comparison of two rate parameters. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. See below for details. |
|
| 9 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' names of the event and time variables (in that order) to be used in computations. Must be of length 2. |
|
| 11 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' optional column name to compare by. |
|
| 13 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 14 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 15 |
#' @param na.rm (scalar `logical`)\cr |
|
| 16 |
#' whether missing values should be removed before computations. Default is `TRUE`. |
|
| 17 |
#' @param ... arguments passed to [poisson.test()]. |
|
| 18 |
#' @return an ARD data frame of class 'card' |
|
| 19 |
#' @name ard_stats_poisson_test |
|
| 20 |
#' |
|
| 21 |
#' @details |
|
| 22 |
#' * For the `ard_stats_poisson_test()` function, the data is expected to be one row per subject. |
|
| 23 |
#' * If `by` is not specified, an exact Poisson test of the rate parameter will be performed. Otherwise, a |
|
| 24 |
#' Poisson comparison of two rate parameters will be performed on the levels of `by`. If `by` has more than 2 |
|
| 25 |
#' levels, an error will occur. |
|
| 26 |
#' |
|
| 27 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 28 |
#' # Exact test of rate parameter against null hypothesis |
|
| 29 |
#' cards::ADTTE |> |
|
| 30 |
#' ard_stats_poisson_test(variables = c(CNSR, AVAL)) |
|
| 31 |
#' |
|
| 32 |
#' # Comparison test of ratio of 2 rate parameters against null hypothesis |
|
| 33 |
#' cards::ADTTE |> |
|
| 34 |
#' dplyr::filter(TRTA %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 35 |
#' ard_stats_poisson_test(by = TRTA, variables = c(CNSR, AVAL)) |
|
| 36 |
NULL |
|
| 37 | ||
| 38 |
#' @rdname ard_stats_poisson_test |
|
| 39 |
#' @export |
|
| 40 |
ard_stats_poisson_test <- function(data, variables, na.rm = TRUE, by = NULL, conf.level = 0.95, ...) {
|
|
| 41 | 5x |
set_cli_abort_call() |
| 42 | ||
| 43 |
# check installed packages --------------------------------------------------- |
|
| 44 | 5x |
check_pkg_installed("broom")
|
| 45 | ||
| 46 |
# check/process inputs ------------------------------------------------------- |
|
| 47 | 5x |
check_not_missing(data) |
| 48 | 5x |
check_not_missing(variables) |
| 49 | 5x |
check_data_frame(data) |
| 50 | 5x |
data <- dplyr::ungroup(data) |
| 51 | 5x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 52 | 5x |
check_length(variables, 2) |
| 53 | 5x |
check_logical(na.rm) |
| 54 | 5x |
check_scalar(by, allow_empty = TRUE) |
| 55 | 5x |
check_range(conf.level, range = c(0, 1)) |
| 56 | ||
| 57 |
# return empty ARD if no variables selected ---------------------- |
|
| 58 | 5x |
if (is_empty(variables)) {
|
| 59 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 60 |
} |
|
| 61 | ||
| 62 |
# check number of levels in `by` |
|
| 63 | 5x |
if (!is_empty(by) && dplyr::n_distinct(data[[by]], na.rm = TRUE) != 2L) {
|
| 64 | 1x |
cli::cli_abort( |
| 65 | 1x |
"The {.arg by} argument must have a maximum of two levels.",
|
| 66 | 1x |
call = get_cli_abort_call() |
| 67 |
) |
|
| 68 |
} |
|
| 69 | ||
| 70 |
# calculate numerator and denominator values |
|
| 71 | 4x |
if (!is_empty(by)) {
|
| 72 | 1x |
num <- data |> |
| 73 | 1x |
dplyr::group_by(.data[[by]]) |> |
| 74 | 1x |
dplyr::summarise(sum = sum(.data[[variables[1]]], na.rm = na.rm)) |> |
| 75 | 1x |
dplyr::pull(sum) |
| 76 | 1x |
denom <- data |> |
| 77 | 1x |
dplyr::group_by(.data[[by]]) |> |
| 78 | 1x |
dplyr::summarise(sum = sum(.data[[variables[2]]], na.rm = na.rm)) |> |
| 79 | 1x |
dplyr::pull(sum) |
| 80 |
} else {
|
|
| 81 | 3x |
num <- sum(data[[variables[1]]], na.rm = na.rm) |
| 82 | 3x |
denom <- sum(data[[variables[2]]], na.rm = na.rm) |
| 83 |
} |
|
| 84 | ||
| 85 |
# build ARD ------------------------------------------------------------------ |
|
| 86 | 4x |
.format_poissontest_results( |
| 87 | 4x |
by = by, |
| 88 | 4x |
variables = variables, |
| 89 | 4x |
lst_tidy = |
| 90 | 4x |
cards::eval_capture_conditions( |
| 91 | 4x |
stats::poisson.test(x = num, T = denom, conf.level = conf.level, ...) |> broom::tidy() |
| 92 |
), |
|
| 93 |
... |
|
| 94 |
) |
|
| 95 |
} |
|
| 96 | ||
| 97 |
#' Convert Poisson test to ARD |
|
| 98 |
#' |
|
| 99 |
#' @inheritParams cards::tidy_as_ard |
|
| 100 |
#' @inheritParams stats::poisson.test |
|
| 101 |
#' @param by (`string`)\cr by column name |
|
| 102 |
#' @param variables (`character`)\cr names of the event and time variables |
|
| 103 |
#' @param ... passed to [poisson.test()] |
|
| 104 |
#' |
|
| 105 |
#' @return ARD data frame |
|
| 106 |
#' @keywords internal |
|
| 107 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 108 |
#' cardx:::.format_poissontest_results( |
|
| 109 |
#' by = "ARM", |
|
| 110 |
#' variables = c("CNSR", "AVAL"),
|
|
| 111 |
#' lst_tidy = |
|
| 112 |
#' cards::eval_capture_conditions( |
|
| 113 |
#' stats::poisson.test(sum(cards::ADTTE[["CNSR"]]), sum(cards::ADTTE[["AVAL"]])) |> |
|
| 114 |
#' broom::tidy() |
|
| 115 |
#' ) |
|
| 116 |
#' ) |
|
| 117 |
.format_poissontest_results <- function(by = NULL, variables, lst_tidy, ...) {
|
|
| 118 |
# build ARD ------------------------------------------------------------------ |
|
| 119 | 4x |
ret <- |
| 120 | 4x |
cards::tidy_as_ard( |
| 121 | 4x |
lst_tidy = lst_tidy, |
| 122 | 4x |
tidy_result_names = |
| 123 | 4x |
c( |
| 124 | 4x |
"estimate", "statistic", |
| 125 | 4x |
"p.value", "parameter", "conf.low", "conf.high", |
| 126 | 4x |
"method", "alternative" |
| 127 |
), |
|
| 128 | 4x |
fun_args_to_record = c("conf.level", "r"),
|
| 129 | 4x |
formals = formals(asNamespace("stats")[["poisson.test"]]),
|
| 130 | 4x |
passed_args = dots_list(...), |
| 131 | 4x |
lst_ard_columns = list(context = "stats_poisson_test", variable = variables[2]) |
| 132 |
) |> |
|
| 133 | 4x |
dplyr::distinct() |
| 134 | ||
| 135 |
# rename "r" statistic to "mu" |
|
| 136 | 4x |
ret$stat_name[ret$stat_name == "r"] <- "mu" |
| 137 | ||
| 138 | 4x |
if (!is_empty(by)) {
|
| 139 | 1x |
ret <- ret |> |
| 140 | 1x |
dplyr::mutate(group1 = by) |
| 141 |
} |
|
| 142 | ||
| 143 |
# add the stat label --------------------------------------------------------- |
|
| 144 | 4x |
ret |> |
| 145 | 4x |
dplyr::left_join( |
| 146 | 4x |
.df_poissontest_stat_labels(by = by), |
| 147 | 4x |
by = "stat_name" |
| 148 |
) |> |
|
| 149 | 4x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 150 | 4x |
cards::as_card() |> |
| 151 | 4x |
cards::tidy_ard_column_order() |
| 152 |
} |
|
| 153 | ||
| 154 |
.df_poissontest_stat_labels <- function(by = NULL) {
|
|
| 155 | 4x |
dplyr::tribble( |
| 156 | 4x |
~stat_name, ~stat_label, |
| 157 | 4x |
"estimate", ifelse(is_empty(by), "Estimated Rate", "Estimated Rate Ratio"), |
| 158 | 4x |
"statistic", ifelse(is_empty(by), "Number of Events", "Number of Events in First Sample"), |
| 159 | 4x |
"p.value", "p-value", |
| 160 | 4x |
"parameter", "Expected Count", |
| 161 | 4x |
"conf.low", "CI Lower Bound", |
| 162 | 4x |
"conf.high", "CI Upper Bound", |
| 163 | 4x |
"mu", "H0 Mean", |
| 164 | 4x |
"conf.level", "CI Confidence Level" |
| 165 |
) |
|
| 166 |
} |
| 1 |
#' ARD Wilcoxon Rank-Sum Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for paired and non-paired Wilcoxon Rank-Sum tests. |
|
| 5 |
#' |
|
| 6 |
#' @param data (`data.frame`)\cr |
|
| 7 |
#' a data frame. See below for details. |
|
| 8 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 9 |
#' optional column name to compare by. |
|
| 10 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column names to be compared. Independent tests will be computed for |
|
| 12 |
#' each variable. |
|
| 13 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 14 |
#' column name of the subject or participant ID. |
|
| 15 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 16 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 17 |
#' @param ... arguments passed to `wilcox.test(...)` |
|
| 18 |
#' |
|
| 19 |
#' @return ARD data frame |
|
| 20 |
#' @name ard_stats_wilcox_test |
|
| 21 |
#' |
|
| 22 |
#' @details |
|
| 23 |
#' For the `ard_stats_wilcox_test()` function, the data is expected to be one row per subject. |
|
| 24 |
#' The data is passed as `wilcox.test(data[[variable]] ~ data[[by]], paired = FALSE, ...)`. |
|
| 25 |
#' |
|
| 26 |
#' For the `ard_stats_paired_wilcox_test()` function, the data is expected to be one row |
|
| 27 |
#' per subject per by level. Before the test is calculated, the data are |
|
| 28 |
#' reshaped to a wide format to be one row per subject. |
|
| 29 |
#' The data are then passed as |
|
| 30 |
#' `wilcox.test(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`. |
|
| 31 |
#' |
|
| 32 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 33 |
#' cards::ADSL |> |
|
| 34 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 35 |
#' ard_stats_wilcox_test(by = "ARM", variables = "AGE") |
|
| 36 |
#' |
|
| 37 |
#' # constructing a paired data set, |
|
| 38 |
#' # where patients receive both treatments |
|
| 39 |
#' cards::ADSL[c("ARM", "AGE")] |>
|
|
| 40 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 41 |
#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |> |
|
| 42 |
#' dplyr::arrange(USUBJID, ARM) |> |
|
| 43 |
#' ard_stats_paired_wilcox_test(by = ARM, variables = AGE, id = USUBJID) |
|
| 44 |
NULL |
|
| 45 | ||
| 46 |
#' @rdname ard_stats_wilcox_test |
|
| 47 |
#' @export |
|
| 48 |
ard_stats_wilcox_test <- function(data, variables, by = NULL, conf.level = 0.95, ...) {
|
|
| 49 | 6x |
set_cli_abort_call() |
| 50 | ||
| 51 |
# check installed packages --------------------------------------------------- |
|
| 52 | 6x |
check_pkg_installed("broom")
|
| 53 | ||
| 54 |
# check/process inputs ------------------------------------------------------- |
|
| 55 | 6x |
check_not_missing(data) |
| 56 | 6x |
check_not_missing(variables) |
| 57 | 6x |
check_data_frame(data) |
| 58 | 6x |
data <- dplyr::ungroup(data) |
| 59 | 6x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 60 | 6x |
check_scalar(by, allow_empty = TRUE) |
| 61 | 6x |
check_range(conf.level, range = c(0, 1)) |
| 62 | ||
| 63 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 64 | 6x |
if (is_empty(variables)) {
|
| 65 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 66 |
} |
|
| 67 | ||
| 68 |
# build ARD ------------------------------------------------------------------ |
|
| 69 | 6x |
lapply( |
| 70 | 6x |
variables, |
| 71 | 6x |
function(variable) {
|
| 72 | 7x |
.format_wilcoxtest_results( |
| 73 | 7x |
by = by, |
| 74 | 7x |
variable = variable, |
| 75 | 7x |
lst_tidy = |
| 76 |
# styler: off |
|
| 77 | 7x |
cards::eval_capture_conditions( |
| 78 | 7x |
if (!is_empty(by)) {
|
| 79 | 6x |
stats::wilcox.test(data[[variable]] ~ data[[by]], conf.level = conf.level, ...) |> |
| 80 | 6x |
broom::tidy() |
| 81 |
} |
|
| 82 |
else {
|
|
| 83 | 1x |
stats::wilcox.test(data[[variable]], ...) |> |
| 84 | 1x |
broom::tidy() |
| 85 |
} |
|
| 86 |
), |
|
| 87 |
# styler: on |
|
| 88 | 7x |
paired = FALSE, |
| 89 |
... |
|
| 90 |
) |
|
| 91 |
} |
|
| 92 |
) |> |
|
| 93 | 6x |
dplyr::bind_rows() |
| 94 |
} |
|
| 95 | ||
| 96 |
#' @rdname ard_stats_wilcox_test |
|
| 97 |
#' @export |
|
| 98 |
ard_stats_paired_wilcox_test <- function(data, by, variables, id, conf.level = 0.95, ...) {
|
|
| 99 | 2x |
set_cli_abort_call() |
| 100 | ||
| 101 |
# check installed packages --------------------------------------------------- |
|
| 102 | 2x |
check_pkg_installed("broom")
|
| 103 | ||
| 104 |
# check/process inputs ------------------------------------------------------- |
|
| 105 | 2x |
check_not_missing(data) |
| 106 | 2x |
check_not_missing(variables) |
| 107 | 2x |
check_not_missing(by) |
| 108 | 2x |
check_not_missing(id) |
| 109 | 2x |
check_data_frame(data) |
| 110 | 2x |
data <- dplyr::ungroup(data) |
| 111 | 2x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
|
| 112 | 2x |
check_scalar(by) |
| 113 | 2x |
check_scalar(id) |
| 114 | ||
| 115 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 116 | 2x |
if (is_empty(variables)) {
|
| 117 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 118 |
} |
|
| 119 | ||
| 120 |
# build ARD ------------------------------------------------------------------ |
|
| 121 | 2x |
lapply( |
| 122 | 2x |
variables, |
| 123 | 2x |
function(variable) {
|
| 124 | 2x |
.format_wilcoxtest_results( |
| 125 | 2x |
by = by, |
| 126 | 2x |
variable = variable, |
| 127 | 2x |
lst_tidy = |
| 128 | 2x |
cards::eval_capture_conditions({
|
| 129 |
# adding this reshape inside the eval, so if there is an error it's captured in the ARD object |
|
| 130 | 2x |
data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id) |
| 131 |
# perform paired wilcox test |
|
| 132 | 1x |
stats::wilcox.test(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, conf.level = conf.level, ...) |> |
| 133 | 1x |
broom::tidy() |
| 134 |
}), |
|
| 135 | 2x |
paired = TRUE, |
| 136 |
... |
|
| 137 |
) |
|
| 138 |
} |
|
| 139 |
) |> |
|
| 140 | 2x |
dplyr::bind_rows() |
| 141 |
} |
|
| 142 | ||
| 143 | ||
| 144 |
#' Convert Wilcoxon test to ARD |
|
| 145 |
#' |
|
| 146 |
#' @inheritParams cards::tidy_as_ard |
|
| 147 |
#' @inheritParams stats::wilcox.test |
|
| 148 |
#' @param by (`string`)\cr by column name |
|
| 149 |
#' @param variable (`string`)\cr variable column name |
|
| 150 |
#' @param ... passed to `stats::wilcox.test(...)` |
|
| 151 |
#' |
|
| 152 |
#' @return ARD data frame |
|
| 153 |
#' |
|
| 154 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 155 |
#' # Pre-processing ADSL to have grouping factor (ARM here) with 2 levels |
|
| 156 |
#' ADSL <- cards::ADSL |> |
|
| 157 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 158 |
#' ard_stats_wilcox_test(by = "ARM", variables = "AGE") |
|
| 159 |
#' |
|
| 160 |
#' cardx:::.format_wilcoxtest_results( |
|
| 161 |
#' by = "ARM", |
|
| 162 |
#' variable = "AGE", |
|
| 163 |
#' paired = FALSE, |
|
| 164 |
#' lst_tidy = |
|
| 165 |
#' cards::eval_capture_conditions( |
|
| 166 |
#' stats::wilcox.test(ADSL[["AGE"]] ~ ADSL[["ARM"]], paired = FALSE) |> |
|
| 167 |
#' broom::tidy() |
|
| 168 |
#' ) |
|
| 169 |
#' ) |
|
| 170 |
#' |
|
| 171 |
#' @keywords internal |
|
| 172 |
.format_wilcoxtest_results <- function(by = NULL, variable, lst_tidy, paired, ...) {
|
|
| 173 |
# build ARD ------------------------------------------------------------------ |
|
| 174 | 9x |
ret <- |
| 175 | 9x |
cards::tidy_as_ard( |
| 176 | 9x |
lst_tidy = lst_tidy, |
| 177 | 9x |
tidy_result_names = c("statistic", "p.value", "method", "alternative"),
|
| 178 | 9x |
fun_args_to_record = c( |
| 179 | 9x |
"mu", "paired", "exact", "correct", "conf.int", |
| 180 | 9x |
"conf.level", "tol.root", "digits.rank" |
| 181 |
), |
|
| 182 | 9x |
formals = formals(asNamespace("stats")[["wilcox.test.default"]]),
|
| 183 | 9x |
passed_args = c(list(paired = paired), dots_list(...)), |
| 184 | 9x |
lst_ard_columns = list(variable = variable, context = "stats_wilcox_test") |
| 185 |
) |
|
| 186 | ||
| 187 | 9x |
if (!is_empty(by)) {
|
| 188 | 8x |
ret <- ret |> |
| 189 | 8x |
dplyr::mutate(group1 = by) |
| 190 |
} |
|
| 191 | ||
| 192 |
# add the stat label --------------------------------------------------------- |
|
| 193 | 9x |
ret |> |
| 194 | 9x |
dplyr::left_join( |
| 195 | 9x |
.df_wilcoxtest_stat_labels(by), |
| 196 | 9x |
by = "stat_name" |
| 197 |
) |> |
|
| 198 | 9x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 199 | 9x |
cards::as_card() |> |
| 200 | 9x |
cards::tidy_ard_column_order() |
| 201 |
} |
|
| 202 | ||
| 203 | ||
| 204 |
.df_wilcoxtest_stat_labels <- function(by = NULL) {
|
|
| 205 | 9x |
dplyr::tribble( |
| 206 | 9x |
~stat_name, ~stat_label, |
| 207 | 9x |
"statistic", ifelse(is.null(by), "V Statistic", "X-squared Statistic"), |
| 208 | 9x |
"parameter", "Degrees of Freedom", |
| 209 | 9x |
"estimate", "Median of the Difference", |
| 210 | 9x |
"p.value", "p-value", |
| 211 | 9x |
"conf.low", "CI Lower Bound", |
| 212 | 9x |
"conf.high", "CI Upper Bound", |
| 213 | 9x |
"paired", "Paired test", |
| 214 | 9x |
"conf.level", "CI Confidence Level", |
| 215 |
) |
|
| 216 |
} |
| 1 |
#' Deprecated functions |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' `r lifecycle::badge('deprecated')`\cr
|
|
| 5 |
#' Some functions have been deprecated and are no longer being actively |
|
| 6 |
#' supported. |
|
| 7 |
#' |
|
| 8 |
#' @name deprecated |
|
| 9 |
#' @keywords internal |
|
| 10 |
NULL |
|
| 11 | ||
| 12 |
# v0.3.0 ----------------------------------------------------------------------- |
|
| 13 |
#' @importFrom cards ard_continuous |
|
| 14 |
#' @export |
|
| 15 |
cards::ard_continuous |
|
| 16 | ||
| 17 |
#' @importFrom cards ard_categorical |
|
| 18 |
#' @export |
|
| 19 |
cards::ard_categorical |
|
| 20 | ||
| 21 |
#' @importFrom cards ard_dichotomous |
|
| 22 |
#' @export |
|
| 23 |
cards::ard_dichotomous |
|
| 24 | ||
| 25 |
#' @rdname deprecated |
|
| 26 |
#' @export |
|
| 27 |
ard_continuous.survey.design <- function(data, ...) {
|
|
| 28 | ! |
lifecycle::deprecate_soft( |
| 29 | ! |
when = "0.3.0", |
| 30 | ! |
what = "cardx::ard_continuous()", |
| 31 | ! |
with = "cardx::ard_summary()" |
| 32 |
) |
|
| 33 | ||
| 34 | ! |
ard_summary(data = data, ...) |> |
| 35 | ! |
dplyr::mutate(context = "continuous") |
| 36 |
} |
|
| 37 | ||
| 38 |
#' @rdname deprecated |
|
| 39 |
#' @export |
|
| 40 |
ard_categorical.survey.design <- function(data, ...) {
|
|
| 41 | ! |
lifecycle::deprecate_soft( |
| 42 | ! |
when = "0.3.0", |
| 43 | ! |
what = "cardx::ard_categorical()", |
| 44 | ! |
with = "cardx::ard_tabulate()" |
| 45 |
) |
|
| 46 | ||
| 47 | ! |
ard_tabulate(data = data, ...) |> |
| 48 | ! |
dplyr::mutate(context = "categorical") |
| 49 |
} |
|
| 50 | ||
| 51 |
#' @rdname deprecated |
|
| 52 |
#' @export |
|
| 53 |
ard_dichotomous.survey.design <- function(data, ...) {
|
|
| 54 | ! |
lifecycle::deprecate_soft( |
| 55 | ! |
when = "0.3.0", |
| 56 | ! |
what = "cardx::ard_dichotomous()", |
| 57 | ! |
with = "cardx::ard_tabulate_value()", |
| 58 | ! |
details = "The `value` argument no longer has a default value and must be specified." |
| 59 |
) |
|
| 60 | ||
| 61 | ! |
ard_tabulate_value(data = data, ...) |> |
| 62 | ! |
dplyr::mutate(context = "dichotomous") |
| 63 |
} |
|
| 64 | ||
| 65 |
#' @rdname deprecated |
|
| 66 |
#' @export |
|
| 67 |
ard_categorical_max <- function(...) {
|
|
| 68 | ! |
lifecycle::deprecate_soft( |
| 69 | ! |
when = "0.3.0", |
| 70 | ! |
what = "cardx::ard_categorical_max()", |
| 71 | ! |
with = "cardx::ard_tabulate_max()" |
| 72 |
) |
|
| 73 | ||
| 74 | ! |
ard_tabulate_max(...) |
| 75 |
} |
|
| 76 | ||
| 77 |
#' @rdname deprecated |
|
| 78 |
#' @export |
|
| 79 |
ard_emmeans_mean_difference <- function(...) {
|
|
| 80 | ! |
lifecycle::deprecate_soft( |
| 81 | ! |
when = "0.3.1", |
| 82 | ! |
what = "cardx::ard_emmeans_mean_difference()", |
| 83 | ! |
with = "cardx::ard_emmeans_contrast()" |
| 84 |
) |
|
| 85 | ||
| 86 | ! |
ard_emmeans_contrast(...) |
| 87 |
} |
| 1 |
#' ARDs for LS Mean Difference and LS Means |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' The `ard_emmeans_contrast()` function calculates least-squares mean differences using the 'emmeans' |
|
| 5 |
#' package using the following |
|
| 6 |
#' |
|
| 7 |
#' ```r |
|
| 8 |
#' emmeans::emmeans(object = <regression model>, specs = ~ <primary covariate>) |> |
|
| 9 |
#' emmeans::contrast(method = "pairwise") |> |
|
| 10 |
#' summary(infer = TRUE, level = <confidence level>) |
|
| 11 |
#' ``` |
|
| 12 |
#' |
|
| 13 |
#' @param data (`data.frame`/`survey.design`)\cr |
|
| 14 |
#' a data frame or survey design object |
|
| 15 |
#' @inheritParams construct_model |
|
| 16 |
#' @param response_type (`string`) |
|
| 17 |
#' string indicating whether the model outcome is `'continuous'` |
|
| 18 |
#' or `'dichotomous'`. When `'dichotomous'`, the call to `emmeans::emmeans()` is |
|
| 19 |
#' supplemented with argument `regrid="response"`. |
|
| 20 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 21 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 22 |
#' @param primary_covariate (`string`)\cr |
|
| 23 |
#' string indicating the primary covariate (typically the dichotomous treatment variable). |
|
| 24 |
#' Default is the first covariate listed in the formula. |
|
| 25 |
#' |
|
| 26 |
#' @return ARD data frame |
|
| 27 |
#' @export |
|
| 28 |
#' @rdname ard_emmeans |
|
| 29 |
#' |
|
| 30 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "emmeans"))
|
|
| 31 |
#' # LS Mean Difference |
|
| 32 |
#' ard_emmeans_contrast( |
|
| 33 |
#' data = mtcars, |
|
| 34 |
#' formula = mpg ~ am + cyl, |
|
| 35 |
#' method = "lm" |
|
| 36 |
#' ) |
|
| 37 |
#' |
|
| 38 |
#' ard_emmeans_contrast( |
|
| 39 |
#' data = mtcars, |
|
| 40 |
#' formula = vs ~ am + mpg, |
|
| 41 |
#' method = "glm", |
|
| 42 |
#' method.args = list(family = binomial), |
|
| 43 |
#' response_type = "dichotomous" |
|
| 44 |
#' ) |
|
| 45 |
ard_emmeans_contrast <- function(data, formula, method, |
|
| 46 |
method.args = list(), |
|
| 47 |
package = "base", |
|
| 48 |
response_type = c("continuous", "dichotomous"),
|
|
| 49 |
conf.level = 0.95, |
|
| 50 |
primary_covariate = |
|
| 51 |
stats::terms(formula) |> |
|
| 52 |
attr("term.labels") |>
|
|
| 53 |
getElement(1L)) {
|
|
| 54 | 5x |
set_cli_abort_call() |
| 55 | ||
| 56 |
# check package installation ------------------------------------------------- |
|
| 57 | 5x |
check_pkg_installed(c("emmeans", package))
|
| 58 | 5x |
check_not_missing(data) |
| 59 | 5x |
check_not_missing(formula) |
| 60 | 5x |
check_not_missing(method) |
| 61 | 5x |
check_class(data, c("data.frame", "survey.design"))
|
| 62 | 5x |
check_class(formula, cls = "formula") |
| 63 | 5x |
check_string(package) |
| 64 | 5x |
check_string(primary_covariate) |
| 65 | 5x |
check_scalar(conf.level) |
| 66 | 5x |
check_range(conf.level, range = c(0, 1)) |
| 67 | 5x |
response_type <- arg_match(response_type, error_call = get_cli_abort_call()) |
| 68 | ||
| 69 | 5x |
data_in <- if (dplyr::last(class(data)) == "survey.design") data$variables else data |
| 70 | ||
| 71 |
# build ARD ------------------------------------------------------------------ |
|
| 72 | 5x |
result <- cards::ard_mvsummary( |
| 73 | 5x |
data = data_in, |
| 74 | 5x |
variables = all_of(primary_covariate), |
| 75 | 5x |
statistic = all_of(primary_covariate) ~ list( |
| 76 | 5x |
emmeans = |
| 77 | 5x |
.calc_emmeans_contrast( |
| 78 | 5x |
data, formula, method, {{ method.args }}, package, response_type, conf.level, primary_covariate
|
| 79 |
) |
|
| 80 |
) |
|
| 81 |
) |
|
| 82 | ||
| 83 | 5x |
result |> |
| 84 | 5x |
dplyr::select(-"stat_label") |> |
| 85 | 5x |
dplyr::left_join( |
| 86 | 5x |
.df_emmeans_stat_labels("contrast"),
|
| 87 | 5x |
by = "stat_name" |
| 88 |
) |> |
|
| 89 | 5x |
dplyr::mutate( |
| 90 | 5x |
variable = "contrast", |
| 91 | 5x |
variable_level = if ("variable_level" %in% .data$stat_name) {
|
| 92 | 5x |
.data$stat[.data$stat_name == "variable_level"] |
| 93 |
} else {
|
|
| 94 | 5x |
NA |
| 95 |
}, |
|
| 96 | 5x |
group1 = .env$primary_covariate, |
| 97 | 5x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 98 | 5x |
context = "emmeans_contrast", |
| 99 |
) |> |
|
| 100 | 5x |
dplyr::filter(.data$stat_name != "variable_level") |> |
| 101 | 5x |
cards::as_card() |> |
| 102 | 5x |
cards::tidy_ard_column_order() |> |
| 103 | 5x |
cards::tidy_ard_row_order() |
| 104 |
} |
|
| 105 | ||
| 106 |
# function to perform calculations --------------------------------------------- |
|
| 107 |
.calc_emmeans_contrast <- function(data, formula, method, |
|
| 108 |
method.args, |
|
| 109 |
package, |
|
| 110 |
response_type, |
|
| 111 |
conf.level, |
|
| 112 |
primary_covariate) {
|
|
| 113 | 5x |
cards::as_cards_fn( |
| 114 | 5x |
\(x, ...) {
|
| 115 |
# construct primary model ------------------------------------------------ |
|
| 116 | 5x |
mod <- |
| 117 | 5x |
construct_model( |
| 118 | 5x |
data = data, formula = formula, method = method, |
| 119 | 5x |
method.args = {{ method.args }},
|
| 120 | 5x |
package = package, env = caller_env() |
| 121 |
) |
|
| 122 | ||
| 123 |
# emmeans ---------------------------------------------------------------- |
|
| 124 | 4x |
emmeans_args <- list(object = mod, specs = reformulate2(primary_covariate)) |
| 125 | 3x |
if (response_type %in% "dichotomous") emmeans_args <- c(emmeans_args, list(regrid = "response")) |
| 126 | 4x |
emmeans <- |
| 127 | 4x |
withr::with_namespace( |
| 128 | 4x |
package = "emmeans", |
| 129 | 4x |
code = do.call("emmeans", args = emmeans_args)
|
| 130 |
) |
|
| 131 | ||
| 132 |
# calculate mean difference estimate ----------------------------------- |
|
| 133 | 4x |
results <- |
| 134 | 4x |
emmeans |> |
| 135 | 4x |
emmeans::contrast(method = "pairwise") |> |
| 136 | 4x |
summary(infer = TRUE, level = conf.level) |> |
| 137 | 4x |
dplyr::rename(variable_level = "contrast") |
| 138 | ||
| 139 |
# convert results to ARD format ---------------------------------------- |
|
| 140 | 4x |
results |> |
| 141 | 4x |
dplyr::as_tibble() |> |
| 142 | 4x |
dplyr::rename( |
| 143 | 4x |
conf.low = any_of("asymp.LCL"),
|
| 144 | 4x |
conf.high = any_of("asymp.UCL"),
|
| 145 | 4x |
conf.low = any_of("lower.CL"),
|
| 146 | 4x |
conf.high = any_of("upper.CL"),
|
| 147 | 4x |
std.error = any_of("SE")
|
| 148 |
) |> |
|
| 149 | 4x |
dplyr::select(any_of(c( |
| 150 | 4x |
"variable_level", "estimate", |
| 151 | 4x |
"std.error", "df", |
| 152 | 4x |
"conf.low", "conf.high", "p.value" |
| 153 |
))) |> |
|
| 154 | 4x |
dplyr::mutate( |
| 155 | 4x |
conf.level = .env$conf.level, |
| 156 | 4x |
method = ifelse( |
| 157 | 4x |
length(attr(stats::terms(formula), "term.labels") |> discard(~ startsWith(., "1 |"))) == 1L, |
| 158 | 4x |
"Least-squares mean difference", |
| 159 | 4x |
"Least-squares adjusted mean difference" |
| 160 |
) |
|
| 161 |
) |
|
| 162 |
}, |
|
| 163 | 5x |
stat_names = c("variable_level", "estimate", "std.error", "df", "conf.low", "conf.high", "p.value", "conf.level", "method")
|
| 164 |
) |
|
| 165 |
} |
|
| 166 | ||
| 167 |
.df_emmeans_stat_labels <- function(estimate) {
|
|
| 168 | 10x |
dplyr::tribble( |
| 169 | 10x |
~stat_name, ~stat_label, |
| 170 | 10x |
"estimate", if (estimate == "contrast") "Mean Difference" else "Mean", |
| 171 | 10x |
"std.error", "Standard Error", |
| 172 | 10x |
"df", "Degrees of Freedom", |
| 173 | 10x |
"conf.low", "CI Lower Bound", |
| 174 | 10x |
"conf.high", "CI Upper Bound", |
| 175 | 10x |
"p.value", "p-value", |
| 176 | 10x |
"conf.level", "CI Confidence Level", |
| 177 |
) |
|
| 178 |
} |
| 1 |
#' ARD survey categorical CIs |
|
| 2 |
#' |
|
| 3 |
#' Confidence intervals for categorical variables calculated via |
|
| 4 |
#' [`survey::svyciprop()`]. |
|
| 5 |
#' |
|
| 6 |
#' @inheritParams ard_summary.survey.design |
|
| 7 |
#' @inheritParams ard_categorical_ci.data.frame |
|
| 8 |
#' @param method (`string`)\cr |
|
| 9 |
#' Method passed to `survey::svyciprop(method)` |
|
| 10 |
#' @param df (`numeric`)\cr |
|
| 11 |
#' denominator degrees of freedom, passed to `survey::svyciprop(df)`. |
|
| 12 |
#' Default is `survey::degf(data)`. |
|
| 13 |
#' @param ... arguments passed to `survey::svyciprop()` |
|
| 14 |
#' |
|
| 15 |
#' @return ARD data frame |
|
| 16 |
#' @export |
|
| 17 |
#' |
|
| 18 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
|
|
| 19 |
#' data(api, package = "survey") |
|
| 20 |
#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |
|
| 21 |
#' |
|
| 22 |
#' ard_categorical_ci(dclus1, variables = sch.wide) |
|
| 23 |
#' ard_categorical_ci(dclus1, variables = sch.wide, value = sch.wide ~ "Yes", method = "xlogit") |
|
| 24 |
ard_categorical_ci.survey.design <- function(data, |
|
| 25 |
variables, |
|
| 26 |
by = NULL, |
|
| 27 |
method = c("logit", "likelihood", "asin", "beta", "mean", "xlogit"),
|
|
| 28 |
conf.level = 0.95, |
|
| 29 |
value = list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE), |
|
| 30 |
df = survey::degf(data), |
|
| 31 |
...) {
|
|
| 32 | 17x |
set_cli_abort_call() |
| 33 | 17x |
check_dots_empty() |
| 34 | ||
| 35 |
# check inputs --------------------------------------------------------------- |
|
| 36 | 17x |
check_not_missing(data) |
| 37 | 17x |
check_class(data, "survey.design") |
| 38 | 17x |
check_not_missing(variables) |
| 39 | ||
| 40 | 17x |
cards::process_selectors( |
| 41 | 17x |
data = data$variables, |
| 42 | 17x |
variables = {{ variables }},
|
| 43 | 17x |
by = {{ by }}
|
| 44 |
) |
|
| 45 | 17x |
cards::process_formula_selectors( |
| 46 | 17x |
data = data$variables, |
| 47 | 17x |
value = value |
| 48 |
) |
|
| 49 | 17x |
check_scalar(by, allow_empty = TRUE) |
| 50 | 17x |
check_scalar_range(conf.level, range = c(0, 1)) |
| 51 | 17x |
method <- arg_match(method) |
| 52 | ||
| 53 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 54 | 17x |
if (is_empty(variables)) {
|
| 55 | 1x |
return(dplyr::tibble() |> cards::as_card()) |
| 56 |
} |
|
| 57 | ||
| 58 |
# calculate and return ARD of one sample CI ---------------------------------- |
|
| 59 | 16x |
.calculate_ard_onesample_survey_ci( |
| 60 | 16x |
FUN = .svyciprop_wrapper, |
| 61 | 16x |
data = data, |
| 62 | 16x |
variables = variables, |
| 63 | 16x |
by = by, |
| 64 | 16x |
conf.level = conf.level, |
| 65 | 16x |
method = method, |
| 66 | 16x |
df = df, |
| 67 | 16x |
value = value, |
| 68 |
... |
|
| 69 |
) |
|
| 70 |
} |
|
| 71 | ||
| 72 |
.calculate_ard_onesample_survey_ci <- function(FUN, data, variables, by, conf.level, value, ...) {
|
|
| 73 |
# calculate results ---------------------------------------------------------- |
|
| 74 | 16x |
map( |
| 75 | 16x |
variables, |
| 76 | 16x |
function(variable) {
|
| 77 | 26x |
.calculate_one_ard_categorical_survey_ci( |
| 78 | 26x |
FUN = FUN, |
| 79 | 26x |
data = data, |
| 80 | 26x |
variable = variable, |
| 81 | 26x |
by = by, |
| 82 | 26x |
conf.level = conf.level, |
| 83 | 26x |
value = value[[variable]], |
| 84 |
... |
|
| 85 |
) |
|
| 86 |
} |
|
| 87 |
) |> |
|
| 88 | 16x |
dplyr::bind_rows() |
| 89 |
} |
|
| 90 | ||
| 91 |
.calculate_one_ard_categorical_survey_ci <- function(FUN, data, variable, by, conf.level, value, ...) {
|
|
| 92 | 26x |
variable_levels <- .unique_values_sort(data$variables, variable = variable) |
| 93 | 26x |
if (!is_empty(by)) {
|
| 94 | 6x |
by_levels <- .unique_values_sort(data$variables, variable = by) |
| 95 | 6x |
lst_data <- |
| 96 | 6x |
map( |
| 97 | 6x |
by_levels, |
| 98 | 6x |
~ call2("subset", expr(data), expr(!!sym(by) == !!.x)) |> eval()
|
| 99 |
) |> |
|
| 100 | 6x |
set_names(as.character(by_levels)) |
| 101 |
} |
|
| 102 | ||
| 103 | 26x |
df_full <- |
| 104 | 26x |
case_switch( |
| 105 | 26x |
!is_empty(by) ~ |
| 106 | 26x |
tidyr::expand_grid( |
| 107 | 26x |
group1_level = as.character(by_levels) |> as.list(), |
| 108 | 26x |
variable_level = as.character(variable_levels) |> as.list() |
| 109 |
) |> |
|
| 110 | 26x |
dplyr::mutate(group1 = .env$by, variable = .env$variable), |
| 111 | 26x |
.default = |
| 112 | 26x |
dplyr::tibble( |
| 113 | 26x |
variable = .env$variable, |
| 114 | 26x |
variable_level = as.character(variable_levels) |> as.list() |
| 115 |
) |
|
| 116 |
) |> |
|
| 117 | 26x |
dplyr::rowwise() |> |
| 118 | 26x |
dplyr::mutate( |
| 119 | 26x |
lst_result = |
| 120 | 26x |
FUN( |
| 121 | 26x |
data = |
| 122 | 26x |
case_switch( |
| 123 | 26x |
is_empty(.env$by) ~ data, |
| 124 | 26x |
.default = lst_data[[.data$group1_level]] |
| 125 |
), |
|
| 126 | 26x |
variable = .data$variable, |
| 127 | 26x |
variable_level = .data$variable_level, |
| 128 | 26x |
conf.level = .env$conf.level, |
| 129 |
... |
|
| 130 |
) |> |
|
| 131 | 26x |
list(), |
| 132 | 26x |
result = |
| 133 | 26x |
.data$lst_result[["result"]] |> |
| 134 | 26x |
enframe("stat_name", "stat") |>
|
| 135 | 26x |
list(), |
| 136 | 26x |
warning = .data$lst_result["warning"] |> unname(), |
| 137 | 26x |
error = .data$lst_result["error"] |> unname(), |
| 138 | 26x |
context = "categorical_ci" |
| 139 |
) |> |
|
| 140 | 26x |
dplyr::select(-"lst_result") |> |
| 141 | 26x |
dplyr::ungroup() |> |
| 142 | 26x |
tidyr::unnest("result") |>
|
| 143 | 26x |
dplyr::mutate( |
| 144 | 26x |
stat_label = .data$stat_name, |
| 145 | 26x |
fmt_fun = map(.data$stat, ~ case_switch(is.numeric(.x) ~ 2L, .default = as.character)) |
| 146 |
) |> |
|
| 147 | 26x |
cards::as_card() |> |
| 148 | 26x |
cards::tidy_ard_column_order() |> |
| 149 | 26x |
.restore_original_column_types(data = data$variables) |
| 150 | ||
| 151 |
# if a value was passed for the variable, subset on those results |
|
| 152 | 26x |
if (!is_empty(value)) {
|
| 153 | 3x |
df_full <- df_full |> |
| 154 | 3x |
dplyr::filter(unlist(.data$variable_level) %in% .env$value) |
| 155 |
} |
|
| 156 | ||
| 157 | 26x |
df_full |
| 158 |
} |
|
| 159 | ||
| 160 | ||
| 161 |
.svyciprop_wrapper <- function(data, variable, variable_level, conf.level, method, df, ...) {
|
|
| 162 | 64x |
lst_results <- |
| 163 | 64x |
cards::eval_capture_conditions( |
| 164 | 64x |
survey::svyciprop( |
| 165 | 64x |
formula = inject(~ I(!!sym(variable) == !!variable_level)), |
| 166 | 64x |
design = data, |
| 167 | 64x |
method = method, |
| 168 | 64x |
level = conf.level, |
| 169 | 64x |
df = df, |
| 170 |
... |
|
| 171 |
) %>% |
|
| 172 | 64x |
{list(.[[1]], attr(., "ci"))} |> # styler: off
|
| 173 | 64x |
unlist() |> |
| 174 | 64x |
set_names(c("estimate", "conf.low", "conf.high")) |>
|
| 175 | 64x |
as.list() |
| 176 |
) |
|
| 177 | ||
| 178 |
# add NULL results if error |
|
| 179 | 64x |
if (is_empty(lst_results[["result"]])) {
|
| 180 | ! |
lst_results[["result"]] <- rep_named(c("estimate", "conf.low", "conf.high"), list(NULL))
|
| 181 |
} |
|
| 182 | ||
| 183 |
# add other args |
|
| 184 | 64x |
lst_results[["result"]] <- lst_results[["result"]] |> append(list(method = method, conf.level = conf.level)) |
| 185 | ||
| 186 |
# return list result |
|
| 187 | 64x |
lst_results |
| 188 |
} |
|
| 189 | ||
| 190 | ||
| 191 |
case_switch <- function(..., .default = NULL) {
|
|
| 192 |
dots <- dots_list(...) |
|
| 193 | ||
| 194 |
for (f in dots) {
|
|
| 195 |
if (isTRUE(eval(f_lhs(f), envir = attr(f, ".Environment")))) {
|
|
| 196 |
return(eval(f_rhs(f), envir = attr(f, ".Environment"))) |
|
| 197 |
} |
|
| 198 |
} |
|
| 199 | ||
| 200 |
return(.default) |
|
| 201 |
} |
| 1 |
#' ARD Continuous Survey Statistics |
|
| 2 |
#' |
|
| 3 |
#' Returns an ARD of weighted statistics using the `{survey}` package.
|
|
| 4 |
#' |
|
| 5 |
#' @param data (`survey.design`)\cr |
|
| 6 |
#' a design object often created with [`survey::svydesign()`]. |
|
| 7 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 8 |
#' columns to include in summaries. |
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' results are calculated for **all combinations** of the columns specified, |
|
| 11 |
#' including unobserved combinations and unobserved factor levels. |
|
| 12 |
#' @param statistic ([`formula-list-selector`][cards::syntax])\cr |
|
| 13 |
#' a named list, a list of formulas, |
|
| 14 |
#' or a single formula where the list element is a character vector of |
|
| 15 |
#' statistic names to include. See below for options. |
|
| 16 |
#' @param fmt_fun ([`formula-list-selector`][cards::syntax])\cr |
|
| 17 |
#' a named list, a list of formulas, |
|
| 18 |
#' or a single formula where the list element is a named list of functions |
|
| 19 |
#' (or the RHS of a formula), |
|
| 20 |
#' e.g. `list(mpg = list(mean = \(x) round(x, digits = 2) |> as.character))`. |
|
| 21 |
#' @param stat_label ([`formula-list-selector`][cards::syntax])\cr |
|
| 22 |
#' a named list, a list of formulas, or a single formula where |
|
| 23 |
#' the list element is either a named list or a list of formulas defining the |
|
| 24 |
#' statistic labels, e.g. `everything() ~ list(mean = "Mean", sd = "SD")` or |
|
| 25 |
#' `everything() ~ list(mean ~ "Mean", sd ~ "SD")`. |
|
| 26 |
#' @inheritParams ard_tabulate.survey.design |
|
| 27 |
#' @inheritParams rlang::args_dots_empty |
|
| 28 |
#' |
|
| 29 |
#' @section statistic argument: |
|
| 30 |
#' |
|
| 31 |
#' The following statistics are available: |
|
| 32 |
#' `r cardx:::accepted_svy_stats(FALSE) |> shQuote("sh") |> paste(collapse = ", ")`,
|
|
| 33 |
#' where 'p##' is are the percentiles and `##` is an integer between 0 and 100. |
|
| 34 |
#' |
|
| 35 |
#' The design effect (`"deff"`) is calculated only when requested in the `statistic` argument. |
|
| 36 |
#' |
|
| 37 |
#' |
|
| 38 |
#' @return an ARD data frame of class 'card' |
|
| 39 |
#' @export |
|
| 40 |
#' |
|
| 41 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
|
|
| 42 |
#' data(api, package = "survey") |
|
| 43 |
#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |
|
| 44 |
#' |
|
| 45 |
#' ard_summary( |
|
| 46 |
#' data = dclus1, |
|
| 47 |
#' variables = api00, |
|
| 48 |
#' by = stype |
|
| 49 |
#' ) |
|
| 50 |
ard_summary.survey.design <- function(data, variables, by = NULL, |
|
| 51 |
statistic = everything() ~ c("median", "p25", "p75"),
|
|
| 52 |
fmt_fun = NULL, |
|
| 53 |
stat_label = NULL, |
|
| 54 |
fmt_fn = deprecated(), |
|
| 55 |
...) {
|
|
| 56 | 50x |
set_cli_abort_call() |
| 57 | 50x |
check_dots_empty() |
| 58 | ||
| 59 |
# deprecated args ------------------------------------------------------------ |
|
| 60 | 50x |
if (lifecycle::is_present(fmt_fn)) {
|
| 61 | ! |
lifecycle::deprecate_soft( |
| 62 | ! |
when = "0.2.5", |
| 63 | ! |
what = "ard_summary(fmt_fn)", |
| 64 | ! |
with = "ard_summary(fmt_fun)" |
| 65 |
) |
|
| 66 | ! |
fmt_fun <- fmt_fn |
| 67 |
} |
|
| 68 | ||
| 69 |
# check installed packages --------------------------------------------------- |
|
| 70 | 50x |
check_pkg_installed(pkg = "survey") |
| 71 | ||
| 72 |
# check inputs --------------------------------------------------------------- |
|
| 73 | 50x |
check_not_missing(variables) |
| 74 | ||
| 75 |
# process inputs ------------------------------------------------------------- |
|
| 76 | 50x |
cards::process_selectors(data$variables, variables = {{ variables }}, by = {{ by }})
|
| 77 | 50x |
variables <- setdiff(variables, by) |
| 78 | 50x |
check_na_factor_levels(data$variables, by) |
| 79 | ||
| 80 | 50x |
cards::process_formula_selectors( |
| 81 | 50x |
data$variables[variables], |
| 82 | 50x |
statistic = statistic, |
| 83 | 50x |
fmt_fun = fmt_fun, |
| 84 | 50x |
stat_label = stat_label |
| 85 |
) |
|
| 86 | 50x |
cards::fill_formula_selectors( |
| 87 | 50x |
data$variables[variables], |
| 88 | 50x |
statistic = formals(asNamespace("cardx")[["ard_summary.survey.design"]])[["statistic"]] |> eval()
|
| 89 |
) |
|
| 90 | 50x |
cards::check_list_elements( |
| 91 | 50x |
x = statistic, |
| 92 | 50x |
predicate = \(x) all(x %in% accepted_svy_stats()), |
| 93 | 50x |
error_msg = c("Error in the values of the {.arg statistic} argument for variable {.val {variable}}.",
|
| 94 | 50x |
i = "Values must be in {.val {cardx:::accepted_svy_stats(FALSE)}}"
|
| 95 |
) |
|
| 96 |
) |
|
| 97 | ||
| 98 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 99 | 50x |
if (is_empty(variables)) {
|
| 100 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 101 |
} |
|
| 102 | ||
| 103 |
# compute the weighted statistics -------------------------------------------- |
|
| 104 | 50x |
df_stats <- |
| 105 | 50x |
map( |
| 106 | 50x |
names(statistic), |
| 107 | 50x |
function(variable) {
|
| 108 | 92x |
map( |
| 109 | 92x |
statistic[[variable]], |
| 110 | 92x |
function(statistic) {
|
| 111 | 324x |
.compute_svy_stat(data, variable = variable, by = by, stat_name = statistic) |
| 112 |
} |
|
| 113 |
) |
|
| 114 |
} |
|
| 115 |
) |> |
|
| 116 | 50x |
dplyr::bind_rows() |> |
| 117 | 50x |
.restore_original_column_types(data = data$variables) |
| 118 | ||
| 119 |
# add stat_labels ------------------------------------------------------------ |
|
| 120 | 50x |
df_stats <- |
| 121 | 50x |
df_stats |> |
| 122 | 50x |
dplyr::left_join( |
| 123 | 50x |
.default_svy_stat_labels(), |
| 124 | 50x |
by = "stat_name" |
| 125 |
) |> |
|
| 126 | 50x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |
| 127 | 50x |
if (!is_empty(stat_label)) {
|
| 128 | 1x |
df_stats <- |
| 129 | 1x |
dplyr::rows_update( |
| 130 | 1x |
df_stats, |
| 131 | 1x |
dplyr::tibble( |
| 132 | 1x |
variable = names(stat_label), |
| 133 | 1x |
stat_name = map(.data$variable, ~ names(stat_label[[.x]])), |
| 134 | 1x |
stat_label = map(.data$variable, ~ stat_label[[.x]] |> |
| 135 | 1x |
unname() |> |
| 136 | 1x |
unlist()) |
| 137 |
) |> |
|
| 138 | 1x |
tidyr::unnest(cols = c("stat_name", "stat_label")),
|
| 139 | 1x |
by = c("variable", "stat_name"),
|
| 140 | 1x |
unmatched = "ignore" |
| 141 |
) |
|
| 142 |
} |
|
| 143 | ||
| 144 |
# add formatting stats ------------------------------------------------------- |
|
| 145 | 50x |
df_stats$fmt_fun <- list(1L) |
| 146 | 50x |
if (!is_empty(fmt_fun)) {
|
| 147 | 1x |
df_stats <- |
| 148 | 1x |
dplyr::rows_update( |
| 149 | 1x |
df_stats, |
| 150 | 1x |
dplyr::tibble( |
| 151 | 1x |
variable = names(fmt_fun), |
| 152 | 1x |
stat_name = map(.data$variable, ~ names(fmt_fun[[.x]])), |
| 153 | 1x |
fmt_fun = map(.data$variable, ~ fmt_fun[[.x]] |> unname()) |
| 154 |
) |> |
|
| 155 | 1x |
tidyr::unnest(cols = c("stat_name", "fmt_fun")),
|
| 156 | 1x |
by = c("variable", "stat_name"),
|
| 157 | 1x |
unmatched = "ignore" |
| 158 |
) |
|
| 159 |
} |
|
| 160 | ||
| 161 |
# add class and return ARD object -------------------------------------------- |
|
| 162 | 50x |
df_stats |> |
| 163 | 50x |
dplyr::mutate(context = "continuous") |> |
| 164 | 50x |
cards::as_card() |> |
| 165 | 50x |
cards::tidy_ard_column_order() |
| 166 |
} |
|
| 167 | ||
| 168 |
.default_svy_stat_labels <- function(stat_label = NULL) {
|
|
| 169 | 50x |
dplyr::tribble( |
| 170 | 50x |
~stat_name, ~stat_label, |
| 171 | 50x |
"mean", "Mean", |
| 172 | 50x |
"median", "Median", |
| 173 | 50x |
"var", "Variance", |
| 174 | 50x |
"sd", "Standard Deviation", |
| 175 | 50x |
"sum", "Sum", |
| 176 | 50x |
"deff", "Design Effect", |
| 177 | 50x |
"mean.std.error", "SE(Mean)", |
| 178 | 50x |
"min", "Minimum", |
| 179 | 50x |
"max", "Maximum", |
| 180 | 50x |
"p25", "25% Percentile", |
| 181 | 50x |
"p75", "75% Percentile" |
| 182 |
) |
|
| 183 |
} |
|
| 184 | ||
| 185 |
accepted_svy_stats <- function(expand_quantiles = TRUE) {
|
|
| 186 | 92x |
base_stats <- |
| 187 | 92x |
c("mean", "median", "min", "max", "sum", "var", "sd", "mean.std.error", "deff")
|
| 188 | 92x |
if (expand_quantiles) {
|
| 189 | 92x |
return(c(base_stats, paste0("p", 0:100)))
|
| 190 |
} |
|
| 191 | ! |
c(base_stats, "p##") |
| 192 |
} |
|
| 193 | ||
| 194 | ||
| 195 | ||
| 196 |
# this function calculates the summary for a single variable, single statistic |
|
| 197 |
# and for all `by` levels. it returns an ARD data frame |
|
| 198 |
.compute_svy_stat <- function(data, variable, by = NULL, stat_name) {
|
|
| 199 |
# difftime variable needs to be transformed into numeric for svyquantile |
|
| 200 | 324x |
if (inherits(data$variables[[variable]], "difftime")) {
|
| 201 | ! |
data$variables[[variable]] <- unclass(data$variables[[variable]]) |
| 202 |
} |
|
| 203 | ||
| 204 |
# styler: off |
|
| 205 | 12x |
if (stat_name %in% "mean") args <- list(FUN = survey::svymean) |
| 206 | 6x |
else if (stat_name %in% "sum") args <- list(FUN = survey::svytotal) |
| 207 | 6x |
else if (stat_name %in% "var") args <- list(FUN = survey::svyvar) |
| 208 | 6x |
else if (stat_name %in% "sd") args <- list(FUN = \(...) survey::svyvar(...) |> sqrt()) |
| 209 | 6x |
else if (stat_name %in% "mean.std.error") args <- list(FUN = \(...) survey::svymean(...) |> survey::SE()) |
| 210 | 6x |
else if (stat_name %in% "deff") args <- list(FUN = \(...) survey::svymean(..., deff = TRUE) |> survey::deff()) |
| 211 | 12x |
else if (stat_name %in% "min") args <- list(FUN = \(x, design, na.rm, ...) min(design$variables[[all.vars(x)]], na.rm = na.rm)) |
| 212 | 12x |
else if (stat_name %in% "max") args <- list(FUN = \(x, design, na.rm, ...) max(design$variables[[all.vars(x)]], na.rm = na.rm)) |
| 213 |
# define functions for the quantiles |
|
| 214 | 258x |
else if (stat_name %in% c("median", paste0("p", 0:100))) {
|
| 215 | 258x |
quantile <- ifelse(stat_name %in% "median", 0.5, as.numeric(substr(stat_name, 2, nchar(stat_name))) / 100) |
| 216 |
# univariate results are returned in a different format from stratified. |
|
| 217 | 258x |
args <- |
| 218 | 258x |
if (is_empty(by)) list(FUN = \(...) survey::svyquantile(...)[[1]], quantiles = quantile) |
| 219 | 258x |
else list(FUN = \(...) survey::svyquantile(...), quantiles = quantile) |
| 220 |
} |
|
| 221 |
# styler: on |
|
| 222 | ||
| 223 |
# adding additional args to pass |
|
| 224 | 324x |
args <- |
| 225 | 324x |
args |> |
| 226 | 324x |
append( |
| 227 | 324x |
list( |
| 228 | 324x |
design = data, |
| 229 |
# if all values are NA, turn na.rm to FALSE to avoid error |
|
| 230 | 324x |
na.rm = !all(is.na(data$variables[[variable]])), |
| 231 | 324x |
keep.var = FALSE |
| 232 |
) |
|
| 233 |
) |
|
| 234 | ||
| 235 | ||
| 236 |
# if no by variable, calculate univariate statistics |
|
| 237 | 324x |
if (is_empty(by)) {
|
| 238 | 46x |
args$x <- reformulate2(variable) |
| 239 |
# calculate statistic (and remove FUN from the argument list) |
|
| 240 | 46x |
stat <- |
| 241 | 46x |
cards::eval_capture_conditions( |
| 242 | 46x |
do.call(args$FUN, args = args |> utils::modifyList(list(FUN = NULL))) |
| 243 |
) |
|
| 244 |
# if the result was calculated, then put it into a tibble |
|
| 245 | 46x |
if (!is.null(stat[["result"]])) {
|
| 246 | 40x |
df_stat <- |
| 247 | 40x |
dplyr::tibble(variable, stat[["result"]][1]) |> |
| 248 | 40x |
set_names(c("variable", "stat")) |>
|
| 249 | 40x |
dplyr::mutate( |
| 250 | 40x |
stat = as.list(unname(.data$stat)), |
| 251 | 40x |
warning = list(stat[["warning"]]), |
| 252 | 40x |
error = list(stat[["error"]]) |
| 253 |
) |
|
| 254 |
} |
|
| 255 |
# otherwise, if there was an error return tibble with error message |
|
| 256 |
else {
|
|
| 257 | 6x |
df_stat <- |
| 258 | 6x |
dplyr::tibble( |
| 259 | 6x |
variable = .env$variable, |
| 260 | 6x |
stat = list(NULL), |
| 261 | 6x |
warning = list(.env$stat[["warning"]]), |
| 262 | 6x |
error = list(.env$stat[["error"]]) |
| 263 |
) |
|
| 264 |
} |
|
| 265 |
} |
|
| 266 | ||
| 267 |
# if there is by variable(s), calculate statistics for the combinations |
|
| 268 |
else {
|
|
| 269 | 278x |
args$formula <- reformulate2(variable) |
| 270 | 278x |
args$by <- reformulate2(by) |
| 271 | 278x |
stat <- |
| 272 | 278x |
if (stat_name %in% c("median", paste0("p", 0:100))) {
|
| 273 | 248x |
cards::eval_capture_conditions( |
| 274 | 248x |
do.call(survey::svyby, args) |> set_names(c(by, "quantile", "ci.2.5", "ci.97.5", "se")) |
| 275 |
) |
|
| 276 | 278x |
} else if (stat_name %in% "deff") {
|
| 277 | 3x |
stat <- |
| 278 | 3x |
cards::eval_capture_conditions( |
| 279 | 3x |
do.call( |
| 280 | 3x |
survey::svyby, |
| 281 | 3x |
args |> utils::modifyList(list(FUN = survey::svymean, deff = TRUE)) |
| 282 |
) |> |
|
| 283 | 3x |
dplyr::select(all_of(by), dplyr::last_col()) # the last column is DEff |
| 284 |
) |
|
| 285 |
} else {
|
|
| 286 | 27x |
cards::eval_capture_conditions(do.call(survey::svyby, args)) |
| 287 |
} |
|
| 288 | ||
| 289 |
# if the result was calculated, then put it into a tibble |
|
| 290 | 278x |
if (!is.null(stat[["result"]])) {
|
| 291 | 122x |
df_stat <- stat[["result"]][seq_len(length(by) + 1L)] |> |
| 292 | 122x |
dplyr::as_tibble() %>% |
| 293 |
# adding unobserved combinations of "by" variables |
|
| 294 |
{
|
|
| 295 | 122x |
dplyr::full_join( |
| 296 | 122x |
cards::nest_for_ard(data$variables, by = by, key = "...ard_no_one_will_ever_pick_this...", rename = FALSE, list_columns = FALSE) |> |
| 297 | 122x |
dplyr::select(-"...ard_no_one_will_ever_pick_this..."), |
| 298 |
., |
|
| 299 | 122x |
by = by |
| 300 |
) |
|
| 301 |
} |> |
|
| 302 | 122x |
set_names(paste0("group", seq_along(by), "_level"), "stat") |>
|
| 303 | 122x |
dplyr::bind_cols( |
| 304 | 122x |
dplyr::tibble(!!!c(by, variable)) |> |
| 305 | 122x |
set_names(paste0("group", seq_along(by)), "variable")
|
| 306 |
) |> |
|
| 307 | 122x |
dplyr::mutate( |
| 308 | 122x |
dplyr::across(c(cards::all_ard_groups("levels"), "stat"), as.list),
|
| 309 | 122x |
warning = list(.env$stat[["warning"]]), |
| 310 | 122x |
error = list(.env$stat[["error"]]) |
| 311 |
) |
|
| 312 |
} |
|
| 313 |
# otherwise, if there was an error return tibble with error message |
|
| 314 |
else {
|
|
| 315 | 156x |
df_stat <- |
| 316 | 156x |
cards::nest_for_ard(data$variables, by = by, key = "...ard_no_one_will_ever_pick_this...") |> |
| 317 | 156x |
dplyr::select(-"...ard_no_one_will_ever_pick_this...") |> |
| 318 | 156x |
dplyr::mutate( |
| 319 | 156x |
variable = .env$variable, |
| 320 | 156x |
stat = list(NULL), |
| 321 | 156x |
warning = list(.env$stat[["warning"]]), |
| 322 | 156x |
error = list(.env$stat[["error"]]) |
| 323 |
) |
|
| 324 |
} |
|
| 325 |
} |
|
| 326 | ||
| 327 | 324x |
df_stat |> |
| 328 | 324x |
dplyr::mutate( |
| 329 | 324x |
stat_name = .env$stat_name, |
| 330 | 324x |
across( |
| 331 | 324x |
c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")),
|
| 332 | 324x |
~ map(.x, as.character) |
| 333 |
) |
|
| 334 |
) |
|
| 335 |
} |
|
| 336 | ||
| 337 | ||
| 338 |
# some operations coerce the variable types to character. |
|
| 339 |
# this function will convert the `_level` values to their original types |
|
| 340 |
.restore_original_column_types <- function(ard, data) {
|
|
| 341 |
# identify grouping variable names with associated levels -------------------- |
|
| 342 | 179x |
by <- character() |
| 343 | 179x |
for (v in names(dplyr::select(ard, cards::all_ard_groups("names")))) {
|
| 344 | 120x |
if (paste0(v, "_level") %in% names(ard)) by <- c(by, ard[[v]][1]) # styler: off |
| 345 |
} |
|
| 346 | 179x |
variables <- character() |
| 347 | 179x |
if ("variable" %in% names(ard)) {
|
| 348 | 179x |
variables <- ard[["variable"]] |> unique() |
| 349 |
} |
|
| 350 | ||
| 351 |
# if there are no levels to correct, then return ard as it is |
|
| 352 | ! |
if (is_empty(variables) && is_empty(by)) return(ard) # styler: off |
| 353 | ||
| 354 |
# add an ID for sorting |
|
| 355 | 179x |
ard$...ard_id_for_sorting... <- seq_len(nrow(ard)) |
| 356 | ||
| 357 |
# nest the raw data with original types -------------------------------------- |
|
| 358 | 179x |
if (!is_empty(variables)) {
|
| 359 | 65x |
if (!"variable_level" %in% names(ard)) df_variable_orginal_types <- unique(ard["variable"]) # styler: off |
| 360 | 114x |
else if (!all(variables %in% names(data))) { # for survfit summaries, the times/probs var is not in the data
|
| 361 | 5x |
df_variable_orginal_types <- unique(ard[c("variable", "variable_level")])
|
| 362 |
} else {
|
|
| 363 | 109x |
df_variable_orginal_types <- |
| 364 | 109x |
map( |
| 365 | 109x |
variables, |
| 366 | 109x |
~ cards::nest_for_ard(tidyr::drop_na(data[.x]), by = .x, include_data = FALSE) |> |
| 367 | 109x |
stats::setNames(c("variable", "variable_level"))
|
| 368 |
) |> |
|
| 369 | 109x |
dplyr::bind_rows() |
| 370 |
} |
|
| 371 |
} |
|
| 372 | 179x |
if (!is_empty(by)) {
|
| 373 | 117x |
df_by_orginal_types <- |
| 374 | 117x |
cards::nest_for_ard(tidyr::drop_na(data[by]), by = by, include_data = FALSE) |
| 375 |
} |
|
| 376 | ||
| 377 |
# combine groups and variables together |
|
| 378 | 179x |
if (!is_empty(variables) && !is_empty(by)) {
|
| 379 | 117x |
df_original_types <- |
| 380 | 117x |
dplyr::cross_join(df_by_orginal_types, df_variable_orginal_types) |
| 381 | 62x |
} else if (!is_empty(variables)) {
|
| 382 | 62x |
df_original_types <- df_variable_orginal_types |
| 383 | ! |
} else if (!is_empty(by)) {
|
| 384 | ! |
df_original_types <- df_by_orginal_types |
| 385 |
} |
|
| 386 | ||
| 387 |
# unlisting the sorting according the character value |
|
| 388 | 179x |
df_original_types <- df_original_types |> |
| 389 | 179x |
dplyr::arrange(across(everything(), ~ map(., as.character) |> unlist())) |
| 390 | ||
| 391 | 179x |
ard_nested <- ard |> |
| 392 | 179x |
tidyr::nest(..ard_data... = -c(cards::all_ard_groups(), cards::all_ard_variables())) |> |
| 393 | 179x |
dplyr::arrange(across( |
| 394 | 179x |
c(cards::all_ard_groups(), cards::all_ard_variables()), |
| 395 | 179x |
~ map(., as.character) |> unlist() |
| 396 |
)) |
|
| 397 | ||
| 398 |
# if all columns match, then replace the coerced character cols with their original type/class |
|
| 399 | 179x |
all_cols_equal <- |
| 400 | 179x |
every( |
| 401 | 179x |
names(df_original_types) |> setdiff("variable_level"),
|
| 402 | 179x |
~ all( |
| 403 | 179x |
unlist(ard_nested[[.x]]) == as.character(unlist(df_original_types[[.x]])) | |
| 404 | 179x |
(is.na(unlist(ard_nested[[.x]])) & is.na(unlist(df_original_types[[.x]]))) |
| 405 |
) |
|
| 406 |
) |
|
| 407 |
# the variable level needs to be handled separately because there can be mixed type and we can't unlist |
|
| 408 | 179x |
if (isTRUE(all_cols_equal) && "variable_level" %in% names(df_original_types)) {
|
| 409 | 114x |
all_cols_equal <- |
| 410 | 114x |
seq_len(nrow(df_original_types)) |> |
| 411 | 114x |
map_lgl( |
| 412 | 114x |
~ identical( |
| 413 | 114x |
as.character(df_original_types[["variable_level"]][[.x]]), |
| 414 | 114x |
as.character(ard_nested[["variable_level"]][[.x]]) |
| 415 |
) |
|
| 416 |
) |> |
|
| 417 | 114x |
all() |
| 418 |
} |
|
| 419 | ||
| 420 | 179x |
if (isTRUE(all_cols_equal)) {
|
| 421 | 179x |
return( |
| 422 | 179x |
dplyr::bind_cols( |
| 423 | 179x |
df_original_types, |
| 424 | 179x |
dplyr::select(ard_nested, -all_of(names(df_original_types))), |
| 425 | 179x |
.name_repair = "minimal" |
| 426 |
) |> |
|
| 427 | 179x |
tidyr::unnest(cols = "..ard_data...") |> |
| 428 | 179x |
dplyr::arrange(.data$...ard_id_for_sorting...) |> |
| 429 | 179x |
dplyr::select(-"...ard_id_for_sorting...") |> |
| 430 | 179x |
cards::as_card() |
| 431 |
) |
|
| 432 |
} |
|
| 433 | ||
| 434 |
# I hope this message is never triggered! |
|
| 435 | ! |
cli::cli_inform(c( |
| 436 | ! |
"If you see this message, variable levels have been coerced to character, which could cause downstream issues.", |
| 437 | ! |
"*" = "Please post a reproducible example to {.url https://github.com/insightsengineering/cardx/issues/new}, so we can address in the next release.",
|
| 438 | ! |
"i" = "You can create a minimal reproducible example with {.fun reprex::reprex}."
|
| 439 |
)) |
|
| 440 | ||
| 441 | ! |
ard |> |
| 442 | ! |
dplyr::arrange(.data$...ard_id_for_sorting...) |> |
| 443 | ! |
dplyr::select(-"...ard_id_for_sorting...") |
| 444 |
} |
| 1 |
#' ARD Incidence Rate |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' |
|
| 5 |
#' Function takes a time at risk variable (`time`) and event count variable (`count`) and calculates the incidence |
|
| 6 |
#' rate in person-years. |
|
| 7 |
#' |
|
| 8 |
#' Incidence rate is calculated as: Total number of events that occurred / Total person-time at risk |
|
| 9 |
#' |
|
| 10 |
#' @param data (`data.frame`)\cr |
|
| 11 |
#' a data frame. |
|
| 12 |
#' @param time ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' column name of time at risk variable. |
|
| 14 |
#' @param count ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 15 |
#' column name of variable indicating count of events that occurred. If `NULL`, each row in `data` is assumed to |
|
| 16 |
#' correspond to a single event occurrence. |
|
| 17 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 18 |
#' column name used to identify unique subjects in `data`. If `NULL`, each row in `data` is assumed to correspond to |
|
| 19 |
#' a unique subject. |
|
| 20 |
#' @param n_person_time (`numeric`)\cr |
|
| 21 |
#' amount of person-time to estimate incidence rate for. Defaults to 100. |
|
| 22 |
#' @param unit_label (`string`)\cr |
|
| 23 |
#' label for the unit of values in `time` and estimated person-time output (e.g. `"years"` for person-years, |
|
| 24 |
#' `"days"` for person-days, etc.). If the desired person-time estimate unit does not match the current `time` unit, |
|
| 25 |
#' values of `time` should be converted to the correct unit during pre-processing. Defaults to `"time"` (person-time). |
|
| 26 |
#' @param conf.level (`numeric`)\cr |
|
| 27 |
#' confidence level for the estimated incidence rate. |
|
| 28 |
#' @param conf.type (`string`)\cr |
|
| 29 |
#' confidence interval type for the estimated incidence rate. |
|
| 30 |
#' |
|
| 31 |
#' One of: `normal` (default), `normal-log`, `exact`, or `byar`. |
|
| 32 |
#' @inheritParams cards::ard_summary |
|
| 33 |
#' |
|
| 34 |
#' @return an ARD data frame of class 'card' |
|
| 35 |
#' @export |
|
| 36 |
#' |
|
| 37 |
#' @details |
|
| 38 |
#' The formulas used to calculate the confidence interval for each CI type are as |
|
| 39 |
#' follows, where \eqn{x_i} and \eqn{t_i} represent the number of events and follow-up
|
|
| 40 |
#' time for subject \eqn{i}, respectively.
|
|
| 41 |
#' |
|
| 42 |
#' * `byar`: Byar's approximation of a Poisson CI. A continuity correction of 0.5 is included in the calculation. |
|
| 43 |
#' |
|
| 44 |
#' \deqn{CI = (\sum{x_i} + 0.5) (1 - 1 / (9 \times (\sum{x_i} + 0.5)) \pm Z_{1 - \alpha / 2} / (3 \sqrt{\sum{x_i} + 0.5}))^3 / \sum{t_i}}
|
|
| 45 |
#' |
|
| 46 |
#' * `normal`: Normal CI. |
|
| 47 |
#' |
|
| 48 |
#' \deqn{CI = \sum{x_i} / \sum{t_i} \pm Z_{1 - \alpha / 2} \sqrt{\sum{x_i}} / \sum{t_i}}
|
|
| 49 |
#' |
|
| 50 |
#' * `normal-log`: Normal-Log CI. |
|
| 51 |
#' |
|
| 52 |
#' \deqn{CI = \exp(\log(\sum{x_i} / \sum{t_i}) \pm Z_{1 - \alpha / 2} / \sqrt{\sum{x_i}})}
|
|
| 53 |
#' |
|
| 54 |
#' * `exact`: Exact CI for a Poisson mean. |
|
| 55 |
#' |
|
| 56 |
#' \deqn{CI_{lower} = \chi^2_{\alpha / 2, 2\sum{x_i} + 2} / {2 \sum{t_i}}}
|
|
| 57 |
#' \deqn{CI_{upper} = \chi^2_{1 - \alpha / 2, 2\sum{x_i} + 2} / {2 \sum{t_i}}}
|
|
| 58 |
#' |
|
| 59 |
#' @examples |
|
| 60 |
#' set.seed(1) |
|
| 61 |
#' data <- data.frame( |
|
| 62 |
#' USUBJID = 1:100, |
|
| 63 |
#' TRTA = sample(LETTERS[1:3], 100, replace = TRUE), |
|
| 64 |
#' AETTE1 = abs(rnorm(100, mean = 0.5)), |
|
| 65 |
#' AETOT1 = sample(0:20, 100, replace = TRUE) |
|
| 66 |
#' ) |
|
| 67 |
#' |
|
| 68 |
#' data |> |
|
| 69 |
#' ard_incidence_rate(time = AETTE1, count = AETOT1, id = USUBJID, by = TRTA, unit_label = "years") |
|
| 70 |
ard_incidence_rate <- function(data, |
|
| 71 |
time, |
|
| 72 |
count = NULL, |
|
| 73 |
id = NULL, |
|
| 74 |
by = NULL, |
|
| 75 |
strata = NULL, |
|
| 76 |
n_person_time = 100, |
|
| 77 |
unit_label = "time", |
|
| 78 |
conf.level = 0.95, |
|
| 79 |
conf.type = c("normal", "normal-log", "exact", "byar")) {
|
|
| 80 | 10x |
set_cli_abort_call() |
| 81 | ||
| 82 |
# check inputs --------------------------------------------------------------- |
|
| 83 | 10x |
check_data_frame(data) |
| 84 | 10x |
cards::process_selectors( |
| 85 | 10x |
data, |
| 86 | 10x |
time = {{ time }}, by = {{ by }}, strata = {{ strata }}, count = {{ count }}, id = {{ id }}
|
| 87 |
) |
|
| 88 | 10x |
check_scalar(time) |
| 89 | 10x |
check_string(unit_label) |
| 90 | 9x |
check_scalar(count, allow_empty = TRUE) |
| 91 | 9x |
check_scalar(id, allow_empty = TRUE) |
| 92 | 9x |
check_scalar_range(conf.level, c(0, 1)) |
| 93 | 9x |
check_numeric(n_person_time) |
| 94 | 9x |
check_scalar(n_person_time) |
| 95 | 9x |
if (!class(data[[time]]) %in% c("numeric", "integer")) {
|
| 96 | 1x |
cli::cli_abort( |
| 97 | 1x |
message = paste( |
| 98 | 1x |
"The {.arg time} variable must be of type {.cls numeric/integer} but {.arg {time}} is",
|
| 99 | 1x |
"{.obj_type_friendly {data[[time]]}}."
|
| 100 |
), |
|
| 101 | 1x |
call = get_cli_abort_call() |
| 102 |
) |
|
| 103 |
} |
|
| 104 | ||
| 105 | ||
| 106 | 8x |
conf.type <- arg_match(conf.type, error_call = get_cli_abort_call()) |
| 107 | ||
| 108 |
# build ARD ------------------------------------------------------------------ |
|
| 109 | 7x |
cards::ard_mvsummary( |
| 110 | 7x |
data = data, |
| 111 | 7x |
variables = all_of(time), |
| 112 | 7x |
by = any_of(by), |
| 113 | 7x |
strata = any_of(strata), |
| 114 | 7x |
statistic = all_of(time) ~ list( |
| 115 | 7x |
incidence_rate = |
| 116 | 7x |
.calc_incidence_rate(data, time, count, id, by, strata, n_person_time, unit_label, conf.level, conf.type) |
| 117 |
) |
|
| 118 |
) |> |
|
| 119 | 7x |
dplyr::select(-"stat_label") |> |
| 120 | 7x |
dplyr::left_join( |
| 121 | 7x |
.df_incidence_rate_stat_labels(n_person_time, unit_label), |
| 122 | 7x |
by = "stat_name" |
| 123 |
) |> |
|
| 124 | 7x |
dplyr::mutate( |
| 125 | 7x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 126 | 7x |
context = "incidence_rate", |
| 127 |
) |> |
|
| 128 | 7x |
cards::as_card() |> |
| 129 | 7x |
cards::tidy_ard_column_order() |> |
| 130 | 7x |
cards::tidy_ard_row_order() |
| 131 |
} |
|
| 132 | ||
| 133 |
# function to perform calculations ------------------------------------------- |
|
| 134 |
.calc_incidence_rate <- function(data, time, count, id, by, strata, n_person_time, unit_label, conf.level, conf.type) {
|
|
| 135 | 7x |
cards::as_cards_fn( |
| 136 | 7x |
\(x, data, ...) {
|
| 137 |
# calculate number of unique IDs with >=1 event |
|
| 138 | 7x |
N <- if (!is_empty(id)) {
|
| 139 | 6x |
sum(!is.na(unique(data[[id]]))) |
| 140 |
} else {
|
|
| 141 | 1x |
nrow(data) |
| 142 |
} |
|
| 143 | ||
| 144 |
# calculate total person-years |
|
| 145 | 7x |
tot_person_time <- sum(x, na.rm = TRUE) |
| 146 | ||
| 147 |
# calculate total number of events |
|
| 148 | 7x |
n_events <- if (!is_empty(count)) sum(data[[count]], na.rm = TRUE) else nrow(data) |
| 149 | ||
| 150 | 7x |
rate_est <- n_events / tot_person_time |
| 151 | 7x |
rate_se <- sqrt(rate_est / tot_person_time) |
| 152 | 7x |
alpha <- 1 - conf.level |
| 153 | 7x |
if (conf.type %in% c("normal", "normal-log")) {
|
| 154 | 5x |
rate_ci <- if (conf.type == "normal") {
|
| 155 | 4x |
rate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * rate_se |
| 156 |
} else {
|
|
| 157 | 1x |
exp(log(rate_est) + c(-1, 1) * stats::qnorm(1 - alpha / 2) * rate_se / rate_est) |
| 158 |
} |
|
| 159 | 5x |
conf.low <- rate_ci[1] |
| 160 | 5x |
conf.high <- rate_ci[2] |
| 161 | 2x |
} else if (conf.type == "exact") {
|
| 162 | 1x |
conf.low <- stats::qchisq(p = alpha / 2, df = 2 * n_events) / (2 * tot_person_time) |
| 163 | 1x |
conf.high <- stats::qchisq(p = 1 - alpha / 2, df = 2 * n_events + 2) / (2 * tot_person_time) |
| 164 | 1x |
} else if (conf.type == "byar") {
|
| 165 | 1x |
seg_1 <- n_events + 0.5 |
| 166 | 1x |
seg_2 <- 1 - 1 / (9 * (n_events + 0.5)) |
| 167 | 1x |
seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3 |
| 168 | 1x |
conf.low <- seg_1 * ((seg_2 - seg_3)^3) / tot_person_time |
| 169 | 1x |
conf.high <- seg_1 * ((seg_2 + seg_3)^3) / tot_person_time |
| 170 |
} |
|
| 171 | ||
| 172 | 7x |
dplyr::tibble( |
| 173 | 7x |
estimate = rate_est * n_person_time, |
| 174 | 7x |
std.error = rate_se, |
| 175 | 7x |
conf.low = conf.low * n_person_time, |
| 176 | 7x |
conf.high = conf.high * n_person_time, |
| 177 | 7x |
conf.type = conf.type, |
| 178 | 7x |
conf.level = conf.level, |
| 179 | 7x |
tot_person_time = tot_person_time, |
| 180 | 7x |
n_events = n_events, |
| 181 | 7x |
N = N |
| 182 |
) |
|
| 183 |
}, |
|
| 184 | 7x |
stat_names = c( |
| 185 | 7x |
"estimate", "std.error", "conf.low", "conf.high", "conf.type", "conf.level", |
| 186 | 7x |
"tot_person_time", "n_events", "N" |
| 187 |
) |
|
| 188 |
) |
|
| 189 |
} |
|
| 190 | ||
| 191 |
.df_incidence_rate_stat_labels <- function(n_person_time, unit_label) {
|
|
| 192 | 7x |
time_unit <- paste0("Person-", str_replace(unit_label, "([[:alpha:]])", substr(toupper(unit_label), 1, 1)))
|
| 193 | ||
| 194 | 7x |
dplyr::tribble( |
| 195 | 7x |
~stat_name, ~stat_label, |
| 196 | 7x |
"estimate", paste("Incidence rate per", n_person_time, time_unit),
|
| 197 | 7x |
"std.error", "Standard Error", |
| 198 | 7x |
"conf.low", "CI Lower Bound", |
| 199 | 7x |
"conf.high", "CI Upper Bound", |
| 200 | 7x |
"conf.type", "CI Type", |
| 201 | 7x |
"conf.level", "CI Confidence Level", |
| 202 | 7x |
"tot_person_time", paste(time_unit, "at Risk"), |
| 203 | 7x |
"n_events", "Number of Events Observed", |
| 204 | 7x |
"N", "Number of Subjects Observed" |
| 205 |
) |
|
| 206 |
} |
| 1 |
#' ARD Hedge's G Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for paired and non-paired Hedge's G Effect Size Test |
|
| 5 |
#' using [`effectsize::hedges_g()`]. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. See below for details. |
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column name to compare by. Must be a categorical variable with exactly two levels. |
|
| 11 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column names to be compared. Must be a continuous variable. Independent |
|
| 13 |
#' tests will be run for each variable |
|
| 14 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 15 |
#' column name of the subject or participant ID |
|
| 16 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 17 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 18 |
#' @param ... arguments passed to `effectsize::hedges_g(...)` |
|
| 19 |
#' |
|
| 20 |
#' @return ARD data frame |
|
| 21 |
#' @name ard_effectsize_hedges_g |
|
| 22 |
#' |
|
| 23 |
#' @details |
|
| 24 |
#' For the `ard_effectsize_hedges_g()` function, the data is expected to be one row per subject. |
|
| 25 |
#' The data is passed as `effectsize::hedges_g(data[[variable]]~data[[by]], data, paired = FALSE, ...)`. |
|
| 26 |
#' |
|
| 27 |
#' For the `ard_effectsize_paired_hedges_g()` function, the data is expected to be one row |
|
| 28 |
#' per subject per by level. Before the effect size is calculated, the data are |
|
| 29 |
#' reshaped to a wide format to be one row per subject. |
|
| 30 |
#' The data are then passed as |
|
| 31 |
#' `effectsize::hedges_g(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`. |
|
| 32 |
#' |
|
| 33 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
|
|
| 34 |
#' cards::ADSL |> |
|
| 35 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 36 |
#' ard_effectsize_hedges_g(by = ARM, variables = AGE) |
|
| 37 |
#' |
|
| 38 |
#' # constructing a paired data set, |
|
| 39 |
#' # where patients receive both treatments |
|
| 40 |
#' cards::ADSL[c("ARM", "AGE")] |>
|
|
| 41 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 42 |
#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |> |
|
| 43 |
#' dplyr::arrange(USUBJID, ARM) |> |
|
| 44 |
#' dplyr::group_by(USUBJID) |> |
|
| 45 |
#' dplyr::filter(dplyr::n() > 1) |> |
|
| 46 |
#' ard_effectsize_paired_hedges_g(by = ARM, variables = AGE, id = USUBJID) |
|
| 47 |
NULL |
|
| 48 | ||
| 49 |
#' @rdname ard_effectsize_hedges_g |
|
| 50 |
#' @export |
|
| 51 |
ard_effectsize_hedges_g <- function(data, by, variables, conf.level = 0.95, ...) {
|
|
| 52 | 4x |
set_cli_abort_call() |
| 53 | ||
| 54 |
# check installed packages --------------------------------------------------- |
|
| 55 | 4x |
check_pkg_installed(c("effectsize", "parameters"))
|
| 56 | ||
| 57 |
# check/process inputs ------------------------------------------------------- |
|
| 58 | 4x |
check_not_missing(data) |
| 59 | 4x |
check_not_missing(variables) |
| 60 | 4x |
check_data_frame(data) |
| 61 | 4x |
data <- dplyr::ungroup(data) |
| 62 | 4x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 63 | 4x |
check_scalar(by) |
| 64 | 4x |
check_range(conf.level, range = c(0, 1)) |
| 65 | ||
| 66 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 67 | 4x |
if (is_empty(variables)) {
|
| 68 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 69 |
} |
|
| 70 | ||
| 71 |
# build ARD ------------------------------------------------------------------ |
|
| 72 | 4x |
lapply( |
| 73 | 4x |
variables, |
| 74 | 4x |
function(variable) {
|
| 75 | 5x |
.format_hedges_g_results( |
| 76 | 5x |
by = by, |
| 77 | 5x |
variable = variable, |
| 78 | 5x |
lst_tidy = |
| 79 | 5x |
cards::eval_capture_conditions( |
| 80 | 5x |
effectsize::hedges_g( |
| 81 | 5x |
reformulate2(by, response = variable), |
| 82 | 5x |
data = data |> tidyr::drop_na(all_of(c(by, variable))), |
| 83 | 5x |
paired = FALSE, |
| 84 | 5x |
ci = conf.level, |
| 85 |
... |
|
| 86 |
) |> |
|
| 87 | 5x |
parameters::standardize_names(style = "broom") |> |
| 88 | 5x |
dplyr::mutate(method = "Hedge's G") |
| 89 |
), |
|
| 90 | 5x |
paired = FALSE, |
| 91 |
... |
|
| 92 |
) |
|
| 93 |
} |
|
| 94 |
) |> |
|
| 95 | 4x |
dplyr::bind_rows() |
| 96 |
} |
|
| 97 | ||
| 98 |
#' @rdname ard_effectsize_hedges_g |
|
| 99 |
#' @export |
|
| 100 |
ard_effectsize_paired_hedges_g <- function(data, by, variables, id, conf.level = 0.95, ...) {
|
|
| 101 | 3x |
set_cli_abort_call() |
| 102 | ||
| 103 |
# check installed packages --------------------------------------------------- |
|
| 104 | 3x |
check_pkg_installed(c("effectsize", "parameters"))
|
| 105 | ||
| 106 |
# check/process inputs ------------------------------------------------------- |
|
| 107 | 3x |
check_not_missing(data) |
| 108 | 3x |
check_not_missing(variables) |
| 109 | 3x |
check_not_missing(by) |
| 110 | 3x |
check_not_missing(id) |
| 111 | 3x |
check_data_frame(data) |
| 112 | 3x |
data <- dplyr::ungroup(data) |
| 113 | 3x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
|
| 114 | 3x |
check_scalar(by) |
| 115 | 3x |
check_scalar(id) |
| 116 | 3x |
check_range(conf.level, range = c(0, 1)) |
| 117 | ||
| 118 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 119 | 3x |
if (is_empty(variables)) {
|
| 120 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 121 |
} |
|
| 122 | ||
| 123 |
# build ARD ------------------------------------------------------------------ |
|
| 124 | 3x |
lapply( |
| 125 | 3x |
variables, |
| 126 | 3x |
function(variable) {
|
| 127 | 3x |
.format_hedges_g_results( |
| 128 | 3x |
by = by, |
| 129 | 3x |
variable = variable, |
| 130 | 3x |
lst_tidy = |
| 131 | 3x |
cards::eval_capture_conditions({
|
| 132 |
# adding this reshape inside the eval, so if there is an error it's captured in the ARD object |
|
| 133 | 3x |
data_wide <- |
| 134 | 3x |
data |> |
| 135 | 3x |
tidyr::drop_na(all_of(c(id, by, variable))) |> |
| 136 | 3x |
.paired_data_pivot_wider(by = by, variable = variable, id = id) |> |
| 137 | 3x |
tidyr::drop_na(any_of(c("by1", "by2")))
|
| 138 |
# perform paired cohen's d test |
|
| 139 | 2x |
effectsize::hedges_g(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, ci = conf.level, ...) |> |
| 140 | 2x |
parameters::standardize_names(style = "broom") |> |
| 141 | 2x |
dplyr::mutate(method = "Paired Hedge's G") |
| 142 |
}), |
|
| 143 | 3x |
paired = TRUE, |
| 144 |
... |
|
| 145 |
) |
|
| 146 |
} |
|
| 147 |
) |> |
|
| 148 | 3x |
dplyr::bind_rows() |
| 149 |
} |
|
| 150 | ||
| 151 |
#' Convert Hedge's G Test to ARD |
|
| 152 |
#' |
|
| 153 |
#' @inheritParams cards::tidy_as_ard |
|
| 154 |
#' @inheritParams effectsize::hedges_g |
|
| 155 |
#' @param by (`string`)\cr by column name |
|
| 156 |
#' @param variable (`string`)\cr variable column name |
|
| 157 |
#' @param ... passed to `hedges_g(...)` |
|
| 158 |
#' |
|
| 159 |
#' @return ARD data frame |
|
| 160 |
#' @keywords internal |
|
| 161 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
|
|
| 162 |
#' cardx:::.format_hedges_g_results( |
|
| 163 |
#' by = "ARM", |
|
| 164 |
#' variable = "AGE", |
|
| 165 |
#' paired = FALSE, |
|
| 166 |
#' lst_tidy = |
|
| 167 |
#' cards::eval_capture_conditions( |
|
| 168 |
#' effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE) |> |
|
| 169 |
#' parameters::standardize_names(style = "broom") |
|
| 170 |
#' ) |
|
| 171 |
#' ) |
|
| 172 |
.format_hedges_g_results <- function(by, variable, lst_tidy, paired, ...) {
|
|
| 173 |
# build ARD ------------------------------------------------------------------ |
|
| 174 | 8x |
ret <- |
| 175 | 8x |
cards::tidy_as_ard( |
| 176 | 8x |
lst_tidy = lst_tidy, |
| 177 | 8x |
tidy_result_names = c( |
| 178 | 8x |
"estimate", "conf.level", "conf.low", "conf.high" |
| 179 |
), |
|
| 180 | 8x |
fun_args_to_record = c("mu", "paired", "pooled_sd", "alternative"),
|
| 181 | 8x |
formals = formals(asNamespace("effectsize")[["hedges_g"]]),
|
| 182 | 8x |
passed_args = c(list(paired = paired), dots_list(...)), |
| 183 | 8x |
lst_ard_columns = list(group1 = by, variable = variable, context = "effectsize_hedges_g") |
| 184 |
) |
|
| 185 | ||
| 186 |
# add the stat label --------------------------------------------------------- |
|
| 187 | 8x |
ret |> |
| 188 | 8x |
dplyr::left_join( |
| 189 | 8x |
.df_effectsize_stat_labels(), |
| 190 | 8x |
by = "stat_name" |
| 191 |
) |> |
|
| 192 | 8x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 193 | 8x |
cards::as_card() |> |
| 194 | 8x |
cards::tidy_ard_column_order() |
| 195 |
} |
| 1 |
#' ARD continuous CIs |
|
| 2 |
#' |
|
| 3 |
#' One-sample confidence intervals for continuous variable means and medians. |
|
| 4 |
#' |
|
| 5 |
#' @inheritParams ard_stats_t_test |
|
| 6 |
#' @param method (`string`)\cr |
|
| 7 |
#' a string indicating the method to use for the confidence interval |
|
| 8 |
#' calculation. Must be one of `"t.test"` or `"wilcox.test"` |
|
| 9 |
#' @param ... arguments passed to `t.test()` or `wilcox.test()` |
|
| 10 |
#' |
|
| 11 |
#' @return ARD data frame |
|
| 12 |
#' @name ard_continuous_ci |
|
| 13 |
#' |
|
| 14 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 15 |
#' ard_continuous_ci(mtcars, variables = c(mpg, hp), method = "wilcox.test") |
|
| 16 |
#' ard_continuous_ci(mtcars, variables = mpg, by = am, method = "t.test") |
|
| 17 |
NULL |
|
| 18 | ||
| 19 |
#' @rdname ard_continuous_ci |
|
| 20 |
#' @export |
|
| 21 |
ard_continuous_ci <- function(data, ...) {
|
|
| 22 | 19x |
check_not_missing(data) |
| 23 | 19x |
UseMethod("ard_continuous_ci")
|
| 24 |
} |
|
| 25 | ||
| 26 |
#' @rdname ard_continuous_ci |
|
| 27 |
#' @export |
|
| 28 |
ard_continuous_ci.data.frame <- function(data, variables, by = dplyr::group_vars(data), conf.level = 0.95, method = c("t.test", "wilcox.test"), ...) {
|
|
| 29 | 3x |
set_cli_abort_call() |
| 30 | ||
| 31 |
# check inputs --------------------------------------------------------------- |
|
| 32 | 3x |
method <- arg_match(method) |
| 33 | 3x |
check_not_missing(variables) |
| 34 | 3x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 35 | ||
| 36 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 37 | 3x |
if (is_empty(variables)) {
|
| 38 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 39 |
} |
|
| 40 | ||
| 41 |
# calculate CIs -------------------------------------------------------------- |
|
| 42 | 3x |
switch(method, |
| 43 | 3x |
"t.test" = |
| 44 | 3x |
ard_stats_t_test_onesample( |
| 45 | 3x |
data = data, |
| 46 | 3x |
variables = {{ variables }},
|
| 47 | 3x |
by = {{ by }},
|
| 48 | 3x |
conf.level = conf.level, |
| 49 |
... |
|
| 50 |
), |
|
| 51 | 3x |
"wilcox.test" = |
| 52 | 3x |
ard_stats_wilcox_test_onesample( |
| 53 | 3x |
data = data, |
| 54 | 3x |
variables = {{ variables }},
|
| 55 | 3x |
by = {{ by }},
|
| 56 | 3x |
conf.level = conf.level, |
| 57 | 3x |
conf.int = TRUE, |
| 58 |
... |
|
| 59 |
) |
|
| 60 |
) |> |
|
| 61 | 3x |
dplyr::mutate(context = "continuous_ci") |
| 62 |
} |
| 1 |
#' ARD to Calculate Categorical Occurrence Rates by Maximum Level Per Unique ID |
|
| 2 |
#' |
|
| 3 |
#' Function calculates categorical variable level occurrences rates by maximum level per unique ID. |
|
| 4 |
#' Each variable in `variables` is evaluated independently and then results for all variables are stacked. |
|
| 5 |
#' Only the highest-ordered level will be counted for each unique ID. |
|
| 6 |
#' Unordered, non-numeric variables will be converted to factor and the default level order used for ordering. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams cards::ard_tabulate |
|
| 9 |
#' @inheritParams cards::ard_stack |
|
| 10 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' The categorical variables for which occurrence rates per unique ID (by maximum level) will be calculated. |
|
| 12 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' Argument used to subset `data` to identify rows in `data` to calculate categorical variable level occurrence rates. |
|
| 14 |
#' @param denominator (`data.frame`, `integer`)\cr |
|
| 15 |
#' An optional argument to change the denominator used for `"N"` and `"p"` statistic calculations. |
|
| 16 |
#' Defaults to `NULL`, in which case `dplyr::distinct(data, dplyr::pick(all_of(c(id, by))))` is used for these |
|
| 17 |
#' calculations. See [cards::ard_tabulate()] for more details on specifying denominators. |
|
| 18 |
#' @param quiet (scalar `logical`)\cr |
|
| 19 |
#' Logical indicating whether to suppress additional messaging. Default is `FALSE`. |
|
| 20 |
#' @param fmt_fn `r lifecycle::badge("deprecated")`
|
|
| 21 |
#' |
|
| 22 |
#' @return an ARD data frame of class 'card' |
|
| 23 |
#' @name ard_tabulate_max |
|
| 24 |
#' |
|
| 25 |
#' @examples |
|
| 26 |
#' # Occurrence Rates by Max Level (Highest Severity) -------------------------- |
|
| 27 |
#' ard_tabulate_max( |
|
| 28 |
#' cards::ADAE, |
|
| 29 |
#' variables = c(AESER, AESEV), |
|
| 30 |
#' id = USUBJID, |
|
| 31 |
#' by = TRTA, |
|
| 32 |
#' denominator = cards::ADSL |
|
| 33 |
#' ) |
|
| 34 |
NULL |
|
| 35 | ||
| 36 |
#' @rdname ard_tabulate_max |
|
| 37 |
#' @export |
|
| 38 |
ard_tabulate_max <- function(data, |
|
| 39 |
variables, |
|
| 40 |
id, |
|
| 41 |
by = dplyr::group_vars(data), |
|
| 42 |
statistic = everything() ~ c("n", "p", "N"),
|
|
| 43 |
denominator = NULL, |
|
| 44 |
strata = NULL, |
|
| 45 |
fmt_fun = NULL, |
|
| 46 |
stat_label = everything() ~ cards::default_stat_labels(), |
|
| 47 |
quiet = FALSE, |
|
| 48 |
fmt_fn = deprecated(), |
|
| 49 |
...) {
|
|
| 50 | 17x |
set_cli_abort_call() |
| 51 | ||
| 52 |
# deprecated args ------------------------------------------------------------ |
|
| 53 | 17x |
if (lifecycle::is_present(fmt_fn)) {
|
| 54 | ! |
lifecycle::deprecate_soft( |
| 55 | ! |
when = "0.2.5", |
| 56 | ! |
what = "ard_tabulate_max(fmt_fn)", |
| 57 | ! |
with = "ard_tabulate_max(fmt_fun)" |
| 58 |
) |
|
| 59 | ! |
fmt_fun <- fmt_fn |
| 60 |
} |
|
| 61 | ||
| 62 |
# check inputs --------------------------------------------------------------- |
|
| 63 | 17x |
check_not_missing(data) |
| 64 | 17x |
check_not_missing(variables) |
| 65 | 17x |
check_not_missing(id) |
| 66 | 17x |
cards::process_selectors(data, |
| 67 | 17x |
variables = {{ variables }}, id = {{ id }},
|
| 68 | 17x |
by = {{ by }}, strata = {{ strata }}
|
| 69 |
) |
|
| 70 | 17x |
data <- dplyr::ungroup(data) |
| 71 | ||
| 72 |
# check the id argument is not empty |
|
| 73 | 17x |
if (is_empty(id)) {
|
| 74 | ! |
cli::cli_abort("Argument {.arg id} cannot be empty.", call = get_cli_abort_call())
|
| 75 |
} |
|
| 76 | ||
| 77 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 78 | 17x |
if (is_empty(variables)) {
|
| 79 | 1x |
return(dplyr::tibble() |> cards::as_card()) |
| 80 |
} |
|
| 81 | ||
| 82 | 16x |
lst_results <- lapply( |
| 83 | 16x |
variables, |
| 84 | 16x |
function(x) {
|
| 85 | 20x |
ard_categorical( |
| 86 | 20x |
data = data |> |
| 87 | 20x |
arrange_using_order(c(id, by, strata, x)) |> |
| 88 | 20x |
dplyr::slice_tail(n = 1L, by = all_of(c(id, by, strata))), |
| 89 | 20x |
variables = all_of(x), |
| 90 | 20x |
by = all_of(by), |
| 91 | 20x |
strata = all_of(strata), |
| 92 | 20x |
statistic = statistic, |
| 93 | 20x |
denominator = denominator, |
| 94 | 20x |
fmt_fun = fmt_fun, |
| 95 | 20x |
stat_label = stat_label |
| 96 |
) |
|
| 97 |
} |
|
| 98 |
) |
|
| 99 | ||
| 100 |
# print default order of variable levels ------------------------------------- |
|
| 101 | 14x |
for (v in variables) {
|
| 102 | 18x |
lvls <- .unique_and_sorted(data[[v]]) |
| 103 | 18x |
vec <- cli::cli_vec( |
| 104 | 18x |
lvls, |
| 105 | 18x |
style = list("vec-sep" = " < ", "vec-sep2" = " < ", "vec-last" = " < ", "vec-trunc" = 3)
|
| 106 |
) |
|
| 107 | 16x |
if (!quiet) cli::cli_inform("{.var {v}}: {.val {vec}}")
|
| 108 |
} |
|
| 109 | ||
| 110 |
# combine results ------------------------------------------------------------ |
|
| 111 | 14x |
result <- lst_results |> |
| 112 | 14x |
dplyr::bind_rows() |> |
| 113 | 14x |
dplyr::mutate(context = "categorical_max") |> |
| 114 | 14x |
cards::tidy_ard_column_order() |> |
| 115 | 14x |
cards::tidy_ard_row_order() |
| 116 | ||
| 117 |
# return final result -------------------------------------------------------- |
|
| 118 | 14x |
result |
| 119 |
} |
|
| 120 | ||
| 121 |
# internal function copied from cards |
|
| 122 |
# like `dplyr::arrange()`, but uses base R's `order()` to keep consistency in some edge cases |
|
| 123 |
arrange_using_order <- function(data, columns) {
|
|
| 124 | 20x |
inject(data[with(data, order(!!!syms(columns))), ]) |
| 125 |
} |
| 1 |
#' ARD Survival Differences |
|
| 2 |
#' |
|
| 3 |
#' Calculate differences in the Kaplan-Meier estimator of survival using the |
|
| 4 |
#' results from [`survival::survfit()`]. |
|
| 5 |
#' |
|
| 6 |
#' @param x (`survift`)\cr |
|
| 7 |
#' object of class `'survfit'` typically created with [`survival::survfit()`] |
|
| 8 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 9 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 10 |
#' @inheritParams ard_survival_survfit |
|
| 11 |
#' |
|
| 12 |
#' @return an ARD data frame of class 'card' |
|
| 13 |
#' @export |
|
| 14 |
#' |
|
| 15 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "ggsurvfit")))
|
|
| 16 |
#' library(ggsurvfit) |
|
| 17 |
#' library(survival) |
|
| 18 |
#' |
|
| 19 |
#' survfit(Surv_CNSR() ~ TRTA, data = cards::ADTTE) |> |
|
| 20 |
#' ard_survival_survfit_diff(times = c(25, 50)) |
|
| 21 |
ard_survival_survfit_diff <- function(x, times, conf.level = 0.95) {
|
|
| 22 | 5x |
set_cli_abort_call() |
| 23 | ||
| 24 |
# check installed packages --------------------------------------------------- |
|
| 25 | 5x |
check_pkg_installed(c("survival", "broom"))
|
| 26 | 5x |
check_not_missing(x) |
| 27 | 5x |
check_not_missing(times) |
| 28 | 5x |
check_class(x, "survfit") |
| 29 | ||
| 30 | 5x |
if (inherits(x, c("survfitms", "survfitcox"))) {
|
| 31 | 1x |
cli::cli_abort( |
| 32 | 1x |
"Argument {.arg x} cannot be class {.cls {c('survfitms', 'survfitcox')}}.",
|
| 33 | 1x |
call = get_cli_abort_call() |
| 34 |
) |
|
| 35 |
} |
|
| 36 | 4x |
check_scalar_range(conf.level, range = c(0, 1)) |
| 37 | 4x |
check_length( |
| 38 | 4x |
as.list(x$call)[["formula"]] |> stats::as.formula() |> stats::terms() |> attr("term.labels"),
|
| 39 | 4x |
length = 1L, |
| 40 | 4x |
message = "The {.cls survfit} object passed in argument {.arg x} must be stratified by a single variable."
|
| 41 |
) |
|
| 42 | 3x |
if (length(x$strata) < 2) {
|
| 43 | 1x |
cli::cli_abort( |
| 44 | 1x |
"The {.cls survfit} object's stratifying variable must have 2 or more levels.",
|
| 45 | 1x |
call = get_cli_abort_call() |
| 46 |
) |
|
| 47 |
} |
|
| 48 | ||
| 49 |
# calculate the survival at the specified times |
|
| 50 | 2x |
ard_survival_survfit <- |
| 51 | 2x |
ard_survival_survfit(x = x, times = times) |> |
| 52 | 2x |
dplyr::filter(.data$stat_name %in% c("estimate", "std.error")) |>
|
| 53 | 2x |
dplyr::select(-c("stat_label", "context", "fmt_fun"))
|
| 54 | ||
| 55 |
# transform the survival ARD into a cards object with the survival difference |
|
| 56 | 2x |
card <- |
| 57 | 2x |
ard_survival_survfit %>% |
| 58 | 2x |
{dplyr::left_join( # styler: off
|
| 59 |
# remove the first group from the data frame (this is our reference group) |
|
| 60 | 2x |
dplyr::filter(., .by = cards::all_ard_groups(), dplyr::cur_group_id() > 1L) |> |
| 61 | 2x |
dplyr::rename(stat1 = "stat"), |
| 62 |
# merge the reference group data |
|
| 63 | 2x |
dplyr::filter(., .by = cards::all_ard_groups(), dplyr::cur_group_id() == 1L) |> |
| 64 | 2x |
dplyr::select(stat0 = "stat", everything(), -c("group1_level", "error", "warning")),
|
| 65 | 2x |
by = c("group1", "variable", "variable_level", "stat_name")
|
| 66 | 2x |
)} |> # styler: off |
| 67 |
# reshape to put the stats that need to be combined on the same row |
|
| 68 | 2x |
tidyr::pivot_wider( |
| 69 | 2x |
id_cols = c("group1", "group1_level", "variable", "variable_level"),
|
| 70 | 2x |
names_from = "stat_name", |
| 71 | 2x |
values_from = c("stat0", "stat1"),
|
| 72 | 2x |
values_fn = unlist |
| 73 |
) |> |
|
| 74 |
# calcualte the primary statistics to return |
|
| 75 | 2x |
dplyr::mutate( |
| 76 |
# reference level |
|
| 77 | 2x |
reference_level = ard_survival_survfit[["group1_level"]][1], |
| 78 |
# short description of method |
|
| 79 | 2x |
method = "Survival Difference (Z-test)", |
| 80 |
# survival difference |
|
| 81 | 2x |
estimate = .data$stat0_estimate - .data$stat1_estimate, |
| 82 |
# survival difference standard error |
|
| 83 | 2x |
std.error = sqrt(.data$stat0_std.error^2 + .data$stat1_std.error^2), |
| 84 |
# Z test statistic |
|
| 85 | 2x |
statistic = .data$estimate / .data$std.error, |
| 86 |
# confidence limits of the survival difference |
|
| 87 | 2x |
conf.low = .data$estimate - .data$std.error * stats::qnorm(1 - (1 - .env$conf.level) / 2), |
| 88 | 2x |
conf.high = .data$estimate + .data$std.error * stats::qnorm(1 - (1 - .env$conf.level) / 2), |
| 89 |
# p-value for test where H0: no difference |
|
| 90 | 2x |
p.value = 2 * (1 - stats::pnorm(abs(.data$statistic))), |
| 91 | 2x |
across(c("reference_level", "estimate", "std.error", "statistic", "conf.low", "conf.high", "p.value", "method"), as.list)
|
| 92 |
) |> |
|
| 93 |
# reshape into the cards structure |
|
| 94 | 2x |
dplyr::select(-starts_with("stat0_"), -starts_with("stat1_")) |>
|
| 95 | 2x |
tidyr::pivot_longer( |
| 96 | 2x |
cols = -c(cards::all_ard_groups(), cards::all_ard_variables()), |
| 97 | 2x |
names_to = "stat_name", |
| 98 | 2x |
values_to = "stat" |
| 99 |
) |
|
| 100 | ||
| 101 |
# final prepping of the cards object ----------------------------------------- |
|
| 102 | 2x |
card |> |
| 103 | 2x |
dplyr::mutate( |
| 104 | 2x |
warning = ard_survival_survfit[["warning"]][1], |
| 105 | 2x |
error = ard_survival_survfit[["error"]][1], |
| 106 | 2x |
fmt_fun = list(1L), |
| 107 | 2x |
stat_label = |
| 108 | 2x |
dplyr::case_when( |
| 109 | 2x |
.data$stat_name %in% "estimate" ~ "Survival Difference", |
| 110 | 2x |
.data$stat_name %in% "std.error" ~ "Survival Difference Standard Error", |
| 111 | 2x |
.data$stat_name %in% "conf.low" ~ "CI Lower Bound", |
| 112 | 2x |
.data$stat_name %in% "conf.high" ~ "CI Upper Bound", |
| 113 | 2x |
.data$stat_name %in% "statistic" ~ "z statistic", |
| 114 | 2x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 115 | 2x |
.default = .data$stat_name |
| 116 |
), |
|
| 117 | 2x |
context = "survival_survfit_diff", |
| 118 |
) |> |
|
| 119 | 2x |
cards::as_card() |> |
| 120 | 2x |
cards::tidy_ard_column_order() |
| 121 |
} |
| 1 |
#' ARD Cohen's D Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for paired and non-paired Cohen's D Effect Size Test |
|
| 5 |
#' using [`effectsize::cohens_d()`]. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. See below for details. |
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column name to compare by. Must be a categorical variable with exactly two levels. |
|
| 11 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column names to be compared. Must be a continuous variables. |
|
| 13 |
#' Independent tests will be run for each variable. |
|
| 14 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 15 |
#' column name of the subject or participant ID |
|
| 16 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 17 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 18 |
#' @param ... arguments passed to `effectsize::cohens_d(...)` |
|
| 19 |
#' |
|
| 20 |
#' @return ARD data frame |
|
| 21 |
#' @name ard_effectsize_cohens_d |
|
| 22 |
#' |
|
| 23 |
#' @details |
|
| 24 |
#' For the `ard_effectsize_cohens_d()` function, the data is expected to be one row per subject. |
|
| 25 |
#' The data is passed as `effectsize::cohens_d(data[[variable]]~data[[by]], data, paired = FALSE, ...)`. |
|
| 26 |
#' |
|
| 27 |
#' For the `ard_effectsize_paired_cohens_d()` function, the data is expected to be one row |
|
| 28 |
#' per subject per by level. Before the effect size is calculated, the data are |
|
| 29 |
#' reshaped to a wide format to be one row per subject. |
|
| 30 |
#' The data are then passed as |
|
| 31 |
#' `effectsize::cohens_d(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`. |
|
| 32 |
#' |
|
| 33 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
|
|
| 34 |
#' cards::ADSL |> |
|
| 35 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 36 |
#' ard_effectsize_cohens_d(by = ARM, variables = AGE) |
|
| 37 |
#' |
|
| 38 |
#' # constructing a paired data set, |
|
| 39 |
#' # where patients receive both treatments |
|
| 40 |
#' cards::ADSL[c("ARM", "AGE")] |>
|
|
| 41 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 42 |
#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |> |
|
| 43 |
#' dplyr::arrange(USUBJID, ARM) |> |
|
| 44 |
#' dplyr::group_by(USUBJID) |> |
|
| 45 |
#' dplyr::filter(dplyr::n() > 1) |> |
|
| 46 |
#' ard_effectsize_paired_cohens_d(by = ARM, variables = AGE, id = USUBJID) |
|
| 47 |
NULL |
|
| 48 | ||
| 49 |
#' @rdname ard_effectsize_cohens_d |
|
| 50 |
#' @export |
|
| 51 |
ard_effectsize_cohens_d <- function(data, by, variables, conf.level = 0.95, ...) {
|
|
| 52 | 4x |
set_cli_abort_call() |
| 53 | ||
| 54 |
# check installed packages --------------------------------------------------- |
|
| 55 | 4x |
check_pkg_installed(c("effectsize", "parameters"))
|
| 56 | ||
| 57 |
# check/process inputs ------------------------------------------------------- |
|
| 58 | 4x |
check_not_missing(data) |
| 59 | 4x |
check_not_missing(variables) |
| 60 | 4x |
check_not_missing(by) |
| 61 | 4x |
check_data_frame(data) |
| 62 | 4x |
data <- dplyr::ungroup(data) |
| 63 | 4x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 64 | 4x |
check_scalar(by) |
| 65 | 4x |
check_range(conf.level, range = c(0, 1)) |
| 66 | ||
| 67 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 68 | 4x |
if (is_empty(variables)) {
|
| 69 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 70 |
} |
|
| 71 | ||
| 72 |
# build ARD ------------------------------------------------------------------ |
|
| 73 | 4x |
lapply( |
| 74 | 4x |
variables, |
| 75 | 4x |
function(variable) {
|
| 76 | 5x |
.format_cohens_d_results( |
| 77 | 5x |
by = by, |
| 78 | 5x |
variable = variable, |
| 79 | 5x |
lst_tidy = |
| 80 | 5x |
cards::eval_capture_conditions( |
| 81 | 5x |
effectsize::cohens_d( |
| 82 | 5x |
reformulate2(by, response = variable), |
| 83 | 5x |
data = data |> tidyr::drop_na(all_of(c(by, variable))), |
| 84 | 5x |
paired = FALSE, |
| 85 | 5x |
ci = conf.level, |
| 86 |
... |
|
| 87 |
) |> |
|
| 88 | 5x |
parameters::standardize_names(style = "broom") |> |
| 89 | 5x |
dplyr::mutate(method = "Cohen's D") |
| 90 |
), |
|
| 91 | 5x |
paired = FALSE, |
| 92 |
... |
|
| 93 |
) |
|
| 94 |
} |
|
| 95 |
) |> |
|
| 96 | 4x |
dplyr::bind_rows() |
| 97 |
} |
|
| 98 | ||
| 99 | ||
| 100 |
#' @rdname ard_effectsize_cohens_d |
|
| 101 |
#' @export |
|
| 102 |
ard_effectsize_paired_cohens_d <- function(data, by, variables, id, conf.level = 0.95, ...) {
|
|
| 103 | 3x |
set_cli_abort_call() |
| 104 | ||
| 105 |
# check installed packages --------------------------------------------------- |
|
| 106 | 3x |
check_pkg_installed(c("effectsize", "parameters"))
|
| 107 | ||
| 108 |
# check/process inputs ------------------------------------------------------- |
|
| 109 | 3x |
check_not_missing(data) |
| 110 | 3x |
check_not_missing(variables) |
| 111 | 3x |
check_not_missing(by) |
| 112 | 3x |
check_not_missing(id) |
| 113 | 3x |
check_data_frame(data) |
| 114 | 3x |
data <- dplyr::ungroup(data) |
| 115 | 3x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
|
| 116 | 3x |
check_scalar(by) |
| 117 | 3x |
check_scalar(id) |
| 118 | 3x |
check_range(conf.level, range = c(0, 1)) |
| 119 | ||
| 120 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 121 | 3x |
if (is_empty(variables)) {
|
| 122 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 123 |
} |
|
| 124 | ||
| 125 |
# build ARD ------------------------------------------------------------------ |
|
| 126 | 3x |
lapply( |
| 127 | 3x |
variables, |
| 128 | 3x |
function(variable) {
|
| 129 | 3x |
.format_cohens_d_results( |
| 130 | 3x |
by = by, |
| 131 | 3x |
variable = variable, |
| 132 | 3x |
lst_tidy = |
| 133 | 3x |
cards::eval_capture_conditions({
|
| 134 |
# adding this reshape inside the eval, so if there is an error it's captured in the ARD object |
|
| 135 | 3x |
data_wide <- |
| 136 | 3x |
data |> |
| 137 | 3x |
tidyr::drop_na(all_of(c(id, by, variable))) |> |
| 138 | 3x |
.paired_data_pivot_wider(by = by, variable = variable, id = id) |> |
| 139 | 3x |
tidyr::drop_na(any_of(c("by1", "by2")))
|
| 140 |
# perform paired cohen's d test |
|
| 141 | 2x |
effectsize::cohens_d(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, ci = conf.level, ...) |> |
| 142 | 2x |
parameters::standardize_names(style = "broom") |> |
| 143 | 2x |
dplyr::mutate(method = "Paired Cohen's D") |
| 144 |
}), |
|
| 145 | 3x |
paired = TRUE, |
| 146 |
... |
|
| 147 |
) |
|
| 148 |
} |
|
| 149 |
) |> |
|
| 150 | 3x |
dplyr::bind_rows() |
| 151 |
} |
|
| 152 | ||
| 153 |
.df_effectsize_stat_labels <- function() {
|
|
| 154 | 16x |
dplyr::tribble( |
| 155 | 16x |
~stat_name, ~stat_label, |
| 156 | 16x |
"estimate", "Effect Size Estimate", |
| 157 | 16x |
"conf.low", "CI Lower Bound", |
| 158 | 16x |
"conf.high", "CI Upper Bound", |
| 159 | 16x |
"conf.level", "CI Confidence Level", |
| 160 | 16x |
"mu", "H0 Mean", |
| 161 | 16x |
"paired", "Paired test", |
| 162 | 16x |
"pooled_sd", "Pooled Standard Deviation", |
| 163 | 16x |
"alternative", "Alternative Hypothesis" |
| 164 |
) |
|
| 165 |
} |
|
| 166 | ||
| 167 | ||
| 168 |
#' Convert Cohen's D Test to ARD |
|
| 169 |
#' |
|
| 170 |
#' @inheritParams cards::tidy_as_ard |
|
| 171 |
#' @inheritParams effectsize::cohens_d |
|
| 172 |
#' @param by (`string`)\cr by column name |
|
| 173 |
#' @param variable (`string`)\cr variable column name |
|
| 174 |
#' @param ... passed to `cohens_d(...)` |
|
| 175 |
#' |
|
| 176 |
#' @return ARD data frame |
|
| 177 |
#' @keywords internal |
|
| 178 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("effectsize", "parameters")))
|
|
| 179 |
#' cardx:::.format_cohens_d_results( |
|
| 180 |
#' by = "ARM", |
|
| 181 |
#' variable = "AGE", |
|
| 182 |
#' paired = FALSE, |
|
| 183 |
#' lst_tidy = |
|
| 184 |
#' cards::eval_capture_conditions( |
|
| 185 |
#' effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE) |> |
|
| 186 |
#' parameters::standardize_names(style = "broom") |
|
| 187 |
#' ) |
|
| 188 |
#' ) |
|
| 189 |
.format_cohens_d_results <- function(by, variable, lst_tidy, paired, ...) {
|
|
| 190 |
# build ARD ------------------------------------------------------------------ |
|
| 191 | 8x |
ret <- |
| 192 | 8x |
cards::tidy_as_ard( |
| 193 | 8x |
lst_tidy = lst_tidy, |
| 194 | 8x |
tidy_result_names = c( |
| 195 | 8x |
"estimate", "conf.level", "conf.low", "conf.high" |
| 196 |
), |
|
| 197 | 8x |
fun_args_to_record = c("mu", "paired", "pooled_sd", "alternative"),
|
| 198 | 8x |
formals = formals(asNamespace("effectsize")[["cohens_d"]]),
|
| 199 | 8x |
passed_args = c(list(paired = paired), dots_list(...)), |
| 200 | 8x |
lst_ard_columns = list(group1 = by, variable = variable, context = "effectsize_cohens_d") |
| 201 |
) |
|
| 202 | ||
| 203 |
# add the stat label --------------------------------------------------------- |
|
| 204 | 8x |
ret |> |
| 205 | 8x |
dplyr::left_join( |
| 206 | 8x |
.df_effectsize_stat_labels(), |
| 207 | 8x |
by = "stat_name" |
| 208 |
) |> |
|
| 209 | 8x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 210 | 8x |
cards::as_card() |> |
| 211 | 8x |
cards::tidy_ard_column_order() |
| 212 |
} |
| 1 |
#' Basic Regression ARD |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' A function that takes a regression model and provides basic statistics in an |
|
| 5 |
#' ARD structure. |
|
| 6 |
#' The default output is simpler than [`ard_regression()`]. |
|
| 7 |
#' The function primarily matches regression terms to underlying variable names |
|
| 8 |
#' and levels. |
|
| 9 |
#' The default arguments used are |
|
| 10 |
#' |
|
| 11 |
#' ```r |
|
| 12 |
#' broom.helpers::tidy_plus_plus( |
|
| 13 |
#' add_reference_rows = FALSE, |
|
| 14 |
#' add_estimate_to_reference_rows = FALSE, |
|
| 15 |
#' add_n = FALSE, |
|
| 16 |
#' intercept = FALSE |
|
| 17 |
#' ) |
|
| 18 |
#' ``` |
|
| 19 |
#' |
|
| 20 |
#' @inheritParams ard_regression |
|
| 21 |
#' @param stats_to_remove (`character`)\cr |
|
| 22 |
#' character vector of statistic names to remove. Default is |
|
| 23 |
#' `c("term", "var_type", "var_label", "var_class", "label", "contrasts_type", "contrasts", "var_nlevels")`.
|
|
| 24 |
#' |
|
| 25 |
#' @return data frame |
|
| 26 |
#' @name ard_regression_basic |
|
| 27 |
#' |
|
| 28 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom.helpers"))
|
|
| 29 |
#' lm(AGE ~ ARM, data = cards::ADSL) |> |
|
| 30 |
#' ard_regression_basic() |
|
| 31 |
#' |
|
| 32 |
#' ard_regression_basic( |
|
| 33 |
#' x = cards::ADSL, |
|
| 34 |
#' formula = AGE ~ ARM, |
|
| 35 |
#' method = "lm" |
|
| 36 |
#' ) |
|
| 37 |
NULL |
|
| 38 | ||
| 39 |
#' @rdname ard_regression_basic |
|
| 40 |
#' @export |
|
| 41 |
ard_regression_basic <- function(x, ...) {
|
|
| 42 | 8x |
UseMethod("ard_regression_basic")
|
| 43 |
} |
|
| 44 | ||
| 45 |
#' @rdname ard_regression_basic |
|
| 46 |
#' @export |
|
| 47 |
ard_regression_basic.default <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, |
|
| 48 |
stats_to_remove = c( |
|
| 49 |
"term", "var_type", "var_label", "var_class", |
|
| 50 |
"label", "contrasts_type", "contrasts", "var_nlevels" |
|
| 51 |
), |
|
| 52 |
...) {
|
|
| 53 | 6x |
set_cli_abort_call() |
| 54 | ||
| 55 |
# check installed packages --------------------------------------------------- |
|
| 56 | 6x |
check_pkg_installed(pkg = "broom.helpers") |
| 57 | ||
| 58 |
# check inputs --------------------------------------------------------------- |
|
| 59 | 6x |
check_not_missing(x) |
| 60 | 6x |
check_class(stats_to_remove, cls = "character", allow_empty = TRUE) |
| 61 | ! |
if (is_empty(stats_to_remove)) stats_to_remove <- character(0L) # styler: off |
| 62 | ||
| 63 | 6x |
args <- |
| 64 | 6x |
list( |
| 65 | 6x |
add_reference_rows = FALSE, |
| 66 | 6x |
add_estimate_to_reference_rows = FALSE, |
| 67 | 6x |
add_n = FALSE, |
| 68 | 6x |
intercept = FALSE |
| 69 |
) |> |
|
| 70 | 6x |
utils::modifyList(val = rlang::dots_list(...)) |
| 71 | ||
| 72 | 6x |
rlang::inject(ard_regression(x = x, tidy_fun = tidy_fun, !!!args)) |> |
| 73 | 6x |
dplyr::filter(!.data$stat_name %in% .env$stats_to_remove) |> |
| 74 | 6x |
dplyr::select(-(cards::all_ard_variables("levels") & dplyr::where(\(x) all(is.na(x)))))
|
| 75 |
} |
|
| 76 | ||
| 77 |
#' @rdname ard_regression_basic |
|
| 78 |
#' @export |
|
| 79 |
ard_regression_basic.data.frame <- function(x, formula, method, method.args = list(), package = "base", |
|
| 80 |
tidy_fun = broom.helpers::tidy_with_broom_or_parameters, |
|
| 81 |
stats_to_remove = c( |
|
| 82 |
"term", "var_type", "var_label", "var_class", |
|
| 83 |
"label", "contrasts_type", "contrasts", "var_nlevels" |
|
| 84 |
), |
|
| 85 |
...) {
|
|
| 86 |
# check inputs --------------------------------------------------------------- |
|
| 87 | 2x |
set_cli_abort_call() |
| 88 | 2x |
check_not_missing(x) |
| 89 | 2x |
check_not_missing(formula) |
| 90 | 1x |
check_not_missing(method) |
| 91 | 1x |
check_class(formula, cls = "formula") |
| 92 | ||
| 93 |
# build model ---------------------------------------------------------------- |
|
| 94 | 1x |
model <- |
| 95 | 1x |
construct_model( |
| 96 | 1x |
data = x, |
| 97 | 1x |
formula = formula, |
| 98 | 1x |
method = method, |
| 99 | 1x |
method.args = {{ method.args }},
|
| 100 | 1x |
package = package |
| 101 |
) |
|
| 102 | ||
| 103 |
# summarize model ------------------------------------------------------------ |
|
| 104 | 1x |
ard_regression_basic(x = model, tidy_fun = tidy_fun, stats_to_remove = stats_to_remove, ...) |
| 105 |
} |
| 1 |
#' ARD Wald Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Function takes a regression model object and calculates Wald |
|
| 5 |
#' statistical test using [`aod::wald.test()`]. |
|
| 6 |
#' |
|
| 7 |
#' @param x regression model object |
|
| 8 |
#' @param ... arguments passed to `aod::wald.test(...)` |
|
| 9 |
#' @inheritParams ard_regression |
|
| 10 |
#' |
|
| 11 |
#' @return data frame |
|
| 12 |
#' @export |
|
| 13 |
#' |
|
| 14 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("aod", "broom.helpers")))
|
|
| 15 |
#' lm(AGE ~ ARM, data = cards::ADSL) |> |
|
| 16 |
#' ard_aod_wald_test() |
|
| 17 |
ard_aod_wald_test <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
|
|
| 18 | 3x |
set_cli_abort_call() |
| 19 | ||
| 20 |
# check installed packages --------------------------------------------------- |
|
| 21 | 3x |
check_pkg_installed(c("aod", "broom.helpers"))
|
| 22 | ||
| 23 |
# check inputs --------------------------------------------------------------- |
|
| 24 | 3x |
check_not_missing(x) |
| 25 | ||
| 26 |
# run regression() ----------------------------------------------------------- |
|
| 27 | 3x |
reg_model <- cards::eval_capture_conditions( |
| 28 | 3x |
ard_regression_basic(x, tidy_fun = tidy_fun, intercept = TRUE, stats_to_remove = c( |
| 29 | 3x |
"var_type", |
| 30 | 3x |
"var_label", |
| 31 | 3x |
"var_class", "label", |
| 32 | 3x |
"contrasts_type", "contrasts", "var_nlevels", "std.error", |
| 33 | 3x |
"conf.low", "conf.high", "statistic", "p.value", "estimate" |
| 34 |
)) |
|
| 35 |
) |
|
| 36 | ||
| 37 | 3x |
if (!is.null(reg_model[["error"]])) {
|
| 38 | 1x |
cli::cli_abort( |
| 39 | 1x |
c("Unable to identify underlying variable names in regression model.",
|
| 40 | 1x |
i = "Is this model type supported by {.fun broom.helpers::tidy_plus_plus}, which is the function used to identify variable names?"
|
| 41 |
), |
|
| 42 | 1x |
call = get_cli_abort_call() |
| 43 |
) |
|
| 44 |
} |
|
| 45 | 2x |
aod <- |
| 46 | 2x |
reg_model[["result"]] %>% |
| 47 | 2x |
dplyr::select(c( |
| 48 | 2x |
variable = "variable", |
| 49 | 2x |
model_terms = "stat" |
| 50 |
)) %>% |
|
| 51 | 2x |
dplyr::mutate(term_id = dplyr::row_number()) %>% |
| 52 | 2x |
tidyr::nest(data = -"variable") %>% |
| 53 | 2x |
dplyr::rowwise() %>% |
| 54 | 2x |
dplyr::mutate( |
| 55 | 2x |
model_terms = unlist(.data$data[["model_terms"]]) %>% list(), |
| 56 | 2x |
model_terms_id = rlang::set_names(.data$data[["term_id"]]) %>% list() |
| 57 |
) |
|
| 58 |
# run wald.test() ----------------------------------------------------------- |
|
| 59 | 2x |
wald_test <- |
| 60 | 2x |
cards::eval_capture_conditions(lapply(seq_len(length(aod$model_terms_id)), function(terms_id) {
|
| 61 | 4x |
aod::wald.test( |
| 62 | 4x |
Sigma = stats::vcov(x), |
| 63 | 4x |
b = stats::coef(x), Terms = aod$model_terms_id[[terms_id]] |
| 64 |
) |
|
| 65 |
})) |
|
| 66 | ||
| 67 | ||
| 68 | 2x |
df_list <- do.call(rbind, lapply(wald_test$result, .extract_wald_results)) |
| 69 | ||
| 70 | 2x |
cbind(aod$variable, df_list) %>% |
| 71 | 2x |
tidyr::pivot_longer( |
| 72 | 2x |
cols = !"aod$variable", |
| 73 | 2x |
names_to = "stat_name", |
| 74 | 2x |
values_to = "stat" |
| 75 |
) %>% |
|
| 76 | 2x |
dplyr::rename( |
| 77 | 2x |
"variable" = "aod$variable" |
| 78 |
) |> |
|
| 79 | 2x |
dplyr::mutate( |
| 80 | 2x |
stat = as.list(.data$stat), |
| 81 | 2x |
stat_label = |
| 82 | 2x |
dplyr::case_when( |
| 83 | 2x |
.data$stat_name %in% "statistic" ~ "Statistic", |
| 84 | 2x |
.data$stat_name %in% "df" ~ "Degrees of Freedom", |
| 85 | 2x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 86 | 2x |
TRUE ~ .data$stat_name |
| 87 |
), |
|
| 88 | 2x |
fmt_fun = |
| 89 | 2x |
map( |
| 90 | 2x |
.data$stat, |
| 91 | 2x |
function(.x) {
|
| 92 |
# styler: off |
|
| 93 | ! |
if (is.integer(.x)) return(0L) |
| 94 | 12x |
if (is.numeric(.x)) return(1L) |
| 95 |
# styler: on |
|
| 96 | ! |
NULL |
| 97 |
} |
|
| 98 |
), |
|
| 99 | 2x |
context = "aod_wald_test", |
| 100 | 2x |
warning = wald_test["warning"], |
| 101 | 2x |
error = wald_test["error"] |
| 102 |
) |> |
|
| 103 | 2x |
cards::as_card() |> |
| 104 | 2x |
cards::tidy_ard_column_order() |
| 105 |
} |
|
| 106 | ||
| 107 |
#' Extract data from wald.test object |
|
| 108 |
#' |
|
| 109 |
#' @param wald_test (`data.frame`)\cr wald test object object from `aod::wald.test()` |
|
| 110 |
#' |
|
| 111 |
#' @return a data frame containing the wald test results. |
|
| 112 |
#' @keywords internal |
|
| 113 |
.extract_wald_results <- function(wald_test) {
|
|
| 114 | 4x |
df <- wald_test$result$chi2[("df")]
|
| 115 | 4x |
statistic <- wald_test$result$chi2[("chi2")]
|
| 116 | 4x |
p.value <- wald_test$result$chi2[("P")]
|
| 117 | 4x |
data.frame(df, statistic, p.value) |
| 118 |
} |
| 1 |
#' ARD Survey t-test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for survey t-test using [`survey::svyttest()`]. |
|
| 5 |
#' |
|
| 6 |
#' @param data (`survey.design`)\cr |
|
| 7 |
#' a survey design object often created with [`survey::svydesign()`] |
|
| 8 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 9 |
#' column name to compare by |
|
| 10 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column names to be compared. Independent tests will be run for each variable. |
|
| 12 |
#' @param conf.level (`double`)\cr |
|
| 13 |
#' confidence level of the returned confidence interval. Must be between `c(0, 1)`. |
|
| 14 |
#' Default is `0.95` |
|
| 15 |
#' @param ... arguments passed to [`survey::svyttest()`] |
|
| 16 |
#' |
|
| 17 |
#' @return ARD data frame |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survey", "broom")))
|
|
| 21 |
#' data(api, package = "survey") |
|
| 22 |
#' dclus2 <- survey::svydesign(id = ~ dnum + snum, fpc = ~ fpc1 + fpc2, data = apiclus2) |
|
| 23 |
#' |
|
| 24 |
#' ard_survey_svyttest(dclus2, variables = enroll, by = comp.imp, conf.level = 0.9) |
|
| 25 |
ard_survey_svyttest <- function(data, by, variables, conf.level = 0.95, ...) {
|
|
| 26 | 5x |
set_cli_abort_call() |
| 27 | ||
| 28 |
# check installed packages --------------------------------------------------- |
|
| 29 | 5x |
check_pkg_installed(c("survey", "broom"))
|
| 30 | ||
| 31 |
# check/process inputs ------------------------------------------------------- |
|
| 32 | 5x |
check_not_missing(data) |
| 33 | 5x |
check_not_missing(variables) |
| 34 | 5x |
check_not_missing(by) |
| 35 | 5x |
check_range(conf.level, range = c(0, 1)) |
| 36 | 5x |
check_class(data, cls = "survey.design") |
| 37 | 5x |
cards::process_selectors(data[["variables"]], by = {{ by }}, variables = {{ variables }})
|
| 38 | 5x |
check_scalar(by) |
| 39 | ||
| 40 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 41 | 5x |
if (is_empty(variables)) {
|
| 42 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 43 |
} |
|
| 44 | ||
| 45 |
# build ARD ------------------------------------------------------------------ |
|
| 46 | 5x |
lapply( |
| 47 | 5x |
variables, |
| 48 | 5x |
function(variable) {
|
| 49 | 6x |
.format_svyttest_results( |
| 50 | 6x |
by = by, |
| 51 | 6x |
variable = variable, |
| 52 | 6x |
lst_tidy = |
| 53 | 6x |
cards::eval_capture_conditions( |
| 54 | 6x |
survey::svyttest(reformulate2(termlabels = by, response = variable), design = data, ...) %>% |
| 55 |
# a slightly enhanced tidier that allows us to specify the conf.level |
|
| 56 |
{
|
|
| 57 | 5x |
dplyr::bind_cols( |
| 58 | 5x |
broom::tidy(.) |> dplyr::select(-c("conf.low", "conf.high")),
|
| 59 | 5x |
dplyr::tibble(!!!stats::confint(., level = conf.level) |> set_names(c("conf.low", "conf.high"))) |>
|
| 60 | 5x |
dplyr::mutate(conf.level = conf.level) |
| 61 |
) |
|
| 62 |
} |
|
| 63 |
), |
|
| 64 |
... |
|
| 65 |
) |
|
| 66 |
} |
|
| 67 |
) |> |
|
| 68 | 5x |
dplyr::bind_rows() |
| 69 |
} |
|
| 70 | ||
| 71 |
.format_svyttest_results <- function(by, variable, lst_tidy, ...) {
|
|
| 72 |
# build ARD ------------------------------------------------------------------ |
|
| 73 | 6x |
ret <- |
| 74 | 6x |
cards::tidy_as_ard( |
| 75 | 6x |
lst_tidy = lst_tidy, |
| 76 | 6x |
tidy_result_names = c( |
| 77 | 6x |
"estimate", "statistic", |
| 78 | 6x |
"p.value", "parameter", |
| 79 | 6x |
"conf.low", "conf.high", |
| 80 | 6x |
"conf.level", "method", "alternative" |
| 81 |
), |
|
| 82 | 6x |
passed_args = dots_list(...), |
| 83 | 6x |
lst_ard_columns = list(group1 = by, variable = variable, context = "survey_svyttest") |
| 84 |
) |
|
| 85 | ||
| 86 |
# add the stat label --------------------------------------------------------- |
|
| 87 | 6x |
ret |> |
| 88 | 6x |
dplyr::left_join( |
| 89 | 6x |
.df_ttest_stat_labels(), |
| 90 | 6x |
by = "stat_name" |
| 91 |
) |> |
|
| 92 | 6x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 93 | 6x |
cards::as_card() |> |
| 94 | 6x |
cards::tidy_ard_column_order() |
| 95 |
} |
| 1 |
#' Regression ARD |
|
| 2 |
#' |
|
| 3 |
#' Function takes a regression model object and converts it to a ARD |
|
| 4 |
#' structure using the `broom.helpers` package. |
|
| 5 |
#' |
|
| 6 |
#' @inheritParams construct_model |
|
| 7 |
#' @param x (regression model/`data.frame`)\cr |
|
| 8 |
#' regression model object or a data frame |
|
| 9 |
#' @param tidy_fun (`function`)\cr |
|
| 10 |
#' a tidier. Default is [`broom.helpers::tidy_with_broom_or_parameters`] |
|
| 11 |
#' @param ... Arguments passed to [`broom.helpers::tidy_plus_plus()`] |
|
| 12 |
#' |
|
| 13 |
#' @return data frame |
|
| 14 |
#' @name ard_regression |
|
| 15 |
#' |
|
| 16 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom.helpers"))
|
|
| 17 |
#' lm(AGE ~ ARM, data = cards::ADSL) |> |
|
| 18 |
#' ard_regression(add_estimate_to_reference_rows = TRUE) |
|
| 19 |
#' |
|
| 20 |
#' ard_regression( |
|
| 21 |
#' x = cards::ADSL, |
|
| 22 |
#' formula = AGE ~ ARM, |
|
| 23 |
#' method = "lm" |
|
| 24 |
#' ) |
|
| 25 |
NULL |
|
| 26 | ||
| 27 |
#' @rdname ard_regression |
|
| 28 |
#' @export |
|
| 29 |
ard_regression <- function(x, ...) {
|
|
| 30 | 20x |
UseMethod("ard_regression")
|
| 31 |
} |
|
| 32 | ||
| 33 |
#' @rdname ard_regression |
|
| 34 |
#' @export |
|
| 35 |
ard_regression.default <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
|
|
| 36 | 19x |
set_cli_abort_call() |
| 37 | ||
| 38 |
# check installed packages --------------------------------------------------- |
|
| 39 | 19x |
check_pkg_installed(pkg = "broom.helpers") |
| 40 | ||
| 41 |
# check inputs --------------------------------------------------------------- |
|
| 42 | 19x |
check_not_missing(x) |
| 43 | ||
| 44 |
# summarize model ------------------------------------------------------------ |
|
| 45 | 19x |
lst_results <- cards::eval_capture_conditions( |
| 46 | 19x |
broom.helpers::tidy_plus_plus( |
| 47 | 19x |
model = x, |
| 48 | 19x |
tidy_fun = tidy_fun, |
| 49 |
... |
|
| 50 |
) |
|
| 51 |
) |
|
| 52 | ||
| 53 |
# final tidying up of cards data frame --------------------------------------- |
|
| 54 | 19x |
.regression_final_ard_prep(lst_results) |
| 55 |
} |
|
| 56 | ||
| 57 |
#' @rdname ard_regression |
|
| 58 |
#' @export |
|
| 59 |
ard_regression.data.frame <- function(x, formula, method, method.args = list(), package = "base", |
|
| 60 |
tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) {
|
|
| 61 |
# check inputs --------------------------------------------------------------- |
|
| 62 | 1x |
set_cli_abort_call() |
| 63 | 1x |
check_not_missing(x) |
| 64 | 1x |
check_not_missing(formula) |
| 65 | 1x |
check_not_missing(method) |
| 66 | 1x |
check_class(formula, cls = "formula") |
| 67 | ||
| 68 |
# build model ---------------------------------------------------------------- |
|
| 69 | 1x |
model <- |
| 70 | 1x |
construct_model( |
| 71 | 1x |
data = x, |
| 72 | 1x |
formula = formula, |
| 73 | 1x |
method = method, |
| 74 | 1x |
method.args = {{ method.args }},
|
| 75 | 1x |
package = package |
| 76 |
) |
|
| 77 | ||
| 78 |
# summarize model ------------------------------------------------------------ |
|
| 79 | 1x |
ard_regression(x = model, tidy_fun = tidy_fun, ...) |
| 80 |
} |
|
| 81 | ||
| 82 |
.regression_final_ard_prep <- function(lst_results) {
|
|
| 83 |
# saving the results in data frame ------------------------------------------- |
|
| 84 | 19x |
df_card <- |
| 85 | 19x |
if (!is.null(lst_results[["result"]])) {
|
| 86 | 18x |
lst_results[["result"]] |> |
| 87 | 18x |
dplyr::mutate( |
| 88 | 18x |
variable_level = as.list(dplyr::if_else(.data$var_type %in% "continuous", NA_character_, .data$label)), |
| 89 | 18x |
dplyr::across(-c("variable", "variable_level"), .fns = as.list)
|
| 90 |
) |> |
|
| 91 | 18x |
tidyr::pivot_longer( |
| 92 | 18x |
cols = -c("variable", "variable_level"),
|
| 93 | 18x |
names_to = "stat_name", |
| 94 | 18x |
values_to = "stat" |
| 95 |
) |> |
|
| 96 | 18x |
dplyr::filter(map_lgl(.data$stat, Negate(is.na))) |> |
| 97 | 18x |
dplyr::select(-(cards::all_ard_variables("levels") & dplyr::where(\(x) all(is.na(x)))))
|
| 98 | 19x |
} else { # if there was an error return a shell of an ARD data frame
|
| 99 | 1x |
dplyr::tibble( |
| 100 | 1x |
variable = "model_1", |
| 101 | 1x |
stat_name = "estimate", |
| 102 | 1x |
stat = list(NULL) |
| 103 |
) |
|
| 104 |
} |
|
| 105 | ||
| 106 |
# final tidying up of ARD data frame --------------------------------------- |
|
| 107 | 19x |
df_card |> |
| 108 | 19x |
dplyr::mutate( |
| 109 | 19x |
warning = lst_results["warning"], |
| 110 | 19x |
error = lst_results["error"], |
| 111 | 19x |
fmt_fun = lapply( |
| 112 | 19x |
.data$stat, |
| 113 | 19x |
function(x) {
|
| 114 | 499x |
switch(is.integer(x), |
| 115 | 499x |
0L |
| 116 | 499x |
) %||% switch(is.numeric(x), |
| 117 | 499x |
1L |
| 118 |
) |
|
| 119 |
} |
|
| 120 |
), |
|
| 121 | 19x |
context = "regression" |
| 122 |
) |> |
|
| 123 | 19x |
dplyr::left_join( |
| 124 | 19x |
.df_regression_stat_labels(), |
| 125 | 19x |
by = "stat_name" |
| 126 |
) |> |
|
| 127 | 19x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 128 | 19x |
cards::as_card() |> |
| 129 | 19x |
cards::tidy_ard_column_order() |
| 130 |
} |
|
| 131 | ||
| 132 |
.df_regression_stat_labels <- function() {
|
|
| 133 | 19x |
dplyr::tribble( |
| 134 | 19x |
~stat_name, ~stat_label, |
| 135 | 19x |
"var_label", "Label", |
| 136 | 19x |
"var_class", "Class", |
| 137 | 19x |
"var_type", "Type", |
| 138 | 19x |
"var_nlevels", "N Levels", |
| 139 | 19x |
"contrasts_type", "Contrast Type", |
| 140 | 19x |
"label", "Level Label", |
| 141 | 19x |
"n_obs", "N Obs.", |
| 142 | 19x |
"n_event", "N Events", |
| 143 | 19x |
"exposure", "Exposure Time", |
| 144 | 19x |
"estimate", "Coefficient", |
| 145 | 19x |
"std.error", "Standard Error", |
| 146 | 19x |
"p.value", "p-value", |
| 147 | 19x |
"conf.low", "CI Lower Bound", |
| 148 | 19x |
"conf.high", "CI Upper Bound", |
| 149 |
) |
|
| 150 |
} |
| 1 |
#' @description |
|
| 2 |
#' The `ard_emmeans_emmeans()` function calculates least-squares means using the 'emmeans' |
|
| 3 |
#' package using the following |
|
| 4 |
#' |
|
| 5 |
#' ```r |
|
| 6 |
#' emmeans::emmeans(object = <regression model>, specs = ~ <primary covariate>) |> |
|
| 7 |
#' summary(emmeans, calc = c(n = ".wgt.")) |
|
| 8 |
#' ``` |
|
| 9 |
#' |
|
| 10 |
#' The arguments `data`, `formula`, `method`, `method.args`, `package` are used |
|
| 11 |
#' to construct the regression model via `cardx::construct_model()`. |
|
| 12 |
#' |
|
| 13 |
#' @export |
|
| 14 |
#' @rdname ard_emmeans |
|
| 15 |
#' |
|
| 16 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "emmeans"))
|
|
| 17 |
#' # LS Means |
|
| 18 |
#' ard_emmeans_emmeans( |
|
| 19 |
#' data = mtcars, |
|
| 20 |
#' formula = mpg ~ am + cyl, |
|
| 21 |
#' method = "lm" |
|
| 22 |
#' ) |
|
| 23 |
#' |
|
| 24 |
#' ard_emmeans_emmeans( |
|
| 25 |
#' data = mtcars, |
|
| 26 |
#' formula = vs ~ am + mpg, |
|
| 27 |
#' method = "glm", |
|
| 28 |
#' method.args = list(family = binomial), |
|
| 29 |
#' response_type = "dichotomous" |
|
| 30 |
#' ) |
|
| 31 |
ard_emmeans_emmeans <- function(data, |
|
| 32 |
formula, |
|
| 33 |
method, |
|
| 34 |
method.args = list(), |
|
| 35 |
package = "base", |
|
| 36 |
response_type = c("continuous", "dichotomous"),
|
|
| 37 |
conf.level = 0.95, |
|
| 38 |
primary_covariate = |
|
| 39 |
stats::terms(formula) |> |
|
| 40 |
attr("term.labels") |>
|
|
| 41 |
getElement(1L)) {
|
|
| 42 | 5x |
set_cli_abort_call() |
| 43 | ||
| 44 |
# check package installation ------------------------------------------------- |
|
| 45 | 5x |
check_pkg_installed(c("emmeans", package))
|
| 46 | 5x |
check_not_missing(data) |
| 47 | 5x |
check_not_missing(formula) |
| 48 | 5x |
check_not_missing(method) |
| 49 | 5x |
check_class(data, c("data.frame", "survey.design"))
|
| 50 | 5x |
check_class(formula, cls = "formula") |
| 51 | 5x |
check_string(package) |
| 52 | 5x |
check_string(primary_covariate) |
| 53 | 5x |
check_scalar(conf.level) |
| 54 | 5x |
check_range(conf.level, range = c(0, 1)) |
| 55 | 5x |
response_type <- arg_match(response_type, error_call = get_cli_abort_call()) |
| 56 | ||
| 57 | 5x |
data_in <- if (dplyr::last(class(data)) == "survey.design") data$variables else data |
| 58 | ||
| 59 |
# build ARD ------------------------------------------------------------------ |
|
| 60 | 5x |
result <- cards::ard_mvsummary( |
| 61 | 5x |
data = data_in, |
| 62 | 5x |
variables = all_of(primary_covariate), |
| 63 | 5x |
statistic = all_of(primary_covariate) ~ list( |
| 64 | 5x |
emmeans = |
| 65 | 5x |
.calc_emmeans( |
| 66 | 5x |
data = data, formula = formula, method = method, |
| 67 | 5x |
method.args = {{ method.args }}, package = package,
|
| 68 | 5x |
response_type = response_type, conf.level = conf.level, |
| 69 | 5x |
primary_covariate = primary_covariate |
| 70 |
) |
|
| 71 |
) |
|
| 72 |
) |
|
| 73 |
# unlist stat column |
|
| 74 | 5x |
if (length(result$stat[[which(result$stat_label == "variable_level")]]) > 1) {
|
| 75 | 4x |
result <- result |> tidyr::unnest_longer(col = "stat") |
| 76 |
} |
|
| 77 | ||
| 78 | 5x |
result |> |
| 79 | 5x |
dplyr::select(-"stat_label") |> |
| 80 | 5x |
dplyr::left_join( |
| 81 | 5x |
.df_emmeans_stat_labels("emmeans"),
|
| 82 | 5x |
by = "stat_name" |
| 83 |
) |> |
|
| 84 | 5x |
dplyr::mutate( |
| 85 | 5x |
variable = "contrast", |
| 86 | 5x |
variable_level = if ("variable_level" %in% .data$stat_name) {
|
| 87 | 5x |
rep_len(.data$stat[.data$stat_name == "variable_level"], length.out = nrow(result)) |
| 88 |
} else {
|
|
| 89 | 5x |
NA |
| 90 |
}, |
|
| 91 | 5x |
group1 = .env$primary_covariate, |
| 92 | 5x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 93 | 5x |
context = "emmeans_emmeans" |
| 94 |
) |> |
|
| 95 | 5x |
dplyr::filter(!is.na(.data$stat)) |> |
| 96 | 5x |
dplyr::filter(.data$stat_name != "variable_level") |> |
| 97 | 5x |
dplyr::arrange(.data$variable_level) |> |
| 98 | 5x |
cards::as_card() |> |
| 99 | 5x |
cards::tidy_ard_column_order() |> |
| 100 | 5x |
cards::tidy_ard_row_order() |
| 101 |
} |
|
| 102 | ||
| 103 |
# function to perform calculations --------------------------------------------- |
|
| 104 |
.calc_emmeans <- function(data, formula, method, |
|
| 105 |
method.args, |
|
| 106 |
package, |
|
| 107 |
response_type, |
|
| 108 |
conf.level, |
|
| 109 |
primary_covariate) {
|
|
| 110 | 5x |
cards::as_cards_fn( |
| 111 | 5x |
\(x, ...) {
|
| 112 |
# construct primary model ------------------------------------------------ |
|
| 113 | 5x |
mod <- |
| 114 | 5x |
construct_model( |
| 115 | 5x |
data = data, formula = formula, method = method, |
| 116 | 5x |
method.args = {{ method.args }},
|
| 117 | 5x |
package = package, env = caller_env() |
| 118 |
) |
|
| 119 | ||
| 120 |
# emmeans ---------------------------------------------------------------- |
|
| 121 | 4x |
emmeans_args <- list(object = mod, specs = reformulate2(primary_covariate)) |
| 122 | 3x |
if (response_type %in% "dichotomous") emmeans_args <- c(emmeans_args, list(regrid = "response")) |
| 123 | 4x |
emmeans <- |
| 124 | 4x |
withr::with_namespace( |
| 125 | 4x |
package = "emmeans", |
| 126 | 4x |
code = do.call("emmeans", args = emmeans_args)
|
| 127 |
) |
|
| 128 | ||
| 129 |
# calculate mean estimates --------------------------------------------- |
|
| 130 | 4x |
results <- |
| 131 | 4x |
summary(emmeans, calc = c(n = ".wgt.")) |> |
| 132 | 4x |
dplyr::as_tibble() |> |
| 133 | 4x |
dplyr::rename( |
| 134 | 4x |
estimate = any_of(c("emmean", "prob")),
|
| 135 | 4x |
n = any_of("n")
|
| 136 |
) |> |
|
| 137 | 4x |
dplyr::rename(variable_level = all_of(primary_covariate)) |> |
| 138 | 4x |
dplyr::mutate(variable_level = as.character(.data$variable_level)) |
| 139 | ||
| 140 |
# convert results to ARD format ------------------------------------------ |
|
| 141 | 4x |
results |> |
| 142 | 4x |
dplyr::as_tibble() |> |
| 143 | 4x |
dplyr::rename( |
| 144 | 4x |
conf.low = any_of("asymp.LCL"),
|
| 145 | 4x |
conf.high = any_of("asymp.UCL"),
|
| 146 | 4x |
conf.low = any_of("lower.CL"),
|
| 147 | 4x |
conf.high = any_of("upper.CL"),
|
| 148 | 4x |
std.error = any_of("SE")
|
| 149 |
) |> |
|
| 150 | 4x |
dplyr::select(any_of(c( |
| 151 | 4x |
"variable_level", "estimate", |
| 152 | 4x |
"std.error", "df", "n", |
| 153 | 4x |
"conf.low", "conf.high", "p.value" |
| 154 |
))) |> |
|
| 155 | 4x |
dplyr::mutate( |
| 156 | 4x |
conf.level = .env$conf.level, |
| 157 | 4x |
method = "Least-squares means" |
| 158 |
) |> |
|
| 159 | 4x |
dplyr::mutate(across(everything(), ~ .x |> as.list())) |
| 160 |
}, |
|
| 161 | 5x |
stat_names = c("variable_level", "estimate", "std.error", "df", "conf.low", "conf.high", "p.value", "conf.level", "method", "n")
|
| 162 |
) |
|
| 163 |
} |
| 1 |
#' ARD for Difference in Survival |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for comparison of survival using [survival::survdiff()]. |
|
| 5 |
#' |
|
| 6 |
#' @param formula (`formula`)\cr |
|
| 7 |
#' a formula |
|
| 8 |
#' @param data (`data.frame`)\cr |
|
| 9 |
#' a data frame |
|
| 10 |
#' @param rho (`scalar numeric`)\cr |
|
| 11 |
#' numeric scalar passed to `survival::survdiff(rho)`. Default is `rho=0`. |
|
| 12 |
#' @param ... additional arguments passed to `survival::survdiff()` |
|
| 13 |
#' |
|
| 14 |
#' @return an ARD data frame of class 'card' |
|
| 15 |
#' @export |
|
| 16 |
#' |
|
| 17 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survival", "broom", "ggsurvfit")))
|
|
| 18 |
#' library(survival) |
|
| 19 |
#' library(ggsurvfit) |
|
| 20 |
#' |
|
| 21 |
#' ard_survival_survdiff(Surv_CNSR(AVAL, CNSR) ~ TRTA, data = cards::ADTTE) |
|
| 22 |
ard_survival_survdiff <- function(formula, data, rho = 0, ...) {
|
|
| 23 | 5x |
set_cli_abort_call() |
| 24 | ||
| 25 |
# check installed packages --------------------------------------------------- |
|
| 26 | 5x |
check_pkg_installed(c("survival", "broom"))
|
| 27 | ||
| 28 |
# check/process inputs ------------------------------------------------------- |
|
| 29 | 5x |
check_not_missing(formula) |
| 30 | 5x |
check_class(formula, cls = "formula") |
| 31 | 5x |
if (!missing(data)) check_class(data, cls = "data.frame") |
| 32 | 5x |
check_scalar(rho) |
| 33 | 5x |
check_class(rho, cls = "numeric") |
| 34 | ||
| 35 |
# assign method |
|
| 36 | 5x |
method <- dplyr::case_when( |
| 37 | 5x |
rho == 0 ~ "Log-rank test", |
| 38 | 5x |
rho == 1.5 ~ "Tarone-Ware test", |
| 39 | 5x |
rho == 1 ~ "Peto & Peto modification of Gehan-Wilcoxon test", |
| 40 | 5x |
.default = glue::glue("G-rho test (\U03C1 = {rho})")
|
| 41 |
) |> |
|
| 42 | 5x |
as.character() |
| 43 | ||
| 44 |
# calculate survdiff() results ----------------------------------------------- |
|
| 45 | 5x |
lst_glance <- |
| 46 | 5x |
cards::eval_capture_conditions( |
| 47 | 5x |
survival::survdiff(formula = formula, data = data, rho = rho, ...) |> |
| 48 | 5x |
broom::glance() |> |
| 49 | 5x |
dplyr::mutate(method = .env$method) |
| 50 |
) |
|
| 51 | ||
| 52 |
# tidy results up in an ARD format ------------------------------------------- |
|
| 53 |
# extract variable names from formula |
|
| 54 | 5x |
variables <- stats::terms(formula) |> |
| 55 | 5x |
attr("term.labels") |>
|
| 56 | 5x |
.strip_backticks() |
| 57 | ||
| 58 |
# if there was an error, return results early |
|
| 59 | 5x |
if (is.null(lst_glance[["result"]])) {
|
| 60 |
# if no variables in formula, then return an error |
|
| 61 |
# otherwise, if we do have variable names, then we can construct an empty ARD which will be done below |
|
| 62 | 2x |
if (is_empty(variables)) {
|
| 63 | 1x |
cli::cli_abort( |
| 64 | 1x |
message = |
| 65 | 1x |
c("There was an error in {.fun survival::survdiff}. See below:",
|
| 66 | 1x |
"x" = lst_glance[["error"]] |
| 67 |
), |
|
| 68 | 1x |
call = get_cli_abort_call() |
| 69 |
) |
|
| 70 |
} |
|
| 71 |
} |
|
| 72 | ||
| 73 | 4x |
.variables_to_survdiff_ard( |
| 74 | 4x |
variables = variables, |
| 75 | 4x |
method = method, |
| 76 |
# styler: off |
|
| 77 | 4x |
stat_names = |
| 78 | 4x |
if (!is.null(lst_glance[["result"]])) names(lst_glance[["result"]]) |
| 79 | 4x |
else c("statistic", "df", "p.value", "method"),
|
| 80 | 4x |
stats = |
| 81 | 4x |
if (!is.null(lst_glance[["result"]])) unname(as.list(lst_glance[["result"]])) |
| 82 | 4x |
else rep_along(c("statistic", "df", "p.value"), list(NULL)) |> c(list(method = method))
|
| 83 |
# styler: on |
|
| 84 |
) |> |
|
| 85 | 4x |
.add_survdiff_stat_labels() |> |
| 86 | 4x |
dplyr::mutate( |
| 87 | 4x |
context = "survival_survdiff", |
| 88 | 4x |
warning = lst_glance["warning"], |
| 89 | 4x |
error = lst_glance["error"], |
| 90 | 4x |
fmt_fun = map( |
| 91 | 4x |
.data$stat, |
| 92 | 4x |
function(x) {
|
| 93 | 9x |
if (is.numeric(x)) return(1L) # styler: off |
| 94 | 7x |
NULL |
| 95 |
} |
|
| 96 |
) |
|
| 97 |
) |> |
|
| 98 | 4x |
cards::as_card() |> |
| 99 | 4x |
cards::tidy_ard_column_order() |
| 100 |
} |
|
| 101 | ||
| 102 |
.variables_to_survdiff_ard <- function(variables, |
|
| 103 |
method, |
|
| 104 |
stat_names, |
|
| 105 |
stats) {
|
|
| 106 | 4x |
len <- length(variables) |
| 107 | ||
| 108 | 4x |
df_vars <- dplyr::tibble(!!!rev(variables)) |> |
| 109 | 4x |
set_names( |
| 110 | 4x |
ifelse( |
| 111 | 4x |
len > 1L, |
| 112 | 4x |
c(paste0("group_", rev(seq_len(len - 1L))), "variable"),
|
| 113 | 4x |
"variable" |
| 114 |
) |
|
| 115 |
) |
|
| 116 | ||
| 117 | 4x |
dplyr::bind_cols( |
| 118 | 4x |
df_vars, |
| 119 | 4x |
dplyr::tibble( |
| 120 | 4x |
stat_name = .env$stat_names, |
| 121 | 4x |
stat = .env$stats |
| 122 |
) |
|
| 123 |
) |
|
| 124 |
} |
|
| 125 | ||
| 126 |
.add_survdiff_stat_labels <- function(x) {
|
|
| 127 | 4x |
x |> |
| 128 | 4x |
dplyr::left_join( |
| 129 | 4x |
dplyr::tribble( |
| 130 | 4x |
~stat_name, ~stat_label, |
| 131 | 4x |
"statistic", "X^2 Statistic", |
| 132 | 4x |
"df", "Degrees of Freedom", |
| 133 | 4x |
"p.value", "p-value" |
| 134 |
), |
|
| 135 | 4x |
by = "stat_name" |
| 136 |
) |> |
|
| 137 | 4x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |
| 138 |
} |
|
| 139 | ||
| 140 |
.strip_backticks <- function(x) {
|
|
| 141 | 5x |
ifelse( |
| 142 | 5x |
str_detect(x, "^`.*`$"), |
| 143 | 5x |
substr(x, 2, nchar(x) - 1), |
| 144 | 5x |
x |
| 145 |
) |
|
| 146 |
} |
| 1 |
#' ARD Total N |
|
| 2 |
#' |
|
| 3 |
#' Returns the total N for a survey object. |
|
| 4 |
#' The placeholder variable name returned in the object is `"..ard_total_n.."` |
|
| 5 |
#' |
|
| 6 |
#' @inheritParams ard_tabulate_value.survey.design |
|
| 7 |
#' @inheritParams rlang::args_dots_empty |
|
| 8 |
#' |
|
| 9 |
#' @return an ARD data frame of class 'card' |
|
| 10 |
#' @export |
|
| 11 |
#' |
|
| 12 |
#' @examplesIf cardx:::is_pkg_installed("survey")
|
|
| 13 |
#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) |
|
| 14 |
#' |
|
| 15 |
#' ard_total_n(svy_titanic) |
|
| 16 |
ard_total_n.survey.design <- function(data, ...) {
|
|
| 17 |
# process inputs ------------------------------------------------------------- |
|
| 18 | 3x |
set_cli_abort_call() |
| 19 | 3x |
check_dots_empty() |
| 20 | ||
| 21 |
# calculate total N ---------------------------------------------------------- |
|
| 22 | 3x |
data <- stats::update(data, ..ard_total_n.. = TRUE) |
| 23 | ||
| 24 | 3x |
data |> |
| 25 | 3x |
ard_tabulate_value( |
| 26 | 3x |
variables = "..ard_total_n..", |
| 27 | 3x |
statistic = list(..ard_total_n.. = c("N", "N_unweighted"))
|
| 28 |
) |> |
|
| 29 | 3x |
dplyr::mutate(context = "total_n") |> |
| 30 | 3x |
dplyr::select(-cards::all_ard_variables("levels"))
|
| 31 |
} |
| 1 |
#' ARD 2-sample proportion test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for a 2-sample test or proportions using [`stats::prop.test()`]. |
|
| 5 |
#' |
|
| 6 |
#' @param data (`data.frame`)\cr |
|
| 7 |
#' a data frame. |
|
| 8 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 9 |
#' column name to compare by |
|
| 10 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column names to be compared. Must be a binary column coded as `TRUE`/`FALSE` |
|
| 12 |
#' or `1`/`0`. Independent tests will be computed for each variable. |
|
| 13 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 14 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 15 |
#' @param ... arguments passed to `prop.test(...)` |
|
| 16 |
#' |
|
| 17 |
#' @return ARD data frame |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 21 |
#' mtcars |> |
|
| 22 |
#' ard_stats_prop_test(by = vs, variables = am) |
|
| 23 |
ard_stats_prop_test <- function(data, by, variables, conf.level = 0.95, ...) {
|
|
| 24 | 6x |
set_cli_abort_call() |
| 25 | ||
| 26 |
# check installed packages --------------------------------------------------- |
|
| 27 | 6x |
check_pkg_installed(pkg = "broom") |
| 28 | ||
| 29 |
# check inputs --------------------------------------------------------------- |
|
| 30 | 6x |
check_not_missing(data) |
| 31 | 6x |
check_not_missing(variables) |
| 32 | 6x |
check_not_missing(by) |
| 33 | 6x |
check_data_frame(data) |
| 34 | 6x |
check_scalar_range(conf.level, range = c(0, 1)) |
| 35 | ||
| 36 |
# process inputs ------------------------------------------------------------- |
|
| 37 | 6x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 38 | 6x |
check_scalar(by) |
| 39 | 6x |
data <- data[c(by, variables)] |> dplyr::ungroup() |> tidyr::drop_na() # styler: off |
| 40 | ||
| 41 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 42 | 6x |
if (is_empty(variables)) {
|
| 43 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 44 |
} |
|
| 45 | ||
| 46 |
# build ARD ------------------------------------------------------------------ |
|
| 47 | 6x |
lapply( |
| 48 | 6x |
variables, |
| 49 | 6x |
function(variable) {
|
| 50 | 7x |
.format_proptest_results( |
| 51 | 7x |
by = by, |
| 52 | 7x |
variable = variable, |
| 53 | 7x |
lst_tidy = |
| 54 | 7x |
cards::eval_capture_conditions({
|
| 55 | 7x |
check_binary(data[[variable]], arg_name = "variable") |
| 56 | ||
| 57 | 4x |
data_counts <- |
| 58 | 4x |
dplyr::arrange(data, .data[[by]]) |> |
| 59 | 4x |
dplyr::summarise( |
| 60 | 4x |
.by = all_of(by), |
| 61 | 4x |
x = sum(.data[[variable]]), |
| 62 | 4x |
n = length(.data[[variable]]) |
| 63 |
) |
|
| 64 | ||
| 65 | 4x |
if (nrow(data_counts) != 2) {
|
| 66 | 1x |
cli::cli_abort( |
| 67 | 1x |
c( |
| 68 | 1x |
"The {.arg by} column must have exactly 2 levels.",
|
| 69 | 1x |
"The levels are {.val {data_counts[[by]]}}"
|
| 70 |
), |
|
| 71 | 1x |
call = get_cli_abort_call() |
| 72 |
) |
|
| 73 |
} |
|
| 74 | ||
| 75 | 3x |
stats::prop.test( |
| 76 | 3x |
x = data_counts[["x"]], |
| 77 | 3x |
n = data_counts[["n"]], |
| 78 | 3x |
conf.level = conf.level, |
| 79 |
... |
|
| 80 |
) |> |
|
| 81 | 3x |
broom::tidy() |> |
| 82 |
# add central estimate for difference |
|
| 83 | 3x |
dplyr::mutate(estimate = .data$estimate1 - .data$estimate2, .before = 1L) |
| 84 |
}), |
|
| 85 |
... |
|
| 86 |
) |
|
| 87 |
} |
|
| 88 |
) |> |
|
| 89 | 6x |
dplyr::bind_rows() |
| 90 |
} |
|
| 91 | ||
| 92 | ||
| 93 |
#' Convert prop.test to ARD |
|
| 94 |
#' |
|
| 95 |
#' @inheritParams cards::tidy_as_ard |
|
| 96 |
#' @param by (`string`)\cr by column name |
|
| 97 |
#' @param variable (`string`)\cr variable column name |
|
| 98 |
#' @param ... passed to `prop.test(...)` |
|
| 99 |
#' |
|
| 100 |
#' @return ARD data frame |
|
| 101 |
#' @keywords internal |
|
| 102 |
.format_proptest_results <- function(by, variable, lst_tidy, ...) {
|
|
| 103 |
# build ARD ------------------------------------------------------------------ |
|
| 104 | 7x |
ret <- |
| 105 | 7x |
cards::tidy_as_ard( |
| 106 | 7x |
lst_tidy = lst_tidy, |
| 107 | 7x |
tidy_result_names = c( |
| 108 | 7x |
"estimate", "estimate1", "estimate2", "statistic", |
| 109 | 7x |
"p.value", "parameter", "conf.low", "conf.high", |
| 110 | 7x |
"method", "alternative" |
| 111 |
), |
|
| 112 | 7x |
fun_args_to_record = c("p", "conf.level", "correct"),
|
| 113 | 7x |
formals = formals(stats::prop.test), |
| 114 | 7x |
passed_args = dots_list(...), |
| 115 | 7x |
lst_ard_columns = list(group1 = by, variable = variable, context = "stats_prop_test") |
| 116 |
) |
|
| 117 | ||
| 118 |
# add the stat label --------------------------------------------------------- |
|
| 119 | 7x |
ret |> |
| 120 | 7x |
dplyr::left_join( |
| 121 | 7x |
.df_proptest_stat_labels(), |
| 122 | 7x |
by = "stat_name" |
| 123 |
) |> |
|
| 124 | 7x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 125 | 7x |
cards::as_card() |> |
| 126 | 7x |
cards::tidy_ard_column_order() |
| 127 |
} |
|
| 128 | ||
| 129 |
.df_proptest_stat_labels <- function() {
|
|
| 130 | 7x |
dplyr::tribble( |
| 131 | 7x |
~stat_name, ~stat_label, |
| 132 | 7x |
"estimate1", "Group 1 Rate", |
| 133 | 7x |
"estimate2", "Group 2 Rate", |
| 134 | 7x |
"estimate", "Rate Difference", |
| 135 | 7x |
"p.value", "p-value", |
| 136 | 7x |
"statistic", "X-squared Statistic", |
| 137 | 7x |
"parameter", "Degrees of Freedom", |
| 138 | 7x |
"conf.low", "CI Lower Bound", |
| 139 | 7x |
"conf.high", "CI Upper Bound", |
| 140 | 7x |
"conf.level", "CI Confidence Level", |
| 141 | 7x |
"correct", "Yates' continuity correction", |
| 142 |
) |
|
| 143 |
} |
| 1 |
#' ARD one-sample t-test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for one-sample t-tests. |
|
| 5 |
#' Result may be stratified by including the `by` argument. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. See below for details. |
|
| 9 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column names to be analyzed. Independent t-tests will be computed for |
|
| 11 |
#' each variable. |
|
| 12 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' optional column name to stratify results by. |
|
| 14 |
#' @inheritParams ard_stats_t_test |
|
| 15 |
#' |
|
| 16 |
#' @return ARD data frame |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 20 |
#' cards::ADSL |> |
|
| 21 |
#' ard_stats_t_test_onesample(by = ARM, variables = AGE) |
|
| 22 |
ard_stats_t_test_onesample <- function(data, variables, by = dplyr::group_vars(data), conf.level = 0.95, ...) {
|
|
| 23 | 5x |
set_cli_abort_call() |
| 24 | ||
| 25 |
# check installed packages --------------------------------------------------- |
|
| 26 | 5x |
check_pkg_installed("broom")
|
| 27 | ||
| 28 |
# check/process inputs ------------------------------------------------------- |
|
| 29 | 5x |
check_not_missing(data) |
| 30 | 5x |
check_not_missing(variables) |
| 31 | 5x |
check_data_frame(data) |
| 32 | 5x |
data <- dplyr::ungroup(data) |
| 33 | 5x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 34 | 5x |
check_scalar_range(conf.level, range = c(0, 1)) |
| 35 | ||
| 36 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 37 | 5x |
if (is_empty(variables)) {
|
| 38 | 1x |
return(dplyr::tibble() |> cards::as_card()) |
| 39 |
} |
|
| 40 | ||
| 41 | 4x |
cards::ard_summary( |
| 42 | 4x |
data = data, |
| 43 | 4x |
variables = all_of(variables), |
| 44 | 4x |
by = all_of(by), |
| 45 | 4x |
statistic = all_of(variables) ~ list(t_test_onesample = \(x) stats::t.test(x = x, conf.level = conf.level, ...) |> broom::tidy()) |
| 46 |
) |> |
|
| 47 | 4x |
cards::bind_ard( |
| 48 | 4x |
cards::ard_summary( |
| 49 | 4x |
data = data, |
| 50 | 4x |
variables = all_of(variables), |
| 51 | 4x |
by = all_of(by), |
| 52 | 4x |
statistic = |
| 53 | 4x |
all_of(variables) ~ |
| 54 | 4x |
list(conf.level = \(x) {
|
| 55 | 8x |
formals(asNamespace("stats")[["t.test.default"]])["mu"] |>
|
| 56 | 8x |
utils::modifyList(list(conf.level = conf.level, ...)) |
| 57 |
}) |
|
| 58 |
) |
|
| 59 |
) |> |
|
| 60 | 4x |
dplyr::select(-"stat_label") |> |
| 61 | 4x |
dplyr::left_join( |
| 62 | 4x |
.df_ttest_stat_labels(by = NULL), |
| 63 | 4x |
by = "stat_name" |
| 64 |
) |> |
|
| 65 | 4x |
dplyr::mutate( |
| 66 | 4x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 67 | 4x |
context = "stats_t_test_onesample", |
| 68 |
) |> |
|
| 69 | 4x |
cards::as_card() |> |
| 70 | 4x |
cards::tidy_ard_column_order() |> |
| 71 | 4x |
cards::tidy_ard_row_order() |
| 72 |
} |
| 1 |
#' ARD Chi-squared Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Pearson's Chi-squared Test. |
|
| 5 |
#' Calculated with `chisq.test(x = data[[variable]], y = data[[by]], ...)` |
|
| 6 |
#' |
|
| 7 |
#' |
|
| 8 |
#' @param data (`data.frame`)\cr |
|
| 9 |
#' a data frame. |
|
| 10 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column name to compare by. |
|
| 12 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' column names to be compared. Independent tests will be computed for |
|
| 14 |
#' each variable. |
|
| 15 |
#' @param ... additional arguments passed to `chisq.test(...)` |
|
| 16 |
#' |
|
| 17 |
#' @return ARD data frame |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 21 |
#' cards::ADSL |> |
|
| 22 |
#' ard_stats_chisq_test(by = "ARM", variables = "AGEGR1") |
|
| 23 |
ard_stats_chisq_test <- function(data, by, variables, ...) {
|
|
| 24 | 4x |
set_cli_abort_call() |
| 25 | ||
| 26 |
# check installed packages --------------------------------------------------- |
|
| 27 | 4x |
check_pkg_installed("broom")
|
| 28 | ||
| 29 |
# check/process inputs ------------------------------------------------------- |
|
| 30 | 4x |
check_not_missing(data) |
| 31 | 4x |
check_not_missing(variables) |
| 32 | 4x |
check_not_missing(by) |
| 33 | 4x |
check_data_frame(data) |
| 34 | 4x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 35 | 4x |
check_scalar(by) |
| 36 | ||
| 37 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 38 | 4x |
if (is_empty(variables)) {
|
| 39 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 40 |
} |
|
| 41 | ||
| 42 |
# build ARD ------------------------------------------------------------------ |
|
| 43 | 4x |
lapply( |
| 44 | 4x |
variables, |
| 45 | 4x |
function(variable) {
|
| 46 | 5x |
cards::tidy_as_ard( |
| 47 | 5x |
lst_tidy = |
| 48 | 5x |
cards::eval_capture_conditions( |
| 49 | 5x |
stats::chisq.test(x = data[[variable]], y = data[[by]], ...) |> |
| 50 | 5x |
broom::tidy() |
| 51 |
), |
|
| 52 | 5x |
tidy_result_names = c("statistic", "p.value", "parameter", "method"),
|
| 53 | 5x |
fun_args_to_record = |
| 54 | 5x |
c("correct", "p", "rescale.p", "simulate.p.value", "B"),
|
| 55 | 5x |
formals = formals(stats::chisq.test), |
| 56 | 5x |
passed_args = dots_list(...), |
| 57 | 5x |
lst_ard_columns = list(group1 = by, variable = variable, context = "stats_chisq_test") |
| 58 |
) |> |
|
| 59 | 5x |
dplyr::mutate( |
| 60 | 5x |
.after = "stat_name", |
| 61 | 5x |
stat_label = |
| 62 | 5x |
dplyr::case_when( |
| 63 | 5x |
.data$stat_name %in% "statistic" ~ "X-squared Statistic", |
| 64 | 5x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 65 | 5x |
.data$stat_name %in% "parameter" ~ "Degrees of Freedom", |
| 66 | 5x |
TRUE ~ .data$stat_name, |
| 67 |
) |
|
| 68 |
) |
|
| 69 |
} |
|
| 70 |
) |> |
|
| 71 | 4x |
dplyr::bind_rows() |> |
| 72 | 4x |
cards::as_card() |
| 73 |
} |
| 1 |
#' ARD Fisher's Exact Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Fisher's Exact Test. |
|
| 5 |
#' Calculated with `fisher.test(x = data[[variable]], y = data[[by]], ...)` |
|
| 6 |
#' |
|
| 7 |
#' |
|
| 8 |
#' @param data (`data.frame`)\cr |
|
| 9 |
#' a data frame. |
|
| 10 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column name to compare by |
|
| 12 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' column names to be compared. Independent tests will be computed for |
|
| 14 |
#' each variable. |
|
| 15 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 16 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 17 |
#' @param ... additional arguments passed to `fisher.test(...)` |
|
| 18 |
#' |
|
| 19 |
#' @return ARD data frame |
|
| 20 |
#' @export |
|
| 21 |
#' |
|
| 22 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 23 |
#' cards::ADSL[1:30, ] |> |
|
| 24 |
#' ard_stats_fisher_test(by = "ARM", variables = "AGEGR1") |
|
| 25 |
ard_stats_fisher_test <- function(data, by, variables, conf.level = 0.95, ...) {
|
|
| 26 | 4x |
set_cli_abort_call() |
| 27 | ||
| 28 |
# check installed packages --------------------------------------------------- |
|
| 29 | 4x |
check_pkg_installed("broom")
|
| 30 | ||
| 31 |
# check/process inputs ------------------------------------------------------- |
|
| 32 | 4x |
check_not_missing(data) |
| 33 | 4x |
check_not_missing(variables) |
| 34 | 4x |
check_not_missing(by) |
| 35 | 4x |
check_data_frame(data) |
| 36 | 4x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 37 | 4x |
check_scalar(by) |
| 38 | 4x |
check_range(conf.level, range = c(0, 1)) |
| 39 | ||
| 40 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 41 | 4x |
if (is_empty(variables)) {
|
| 42 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 43 |
} |
|
| 44 | ||
| 45 |
# build ARD ------------------------------------------------------------------ |
|
| 46 | 4x |
lapply( |
| 47 | 4x |
variables, |
| 48 | 4x |
function(variable) {
|
| 49 | 5x |
cards::tidy_as_ard( |
| 50 | 5x |
lst_tidy = |
| 51 | 5x |
cards::eval_capture_conditions( |
| 52 | 5x |
stats::fisher.test(x = data[[variable]], y = data[[by]], conf.level = conf.level, ...) |> |
| 53 | 5x |
broom::tidy() |
| 54 |
), |
|
| 55 | 5x |
tidy_result_names = |
| 56 | 5x |
c("estimate", "p.value", "conf.low", "conf.high", "method", "alternative"),
|
| 57 | 5x |
fun_args_to_record = |
| 58 | 5x |
c( |
| 59 | 5x |
"workspace", "hybrid", "hybridPars", "control", "or", |
| 60 | 5x |
"conf.int", "conf.level", "simulate.p.value", "B" |
| 61 |
), |
|
| 62 | 5x |
formals = formals(stats::fisher.test), |
| 63 | 5x |
passed_args = dots_list(...), |
| 64 | 5x |
lst_ard_columns = list(group1 = by, variable = variable, context = "stats_fisher_test") |
| 65 |
) |> |
|
| 66 | 5x |
dplyr::mutate( |
| 67 | 5x |
.after = "stat_name", |
| 68 | 5x |
stat_label = |
| 69 | 5x |
dplyr::case_when( |
| 70 | 5x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 71 | 5x |
TRUE ~ .data$stat_name, |
| 72 |
) |
|
| 73 |
) |
|
| 74 |
} |
|
| 75 |
) |> |
|
| 76 | 4x |
dplyr::bind_rows() |> |
| 77 | 4x |
cards::as_card() |
| 78 |
} |
| 1 |
#' ARD ANOVA from car Package |
|
| 2 |
#' |
|
| 3 |
#' Function takes a regression model object and calculated ANOVA using [`car::Anova()`]. |
|
| 4 |
#' |
|
| 5 |
#' @param x regression model object |
|
| 6 |
#' @param ... arguments passed to `car::Anova(...)` |
|
| 7 |
#' |
|
| 8 |
#' @return data frame |
|
| 9 |
#' @export |
|
| 10 |
#' |
|
| 11 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("broom.helpers", "car", "parameters")))
|
|
| 12 |
#' lm(AGE ~ ARM, data = cards::ADSL) |> |
|
| 13 |
#' ard_car_anova() |
|
| 14 |
#' |
|
| 15 |
#' glm(vs ~ factor(cyl) + factor(am), data = mtcars, family = binomial) |> |
|
| 16 |
#' ard_car_anova(test.statistic = "Wald") |
|
| 17 |
ard_car_anova <- function(x, ...) {
|
|
| 18 | 3x |
set_cli_abort_call() |
| 19 | ||
| 20 |
# check installed packages --------------------------------------------------- |
|
| 21 | 3x |
check_pkg_installed(pkg = c("broom.helpers", "car", "parameters"))
|
| 22 | ||
| 23 |
# check inputs --------------------------------------------------------------- |
|
| 24 | 3x |
check_not_missing(x) |
| 25 | ||
| 26 |
# run car::Anova() ----------------------------------------------------------- |
|
| 27 | 3x |
car_anova <- cards::eval_capture_conditions(car::Anova(x, ...)) |
| 28 | ||
| 29 | 3x |
if (!is.null(car_anova[["error"]])) {
|
| 30 | 1x |
cli::cli_abort( |
| 31 | 1x |
c( |
| 32 | 1x |
"There was an error running {.fun car::Anova}. See error message below.",
|
| 33 | 1x |
x = car_anova[["error"]] |
| 34 |
), |
|
| 35 | 1x |
call = get_cli_abort_call() |
| 36 |
) |
|
| 37 |
} |
|
| 38 | ||
| 39 | 2x |
car_anova[["result"]] |> |
| 40 | 2x |
broom.helpers::tidy_parameters(conf.int = FALSE) |> # using broom.helpers, because it handle non-syntactic names for us |
| 41 | 2x |
dplyr::filter(!(dplyr::row_number() == dplyr::n() & .data$term %in% "Residuals")) |> # removing Residual rows |
| 42 | 2x |
dplyr::rename(variable = "term") |> |
| 43 | 2x |
tidyr::pivot_longer( |
| 44 | 2x |
cols = -"variable", |
| 45 | 2x |
names_to = "stat_name", |
| 46 | 2x |
values_to = "stat" |
| 47 |
) |> |
|
| 48 | 2x |
dplyr::mutate( |
| 49 | 2x |
stat = as.list(.data$stat), |
| 50 | 2x |
stat_label = |
| 51 | 2x |
dplyr::case_when( |
| 52 | 2x |
.data$stat_name %in% "statistic" ~ "Statistic", |
| 53 | 2x |
.data$stat_name %in% "df" ~ "Degrees of Freedom", |
| 54 | 2x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 55 | 2x |
TRUE ~ .data$stat_name |
| 56 |
), |
|
| 57 | 2x |
fmt_fun = |
| 58 | 2x |
map( |
| 59 | 2x |
.data$stat, |
| 60 | 2x |
function(.x) {
|
| 61 |
# styler: off |
|
| 62 | ! |
if (is.integer(.x)) return(0L) |
| 63 | 12x |
if (is.numeric(.x)) return(1L) |
| 64 |
# styler: on |
|
| 65 | ! |
NULL |
| 66 |
} |
|
| 67 |
), |
|
| 68 | 2x |
context = "car_anova", |
| 69 | 2x |
warning = car_anova["warning"], |
| 70 | 2x |
error = car_anova["error"] |
| 71 |
) |> |
|
| 72 | 2x |
cards::as_card() |> |
| 73 | 2x |
cards::tidy_ard_column_order() |
| 74 |
} |
| 1 |
#' ARD ANOVA |
|
| 2 |
#' |
|
| 3 |
#' Prepare ANOVA results from the `stats::anova()` function. |
|
| 4 |
#' Users may pass a pre-calculated `stats::anova()` object or a list of |
|
| 5 |
#' formulas. In the latter case, the models will be constructed using the |
|
| 6 |
#' information passed and models will be passed to `stats::anova()`. |
|
| 7 |
#' |
|
| 8 |
#' @param x (`anova` or `data.frame`)\cr |
|
| 9 |
#' an object of class `'anova'` created with `stats::anova()` or |
|
| 10 |
#' a data frame |
|
| 11 |
#' @param formulas (`list`)\cr |
|
| 12 |
#' a list of formulas |
|
| 13 |
#' @param method_text (`string`)\cr |
|
| 14 |
#' string of the method used. Default is `"ANOVA results from `stats::anova()`"`. |
|
| 15 |
#' We provide the option to change this as `stats::anova()` can produce |
|
| 16 |
#' results from many types of models that may warrant a more precise |
|
| 17 |
#' description. |
|
| 18 |
#' @inheritParams rlang::args_dots_empty |
|
| 19 |
#' @inheritParams construction_helpers |
|
| 20 |
#' |
|
| 21 |
#' @details |
|
| 22 |
#' When a list of formulas is supplied to `ard_stats_anova()`, these formulas |
|
| 23 |
#' along with information from other arguments, are used to construct models |
|
| 24 |
#' and pass those models to `stats::anova()`. |
|
| 25 |
#' |
|
| 26 |
#' The models are constructed using `rlang::exec()`, which is similar to `do.call()`. |
|
| 27 |
#' |
|
| 28 |
#' ```r |
|
| 29 |
#' rlang::exec(.fn = method, formula = formula, data = data, !!!method.args) |
|
| 30 |
#' ``` |
|
| 31 |
#' |
|
| 32 |
#' The above function is executed in `withr::with_namespace(package)`, which |
|
| 33 |
#' allows for the use of `ard_stats_anova(method)` from packages, |
|
| 34 |
#' e.g. `package = 'lme4'` must be specified when `method = 'glmer'`. |
|
| 35 |
#' See example below. |
|
| 36 |
#' |
|
| 37 |
#' @return ARD data frame |
|
| 38 |
#' @name ard_stats_anova |
|
| 39 |
#' |
|
| 40 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("broom", "withr", "lme4")))
|
|
| 41 |
#' anova( |
|
| 42 |
#' lm(mpg ~ am, mtcars), |
|
| 43 |
#' lm(mpg ~ am + hp, mtcars) |
|
| 44 |
#' ) |> |
|
| 45 |
#' ard_stats_anova() |
|
| 46 |
#' |
|
| 47 |
#' ard_stats_anova( |
|
| 48 |
#' x = mtcars, |
|
| 49 |
#' formulas = list(am ~ mpg, am ~ mpg + hp), |
|
| 50 |
#' method = "glm", |
|
| 51 |
#' method.args = list(family = binomial) |
|
| 52 |
#' ) |
|
| 53 |
#' |
|
| 54 |
#' ard_stats_anova( |
|
| 55 |
#' x = mtcars, |
|
| 56 |
#' formulas = list(am ~ 1 + (1 | vs), am ~ mpg + (1 | vs)), |
|
| 57 |
#' method = "glmer", |
|
| 58 |
#' method.args = list(family = binomial), |
|
| 59 |
#' package = "lme4" |
|
| 60 |
#' ) |
|
| 61 |
NULL |
|
| 62 | ||
| 63 |
#' @rdname ard_stats_anova |
|
| 64 |
#' @export |
|
| 65 |
ard_stats_anova <- function(x, ...) {
|
|
| 66 | 9x |
UseMethod("ard_stats_anova")
|
| 67 |
} |
|
| 68 | ||
| 69 |
#' @rdname ard_stats_anova |
|
| 70 |
#' @export |
|
| 71 |
ard_stats_anova.anova <- function(x, method_text = "ANOVA results from `stats::anova()`", ...) {
|
|
| 72 | 3x |
set_cli_abort_call() |
| 73 | ||
| 74 |
# check inputs --------------------------------------------------------------- |
|
| 75 | 3x |
check_dots_empty() |
| 76 | 3x |
check_pkg_installed("broom")
|
| 77 | 3x |
check_string(method_text) |
| 78 | ||
| 79 |
# return df in cards formats ------------------------------------------------- |
|
| 80 | 3x |
lst_results <- |
| 81 | 3x |
cards::eval_capture_conditions( |
| 82 | 3x |
.anova_tidy_and_reshape(x, method_text = method_text) |
| 83 |
) |
|
| 84 | ||
| 85 |
# final tidying up of cards data frame --------------------------------------- |
|
| 86 | 3x |
.anova_final_ard_prep(lst_results, method_text = method_text) |
| 87 |
} |
|
| 88 | ||
| 89 | ||
| 90 |
#' @rdname ard_stats_anova |
|
| 91 |
#' @export |
|
| 92 |
ard_stats_anova.data.frame <- function(x, |
|
| 93 |
formulas, |
|
| 94 |
method, |
|
| 95 |
method.args = list(), |
|
| 96 |
package = "base", |
|
| 97 |
method_text = "ANOVA results from `stats::anova()`", |
|
| 98 |
...) {
|
|
| 99 | 6x |
set_cli_abort_call() |
| 100 | ||
| 101 |
# check inputs --------------------------------------------------------------- |
|
| 102 | 6x |
check_dots_empty() |
| 103 | 6x |
check_pkg_installed(c("broom", "withr", package))
|
| 104 | 6x |
check_not_missing(formulas) |
| 105 | 6x |
check_class(formulas, cls = "list") |
| 106 | 6x |
walk( |
| 107 | 6x |
formulas, |
| 108 | 6x |
~ check_class( |
| 109 | 6x |
.x, |
| 110 | 6x |
cls = "formula", |
| 111 | 6x |
arg_name = "formulas", |
| 112 | 6x |
message = "Each element of {.arg formulas} must be class {.cls formula}"
|
| 113 |
) |
|
| 114 |
) |
|
| 115 | ||
| 116 |
# calculate results and return df in cards formats --------------------------- |
|
| 117 |
# create models |
|
| 118 | 6x |
lst_results <- |
| 119 | 6x |
cards::eval_capture_conditions({
|
| 120 |
# first build the models |
|
| 121 | 6x |
models <- |
| 122 | 6x |
lapply( |
| 123 | 6x |
formulas, |
| 124 | 6x |
function(formula) {
|
| 125 | 11x |
construct_model(data = x, formula = formula, method = method, method.args = {{ method.args }}, package = package)
|
| 126 |
} |
|
| 127 |
) |
|
| 128 | ||
| 129 |
# now calculate `stats::anova()` and reshape results |
|
| 130 | 5x |
rlang::inject(stats::anova(!!!models)) |> |
| 131 | 5x |
.anova_tidy_and_reshape(method_text = method_text) |
| 132 |
}) |
|
| 133 | ||
| 134 |
# final tidying up of cards data frame --------------------------------------- |
|
| 135 | 6x |
.anova_final_ard_prep(lst_results, method_text = method_text) |
| 136 |
} |
|
| 137 | ||
| 138 |
.anova_tidy_and_reshape <- function(x, method_text) {
|
|
| 139 | 8x |
broom::tidy(x) |> |
| 140 | 8x |
dplyr::mutate( |
| 141 | 8x |
across(everything(), as.list), |
| 142 | 8x |
variable = paste0("model_", dplyr::row_number())
|
| 143 |
) |> |
|
| 144 | 8x |
tidyr::pivot_longer( |
| 145 | 8x |
cols = -"variable", |
| 146 | 8x |
names_to = "stat_name", |
| 147 | 8x |
values_to = "stat" |
| 148 |
) |> |
|
| 149 | 8x |
dplyr::filter(!is.na(.data$stat)) %>% |
| 150 |
# add one more row with the method |
|
| 151 |
{
|
|
| 152 | 8x |
dplyr::bind_rows( |
| 153 |
., |
|
| 154 | 8x |
dplyr::filter(., dplyr::n() == dplyr::row_number()) |> |
| 155 | 8x |
dplyr::mutate( |
| 156 | 8x |
stat_name = "method", |
| 157 | 8x |
stat = list(.env$method_text) |
| 158 |
) |
|
| 159 |
) |
|
| 160 |
} |
|
| 161 |
} |
|
| 162 | ||
| 163 |
.anova_final_ard_prep <- function(lst_results, method_text) {
|
|
| 164 |
# saving the results in data frame ------------------------------------------- |
|
| 165 | 9x |
df_card <- |
| 166 | 9x |
if (!is.null(lst_results[["result"]])) {
|
| 167 | 8x |
lst_results[["result"]] |
| 168 | 9x |
} else { # if there was an error return a shell of an ARD data frame
|
| 169 | 1x |
dplyr::tibble( |
| 170 | 1x |
variable = "model_1", |
| 171 | 1x |
stat_name = c("p.value", "method"),
|
| 172 | 1x |
stat = list(NULL, method_text) |
| 173 |
) |
|
| 174 |
} |
|
| 175 | ||
| 176 |
# final tidying up of cards data frame --------------------------------------- |
|
| 177 | 9x |
df_card |> |
| 178 | 9x |
dplyr::mutate( |
| 179 | 9x |
warning = lst_results["warning"], |
| 180 | 9x |
error = lst_results["error"], |
| 181 | 9x |
context = "stats_anova", |
| 182 | 9x |
fmt_fun = lapply( |
| 183 | 9x |
.data$stat, |
| 184 | 9x |
function(x) {
|
| 185 | 88x |
switch(is.integer(x), |
| 186 | 88x |
0L |
| 187 | 88x |
) %||% switch(is.numeric(x), |
| 188 | 88x |
1L |
| 189 |
) |
|
| 190 |
} |
|
| 191 |
), |
|
| 192 | 9x |
stat_label = |
| 193 | 9x |
dplyr::case_when( |
| 194 | 9x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 195 | 9x |
.data$stat_name %in% "sumsq" ~ "Sum of Squares", |
| 196 | 9x |
.data$stat_name %in% "rss" ~ "Residual Sum of Squares", |
| 197 | 9x |
.data$stat_name %in% "df" ~ "Degrees of Freedom", |
| 198 | 9x |
.data$stat_name %in% "df.residual" ~ "df for residuals", |
| 199 | 9x |
.default = .data$stat_name |
| 200 |
) |
|
| 201 |
) |> |
|
| 202 | 9x |
cards::as_card() |> |
| 203 | 9x |
cards::tidy_ard_column_order() |
| 204 |
} |
| 1 |
#' ARD Survey Chi-Square Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for survey Chi-Square test using [`survey::svychisq()`]. |
|
| 5 |
#' Only two-way comparisons are supported. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`survey.design`)\cr |
|
| 8 |
#' a survey design object often created with the \{survey\} package
|
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column name to compare by. |
|
| 11 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column names to be compared. Independent tests will be computed for |
|
| 13 |
#' each variable. |
|
| 14 |
#' @param statistic (`character`)\cr |
|
| 15 |
#' statistic used to estimate Chisq p-value. |
|
| 16 |
#' Default is the Rao-Scott second-order correction ("F"). See [`survey::svychisq`]
|
|
| 17 |
#' for available statistics options. |
|
| 18 |
#' @param ... arguments passed to [`survey::svychisq()`]. |
|
| 19 |
#' |
|
| 20 |
#' @return ARD data frame |
|
| 21 |
#' @export |
|
| 22 |
#' |
|
| 23 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survey", "broom")))
|
|
| 24 |
#' data(api, package = "survey") |
|
| 25 |
#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |
|
| 26 |
#' |
|
| 27 |
#' ard_survey_svychisq(dclus1, variables = sch.wide, by = comp.imp, statistic = "F") |
|
| 28 |
ard_survey_svychisq <- function(data, by, variables, statistic = "F", ...) {
|
|
| 29 | 6x |
set_cli_abort_call() |
| 30 | ||
| 31 |
# check installed packages --------------------------------------------------- |
|
| 32 | 6x |
check_pkg_installed(c("survey", "broom"))
|
| 33 | ||
| 34 |
# check/process inputs ------------------------------------------------------- |
|
| 35 | 6x |
check_not_missing(data) |
| 36 | 6x |
check_not_missing(variables) |
| 37 | 6x |
check_not_missing(by) |
| 38 | 6x |
check_class(data, cls = "survey.design") |
| 39 | 6x |
cards::process_selectors(data[["variables"]], by = {{ by }}, variables = {{ variables }})
|
| 40 | 6x |
check_scalar(by) |
| 41 | ||
| 42 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 43 | 6x |
if (is_empty(variables)) {
|
| 44 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 45 |
} |
|
| 46 | ||
| 47 |
# build ARD ------------------------------------------------------------------ |
|
| 48 | 6x |
lapply( |
| 49 | 6x |
variables, |
| 50 | 6x |
function(variable) {
|
| 51 | 8x |
cards::tidy_as_ard( |
| 52 | 8x |
lst_tidy = |
| 53 | 8x |
cards::eval_capture_conditions( |
| 54 | 8x |
survey::svychisq(reformulate2(termlabels = c(variable, by)), design = data, statistic = statistic, ...) |> |
| 55 | 8x |
broom::tidy() |
| 56 |
), |
|
| 57 | 8x |
tidy_result_names = c("statistic", "p.value", "ndf", "ddf", "method"),
|
| 58 | 8x |
passed_args = dots_list(...), |
| 59 | 8x |
lst_ard_columns = list(group1 = by, variable = variable, context = "survey_svychisq") |
| 60 |
) |> |
|
| 61 | 8x |
dplyr::mutate( |
| 62 | 8x |
.after = "stat_name", |
| 63 | 8x |
stat_label = |
| 64 | 8x |
dplyr::case_when( |
| 65 | 8x |
.data$stat_name %in% "statistic" ~ "Statistic", |
| 66 | 8x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 67 | 8x |
.data$stat_name %in% "ndf" ~ "Nominator Degrees of Freedom", |
| 68 | 8x |
.data$stat_name %in% "ddf" ~ "Denominator Degrees of Freedom", |
| 69 | 8x |
TRUE ~ .data$stat_name, |
| 70 |
) |
|
| 71 |
) |
|
| 72 |
} |
|
| 73 |
) |> |
|
| 74 | 6x |
dplyr::bind_rows() |> |
| 75 | 6x |
cards::as_card() |
| 76 |
} |
| 1 |
#' ARD Missing Survey Statistics |
|
| 2 |
#' |
|
| 3 |
#' Compute Analysis Results Data (ARD) for statistics related to data missingness for survey objects |
|
| 4 |
#' |
|
| 5 |
#' @inheritParams ard_tabulate.survey.design |
|
| 6 |
#' |
|
| 7 |
#' @return an ARD data frame of class 'card' |
|
| 8 |
#' @export |
|
| 9 |
#' |
|
| 10 |
#' @examplesIf cardx:::is_pkg_installed("survey")
|
|
| 11 |
#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) |
|
| 12 |
#' |
|
| 13 |
#' ard_missing(svy_titanic, variables = c(Class, Age), by = Survived) |
|
| 14 |
ard_missing.survey.design <- function(data, |
|
| 15 |
variables, |
|
| 16 |
by = NULL, |
|
| 17 |
statistic = |
|
| 18 |
everything() ~ c( |
|
| 19 |
"N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", |
|
| 20 |
"N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", |
|
| 21 |
"p_miss_unweighted", "p_nonmiss_unweighted" |
|
| 22 |
), |
|
| 23 |
fmt_fun = NULL, |
|
| 24 |
stat_label = |
|
| 25 |
everything() ~ list( |
|
| 26 |
N_obs = "Total N", |
|
| 27 |
N_miss = "N Missing", |
|
| 28 |
N_nonmiss = "N not Missing", |
|
| 29 |
p_miss = "% Missing", |
|
| 30 |
p_nonmiss = "% not Missing", |
|
| 31 |
N_obs_unweighted = "Total N (unweighted)", |
|
| 32 |
N_miss_unweighted = "N Missing (unweighted)", |
|
| 33 |
N_nonmiss_unweighted = "N not Missing (unweighted)", |
|
| 34 |
p_miss_unweighted = "% Missing (unweighted)", |
|
| 35 |
p_nonmiss_unweighted = "% not Missing (unweighted)" |
|
| 36 |
), |
|
| 37 |
fmt_fn = deprecated(), |
|
| 38 |
...) {
|
|
| 39 | 6x |
set_cli_abort_call() |
| 40 | 6x |
check_dots_empty() |
| 41 | 6x |
check_pkg_installed(pkg = "survey") |
| 42 | ||
| 43 |
# deprecated args ------------------------------------------------------------ |
|
| 44 | 6x |
if (lifecycle::is_present(fmt_fn)) {
|
| 45 | ! |
lifecycle::deprecate_soft( |
| 46 | ! |
when = "0.2.5", |
| 47 | ! |
what = "ard_missing(fmt_fn)", |
| 48 | ! |
with = "ard_missing(fmt_fun)" |
| 49 |
) |
|
| 50 | ! |
fmt_fun <- fmt_fn |
| 51 |
} |
|
| 52 | ||
| 53 |
# process inputs ------------------------------------------------------------- |
|
| 54 | 6x |
check_not_missing(variables) |
| 55 | 6x |
cards::process_selectors( |
| 56 | 6x |
data = data$variables, |
| 57 | 6x |
variables = {{ variables }},
|
| 58 | 6x |
by = {{ by }}
|
| 59 |
) |
|
| 60 | ||
| 61 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 62 | 6x |
if (is_empty(variables)) {
|
| 63 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 64 |
} |
|
| 65 | ||
| 66 |
# convert all variables to T/F whether it's missing -------------------------- |
|
| 67 | 6x |
data$variables <- data$variables |> |
| 68 | 6x |
dplyr::mutate(across(all_of(variables), Negate(is.na), .names = "lgl_{.col}"))
|
| 69 | ||
| 70 | 6x |
cards::process_formula_selectors( |
| 71 | 6x |
data$variables[variables], |
| 72 | 6x |
statistic = statistic, |
| 73 | 6x |
fmt_fun = fmt_fun, |
| 74 | 6x |
stat_label = stat_label |
| 75 |
) |
|
| 76 | 6x |
cards::fill_formula_selectors( |
| 77 | 6x |
data$variables[variables], |
| 78 | 6x |
statistic = formals(asNamespace("cardx")[["ard_missing.survey.design"]])[["statistic"]] |> eval()
|
| 79 |
) |
|
| 80 | 6x |
cards::fill_formula_selectors( |
| 81 | 6x |
data$variables[variables], |
| 82 | 6x |
stat_label = formals(asNamespace("cardx")[["ard_missing.survey.design"]])[["stat_label"]] |> eval()
|
| 83 |
) |
|
| 84 | ||
| 85 | 6x |
stats_available <- c( |
| 86 | 6x |
"N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", |
| 87 | 6x |
"N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", |
| 88 | 6x |
"p_miss_unweighted", "p_nonmiss_unweighted" |
| 89 |
) |
|
| 90 | 6x |
cards::check_list_elements( |
| 91 | 6x |
x = statistic, |
| 92 | 6x |
predicate = \(x) is.character(x) && all(x %in% stats_available), |
| 93 | 6x |
error_msg = "Elements passed in the {.arg statistic} argument must be one or more of {.val {stats_available}}"
|
| 94 |
) |
|
| 95 | ||
| 96 |
# calculate results ---------------------------------------------------------- |
|
| 97 | 6x |
result <- |
| 98 | 6x |
ard_tabulate( |
| 99 | 6x |
data = data, |
| 100 | 6x |
variables = all_of(paste0("lgl_", variables)),
|
| 101 | 6x |
by = any_of(by), |
| 102 | 6x |
statistic = everything() ~ c("n", "N", "p", "n_unweighted", "N_unweighted", "p_unweighted")
|
| 103 |
) |> |
|
| 104 | 6x |
dplyr::mutate( |
| 105 | 6x |
variable = str_remove(.data$variable, pattern = "^lgl_") |
| 106 |
) |
|
| 107 | ||
| 108 |
# rename the stats for missingness ------------------------------------------- |
|
| 109 | 6x |
result <- result |> |
| 110 | 6x |
dplyr::mutate( |
| 111 | 6x |
stat_name = |
| 112 | 6x |
dplyr::case_when( |
| 113 | 6x |
.data$stat_name %in% "N" ~ "N_obs", |
| 114 | 6x |
.data$stat_name %in% "n" & .data$variable_level %in% FALSE ~ "N_miss", |
| 115 | 6x |
.data$stat_name %in% "n" & .data$variable_level %in% TRUE ~ "N_nonmiss", |
| 116 | 6x |
.data$stat_name %in% "p" & .data$variable_level %in% FALSE ~ "p_miss", |
| 117 | 6x |
.data$stat_name %in% "p" & .data$variable_level %in% TRUE ~ "p_nonmiss", |
| 118 | 6x |
.data$stat_name %in% "N_unweighted" ~ "N_obs_unweighted", |
| 119 | 6x |
.data$stat_name %in% "n_unweighted" & .data$variable_level %in% FALSE ~ "N_miss_unweighted", |
| 120 | 6x |
.data$stat_name %in% "n_unweighted" & .data$variable_level %in% TRUE ~ "N_nonmiss_unweighted", |
| 121 | 6x |
.data$stat_name %in% "p_unweighted" & .data$variable_level %in% FALSE ~ "p_miss_unweighted", |
| 122 | 6x |
.data$stat_name %in% "p_unweighted" & .data$variable_level %in% TRUE ~ "p_nonmiss_unweighted" |
| 123 |
) |
|
| 124 |
) |> |
|
| 125 | 6x |
dplyr::select(-cards::all_ard_variables("levels"), -"stat_label", -"fmt_fun") |>
|
| 126 | 6x |
dplyr::slice(1L, .by = c(cards::all_ard_groups(), cards::all_ard_variables(), "stat_name")) |
| 127 | ||
| 128 |
# final processing of fmt_fun ------------------------------------------------ |
|
| 129 | 6x |
result <- result |> |
| 130 | 6x |
.process_nested_list_as_df( |
| 131 | 6x |
arg = fmt_fun, |
| 132 | 6x |
new_column = "fmt_fun" |
| 133 |
) |> |
|
| 134 | 6x |
.default_svy_cat_fmt_fun() |
| 135 | ||
| 136 |
# merge in statistic labels -------------------------------------------------- |
|
| 137 | 6x |
result <- result |> |
| 138 | 6x |
.process_nested_list_as_df( |
| 139 | 6x |
arg = stat_label, |
| 140 | 6x |
new_column = "stat_label", |
| 141 | 6x |
unlist = TRUE |
| 142 |
) |> |
|
| 143 | 6x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |
| 144 | ||
| 145 |
# return final object -------------------------------------------------------- |
|
| 146 | 6x |
result |> |
| 147 | 6x |
dplyr::mutate(context = "missing") |> |
| 148 | 6x |
cards::as_card() |> |
| 149 | 6x |
cards::tidy_ard_column_order() |
| 150 |
} |
| 1 |
#' ARD t-test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for paired and non-paired t-tests. |
|
| 5 |
#' |
|
| 6 |
#' @param data (`data.frame`)\cr |
|
| 7 |
#' a data frame. See below for details. |
|
| 8 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 9 |
#' optional column name to compare by. |
|
| 10 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column names to be compared. Independent t-tests will be computed for |
|
| 12 |
#' each variable. |
|
| 13 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 14 |
#' column name of the subject or participant ID |
|
| 15 |
#' @param conf.level (scalar `numeric`)\cr |
|
| 16 |
#' confidence level for confidence interval. Default is `0.95`. |
|
| 17 |
#' @param ... arguments passed to `t.test()` |
|
| 18 |
#' |
|
| 19 |
#' @return ARD data frame |
|
| 20 |
#' @name ard_stats_t_test |
|
| 21 |
#' |
|
| 22 |
#' @details |
|
| 23 |
#' For the `ard_stats_t_test()` function, the data is expected to be one row per subject. |
|
| 24 |
#' The data is passed as `t.test(data[[variable]] ~ data[[by]], paired = FALSE, ...)`. |
|
| 25 |
#' |
|
| 26 |
#' For the `ard_stats_paired_t_test()` function, the data is expected to be one row |
|
| 27 |
#' per subject per by level. Before the t-test is calculated, the data are |
|
| 28 |
#' reshaped to a wide format to be one row per subject. |
|
| 29 |
#' The data are then passed as |
|
| 30 |
#' `t.test(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`. |
|
| 31 |
#' |
|
| 32 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 33 |
#' cards::ADSL |> |
|
| 34 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 35 |
#' ard_stats_t_test(by = ARM, variables = c(AGE, BMIBL)) |
|
| 36 |
#' |
|
| 37 |
#' # constructing a paired data set, |
|
| 38 |
#' # where patients receive both treatments |
|
| 39 |
#' cards::ADSL[c("ARM", "AGE")] |>
|
|
| 40 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 41 |
#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |> |
|
| 42 |
#' dplyr::arrange(USUBJID, ARM) |> |
|
| 43 |
#' ard_stats_paired_t_test(by = ARM, variables = AGE, id = USUBJID) |
|
| 44 |
NULL |
|
| 45 | ||
| 46 |
#' @rdname ard_stats_t_test |
|
| 47 |
#' @export |
|
| 48 |
ard_stats_t_test <- function(data, variables, by = NULL, conf.level = 0.95, ...) {
|
|
| 49 | 6x |
set_cli_abort_call() |
| 50 | ||
| 51 |
# check installed packages --------------------------------------------------- |
|
| 52 | 6x |
check_pkg_installed("broom")
|
| 53 | ||
| 54 |
# check/process inputs ------------------------------------------------------- |
|
| 55 | 6x |
check_not_missing(data) |
| 56 | 6x |
check_not_missing(variables) |
| 57 | 6x |
check_data_frame(data) |
| 58 | 6x |
data <- dplyr::ungroup(data) |
| 59 | 6x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 60 | 6x |
check_scalar(by, allow_empty = TRUE) |
| 61 | 6x |
check_range(conf.level, range = c(0, 1)) |
| 62 | ||
| 63 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 64 | 6x |
if (is_empty(variables)) {
|
| 65 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 66 |
} |
|
| 67 | ||
| 68 |
# build ARD ------------------------------------------------------------------ |
|
| 69 | 6x |
lapply( |
| 70 | 6x |
variables, |
| 71 | 6x |
function(variable) {
|
| 72 | 7x |
.format_ttest_results( |
| 73 | 7x |
by = by, |
| 74 | 7x |
variable = variable, |
| 75 | 7x |
lst_tidy = |
| 76 |
# styler: off |
|
| 77 | 7x |
cards::eval_capture_conditions( |
| 78 | 7x |
if (!is_empty(by)) stats::t.test(data[[variable]] ~ data[[by]], conf.level = conf.level, ...) |> broom::tidy() |
| 79 | 7x |
else stats::t.test(data[[variable]], ...) |> broom::tidy() |
| 80 |
), |
|
| 81 |
# styler: on |
|
| 82 | 7x |
paired = FALSE, |
| 83 |
... |
|
| 84 |
) |
|
| 85 |
} |
|
| 86 |
) |> |
|
| 87 | 6x |
dplyr::bind_rows() |
| 88 |
} |
|
| 89 | ||
| 90 |
#' @rdname ard_stats_t_test |
|
| 91 |
#' @export |
|
| 92 |
ard_stats_paired_t_test <- function(data, by, variables, id, conf.level = 0.95, ...) {
|
|
| 93 | 2x |
set_cli_abort_call() |
| 94 | ||
| 95 |
# check installed packages --------------------------------------------------- |
|
| 96 | 2x |
check_pkg_installed("broom")
|
| 97 | ||
| 98 |
# check/process inputs ------------------------------------------------------- |
|
| 99 | 2x |
check_not_missing(data) |
| 100 | 2x |
check_not_missing(variables) |
| 101 | 2x |
check_not_missing(by) |
| 102 | 2x |
check_not_missing(id) |
| 103 | 2x |
check_data_frame(data) |
| 104 | 2x |
data <- dplyr::ungroup(data) |
| 105 | 2x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, id = {{ id }})
|
| 106 | 2x |
check_scalar(by) |
| 107 | 2x |
check_scalar(id) |
| 108 | ||
| 109 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 110 | 2x |
if (is_empty(variables)) {
|
| 111 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 112 |
} |
|
| 113 | ||
| 114 |
# build ARD ------------------------------------------------------------------ |
|
| 115 | 2x |
lapply( |
| 116 | 2x |
variables, |
| 117 | 2x |
function(variable) {
|
| 118 | 2x |
.format_ttest_results( |
| 119 | 2x |
by = by, |
| 120 | 2x |
variable = variable, |
| 121 | 2x |
lst_tidy = |
| 122 | 2x |
cards::eval_capture_conditions({
|
| 123 |
# adding this reshape inside the eval, so if there is an error it's captured in the ARD object |
|
| 124 | 2x |
data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id) |
| 125 |
# perform paired t-test |
|
| 126 | 1x |
stats::t.test(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, conf.level = conf.level, ...) |> |
| 127 | 1x |
broom::tidy() |
| 128 |
}), |
|
| 129 | 2x |
paired = TRUE, |
| 130 |
... |
|
| 131 |
) |
|
| 132 |
} |
|
| 133 |
) |> |
|
| 134 | 2x |
dplyr::bind_rows() |
| 135 |
} |
|
| 136 | ||
| 137 |
#' Convert t-test to ARD |
|
| 138 |
#' |
|
| 139 |
#' @inheritParams cards::tidy_as_ard |
|
| 140 |
#' @inheritParams stats::t.test |
|
| 141 |
#' @param by (`string`)\cr by column name |
|
| 142 |
#' @param variable (`string`)\cr variable column name |
|
| 143 |
#' @param ... passed to `t.test(...)` |
|
| 144 |
#' |
|
| 145 |
#' @return ARD data frame |
|
| 146 |
#' @keywords internal |
|
| 147 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 148 |
#' cardx:::.format_ttest_results( |
|
| 149 |
#' by = "ARM", |
|
| 150 |
#' variable = "AGE", |
|
| 151 |
#' paired = FALSE, |
|
| 152 |
#' lst_tidy = |
|
| 153 |
#' cards::eval_capture_conditions( |
|
| 154 |
#' stats::t.test(ADSL[["AGE"]] ~ ADSL[["ARM"]], paired = FALSE) |> |
|
| 155 |
#' broom::tidy() |
|
| 156 |
#' ) |
|
| 157 |
#' ) |
|
| 158 |
.format_ttest_results <- function(by = NULL, variable, lst_tidy, paired, ...) {
|
|
| 159 |
# build ARD ------------------------------------------------------------------ |
|
| 160 | 9x |
ret <- |
| 161 | 9x |
cards::tidy_as_ard( |
| 162 | 9x |
lst_tidy = lst_tidy, |
| 163 | 9x |
tidy_result_names = |
| 164 | 9x |
c( |
| 165 | 9x |
"estimate", "statistic", |
| 166 | 9x |
"p.value", "parameter", "conf.low", "conf.high", |
| 167 | 9x |
"method", "alternative" |
| 168 |
) |> |
|
| 169 |
# add estimate1 and estimate2 if there is a by variable |
|
| 170 | 9x |
append(values = switch(!is_empty(by), c("estimate1", "estimate2")), after = 1L), # styler: off
|
| 171 | 9x |
fun_args_to_record = c("mu", "paired", "var.equal", "conf.level"),
|
| 172 | 9x |
formals = formals(asNamespace("stats")[["t.test.default"]]),
|
| 173 | 9x |
passed_args = c(list(paired = paired), dots_list(...)), |
| 174 | 9x |
lst_ard_columns = list(variable = variable, context = "stats_t_test") |
| 175 |
) |
|
| 176 | ||
| 177 | 9x |
if (!is_empty(by)) {
|
| 178 | 8x |
ret <- ret |> |
| 179 | 8x |
dplyr::mutate(group1 = by) |
| 180 |
} |
|
| 181 | ||
| 182 |
# add the stat label --------------------------------------------------------- |
|
| 183 | 9x |
ret |> |
| 184 | 9x |
dplyr::left_join( |
| 185 | 9x |
.df_ttest_stat_labels(by = by), |
| 186 | 9x |
by = "stat_name" |
| 187 |
) |> |
|
| 188 | 9x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 189 | 9x |
cards::as_card() |> |
| 190 | 9x |
cards::tidy_ard_column_order() |
| 191 |
} |
|
| 192 | ||
| 193 | ||
| 194 |
#' Convert long paired data to wide |
|
| 195 |
#' |
|
| 196 |
#' |
|
| 197 |
#' @param data (`data.frame`)\cr a data frame that is one line per subject per group |
|
| 198 |
#' @param by (`string`)\cr by column name |
|
| 199 |
#' @param variable (`string`)\cr variable column name |
|
| 200 |
#' @param id (`string`)\cr subject id column name |
|
| 201 |
#' |
|
| 202 |
#' @return a wide data frame |
|
| 203 |
#' @keywords internal |
|
| 204 |
#' @examples |
|
| 205 |
#' cards::ADSL[c("ARM", "AGE")] |>
|
|
| 206 |
#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
|
|
| 207 |
#' dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |> |
|
| 208 |
#' dplyr::arrange(USUBJID, ARM) |> |
|
| 209 |
#' cardx:::.paired_data_pivot_wider(by = "ARM", variable = "AGE", id = "USUBJID") |
|
| 210 |
.paired_data_pivot_wider <- function(data, by, variable, id) {
|
|
| 211 |
# check the number of levels before pivoting data to wider format |
|
| 212 | 11x |
if (dplyr::n_distinct(data[[by]], na.rm = TRUE) != 2L) {
|
| 213 | 4x |
cli::cli_abort("The {.arg by} argument must have two and only two levels.",
|
| 214 | 4x |
call = get_cli_abort_call() |
| 215 |
) |
|
| 216 |
} |
|
| 217 | ||
| 218 | 7x |
data |> |
| 219 |
# arrange data so the first group always appears first |
|
| 220 | 7x |
dplyr::arrange(.data[[by]]) |> |
| 221 | 7x |
tidyr::pivot_wider( |
| 222 | 7x |
id_cols = all_of(id), |
| 223 | 7x |
names_from = all_of(by), |
| 224 | 7x |
values_from = all_of(variable) |
| 225 |
) |> |
|
| 226 | 7x |
stats::setNames(c(id, "by1", "by2")) |
| 227 |
} |
|
| 228 | ||
| 229 |
.df_ttest_stat_labels <- function(by = NULL) {
|
|
| 230 | 24x |
dplyr::tribble( |
| 231 | 24x |
~stat_name, ~stat_label, |
| 232 | 24x |
"estimate1", "Group 1 Mean", |
| 233 | 24x |
"estimate2", "Group 2 Mean", |
| 234 | 24x |
"estimate", ifelse(is_empty(by), "Mean", "Mean Difference"), |
| 235 | 24x |
"p.value", "p-value", |
| 236 | 24x |
"statistic", "t Statistic", |
| 237 | 24x |
"parameter", "Degrees of Freedom", |
| 238 | 24x |
"conf.low", "CI Lower Bound", |
| 239 | 24x |
"conf.high", "CI Upper Bound", |
| 240 | 24x |
"mu", "H0 Mean", |
| 241 | 24x |
"paired", "Paired t-test", |
| 242 | 24x |
"var.equal", "Equal Variances", |
| 243 | 24x |
"conf.level", "CI Confidence Level", |
| 244 |
) |
|
| 245 |
} |
| 1 |
#' ARD Survey rank test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for survey wilcox test using [`survey::svyranktest()`]. |
|
| 5 |
#' |
|
| 6 |
#' @param data (`survey.design`)\cr |
|
| 7 |
#' a survey design object often created with [`survey::svydesign()`] |
|
| 8 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 9 |
#' column name to compare by |
|
| 10 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column names to be compared. Independent tests will be run for each variable. |
|
| 12 |
#' @param test (`string`)\cr |
|
| 13 |
#' a string to denote which rank test to use: |
|
| 14 |
#' `"wilcoxon"`, `"vanderWaerden"`, `"median"`, `"KruskalWallis"` |
|
| 15 |
#' @param ... arguments passed to [`survey::svyranktest()`] |
|
| 16 |
#' |
|
| 17 |
#' @return ARD data frame |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("survey", "broom")))
|
|
| 21 |
#' data(api, package = "survey") |
|
| 22 |
#' dclus2 <- survey::svydesign(id = ~ dnum + snum, fpc = ~ fpc1 + fpc2, data = apiclus2) |
|
| 23 |
#' |
|
| 24 |
#' ard_survey_svyranktest(dclus2, variables = enroll, by = comp.imp, test = "wilcoxon") |
|
| 25 |
#' ard_survey_svyranktest(dclus2, variables = enroll, by = comp.imp, test = "vanderWaerden") |
|
| 26 |
#' ard_survey_svyranktest(dclus2, variables = enroll, by = comp.imp, test = "median") |
|
| 27 |
#' ard_survey_svyranktest(dclus2, variables = enroll, by = comp.imp, test = "KruskalWallis") |
|
| 28 |
ard_survey_svyranktest <- function(data, by, variables, test, ...) {
|
|
| 29 | 6x |
set_cli_abort_call() |
| 30 | ||
| 31 |
# check installed packages --------------------------------------------------- |
|
| 32 | 6x |
check_pkg_installed(c("survey", "broom"))
|
| 33 | ||
| 34 |
# check/process inputs ------------------------------------------------------- |
|
| 35 | 6x |
check_not_missing(data) |
| 36 | 6x |
check_not_missing(variables) |
| 37 | 6x |
check_not_missing(by) |
| 38 | 6x |
check_class(data, cls = "survey.design") |
| 39 | 6x |
cards::process_selectors(data[["variables"]], by = {{ by }}, variables = {{ variables }})
|
| 40 | 6x |
check_scalar(by) |
| 41 | ||
| 42 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 43 | 6x |
if (is_empty(variables)) {
|
| 44 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 45 |
} |
|
| 46 | ||
| 47 |
# build ARD ------------------------------------------------------------------ |
|
| 48 | 6x |
lapply( |
| 49 | 6x |
variables, |
| 50 | 6x |
function(variable) {
|
| 51 | 6x |
.format_svyranktest_results( |
| 52 | 6x |
by = by, |
| 53 | 6x |
variable = variable, |
| 54 | 6x |
lst_tidy = |
| 55 | 6x |
cards::eval_capture_conditions( |
| 56 | 6x |
survey::svyranktest(reformulate2(termlabels = by, response = variable), design = data, test = test, ...) |> |
| 57 | 6x |
broom::tidy() |
| 58 |
) |
|
| 59 |
) |
|
| 60 |
} |
|
| 61 |
) |> |
|
| 62 | 6x |
dplyr::bind_rows() |
| 63 |
} |
|
| 64 | ||
| 65 |
.format_svyranktest_results <- function(by, variable, lst_tidy, ...) {
|
|
| 66 |
# build ARD ------------------------------------------------------------------ |
|
| 67 | 6x |
ret <- |
| 68 | 6x |
cards::tidy_as_ard( |
| 69 | 6x |
lst_tidy = lst_tidy, |
| 70 | 6x |
tidy_result_names = c( |
| 71 | 6x |
"estimate", "statistic", |
| 72 | 6x |
"p.value", "parameter", |
| 73 | 6x |
"method", "alternative" |
| 74 |
), |
|
| 75 | 6x |
passed_args = dots_list(...), |
| 76 | 6x |
lst_ard_columns = list(group1 = by, variable = variable, context = "survey_svyranktest") |
| 77 |
) |
|
| 78 | ||
| 79 |
# add the stat label --------------------------------------------------------- |
|
| 80 | 6x |
ret |> |
| 81 | 6x |
dplyr::left_join( |
| 82 | 6x |
.df_surveyrank_stat_labels(), |
| 83 | 6x |
by = "stat_name" |
| 84 |
) |> |
|
| 85 | 6x |
dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> |
| 86 | 6x |
cards::as_card() |> |
| 87 | 6x |
cards::tidy_ard_column_order() |
| 88 |
} |
|
| 89 | ||
| 90 | ||
| 91 |
.df_surveyrank_stat_labels <- function() {
|
|
| 92 | 6x |
dplyr::tribble( |
| 93 | 6x |
~stat_name, ~stat_label, |
| 94 | 6x |
"statistic", "Statistic", |
| 95 | 6x |
"parameter", "Degrees of Freedom", |
| 96 | 6x |
"estimate", "Median of the Difference", |
| 97 | 6x |
"null.value", "Null Value", |
| 98 | 6x |
"alternative", "Alternative Hypothesis", |
| 99 | 6x |
"data.name", "Data Name", |
| 100 | 6x |
"p.value", "p-value" |
| 101 |
) |
|
| 102 |
} |
| 1 |
#' ARD ANOVA |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Analysis of Variance. |
|
| 5 |
#' Calculated with `stats::aov()` |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams stats::aov |
|
| 8 |
#' @param ... arguments passed to `stats::aov(...)` |
|
| 9 |
#' |
|
| 10 |
#' @return ARD data frame |
|
| 11 |
#' @export |
|
| 12 |
#' |
|
| 13 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = c("broom.helpers", "parameters")))
|
|
| 14 |
#' ard_stats_aov(AGE ~ ARM, data = cards::ADSL) |
|
| 15 |
ard_stats_aov <- function(formula, data, ...) {
|
|
| 16 | 3x |
set_cli_abort_call() |
| 17 | ||
| 18 |
# check installed packages --------------------------------------------------- |
|
| 19 | 3x |
check_pkg_installed(c("broom.helpers", "parameters"))
|
| 20 | ||
| 21 |
# check/process inputs ------------------------------------------------------- |
|
| 22 | 3x |
check_not_missing(formula) |
| 23 | 3x |
check_not_missing(data) |
| 24 | 3x |
check_data_frame(data) |
| 25 | 3x |
check_class(formula, cls = "formula") |
| 26 | ||
| 27 |
# build ARD ------------------------------------------------------------------ |
|
| 28 | 3x |
aov <- |
| 29 | 3x |
cards::eval_capture_conditions( |
| 30 | 3x |
stats::aov(formula, data, ...) |
| 31 |
) |
|
| 32 | 3x |
aov[["result"]] |> |
| 33 | 3x |
broom.helpers::tidy_parameters() |> # using broom.helpers, because it handle non-syntactic names |
| 34 | 3x |
dplyr::filter(!(dplyr::row_number() == dplyr::n() & .data$term %in% "Residuals")) |> # removing Residual rows |
| 35 | 3x |
dplyr::rename(variable = "term") |> |
| 36 | 3x |
tidyr::pivot_longer( |
| 37 | 3x |
cols = -"variable", |
| 38 | 3x |
names_to = "stat_name", |
| 39 | 3x |
values_to = "stat" |
| 40 |
) |> |
|
| 41 | 3x |
dplyr::mutate( |
| 42 | 3x |
stat = as.list(.data$stat), |
| 43 | 3x |
stat_label = |
| 44 | 3x |
dplyr::case_when( |
| 45 | 3x |
.data$stat_name %in% "statistic" ~ "Statistic", |
| 46 | 3x |
.data$stat_name %in% "df" ~ "Degrees of Freedom", |
| 47 | 3x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 48 | 3x |
.data$stat_name %in% "sumsq" ~ "Sum of Squares", |
| 49 | 3x |
.data$stat_name %in% "meansq" ~ "Mean of Sum of Squares", |
| 50 | 3x |
TRUE ~ .data$stat_name |
| 51 |
), |
|
| 52 | 3x |
context = "stats_aov", |
| 53 | 3x |
fmt_fun = lapply( |
| 54 | 3x |
.data$stat, |
| 55 | 3x |
function(x) {
|
| 56 | 20x |
switch(is.integer(x), |
| 57 | 20x |
0L |
| 58 | 20x |
) %||% switch(is.numeric(x), |
| 59 | 20x |
1L |
| 60 |
) |
|
| 61 |
} |
|
| 62 |
), |
|
| 63 | 3x |
warning = aov["warning"], |
| 64 | 3x |
error = aov["error"] |
| 65 |
) |> |
|
| 66 | 3x |
cards::as_card() |> |
| 67 | 3x |
cards::tidy_ard_column_order() |
| 68 |
} |
| 1 |
#' ARD Cochran-Mantel-Haenszel Chi-Squared Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Cochran-Mantel-Haenszel Chi-Squared Test for count data. |
|
| 5 |
#' Calculated with `mantelhaen.test(x = data[[variables]], y = data[[by]], z = data[[strata]], ...)`. |
|
| 6 |
#' |
|
| 7 |
#' @param data (`data.frame`)\cr |
|
| 8 |
#' a data frame. |
|
| 9 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 10 |
#' column name to compare by. |
|
| 11 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 12 |
#' column names to be compared. Independent tests will be computed for each variable. |
|
| 13 |
#' @param strata ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 14 |
#' column name to stratify by. |
|
| 15 |
#' @param ... additional arguments passed to `stats::mantelhaen.test(...)` |
|
| 16 |
#' |
|
| 17 |
#' @return ARD data frame |
|
| 18 |
#' @name ard_stats_mantelhaen_test |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 22 |
#' cards::ADSL |> |
|
| 23 |
#' ard_stats_mantelhaen_test(by = "ARM", variables = "AGEGR1", strata = "SEX") |
|
| 24 |
ard_stats_mantelhaen_test <- function(data, by, variables, strata, ...) {
|
|
| 25 | 3x |
set_cli_abort_call() |
| 26 | ||
| 27 |
# check installed packages --------------------------------------------------- |
|
| 28 | 3x |
check_pkg_installed("broom")
|
| 29 | ||
| 30 |
# check/process inputs ------------------------------------------------------- |
|
| 31 | 3x |
check_not_missing(data) |
| 32 | 3x |
check_not_missing(variables) |
| 33 | 3x |
check_not_missing(by) |
| 34 | 3x |
check_not_missing(strata) |
| 35 | 3x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }}, strata = {{ strata }})
|
| 36 | 3x |
check_class(variables, "character") |
| 37 | 3x |
check_scalar(by) |
| 38 | 3x |
check_scalar(strata) |
| 39 | 3x |
check_class(data[[variables]], c("character", "factor"))
|
| 40 | 3x |
check_class(data[[by]], c("character", "factor"))
|
| 41 | 3x |
check_class(data[[strata]], c("character", "factor"))
|
| 42 | ||
| 43 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 44 | 3x |
if (is_empty(variables)) {
|
| 45 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 46 |
} |
|
| 47 | ||
| 48 | 3x |
dots <- dots_list(...) |
| 49 | 3x |
formals_cmh <- formals(asNamespace("stats")[["mantelhaen.test"]])[-c(1:3)]
|
| 50 | 2x |
if (!"alternative" %in% names(dots)) formals_cmh$alternative <- "two.sided" |
| 51 | 3x |
mantelhaen.args <- c(dots, formals_cmh[setdiff(names(formals_cmh), names(dots))]) |
| 52 | ||
| 53 |
# build ARD ------------------------------------------------------------------ |
|
| 54 | 3x |
cards::ard_mvsummary( |
| 55 | 3x |
data = data, |
| 56 | 3x |
variables = all_of(variables), |
| 57 | 3x |
statistic = all_of(variables) ~ list( |
| 58 | 3x |
stats_mantelhaen_test = .calc_mantelhaen_test(data, by, variables, strata, mantelhaen.args) |
| 59 |
) |
|
| 60 |
) |> |
|
| 61 | 3x |
dplyr::select(-"stat_label") |> |
| 62 | 3x |
dplyr::left_join( |
| 63 | 3x |
.df_mantelhaentest_stat_labels(exact = mantelhaen.args$exact), |
| 64 | 3x |
by = "stat_name" |
| 65 |
) |> |
|
| 66 | 3x |
dplyr::mutate( |
| 67 | 3x |
group1 = by, |
| 68 | 3x |
group2 = strata, |
| 69 | 3x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 70 | 3x |
context = "stats_mantelhaen_test", |
| 71 |
) |> |
|
| 72 | 3x |
cards::as_card() |> |
| 73 | 3x |
cards::tidy_ard_column_order() |> |
| 74 | 3x |
cards::tidy_ard_row_order() |
| 75 |
} |
|
| 76 | ||
| 77 |
.calc_mantelhaen_test <- function(data, by, variables, strata, mantelhaen.args) {
|
|
| 78 | 3x |
cards::as_cards_fn( |
| 79 | 3x |
\(x, data, variables, ...) {
|
| 80 | 3x |
stats::mantelhaen.test( |
| 81 | 3x |
x = x, |
| 82 | 3x |
y = data[[by]], |
| 83 | 3x |
z = data[[strata]], |
| 84 | 3x |
mantelhaen.args |
| 85 |
) |> |
|
| 86 | 3x |
broom::tidy() |> |
| 87 | 3x |
dplyr::bind_cols(mantelhaen.args) |
| 88 |
}, |
|
| 89 | 3x |
stat_names = c( |
| 90 | 3x |
"estimate", "statistic", "p.value", "parameter", "correct", "exact", "conf.level", "conf.low", "conf.high" |
| 91 |
) |
|
| 92 |
) |
|
| 93 |
} |
|
| 94 | ||
| 95 |
.df_mantelhaentest_stat_labels <- function(exact = FALSE) {
|
|
| 96 | 3x |
dplyr::tribble( |
| 97 | 3x |
~stat_name, ~stat_label, |
| 98 | 3x |
"estimate", ifelse(exact, "Mantel-Haenszel Odds Ratio Estimate", "Conditional Maximum Likelihood Odds Ratio Estimate"), |
| 99 | 3x |
"statistic", ifelse(exact, "Mantel-Haenszel X-squared Statistic", "Generalized Cochran-Mantel-Haenszel Statistic"), |
| 100 | 3x |
"p.value", "p-value", |
| 101 | 3x |
"parameter", "Degrees of Freedom", |
| 102 | 3x |
"correct", "Continuity Correction", |
| 103 | 3x |
"exact", "Exact Conditional Test", |
| 104 | 3x |
"conf.level", "CI Confidence Level", |
| 105 | 3x |
"conf.low", "CI Lower Bound", |
| 106 | 3x |
"conf.high", "CI Upper Bound" |
| 107 |
) |
|
| 108 |
} |
| 1 |
#' ARD Kruskal-Wallis Test |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Analysis results data for Kruskal-Wallis Rank Sum Test. |
|
| 5 |
#' |
|
| 6 |
#' Calculated with `kruskal.test(data[[variable]], data[[by]], ...)` |
|
| 7 |
#' |
|
| 8 |
#' @param data (`data.frame`)\cr |
|
| 9 |
#' a data frame. |
|
| 10 |
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 11 |
#' column name to compare by. |
|
| 12 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 13 |
#' column names to be compared. Independent tests will |
|
| 14 |
#' be computed for each variable. |
|
| 15 |
#' |
|
| 16 |
#' @return ARD data frame |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"))
|
|
| 20 |
#' cards::ADSL |> |
|
| 21 |
#' ard_stats_kruskal_test(by = "ARM", variables = "AGE") |
|
| 22 |
ard_stats_kruskal_test <- function(data, by, variables) {
|
|
| 23 | 5x |
set_cli_abort_call() |
| 24 | ||
| 25 |
# check installed packages --------------------------------------------------- |
|
| 26 | 5x |
check_pkg_installed("broom")
|
| 27 | ||
| 28 |
# check/process inputs ------------------------------------------------------- |
|
| 29 | 5x |
check_not_missing(data) |
| 30 | 5x |
check_not_missing(variables) |
| 31 | 5x |
check_not_missing(by) |
| 32 | 5x |
check_data_frame(data) |
| 33 | 5x |
cards::process_selectors(data, by = {{ by }}, variables = {{ variables }})
|
| 34 | 5x |
check_scalar(by) |
| 35 | ||
| 36 |
# return empty ARD if no variables selected ---------------------------------- |
|
| 37 | 5x |
if (is_empty(variables)) {
|
| 38 | ! |
return(dplyr::tibble() |> cards::as_card()) |
| 39 |
} |
|
| 40 | ||
| 41 |
# build ARD ------------------------------------------------------------------ |
|
| 42 | 5x |
lapply( |
| 43 | 5x |
variables, |
| 44 | 5x |
function(variable) {
|
| 45 | 6x |
cards::tidy_as_ard( |
| 46 | 6x |
lst_tidy = |
| 47 | 6x |
cards::eval_capture_conditions( |
| 48 | 6x |
stats::kruskal.test(x = data[[variable]], g = data[[by]]) |> |
| 49 | 6x |
broom::tidy() |
| 50 |
), |
|
| 51 | 6x |
tidy_result_names = c("statistic", "p.value", "parameter", "method"),
|
| 52 | 6x |
lst_ard_columns = list(group1 = by, variable = variable, context = "stats_kruskal_test") |
| 53 |
) |> |
|
| 54 | 6x |
dplyr::mutate( |
| 55 | 6x |
.after = "stat_name", |
| 56 | 6x |
stat_label = |
| 57 | 6x |
dplyr::case_when( |
| 58 | 6x |
.data$stat_name %in% "statistic" ~ "Kruskal-Wallis chi-squared Statistic", |
| 59 | 6x |
.data$stat_name %in% "p.value" ~ "p-value", |
| 60 | 6x |
.data$stat_name %in% "parameter" ~ "Degrees of Freedom", |
| 61 | 6x |
TRUE ~ .data$stat_name, |
| 62 |
) |
|
| 63 |
) |
|
| 64 |
} |
|
| 65 |
) |> |
|
| 66 | 5x |
dplyr::bind_rows() |> |
| 67 | 5x |
cards::as_card() |
| 68 |
} |
| 1 |
#' ARD Abnormality Counts |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' |
|
| 5 |
#' Function counts participants with abnormal analysis range values. |
|
| 6 |
#' |
|
| 7 |
#' For each abnormality specified via the `abnormal` parameter (e.g. Low or High), statistic `n` is |
|
| 8 |
#' calculated as the number of patients with this abnormality recorded, and statistic `N` is calculated as |
|
| 9 |
#' the total number of patients with at least one post-baseline assessment. `p` is calculated as |
|
| 10 |
#' `n / N`. If `excl_baseline_abn=TRUE` then participants with abnormality at baseline are excluded |
|
| 11 |
#' from all statistic calculations. |
|
| 12 |
#' |
|
| 13 |
#' @param data (`data.frame`)\cr |
|
| 14 |
#' a data frame. |
|
| 15 |
#' @param postbaseline ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 16 |
#' column name of post-baseline reference range indicator variable. |
|
| 17 |
#' @param baseline ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 18 |
#' column name of baseline reference range indicator variable. |
|
| 19 |
#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 20 |
#' column name used to identify unique participants in `data`. If `NULL`, each row in `data` is assumed to correspond |
|
| 21 |
#' to a unique participants. |
|
| 22 |
#' @param abnormal (`list`)\cr |
|
| 23 |
#' a named list of abnormalities to assess for. Each element should specify all levels of `postbaseline`/`baseline` |
|
| 24 |
#' that should be included when assessing for a given abnormality, with the name specifying the name of the |
|
| 25 |
#' abnormality. Any levels specified but not present in the data are ignored. |
|
| 26 |
#' @param excl_baseline_abn (`logical`)\cr |
|
| 27 |
#' whether participants with baseline abnormality should be excluded from calculations. Defaults to `TRUE`. |
|
| 28 |
#' @param quiet (scalar `logical`)\cr |
|
| 29 |
#' logical indicating whether to suppress additional messaging. Default is `FALSE`. |
|
| 30 |
#' @inheritParams cards::ard_summary |
|
| 31 |
#' |
|
| 32 |
#' @return an ARD data frame of class 'card' |
|
| 33 |
#' @export |
|
| 34 |
#' |
|
| 35 |
#' @examples |
|
| 36 |
#' # Load Data ------------------- |
|
| 37 |
#' set.seed(1) |
|
| 38 |
#' adlb <- cards::ADLB |
|
| 39 |
#' adlb$BNRIND <- ifelse( |
|
| 40 |
#' adlb$BNRIND != "N", |
|
| 41 |
#' sample(c("LOW", "LOW LOW", "HIGH", "HIGH HIGH"), nrow(adlb), replace = TRUE),
|
|
| 42 |
#' "NORMAL" |
|
| 43 |
#' ) |
|
| 44 |
#' |
|
| 45 |
#' # Example 1 ------------------- |
|
| 46 |
#' adlb |> |
|
| 47 |
#' ard_tabulate_abnormal( |
|
| 48 |
#' postbaseline = LBNRIND, baseline = BNRIND, id = USUBJID, by = TRTA, |
|
| 49 |
#' abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))
|
|
| 50 |
#' ) |
|
| 51 |
ard_tabulate_abnormal <- function(data, |
|
| 52 |
postbaseline, |
|
| 53 |
baseline, |
|
| 54 |
id = NULL, |
|
| 55 |
by = NULL, |
|
| 56 |
strata = NULL, |
|
| 57 |
abnormal = list(Low = "LOW", High = "HIGH"), |
|
| 58 |
excl_baseline_abn = TRUE, |
|
| 59 |
quiet = FALSE) {
|
|
| 60 | 6x |
set_cli_abort_call() |
| 61 | ||
| 62 |
# check inputs --------------------------------------------------------------- |
|
| 63 | 6x |
check_data_frame(data) |
| 64 | 6x |
cards::process_selectors( |
| 65 | 6x |
data, |
| 66 | 6x |
postbaseline = {{ postbaseline }}, baseline = {{ baseline }}, id = {{ id }}, by = {{ by }}, strata = {{ strata }}
|
| 67 |
) |
|
| 68 | 6x |
check_not_missing(abnormal) |
| 69 | 6x |
check_scalar_logical(excl_baseline_abn) |
| 70 | 6x |
check_scalar_logical(quiet) |
| 71 | 6x |
check_class(abnormal, "list") |
| 72 | ||
| 73 | 6x |
if (!is_named(abnormal)) {
|
| 74 | 1x |
cli::cli_abort( |
| 75 | 1x |
"{.arg abnormal} must be a named list, where each name corresponds to a different abnormality/direction.",
|
| 76 | 1x |
call = get_cli_abort_call() |
| 77 |
) |
|
| 78 |
} |
|
| 79 | 5x |
if (!all(is.character(unlist(abnormal)))) {
|
| 80 | 1x |
cli::cli_abort( |
| 81 | 1x |
"Each abnormal level of {.var {postbaseline}} specified via {.arg abnormal} must be a {.cls string}.",
|
| 82 | 1x |
call = get_cli_abort_call() |
| 83 |
) |
|
| 84 |
} |
|
| 85 | ||
| 86 |
# print abnormality levels --------------------------------------------------- |
|
| 87 | 4x |
if (!quiet) {
|
| 88 | 3x |
for (i in seq_along(abnormal)) {
|
| 89 | 7x |
vec <- cli::cli_vec(abnormal[[i]], style = list("vec-sep" = ", ", "vec-sep2" = ", ", "vec-last" = ", "))
|
| 90 | 7x |
cli::cli_inform("Abnormality {.val {names(abnormal)[i]}} created {cli::qty(abnormal[[i]])} {?from/by merging} level{?s}: {.val {vec}}")
|
| 91 |
} |
|
| 92 |
} |
|
| 93 | ||
| 94 |
# build ARD ------------------------------------------------------------------ |
|
| 95 | 4x |
data <- data |> |
| 96 | 4x |
dplyr::mutate( |
| 97 | 4x |
dplyr::across( |
| 98 | 4x |
all_of(c(postbaseline, baseline)), |
| 99 | 4x |
\(x) {
|
| 100 |
# combine levels specified for each abnormality |
|
| 101 | 8x |
do.call(fct_collapse, args = c(list(f = x), abnormal)) |> |
| 102 | 8x |
suppressWarnings() |
| 103 |
} |
|
| 104 |
) |
|
| 105 |
) |
|
| 106 | ||
| 107 |
# calculate statistics for each abnormality |
|
| 108 | 4x |
lapply( |
| 109 | 4x |
names(abnormal), |
| 110 | 4x |
function(abn) {
|
| 111 | 9x |
cards::ard_mvsummary( |
| 112 | 9x |
data = data, |
| 113 | 9x |
variables = all_of(postbaseline), |
| 114 | 9x |
by = any_of(by), |
| 115 | 9x |
strata = any_of(strata), |
| 116 | 9x |
statistic = all_of(postbaseline) ~ list( |
| 117 | 9x |
abnormal = |
| 118 | 9x |
.calc_abnormal(data, abn, postbaseline, baseline, id, excl_baseline_abn) |
| 119 |
) |
|
| 120 |
) |> |
|
| 121 | 9x |
dplyr::bind_cols(dplyr::tibble(variable_level = list(abn))) |
| 122 |
} |
|
| 123 |
) |> |
|
| 124 | 4x |
dplyr::bind_rows() |> |
| 125 | 4x |
dplyr::mutate( |
| 126 | 4x |
stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name), |
| 127 | 4x |
context = "categorical_abnormal", |
| 128 |
) |> |
|
| 129 | 4x |
cards::as_card() |> |
| 130 | 4x |
cards::tidy_ard_column_order() |> |
| 131 | 4x |
cards::tidy_ard_row_order() |
| 132 |
} |
|
| 133 | ||
| 134 |
# function to perform calculations ------------------------------------------- |
|
| 135 |
.calc_abnormal <- function(data, abnormality, postbaseline, baseline, id, excl_baseline_abn) {
|
|
| 136 | 9x |
cards::as_cards_fn( |
| 137 | 9x |
\(x, data, ...) {
|
| 138 |
# if `excl_baseline_abn=FALSE` then do not exclude baseline abnormal from numerator/denominator calculations |
|
| 139 | 21x |
baseline_not_abn <- if (excl_baseline_abn) !data[[baseline]] %in% abnormality else TRUE # baseline visit not abnormal |
| 140 | 21x |
postbaseline_abn <- data[[postbaseline]] %in% abnormality # post-baseline visit abnormal |
| 141 | ||
| 142 |
# numerator: unique participants with any abnormal post-baseline visit, baseline visit not abnormal |
|
| 143 | 21x |
n <- data |> |
| 144 | 21x |
dplyr::filter(postbaseline_abn & baseline_not_abn) |> |
| 145 | 21x |
dplyr::select(all_of(id)) |> |
| 146 | 21x |
dplyr::distinct() |> |
| 147 | 21x |
nrow() |
| 148 | ||
| 149 |
# denominator: unique participants with any post-baseline visit, baseline visit not abnormal (if ) |
|
| 150 | 21x |
N <- data |> |
| 151 | 21x |
dplyr::filter(baseline_not_abn) |> |
| 152 | 21x |
dplyr::select(all_of(id)) |> |
| 153 | 21x |
dplyr::distinct() |> |
| 154 | 21x |
nrow() |
| 155 | ||
| 156 | 21x |
dplyr::tibble(n = n, N = N, p = n / N) |
| 157 |
}, |
|
| 158 | 9x |
stat_names = c("n", "N", "p")
|
| 159 |
) |
|
| 160 |
} |
| 1 |
#' ARD Attributes |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Add variable attributes to an ARD data frame. |
|
| 5 |
#' - The `label` attribute will be added for all columns, and when no label |
|
| 6 |
#' is specified and no label has been set for a column using the `label=` argument, |
|
| 7 |
#' the column name will be placed in the label statistic. |
|
| 8 |
#' - The `class` attribute will also be returned for all columns. |
|
| 9 |
#' - Any other attribute returned by `attributes()` will also be added, e.g. factor levels. |
|
| 10 |
#' |
|
| 11 |
#' @rdname ard_attributes |
|
| 12 |
#' @param data (`survey.design`)\cr |
|
| 13 |
#' a design object often created with [`survey::svydesign()`]. |
|
| 14 |
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr |
|
| 15 |
#' variables to include |
|
| 16 |
#' @param label (named `list`)\cr |
|
| 17 |
#' named list of variable labels, e.g. `list(cyl = "No. Cylinders")`. |
|
| 18 |
#' Default is `NULL` |
|
| 19 |
#' @inheritParams rlang::args_dots_empty |
|
| 20 |
#' |
|
| 21 |
#' @return an ARD data frame of class 'card' |
|
| 22 |
#' @export |
|
| 23 |
#' |
|
| 24 |
#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey"))
|
|
| 25 |
#' data(api, package = "survey") |
|
| 26 |
#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |
|
| 27 |
#' |
|
| 28 |
#' ard_attributes( |
|
| 29 |
#' data = dclus1, |
|
| 30 |
#' variables = c(sname, dname), |
|
| 31 |
#' label = list(sname = "School Name", dname = "District Name") |
|
| 32 |
#' ) |
|
| 33 |
ard_attributes.survey.design <- function(data, variables = everything(), label = NULL, ...) {
|
|
| 34 | 1x |
set_cli_abort_call() |
| 35 | ||
| 36 | 1x |
cards::ard_attributes(data = data[["variables"]], variables = {{ variables }}, label = label, ...)
|
| 37 |
} |