| 1 | #' Cumulative counts of numeric variable by thresholds | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_cumulative()] creates a layout element to calculate cumulative counts of values in a | |
| 6 | #' numeric variable that are less than, less or equal to, greater than, or greater or equal to user-specified | |
| 7 | #' threshold values. | |
| 8 | #' | |
| 9 | #' This function analyzes numeric variable `vars` against the threshold values supplied to the `thresholds` | |
| 10 | #' argument as a numeric vector. Whether counts should include the threshold values, and whether to count | |
| 11 | #' values lower or higher than the threshold values can be set via the `include_eq` and `lower_tail` | |
| 12 | #' parameters, respectively. | |
| 13 | #' | |
| 14 | #' @inheritParams h_count_cumulative | |
| 15 | #' @inheritParams argument_convention | |
| 16 | #' @param thresholds (`numeric`)\cr vector of cutoff values for the counts. | |
| 17 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 18 | #' | |
| 19 | #'   Options are: ``r shQuote(get_stats("count_cumulative"), type = "sh")`` | |
| 20 | #' | |
| 21 | #' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()]. | |
| 22 | #' | |
| 23 | #' @name count_cumulative | |
| 24 | #' @order 1 | |
| 25 | NULL | |
| 26 | ||
| 27 | #' Helper function for `s_count_cumulative()` | |
| 28 | #' | |
| 29 | #' @description `r lifecycle::badge("stable")` | |
| 30 | #' | |
| 31 | #' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold. | |
| 32 | #' | |
| 33 | #' @inheritParams argument_convention | |
| 34 | #' @param threshold (`numeric(1)`)\cr a cutoff value as threshold to count values of `x`. | |
| 35 | #' @param lower_tail (`flag`)\cr whether to count lower tail, default is `TRUE`. | |
| 36 | #' @param include_eq (`flag`)\cr whether to include value equal to the `threshold` in | |
| 37 | #' count, default is `TRUE`. | |
| 38 | #' | |
| 39 | #' @return A named vector with items: | |
| 40 | #' * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold | |
| 41 | #' of user specification. | |
| 42 | #' * `fraction`: the fraction of the count. | |
| 43 | #' | |
| 44 | #' @seealso [count_cumulative] | |
| 45 | #' | |
| 46 | #' @examples | |
| 47 | #' set.seed(1, kind = "Mersenne-Twister") | |
| 48 | #' x <- c(sample(1:10, 10), NA) | |
| 49 | #' .N_col <- length(x) | |
| 50 | #' | |
| 51 | #' h_count_cumulative(x, 5, denom = .N_col) | |
| 52 | #' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na_rm = FALSE, denom = .N_col) | |
| 53 | #' h_count_cumulative(x, 0, lower_tail = FALSE, denom = .N_col) | |
| 54 | #' h_count_cumulative(x, 100, lower_tail = FALSE, denom = .N_col) | |
| 55 | #' | |
| 56 | #' @export | |
| 57 | h_count_cumulative <- function(x, | |
| 58 | threshold, | |
| 59 | lower_tail = TRUE, | |
| 60 | include_eq = TRUE, | |
| 61 | na_rm = TRUE, | |
| 62 |                                denom) { | |
| 63 | 48x | checkmate::assert_numeric(x) | 
| 64 | 48x | checkmate::assert_numeric(threshold) | 
| 65 | 48x | checkmate::assert_numeric(denom) | 
| 66 | 48x | checkmate::assert_flag(lower_tail) | 
| 67 | 48x | checkmate::assert_flag(include_eq) | 
| 68 | 48x | checkmate::assert_flag(na_rm) | 
| 69 | ||
| 70 | 48x | is_keep <- if (na_rm) !is.na(x) else rep(TRUE, length(x)) | 
| 71 | 48x |   count <- if (lower_tail && include_eq) { | 
| 72 | 19x | length(x[is_keep & x <= threshold]) | 
| 73 | 48x |   } else if (lower_tail && !include_eq) { | 
| 74 | ! | length(x[is_keep & x < threshold]) | 
| 75 | 48x |   } else if (!lower_tail && include_eq) { | 
| 76 | 14x | length(x[is_keep & x >= threshold]) | 
| 77 | 48x |   } else if (!lower_tail && !include_eq) { | 
| 78 | 15x | length(x[is_keep & x > threshold]) | 
| 79 | } | |
| 80 | ||
| 81 | 48x | result <- c( | 
| 82 | 48x | count = count, | 
| 83 | 48x | fraction = if (count == 0 && denom == 0) 0 else count / denom | 
| 84 | ) | |
| 85 | 48x | result | 
| 86 | } | |
| 87 | ||
| 88 | #' Description of cumulative count | |
| 89 | #' | |
| 90 | #' @description `r lifecycle::badge("stable")` | |
| 91 | #' | |
| 92 | #' This is a helper function that describes the analysis in [s_count_cumulative()]. | |
| 93 | #' | |
| 94 | #' @inheritParams h_count_cumulative | |
| 95 | #' | |
| 96 | #' @return Labels for [s_count_cumulative()]. | |
| 97 | #' | |
| 98 | #' @export | |
| 99 | d_count_cumulative <- function(threshold, lower_tail = TRUE, include_eq = TRUE) { | |
| 100 | 46x | checkmate::assert_numeric(threshold) | 
| 101 | 46x | lg <- if (lower_tail) "<" else ">" | 
| 102 | 46x | eq <- if (include_eq) "=" else "" | 
| 103 | 46x | paste0(lg, eq, " ", threshold) | 
| 104 | } | |
| 105 | ||
| 106 | #' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds. | |
| 107 | #' | |
| 108 | #' @return | |
| 109 | #' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a | |
| 110 | #' component, each component containing a vector for the count and fraction. | |
| 111 | #' | |
| 112 | #' @keywords internal | |
| 113 | s_count_cumulative <- function(x, | |
| 114 | thresholds, | |
| 115 | lower_tail = TRUE, | |
| 116 | include_eq = TRUE, | |
| 117 |                                denom = c("N_col", "n", "N_row"), | |
| 118 | .N_col, # nolint | |
| 119 | .N_row, # nolint | |
| 120 | na_rm = TRUE, | |
| 121 |                                ...) { | |
| 122 | 23x | checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE) | 
| 123 | ||
| 124 | 23x | denom <- match.arg(denom) %>% | 
| 125 | 23x | switch( | 
| 126 | 23x | n = length(x), | 
| 127 | 23x | N_row = .N_row, | 
| 128 | 23x | N_col = .N_col | 
| 129 | ) | |
| 130 | ||
| 131 | 23x |   count_fraction_list <- Map(function(thres) { | 
| 132 | 46x | result <- h_count_cumulative(x, thres, lower_tail, include_eq, na_rm = na_rm, denom = denom) | 
| 133 | 46x | label <- d_count_cumulative(thres, lower_tail, include_eq) | 
| 134 | 46x | formatters::with_label(result, label) | 
| 135 | 23x | }, thresholds) | 
| 136 | ||
| 137 | 23x | names(count_fraction_list) <- thresholds | 
| 138 | 23x | list(count_fraction = count_fraction_list) | 
| 139 | } | |
| 140 | ||
| 141 | #' @describeIn count_cumulative Formatted analysis function which is used as `afun` | |
| 142 | #' in `count_cumulative()`. | |
| 143 | #' | |
| 144 | #' @return | |
| 145 | #' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 146 | #' | |
| 147 | #' @keywords internal | |
| 148 | a_count_cumulative <- function(x, | |
| 149 | ..., | |
| 150 | .stats = NULL, | |
| 151 | .stat_names = NULL, | |
| 152 | .formats = NULL, | |
| 153 | .labels = NULL, | |
| 154 |                                .indent_mods = NULL) { | |
| 155 | 14x | dots_extra_args <- list(...) | 
| 156 | ||
| 157 | # Check if there are user-defined functions | |
| 158 | 14x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 159 | 14x | .stats <- default_and_custom_stats_list$all_stats | 
| 160 | 14x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 161 | ||
| 162 | # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) | |
| 163 | 14x | extra_afun_params <- retrieve_extra_afun_params( | 
| 164 | 14x | names(dots_extra_args$.additional_fun_parameters) | 
| 165 | ) | |
| 166 | 14x | dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore | 
| 167 | ||
| 168 | # Main statistical functions application | |
| 169 | 14x | x_stats <- .apply_stat_functions( | 
| 170 | 14x | default_stat_fnc = s_count_cumulative, | 
| 171 | 14x | custom_stat_fnc_list = custom_stat_functions, | 
| 172 | 14x | args_list = c( | 
| 173 | 14x | x = list(x), | 
| 174 | 14x | extra_afun_params, | 
| 175 | 14x | dots_extra_args | 
| 176 | ) | |
| 177 | ) | |
| 178 | ||
| 179 | # Fill in with stats defaults if needed | |
| 180 | 14x |   .stats <- get_stats("count_cumulative", | 
| 181 | 14x | stats_in = .stats, | 
| 182 | 14x | custom_stats_in = names(custom_stat_functions) | 
| 183 | ) | |
| 184 | ||
| 185 | 14x | x_stats <- x_stats[.stats] | 
| 186 | 14x | levels_per_stats <- lapply(x_stats, names) | 
| 187 | ||
| 188 | # Fill in formats/indents/labels with custom input and defaults | |
| 189 | 14x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 190 | 14x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 191 | 14x | .labels <- get_labels_from_stats( | 
| 192 | 14x | .stats, .labels, levels_per_stats, | 
| 193 | 14x | label_attr_from_stats = sapply(.unlist_keep_nulls(x_stats), attr, "label") | 
| 194 | ) | |
| 195 | ||
| 196 | # Unlist stats | |
| 197 | 14x | x_stats <- x_stats %>% | 
| 198 | 14x | .unlist_keep_nulls() %>% | 
| 199 | 14x | setNames(names(.formats)) | 
| 200 | ||
| 201 | # Auto format handling | |
| 202 | 14x | .formats <- apply_auto_formatting( | 
| 203 | 14x | .formats, | 
| 204 | 14x | x_stats, | 
| 205 | 14x | extra_afun_params$.df_row, | 
| 206 | 14x | extra_afun_params$.var | 
| 207 | ) | |
| 208 | ||
| 209 | # Get and check statistical names from defaults | |
| 210 | 14x | .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats | 
| 211 | ||
| 212 | 14x | in_rows( | 
| 213 | 14x | .list = x_stats, | 
| 214 | 14x | .formats = .formats, | 
| 215 | 14x | .names = names(.labels), | 
| 216 | 14x | .stat_names = .stat_names, | 
| 217 | 14x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 218 | 14x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 219 | ) | |
| 220 | } | |
| 221 | ||
| 222 | #' @describeIn count_cumulative Layout-creating function which can take statistics function arguments | |
| 223 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 224 | #' | |
| 225 | #' @return | |
| 226 | #' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions, | |
| 227 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 228 | #' the statistics from `s_count_cumulative()` to the table layout. | |
| 229 | #' | |
| 230 | #' @examples | |
| 231 | #' basic_table() %>% | |
| 232 | #'   split_cols_by("ARM") %>% | |
| 233 | #' add_colcounts() %>% | |
| 234 | #' count_cumulative( | |
| 235 | #' vars = "AGE", | |
| 236 | #' thresholds = c(40, 60) | |
| 237 | #' ) %>% | |
| 238 | #' build_table(tern_ex_adsl) | |
| 239 | #' | |
| 240 | #' @export | |
| 241 | #' @order 2 | |
| 242 | count_cumulative <- function(lyt, | |
| 243 | vars, | |
| 244 | thresholds, | |
| 245 | lower_tail = TRUE, | |
| 246 | include_eq = TRUE, | |
| 247 | var_labels = vars, | |
| 248 | show_labels = "visible", | |
| 249 | na_str = default_na_str(), | |
| 250 | nested = TRUE, | |
| 251 | table_names = vars, | |
| 252 | ..., | |
| 253 | na_rm = TRUE, | |
| 254 |                              .stats = c("count_fraction"), | |
| 255 | .stat_names = NULL, | |
| 256 | .formats = NULL, | |
| 257 | .labels = NULL, | |
| 258 |                              .indent_mods = NULL) { | |
| 259 | # Depending on main functions | |
| 260 | 6x | extra_args <- list( | 
| 261 | 6x | "na_rm" = na_rm, | 
| 262 | 6x | "thresholds" = thresholds, | 
| 263 | 6x | "lower_tail" = lower_tail, | 
| 264 | 6x | "include_eq" = include_eq, | 
| 265 | ... | |
| 266 | ) | |
| 267 | ||
| 268 | # Needed defaults | |
| 269 | 6x | if (!is.null(.stats)) extra_args[[".stats"]] <- .stats | 
| 270 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 271 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 272 | 1x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 273 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 274 | ||
| 275 | # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) | |
| 276 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 277 | 6x | formals(a_count_cumulative) <- c( | 
| 278 | 6x | formals(a_count_cumulative), | 
| 279 | 6x | extra_args[[".additional_fun_parameters"]] | 
| 280 | ) | |
| 281 | ||
| 282 |   # Main {rtables} structural call | |
| 283 | 6x | analyze( | 
| 284 | 6x | lyt, | 
| 285 | 6x | vars, | 
| 286 | 6x | afun = a_count_cumulative, | 
| 287 | 6x | na_str = na_str, | 
| 288 | 6x | inclNAs = !na_rm, | 
| 289 | 6x | table_names = table_names, | 
| 290 | 6x | var_labels = var_labels, | 
| 291 | 6x | show_labels = show_labels, | 
| 292 | 6x | nested = nested, | 
| 293 | 6x | extra_args = extra_args | 
| 294 | ) | |
| 295 | } | 
| 1 | #' Univariate formula special term | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The special term `univariate` indicate that the model should be fitted individually for | |
| 6 | #' every variable included in univariate. | |
| 7 | #' | |
| 8 | #' @param x (`character`)\cr a vector of variable names separated by commas. | |
| 9 | #' | |
| 10 | #' @return When used within a model formula, produces univariate models for each variable provided. | |
| 11 | #' | |
| 12 | #' @details | |
| 13 | #' If provided alongside with pairwise specification, the model | |
| 14 | #' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models | |
| 15 | #' + `y ~ ARM` | |
| 16 | #' + `y ~ ARM + SEX` | |
| 17 | #' + `y ~ ARM + AGE` | |
| 18 | #' + `y ~ ARM + RACE` | |
| 19 | #' | |
| 20 | #' @export | |
| 21 | univariate <- function(x) { | |
| 22 | 2x | structure(x, varname = deparse(substitute(x))) | 
| 23 | } | |
| 24 | ||
| 25 | # Get the right-hand-term of a formula | |
| 26 | rht <- function(x) { | |
| 27 | 4x | checkmate::assert_formula(x) | 
| 28 | 4x | y <- as.character(rev(x)[[1]]) | 
| 29 | 4x | return(y) | 
| 30 | } | |
| 31 | ||
| 32 | #' Hazard ratio estimation in interactions | |
| 33 | #' | |
| 34 | #' This function estimates the hazard ratios between arms when an interaction variable is given with | |
| 35 | #' specific values. | |
| 36 | #' | |
| 37 | #' @param variable,given (`character(2)`)\cr names of the two variables in the interaction. We seek the estimation of | |
| 38 | #' the levels of `variable` given the levels of `given`. | |
| 39 | #' @param lvl_var,lvl_given (`character`)\cr corresponding levels given by [levels()]. | |
| 40 | #' @param mmat (named `numeric`) a vector filled with `0`s used as a template to obtain the design matrix. | |
| 41 | #' @param coef (`numeric`)\cr vector of estimated coefficients. | |
| 42 | #' @param vcov (`matrix`)\cr variance-covariance matrix of underlying model. | |
| 43 | #' @param conf_level (`proportion`)\cr confidence level of estimate intervals. | |
| 44 | #' | |
| 45 | #' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A) | |
| 46 | #' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex. | |
| 47 | #' The cox regression estimates the coefficients along with a variance-covariance matrix for: | |
| 48 | #' | |
| 49 | #' - b1 (arm b), b2 (arm c) | |
| 50 | #' - b3 (sex m) | |
| 51 | #' - b4 (arm b: sex m), b5 (arm c: sex m) | |
| 52 | #' | |
| 53 | #' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation | |
| 54 | #' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5), | |
| 55 | #' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained | |
| 56 | #' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95. | |
| 57 | #' | |
| 58 | #' @return A list of matrices (one per level of variable) with rows corresponding to the combinations of | |
| 59 | #' `variable` and `given`, with columns: | |
| 60 | #' * `coef_hat`: Estimation of the coefficient. | |
| 61 | #' * `coef_se`: Standard error of the estimation. | |
| 62 | #' * `hr`: Hazard ratio. | |
| 63 | #' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio. | |
| 64 | #' | |
| 65 | #' @seealso [s_cox_multivariate()]. | |
| 66 | #' | |
| 67 | #' @examples | |
| 68 | #' library(dplyr) | |
| 69 | #' library(survival) | |
| 70 | #' | |
| 71 | #' ADSL <- tern_ex_adsl %>% | |
| 72 | #'   filter(SEX %in% c("F", "M")) | |
| 73 | #' | |
| 74 | #' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS") | |
| 75 | #' adtte$ARMCD <- droplevels(adtte$ARMCD) | |
| 76 | #' adtte$SEX <- droplevels(adtte$SEX) | |
| 77 | #' | |
| 78 | #' mod <- coxph( | |
| 79 | #' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2, | |
| 80 | #' data = adtte | |
| 81 | #' ) | |
| 82 | #' | |
| 83 | #' mmat <- stats::model.matrix(mod)[1, ] | |
| 84 | #' mmat[!mmat == 0] <- 0 | |
| 85 | #' | |
| 86 | #' @keywords internal | |
| 87 | estimate_coef <- function(variable, given, | |
| 88 | lvl_var, lvl_given, | |
| 89 | coef, | |
| 90 | mmat, | |
| 91 | vcov, | |
| 92 |                           conf_level = 0.95) { | |
| 93 | 8x | var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level | 
| 94 | 8x | giv_lvl <- paste0(given, lvl_given) | 
| 95 | ||
| 96 | 8x | design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) | 
| 97 | 8x | design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] | 
| 98 | 8x | design_mat <- within( | 
| 99 | 8x | data = design_mat, | 
| 100 | 8x |     expr = { | 
| 101 | 8x | inter <- paste0(variable, ":", given) | 
| 102 | 8x | rev_inter <- paste0(given, ":", variable) | 
| 103 | } | |
| 104 | ) | |
| 105 | ||
| 106 | 8x | split_by_variable <- design_mat$variable | 
| 107 | 8x | interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") | 
| 108 | ||
| 109 | 8x | design_mat <- apply( | 
| 110 | 8x |     X = design_mat, MARGIN = 1, FUN = function(x) { | 
| 111 | 27x | mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 | 
| 112 | 27x | return(mmat) | 
| 113 | } | |
| 114 | ) | |
| 115 | 8x | colnames(design_mat) <- interaction_names | 
| 116 | ||
| 117 | 8x | betas <- as.matrix(coef) | 
| 118 | ||
| 119 | 8x | coef_hat <- t(design_mat) %*% betas | 
| 120 | 8x | dimnames(coef_hat)[2] <- "coef" | 
| 121 | ||
| 122 | 8x |   coef_se <- apply(design_mat, 2, function(x) { | 
| 123 | 27x | vcov_el <- as.logical(x) | 
| 124 | 27x | y <- vcov[vcov_el, vcov_el] | 
| 125 | 27x | y <- sum(y) | 
| 126 | 27x | y <- sqrt(y) | 
| 127 | 27x | return(y) | 
| 128 | }) | |
| 129 | ||
| 130 | 8x | q_norm <- stats::qnorm((1 + conf_level) / 2) | 
| 131 | 8x | y <- cbind(coef_hat, `se(coef)` = coef_se) | 
| 132 | ||
| 133 | 8x |   y <- apply(y, 1, function(x) { | 
| 134 | 27x | x["hr"] <- exp(x["coef"]) | 
| 135 | 27x | x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) | 
| 136 | 27x | x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) | 
| 137 | ||
| 138 | 27x | return(x) | 
| 139 | }) | |
| 140 | ||
| 141 | 8x | y <- t(y) | 
| 142 | 8x | y <- by(y, split_by_variable, identity) | 
| 143 | 8x | y <- lapply(y, as.matrix) | 
| 144 | ||
| 145 | 8x | attr(y, "details") <- paste0( | 
| 146 | 8x | "Estimations of ", variable, | 
| 147 | 8x | " hazard ratio given the level of ", given, " compared to ", | 
| 148 | 8x | variable, " level ", lvl_var[1], "." | 
| 149 | ) | |
| 150 | 8x | return(y) | 
| 151 | } | |
| 152 | ||
| 153 | #' `tryCatch` around `car::Anova` | |
| 154 | #' | |
| 155 | #' Captures warnings when executing [car::Anova]. | |
| 156 | #' | |
| 157 | #' @inheritParams car::Anova | |
| 158 | #' | |
| 159 | #' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings. | |
| 160 | #' | |
| 161 | #' @examples | |
| 162 | #' # `car::Anova` on cox regression model including strata and expected | |
| 163 | #' # a likelihood ratio test triggers a warning as only Wald method is | |
| 164 | #' # accepted. | |
| 165 | #' | |
| 166 | #' library(survival) | |
| 167 | #' | |
| 168 | #' mod <- coxph( | |
| 169 | #' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps), | |
| 170 | #' data = ovarian | |
| 171 | #' ) | |
| 172 | #' | |
| 173 | #' @keywords internal | |
| 174 | try_car_anova <- function(mod, | |
| 175 |                           test.statistic) { # nolint | |
| 176 | 2x | y <- tryCatch( | 
| 177 | 2x | withCallingHandlers( | 
| 178 | 2x |       expr = { | 
| 179 | 2x | warn_text <- c() | 
| 180 | 2x | list( | 
| 181 | 2x | aov = car::Anova( | 
| 182 | 2x | mod, | 
| 183 | 2x | test.statistic = test.statistic, | 
| 184 | 2x | type = "III" | 
| 185 | ), | |
| 186 | 2x | warn_text = warn_text | 
| 187 | ) | |
| 188 | }, | |
| 189 | 2x |       warning = function(w) { | 
| 190 | # If a warning is detected it is handled as "w". | |
| 191 | ! |         warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w)) | 
| 192 | ||
| 193 | # A warning is sometimes expected, then, we want to restart | |
| 194 | # the execution while ignoring the warning. | |
| 195 | ! |         invokeRestart("muffleWarning") | 
| 196 | } | |
| 197 | ), | |
| 198 | 2x |     finally = { | 
| 199 | } | |
| 200 | ) | |
| 201 | ||
| 202 | 2x | return(y) | 
| 203 | } | |
| 204 | ||
| 205 | #' Fit a Cox regression model and ANOVA | |
| 206 | #' | |
| 207 | #' The functions derives the effect p-values using [car::Anova()] from [survival::coxph()] results. | |
| 208 | #' | |
| 209 | #' @inheritParams t_coxreg | |
| 210 | #' | |
| 211 | #' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and | |
| 212 | #' `aov` (result of [car::Anova()]). | |
| 213 | #' | |
| 214 | #' @noRd | |
| 215 | fit_n_aov <- function(formula, | |
| 216 | data = data, | |
| 217 | conf_level = conf_level, | |
| 218 |                       pval_method = c("wald", "likelihood"), | |
| 219 |                       ...) { | |
| 220 | 1x | pval_method <- match.arg(pval_method) | 
| 221 | ||
| 222 | 1x | environment(formula) <- environment() | 
| 223 | 1x |   suppressWarnings({ | 
| 224 | # We expect some warnings due to coxph which fails strict programming. | |
| 225 | 1x | mod <- survival::coxph(formula, data = data, ...) | 
| 226 | 1x | msum <- summary(mod, conf.int = conf_level) | 
| 227 | }) | |
| 228 | ||
| 229 | 1x | aov <- try_car_anova( | 
| 230 | 1x | mod, | 
| 231 | 1x | test.statistic = switch(pval_method, | 
| 232 | 1x | "wald" = "Wald", | 
| 233 | 1x | "likelihood" = "LR" | 
| 234 | ) | |
| 235 | ) | |
| 236 | ||
| 237 | 1x | warn_attr <- aov$warn_text | 
| 238 | ! | if (!is.null(aov$warn_text)) message(warn_attr) | 
| 239 | ||
| 240 | 1x | aov <- aov$aov | 
| 241 | 1x | y <- list(mod = mod, msum = msum, aov = aov) | 
| 242 | 1x | attr(y, "message") <- warn_attr | 
| 243 | ||
| 244 | 1x | return(y) | 
| 245 | } | |
| 246 | ||
| 247 | # argument_checks | |
| 248 | check_formula <- function(formula) { | |
| 249 | 1x |   if (!(inherits(formula, "formula"))) { | 
| 250 | 1x |     stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.") | 
| 251 | } | |
| 252 | ||
| 253 | ! | invisible() | 
| 254 | } | |
| 255 | ||
| 256 | check_covariate_formulas <- function(covariates) { | |
| 257 | 1x |   if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) { | 
| 258 | 1x |     stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).") | 
| 259 | } | |
| 260 | ||
| 261 | ! | invisible() | 
| 262 | } | |
| 263 | ||
| 264 | name_covariate_names <- function(covariates) { | |
| 265 | 1x | miss_names <- names(covariates) == "" | 
| 266 | 1x | no_names <- is.null(names(covariates)) | 
| 267 | ! | if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name") | 
| 268 | ! | if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name") | 
| 269 | 1x | return(covariates) | 
| 270 | } | |
| 271 | ||
| 272 | check_increments <- function(increments, covariates) { | |
| 273 | 1x |   if (!is.null(increments)) { | 
| 274 | 1x | covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name") | 
| 275 | 1x | lapply( | 
| 276 | 1x |       X = names(increments), FUN = function(x) { | 
| 277 | 3x |         if (!x %in% covariates) { | 
| 278 | 1x | warning( | 
| 279 | 1x | paste( | 
| 280 | 1x | "Check `increments`, the `increment` for ", x, | 
| 281 | 1x | "doesn't match any names in investigated covariate(s)." | 
| 282 | ) | |
| 283 | ) | |
| 284 | } | |
| 285 | } | |
| 286 | ) | |
| 287 | } | |
| 288 | ||
| 289 | 1x | invisible() | 
| 290 | } | |
| 291 | ||
| 292 | #' Multivariate Cox model - summarized results | |
| 293 | #' | |
| 294 | #' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or | |
| 295 | #' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually | |
| 296 | #' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the | |
| 297 | #' covariates included in the model. | |
| 298 | #' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the | |
| 299 | #' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis, | |
| 300 | #' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**, | |
| 301 | #' `NEST's bookdown`) | |
| 302 | #' | |
| 303 | #' @param formula (`formula`)\cr a formula corresponding to the investigated [survival::Surv()] survival model | |
| 304 | #' including covariates. | |
| 305 | #' @param data (`data.frame`)\cr a data frame which includes the variable in formula and covariates. | |
| 306 | #' @param conf_level (`proportion`)\cr the confidence level for the hazard ratio interval estimations. Default is 0.95. | |
| 307 | #' @param pval_method (`string`)\cr the method used for the estimation of p-values, should be one of | |
| 308 | #' `"wald"` (default) or `"likelihood"`. | |
| 309 | #' @param ... optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the | |
| 310 | #' method for tie handling, one of `exact` (default), `efron`, `breslow`. | |
| 311 | #' | |
| 312 | #' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`. | |
| 313 | #' | |
| 314 | #' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms | |
| 315 | #' but is out of scope as defined by the Global Data Standards Repository | |
| 316 | #' (**`GDS_Standard_TLG_Specs_Tables_2.doc`**). | |
| 317 | #' | |
| 318 | #' @seealso [estimate_coef()]. | |
| 319 | #' | |
| 320 | #' @examples | |
| 321 | #' library(dplyr) | |
| 322 | #' | |
| 323 | #' adtte <- tern_ex_adtte | |
| 324 | #' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered | |
| 325 | #' adtte_f <- filter( | |
| 326 | #' adtte_f, | |
| 327 | #' PARAMCD == "OS" & | |
| 328 | #'     SEX %in% c("F", "M") & | |
| 329 | #'     RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE") | |
| 330 | #' ) | |
| 331 | #' adtte_f$SEX <- droplevels(adtte_f$SEX) | |
| 332 | #' adtte_f$RACE <- droplevels(adtte_f$RACE) | |
| 333 | #' | |
| 334 | #' @keywords internal | |
| 335 | s_cox_multivariate <- function(formula, data, | |
| 336 | conf_level = 0.95, | |
| 337 |                                pval_method = c("wald", "likelihood"), | |
| 338 |                                ...) { | |
| 339 | 1x |   tf <- stats::terms(formula, specials = c("strata")) | 
| 340 | 1x | covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))] | 
| 341 | 1x | lapply( | 
| 342 | 1x | X = covariates, | 
| 343 | 1x |     FUN = function(x) { | 
| 344 | 3x |       if (is.character(data[[x]])) { | 
| 345 | 1x | data[[x]] <<- as.factor(data[[x]]) | 
| 346 | } | |
| 347 | 3x | invisible() | 
| 348 | } | |
| 349 | ) | |
| 350 | 1x | pval_method <- match.arg(pval_method) | 
| 351 | ||
| 352 | # Results directly exported from environment(fit_n_aov) to environment(s_function_draft) | |
| 353 | 1x | y <- fit_n_aov( | 
| 354 | 1x | formula = formula, | 
| 355 | 1x | data = data, | 
| 356 | 1x | conf_level = conf_level, | 
| 357 | 1x | pval_method = pval_method, | 
| 358 | ... | |
| 359 | ) | |
| 360 | 1x | mod <- y$mod | 
| 361 | 1x | aov <- y$aov | 
| 362 | 1x | msum <- y$msum | 
| 363 | 1x | list2env(as.list(y), environment()) | 
| 364 | ||
| 365 | 1x | all_term_labs <- attr(mod$terms, "term.labels") | 
| 366 | 1x | term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)] | 
| 367 | 1x | names(term_labs) <- term_labs | 
| 368 | ||
| 369 | 1x | coef_inter <- NULL | 
| 370 | 1x |   if (any(attr(mod$terms, "order") > 1)) { | 
| 371 | 1x | for_inter <- all_term_labs[attr(mod$terms, "order") > 1] | 
| 372 | 1x | names(for_inter) <- for_inter | 
| 373 | 1x | mmat <- stats::model.matrix(mod)[1, ] | 
| 374 | 1x | mmat[!mmat == 0] <- 0 | 
| 375 | 1x | mcoef <- stats::coef(mod) | 
| 376 | 1x | mvcov <- stats::vcov(mod) | 
| 377 | ||
| 378 | 1x |     estimate_coef_local <- function(variable, given) { | 
| 379 | 6x | estimate_coef( | 
| 380 | 6x | variable, given, | 
| 381 | 6x | coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level, | 
| 382 | 6x | lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]]) | 
| 383 | ) | |
| 384 | } | |
| 385 | ||
| 386 | 1x | coef_inter <- lapply( | 
| 387 | 1x |       for_inter, function(x) { | 
| 388 | 3x | y <- attr(mod$terms, "factors")[, x] | 
| 389 | 3x | y <- names(y[y > 0]) | 
| 390 | 3x | Map(estimate_coef_local, variable = y, given = rev(y)) | 
| 391 | } | |
| 392 | ) | |
| 393 | } | |
| 394 | ||
| 395 | 1x | list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter) | 
| 396 | } | 
| 1 | #' Odds ratio estimation | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [estimate_odds_ratio()] creates a layout element to compare bivariate responses between | |
| 6 | #' two groups by estimating an odds ratio and its confidence interval. | |
| 7 | #' | |
| 8 | #' The primary analysis variable specified by `vars` is the group variable. Additional variables can be included in the | |
| 9 | #' analysis via the `variables` argument, which accepts `arm`, an arm variable, and `strata`, a stratification variable. | |
| 10 | #' If more than two arm levels are present, they can be combined into two groups using the `groups_list` argument. | |
| 11 | #' | |
| 12 | #' @inheritParams split_cols_by_groups | |
| 13 | #' @inheritParams argument_convention | |
| 14 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 15 | #' | |
| 16 | #'   Options are: ``r shQuote(get_stats("estimate_odds_ratio"), type = "sh")`` | |
| 17 | #' @param method (`string`)\cr whether to use the correct (`"exact"`) calculation in the conditional likelihood or one | |
| 18 | #' of the approximations. See [survival::clogit()] for details. | |
| 19 | #' | |
| 20 | #' @note | |
| 21 | #' * This function uses logistic regression for unstratified analyses, and conditional logistic regression for | |
| 22 | #' stratified analyses. The Wald confidence interval is calculated with the specified confidence level. | |
| 23 | #' * For stratified analyses, there is currently no implementation for conditional likelihood confidence intervals, | |
| 24 | #' therefore the likelihood confidence interval is not available as an option. | |
| 25 | #' * When `vars` contains only responders or non-responders no odds ratio estimation is possible so the returned | |
| 26 | #' values will be `NA`. | |
| 27 | #' | |
| 28 | #' @seealso Relevant helper function [h_odds_ratio()]. | |
| 29 | #' | |
| 30 | #' @name odds_ratio | |
| 31 | #' @order 1 | |
| 32 | NULL | |
| 33 | ||
| 34 | #' @describeIn odds_ratio Statistics function which estimates the odds ratio | |
| 35 | #' between a treatment and a control. A `variables` list with `arm` and `strata` | |
| 36 | #' variable names must be passed if a stratified analysis is required. | |
| 37 | #' | |
| 38 | #' @return | |
| 39 | #' * `s_odds_ratio()` returns a named list with the statistics `or_ci` | |
| 40 | #' (containing `est`, `lcl`, and `ucl`) and `n_tot`. | |
| 41 | #' | |
| 42 | #' @examples | |
| 43 | #' # Unstratified analysis. | |
| 44 | #' s_odds_ratio( | |
| 45 | #' df = subset(dta, grp == "A"), | |
| 46 | #' .var = "rsp", | |
| 47 | #' .ref_group = subset(dta, grp == "B"), | |
| 48 | #' .in_ref_col = FALSE, | |
| 49 | #' .df_row = dta | |
| 50 | #' ) | |
| 51 | #' | |
| 52 | #' # Stratified analysis. | |
| 53 | #' s_odds_ratio( | |
| 54 | #' df = subset(dta, grp == "A"), | |
| 55 | #' .var = "rsp", | |
| 56 | #' .ref_group = subset(dta, grp == "B"), | |
| 57 | #' .in_ref_col = FALSE, | |
| 58 | #' .df_row = dta, | |
| 59 | #' variables = list(arm = "grp", strata = "strata") | |
| 60 | #' ) | |
| 61 | #' | |
| 62 | #' @export | |
| 63 | s_odds_ratio <- function(df, | |
| 64 | .var, | |
| 65 | .ref_group, | |
| 66 | .in_ref_col, | |
| 67 | .df_row, | |
| 68 | variables = list(arm = NULL, strata = NULL), | |
| 69 | conf_level = 0.95, | |
| 70 | groups_list = NULL, | |
| 71 | method = "exact", | |
| 72 |                          ...) { | |
| 73 | 99x | y <- list(or_ci = numeric(), n_tot = numeric()) | 
| 74 | ||
| 75 | 99x |   if (!.in_ref_col) { | 
| 76 | 94x | assert_proportion_value(conf_level) | 
| 77 | 94x | assert_df_with_variables(df, list(rsp = .var)) | 
| 78 | 94x | assert_df_with_variables(.ref_group, list(rsp = .var)) | 
| 79 | ||
| 80 | 94x |     if (is.null(variables$strata)) { | 
| 81 | 76x | data <- data.frame( | 
| 82 | 76x | rsp = c(.ref_group[[.var]], df[[.var]]), | 
| 83 | 76x | grp = factor( | 
| 84 | 76x |           rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), | 
| 85 | 76x |           levels = c("ref", "Not-ref") | 
| 86 | ) | |
| 87 | ) | |
| 88 | 76x | y <- or_glm(data, conf_level = conf_level) | 
| 89 |     } else { | |
| 90 | 18x | assert_df_with_variables(.df_row, c(list(rsp = .var), variables)) | 
| 91 | 18x |       checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE) | 
| 92 | ||
| 93 | # The group variable prepared for clogit must be synchronised with combination groups definition. | |
| 94 | 18x |       if (is.null(groups_list)) { | 
| 95 | 16x | ref_grp <- as.character(unique(.ref_group[[variables$arm]])) | 
| 96 | 16x | trt_grp <- as.character(unique(df[[variables$arm]])) | 
| 97 | 16x | grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp) | 
| 98 |       } else { | |
| 99 | # If more than one level in reference col. | |
| 100 | 2x | reference <- as.character(unique(.ref_group[[variables$arm]])) | 
| 101 | 2x | grp_ref_flag <- vapply( | 
| 102 | 2x | X = groups_list, | 
| 103 | 2x | FUN.VALUE = TRUE, | 
| 104 | 2x | FUN = function(x) all(reference %in% x) | 
| 105 | ) | |
| 106 | 2x | ref_grp <- names(groups_list)[grp_ref_flag] | 
| 107 | ||
| 108 | # If more than one level in treatment col. | |
| 109 | 2x | treatment <- as.character(unique(df[[variables$arm]])) | 
| 110 | 2x | grp_trt_flag <- vapply( | 
| 111 | 2x | X = groups_list, | 
| 112 | 2x | FUN.VALUE = TRUE, | 
| 113 | 2x | FUN = function(x) all(treatment %in% x) | 
| 114 | ) | |
| 115 | 2x | trt_grp <- names(groups_list)[grp_trt_flag] | 
| 116 | ||
| 117 | 2x | grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp) | 
| 118 | 2x | grp <- combine_levels(grp, levels = treatment, new_level = trt_grp) | 
| 119 | } | |
| 120 | ||
| 121 | # The reference level in `grp` must be the same as in the `rtables` column split. | |
| 122 | 18x | data <- data.frame( | 
| 123 | 18x | rsp = .df_row[[.var]], | 
| 124 | 18x | grp = grp, | 
| 125 | 18x | strata = interaction(.df_row[variables$strata]) | 
| 126 | ) | |
| 127 | 18x | y_all <- or_clogit(data, conf_level = conf_level, method = method) | 
| 128 | 18x | checkmate::assert_string(trt_grp) | 
| 129 | 18x | checkmate::assert_subset(trt_grp, names(y_all$or_ci)) | 
| 130 | 17x | y$or_ci <- y_all$or_ci[[trt_grp]] | 
| 131 | 17x | y$n_tot <- y_all$n_tot | 
| 132 | } | |
| 133 | } | |
| 134 | ||
| 135 | 98x |   if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && method != "approximate") { | 
| 136 | 1x | warning( | 
| 137 | 1x | "Unable to compute the odds ratio estimate. Please try re-running the function with ", | 
| 138 | 1x | 'parameter `method` set to "approximate".' | 
| 139 | ) | |
| 140 | } | |
| 141 | ||
| 142 | 98x | y$or_ci <- formatters::with_label( | 
| 143 | 98x | x = y$or_ci, | 
| 144 | 98x |     label = paste0("Odds Ratio (", 100 * conf_level, "% CI)") | 
| 145 | ) | |
| 146 | ||
| 147 | 98x | y$n_tot <- formatters::with_label( | 
| 148 | 98x | x = y$n_tot, | 
| 149 | 98x | label = "Total n" | 
| 150 | ) | |
| 151 | ||
| 152 | 98x | y | 
| 153 | } | |
| 154 | ||
| 155 | #' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`. | |
| 156 | #' | |
| 157 | #' @return | |
| 158 | #' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 159 | #' | |
| 160 | #' @examples | |
| 161 | #' a_odds_ratio( | |
| 162 | #' df = subset(dta, grp == "A"), | |
| 163 | #' .var = "rsp", | |
| 164 | #' .ref_group = subset(dta, grp == "B"), | |
| 165 | #' .in_ref_col = FALSE, | |
| 166 | #' .df_row = dta | |
| 167 | #' ) | |
| 168 | #' | |
| 169 | #' @export | |
| 170 | a_odds_ratio <- function(df, | |
| 171 | ..., | |
| 172 | .stats = NULL, | |
| 173 | .stat_names = NULL, | |
| 174 | .formats = NULL, | |
| 175 | .labels = NULL, | |
| 176 |                          .indent_mods = NULL) { | |
| 177 | # Check for additional parameters to the statistics function | |
| 178 | 12x | dots_extra_args <- list(...) | 
| 179 | 12x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 180 | 12x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 181 | ||
| 182 | # Check for user-defined functions | |
| 183 | 12x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 184 | 12x | .stats <- default_and_custom_stats_list$all_stats | 
| 185 | 12x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 186 | ||
| 187 | # Apply statistics function | |
| 188 | 12x | x_stats <- .apply_stat_functions( | 
| 189 | 12x | default_stat_fnc = s_odds_ratio, | 
| 190 | 12x | custom_stat_fnc_list = custom_stat_functions, | 
| 191 | 12x | args_list = c( | 
| 192 | 12x | df = list(df), | 
| 193 | 12x | extra_afun_params, | 
| 194 | 12x | dots_extra_args | 
| 195 | ) | |
| 196 | ) | |
| 197 | ||
| 198 | # Fill in formatting defaults | |
| 199 | 12x |   .stats <- get_stats("estimate_odds_ratio", | 
| 200 | 12x | stats_in = .stats, | 
| 201 | 12x | custom_stats_in = names(custom_stat_functions) | 
| 202 | ) | |
| 203 | 12x | x_stats <- x_stats[.stats] | 
| 204 | 12x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 205 | 12x | .labels <- get_labels_from_stats( | 
| 206 | 12x | .stats, .labels, | 
| 207 | 12x | tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) | 
| 208 | ) | |
| 209 | 12x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 210 | ||
| 211 | # Auto format handling | |
| 212 | 12x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 213 | ||
| 214 | # Get and check statistical names | |
| 215 | 12x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 216 | ||
| 217 | 12x | in_rows( | 
| 218 | 12x | .list = x_stats, | 
| 219 | 12x | .formats = .formats, | 
| 220 | 12x | .names = .labels %>% .unlist_keep_nulls(), | 
| 221 | 12x | .stat_names = .stat_names, | 
| 222 | 12x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 223 | 12x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 224 | ) | |
| 225 | } | |
| 226 | ||
| 227 | #' @describeIn odds_ratio Layout-creating function which can take statistics function arguments | |
| 228 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 229 | #' | |
| 230 | #' @return | |
| 231 | #' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions, | |
| 232 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 233 | #' the statistics from `s_odds_ratio()` to the table layout. | |
| 234 | #' | |
| 235 | #' @examples | |
| 236 | #' set.seed(12) | |
| 237 | #' dta <- data.frame( | |
| 238 | #' rsp = sample(c(TRUE, FALSE), 100, TRUE), | |
| 239 | #'   grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")), | |
| 240 | #'   strata = factor(sample(c("C", "D"), 100, TRUE)) | |
| 241 | #' ) | |
| 242 | #' | |
| 243 | #' l <- basic_table() %>% | |
| 244 | #' split_cols_by(var = "grp", ref_group = "B") %>% | |
| 245 | #' estimate_odds_ratio(vars = "rsp") | |
| 246 | #' | |
| 247 | #' build_table(l, df = dta) | |
| 248 | #' | |
| 249 | #' @export | |
| 250 | #' @order 2 | |
| 251 | estimate_odds_ratio <- function(lyt, | |
| 252 | vars, | |
| 253 | variables = list(arm = NULL, strata = NULL), | |
| 254 | conf_level = 0.95, | |
| 255 | groups_list = NULL, | |
| 256 | method = "exact", | |
| 257 | na_str = default_na_str(), | |
| 258 | nested = TRUE, | |
| 259 | ..., | |
| 260 | table_names = vars, | |
| 261 | show_labels = "hidden", | |
| 262 | var_labels = vars, | |
| 263 | .stats = "or_ci", | |
| 264 | .stat_names = NULL, | |
| 265 | .formats = NULL, | |
| 266 | .labels = NULL, | |
| 267 |                                 .indent_mods = NULL) { | |
| 268 | # Process standard extra arguments | |
| 269 | 5x |   extra_args <- list(".stats" = .stats) | 
| 270 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 271 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 272 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 273 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 274 | ||
| 275 | # Process additional arguments to the statistic function | |
| 276 | 5x | extra_args <- c( | 
| 277 | 5x | extra_args, | 
| 278 | 5x | variables = list(variables), conf_level = list(conf_level), groups_list = list(groups_list), method = list(method), | 
| 279 | ... | |
| 280 | ) | |
| 281 | ||
| 282 | # Append additional info from layout to the analysis function | |
| 283 | 5x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 284 | 5x | formals(a_odds_ratio) <- c(formals(a_odds_ratio), extra_args[[".additional_fun_parameters"]]) | 
| 285 | ||
| 286 | 5x | analyze( | 
| 287 | 5x | lyt = lyt, | 
| 288 | 5x | vars = vars, | 
| 289 | 5x | afun = a_odds_ratio, | 
| 290 | 5x | na_str = na_str, | 
| 291 | 5x | nested = nested, | 
| 292 | 5x | extra_args = extra_args, | 
| 293 | 5x | var_labels = var_labels, | 
| 294 | 5x | show_labels = show_labels, | 
| 295 | 5x | table_names = table_names | 
| 296 | ) | |
| 297 | } | |
| 298 | ||
| 299 | #' Helper functions for odds ratio estimation | |
| 300 | #' | |
| 301 | #' @description `r lifecycle::badge("stable")` | |
| 302 | #' | |
| 303 | #' Functions to calculate odds ratios in [estimate_odds_ratio()]. | |
| 304 | #' | |
| 305 | #' @inheritParams odds_ratio | |
| 306 | #' @inheritParams argument_convention | |
| 307 | #' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally | |
| 308 | #' `strata` for [or_clogit()]. | |
| 309 | #' | |
| 310 | #' @return A named `list` of elements `or_ci` and `n_tot`. | |
| 311 | #' | |
| 312 | #' @seealso [odds_ratio] | |
| 313 | #' | |
| 314 | #' @name h_odds_ratio | |
| 315 | NULL | |
| 316 | ||
| 317 | #' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be | |
| 318 | #' exactly 2 groups in `data` as specified by the `grp` variable. | |
| 319 | #' | |
| 320 | #' @examples | |
| 321 | #' # Data with 2 groups. | |
| 322 | #' data <- data.frame( | |
| 323 | #' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)), | |
| 324 | #' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)], | |
| 325 | #' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)], | |
| 326 | #' stringsAsFactors = TRUE | |
| 327 | #' ) | |
| 328 | #' | |
| 329 | #' # Odds ratio based on glm. | |
| 330 | #' or_glm(data, conf_level = 0.95) | |
| 331 | #' | |
| 332 | #' @export | |
| 333 | or_glm <- function(data, conf_level) { | |
| 334 | 77x | checkmate::assert_logical(data$rsp) | 
| 335 | 77x | assert_proportion_value(conf_level) | 
| 336 | 77x | assert_df_with_variables(data, list(rsp = "rsp", grp = "grp")) | 
| 337 | 77x |   checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) | 
| 338 | ||
| 339 | 77x | data$grp <- as_factor_keep_attributes(data$grp) | 
| 340 | 77x | assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2) | 
| 341 | 77x |   formula <- stats::as.formula("rsp ~ grp") | 
| 342 | 77x | model_fit <- stats::glm( | 
| 343 | 77x | formula = formula, data = data, | 
| 344 | 77x | family = stats::binomial(link = "logit") | 
| 345 | ) | |
| 346 | ||
| 347 | # Note that here we need to discard the intercept. | |
| 348 | 77x | or <- exp(stats::coef(model_fit)[-1]) | 
| 349 | 77x | or_ci <- exp( | 
| 350 | 77x | stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE] | 
| 351 | ) | |
| 352 | ||
| 353 | 77x |   values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl")) | 
| 354 | 77x | n_tot <- stats::setNames(nrow(model_fit$model), "n_tot") | 
| 355 | ||
| 356 | 77x | list(or_ci = values, n_tot = n_tot) | 
| 357 | } | |
| 358 | ||
| 359 | #' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for | |
| 360 | #' the whole data set including all groups, since the results are not the same as when doing | |
| 361 | #' pairwise comparisons between the groups. | |
| 362 | #' | |
| 363 | #' @examples | |
| 364 | #' # Data with 3 groups. | |
| 365 | #' data <- data.frame( | |
| 366 | #' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)), | |
| 367 | #' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)], | |
| 368 | #' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)], | |
| 369 | #' stringsAsFactors = TRUE | |
| 370 | #' ) | |
| 371 | #' | |
| 372 | #' # Odds ratio based on stratified estimation by conditional logistic regression. | |
| 373 | #' or_clogit(data, conf_level = 0.95) | |
| 374 | #' | |
| 375 | #' @export | |
| 376 | or_clogit <- function(data, conf_level, method = "exact") { | |
| 377 | 19x | checkmate::assert_logical(data$rsp) | 
| 378 | 19x | assert_proportion_value(conf_level) | 
| 379 | 19x | assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata")) | 
| 380 | 19x |   checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) | 
| 381 | 19x |   checkmate::assert_multi_class(data$strata, classes = c("factor", "character")) | 
| 382 | 19x |   checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE) | 
| 383 | ||
| 384 | 19x | data$grp <- as_factor_keep_attributes(data$grp) | 
| 385 | 19x | data$strata <- as_factor_keep_attributes(data$strata) | 
| 386 | ||
| 387 | # Deviation from convention: `survival::strata` must be simply `strata`. | |
| 388 | 19x |   formula <- stats::as.formula("rsp ~ grp + strata(strata)") | 
| 389 | 19x | model_fit <- clogit_with_tryCatch(formula = formula, data = data, method = method) | 
| 390 | ||
| 391 | # Create a list with one set of OR estimates and CI per coefficient, i.e. | |
| 392 | # comparison of one group vs. the reference group. | |
| 393 | 19x | coef_est <- stats::coef(model_fit) | 
| 394 | 19x | ci_est <- stats::confint(model_fit, level = conf_level) | 
| 395 | 19x | or_ci <- list() | 
| 396 | 19x |   for (coef_name in names(coef_est)) { | 
| 397 | 21x |     grp_name <- gsub("^grp", "", x = coef_name) | 
| 398 | 21x | or_ci[[grp_name]] <- stats::setNames( | 
| 399 | 21x | object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), | 
| 400 | 21x |       nm = c("est", "lcl", "ucl") | 
| 401 | ) | |
| 402 | } | |
| 403 | 19x | list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n)) | 
| 404 | } | 
| 1 | # summarize_glm_count ---------------------------------------------------------- | |
| 2 | #' Summarize Poisson negative binomial regression | |
| 3 | #' | |
| 4 | #' @description `r lifecycle::badge("experimental")` | |
| 5 | #' | |
| 6 | #' Summarize results of a Poisson negative binomial regression. | |
| 7 | #' This can be used to analyze count and/or frequency data using a linear model. | |
| 8 | #' It is specifically useful for analyzing count data (using the Poisson or Negative | |
| 9 | #' Binomial distribution) that is result of a generalized linear model of one (e.g. arm) or more | |
| 10 | #' covariates. | |
| 11 | #' | |
| 12 | #' @inheritParams h_glm_count | |
| 13 | #' @inheritParams argument_convention | |
| 14 | #' @param rate_mean_method (`character(1)`)\cr method used to estimate the mean odds ratio. Defaults to `emmeans`. | |
| 15 | #' see details for more information. | |
| 16 | #' @param scale (`numeric(1)`)\cr linear scaling factor for rate and confidence intervals. Defaults to `1`. | |
| 17 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 18 | #' | |
| 19 | #'   Options are: ``r shQuote(get_stats("summarize_glm_count"), type = "sh")`` | |
| 20 | #' | |
| 21 | #' @details | |
| 22 | #' `summarize_glm_count()` uses `s_glm_count()` to calculate the statistics for the table. This | |
| 23 | #' analysis function uses [h_glm_count()] to estimate the GLM with [stats::glm()] for Poisson and Quasi-Poisson | |
| 24 | #' distributions or [MASS::glm.nb()] for Negative Binomial distribution. All methods assume a | |
| 25 | #' logarithmic link function. | |
| 26 | #' | |
| 27 | #' At this point, rates and confidence intervals are estimated from the model using | |
| 28 | #' either [emmeans::emmeans()] when `rate_mean_method = "emmeans"` or [h_ppmeans()] | |
| 29 | #' when `rate_mean_method = "ppmeans"`. | |
| 30 | #' | |
| 31 | #' If a reference group is specified while building the table with `split_cols_by(ref_group)`, | |
| 32 | #' no rate ratio or `p-value` are calculated. Otherwise, we use [emmeans::contrast()] to | |
| 33 | #' calculate the rate ratio and `p-value` for the reference group. Values are always estimated | |
| 34 | #' with `method = "trt.vs.ctrl"` and `ref` equal to the first `arm` value. | |
| 35 | #' | |
| 36 | #' @name summarize_glm_count | |
| 37 | NULL | |
| 38 | ||
| 39 | #' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments | |
| 40 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 41 | #' | |
| 42 | #' @return | |
| 43 | #' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions, | |
| 44 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 45 | #' the statistics from `s_glm_count()` to the table layout. | |
| 46 | #' | |
| 47 | #' @examples | |
| 48 | #' library(dplyr) | |
| 49 | #' | |
| 50 | #' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE") | |
| 51 | #' anl$AVAL_f <- as.factor(anl$AVAL) | |
| 52 | #' | |
| 53 | #' lyt <- basic_table() %>% | |
| 54 | #'   split_cols_by("ARM", ref_group = "B: Placebo") %>% | |
| 55 | #' add_colcounts() %>% | |
| 56 | #' analyze_vars( | |
| 57 | #' "AVAL_f", | |
| 58 | #' var_labels = "Number of exacerbations per patient", | |
| 59 | #'     .stats = c("count_fraction"), | |
| 60 | #'     .formats = c("count_fraction" = "xx (xx.xx%)"), | |
| 61 | #'     .labels = c("Number of exacerbations per patient") | |
| 62 | #' ) %>% | |
| 63 | #' summarize_glm_count( | |
| 64 | #' vars = "AVAL", | |
| 65 | #' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL), | |
| 66 | #' conf_level = 0.95, | |
| 67 | #' distribution = "poisson", | |
| 68 | #' rate_mean_method = "emmeans", | |
| 69 | #' var_labels = "Adjusted (P) exacerbation rate (per year)", | |
| 70 | #' table_names = "adjP", | |
| 71 | #'     .stats = c("rate"), | |
| 72 | #' .labels = c(rate = "Rate") | |
| 73 | #' ) %>% | |
| 74 | #' summarize_glm_count( | |
| 75 | #' vars = "AVAL", | |
| 76 | #'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")), | |
| 77 | #' conf_level = 0.95, | |
| 78 | #' distribution = "quasipoisson", | |
| 79 | #' rate_mean_method = "ppmeans", | |
| 80 | #' var_labels = "Adjusted (QP) exacerbation rate (per year)", | |
| 81 | #' table_names = "adjQP", | |
| 82 | #'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), | |
| 83 | #' .labels = c( | |
| 84 | #' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio", | |
| 85 | #' rate_ratio_ci = "Rate Ratio CI", pval = "p value" | |
| 86 | #' ) | |
| 87 | #' ) %>% | |
| 88 | #' summarize_glm_count( | |
| 89 | #' vars = "AVAL", | |
| 90 | #'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")), | |
| 91 | #' conf_level = 0.95, | |
| 92 | #' distribution = "negbin", | |
| 93 | #' rate_mean_method = "emmeans", | |
| 94 | #' var_labels = "Adjusted (NB) exacerbation rate (per year)", | |
| 95 | #' table_names = "adjNB", | |
| 96 | #'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), | |
| 97 | #' .labels = c( | |
| 98 | #' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio", | |
| 99 | #' rate_ratio_ci = "Rate Ratio CI", pval = "p value" | |
| 100 | #' ) | |
| 101 | #' ) | |
| 102 | #' | |
| 103 | #' build_table(lyt = lyt, df = anl) | |
| 104 | #' | |
| 105 | #' @export | |
| 106 | summarize_glm_count <- function(lyt, | |
| 107 | vars, | |
| 108 | variables, | |
| 109 | distribution, | |
| 110 | conf_level, | |
| 111 |                                 rate_mean_method = c("emmeans", "ppmeans")[1], | |
| 112 | weights = stats::weights, | |
| 113 | scale = 1, | |
| 114 | var_labels, | |
| 115 | na_str = default_na_str(), | |
| 116 | nested = TRUE, | |
| 117 | ..., | |
| 118 | show_labels = "visible", | |
| 119 | table_names = vars, | |
| 120 |                                 .stats = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), | |
| 121 | .stat_names = NULL, | |
| 122 | .formats = NULL, | |
| 123 | .labels = NULL, | |
| 124 |                                 .indent_mods = list("rate_ci" = 1L, "rate_ratio_ci" = 1L, "pval" = 1L)) { | |
| 125 | 3x |   checkmate::assert_choice(rate_mean_method, c("emmeans", "ppmeans")) | 
| 126 | ||
| 127 | # Process standard extra arguments | |
| 128 | 3x |   extra_args <- list(".stats" = .stats) | 
| 129 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 130 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 131 | 3x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 132 | 3x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 133 | ||
| 134 | # Process additional arguments to the statistic function | |
| 135 | 3x | extra_args <- c( | 
| 136 | 3x | extra_args, | 
| 137 | 3x | variables = list(variables), distribution = list(distribution), conf_level = list(conf_level), | 
| 138 | 3x | rate_mean_method = list(rate_mean_method), weights = list(weights), scale = list(scale), | 
| 139 | ... | |
| 140 | ) | |
| 141 | ||
| 142 | # Append additional info from layout to the analysis function | |
| 143 | 3x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 144 | 3x | formals(a_glm_count) <- c(formals(a_glm_count), extra_args[[".additional_fun_parameters"]]) | 
| 145 | ||
| 146 | 3x | analyze( | 
| 147 | 3x | lyt = lyt, | 
| 148 | 3x | vars = vars, | 
| 149 | 3x | afun = a_glm_count, | 
| 150 | 3x | na_str = na_str, | 
| 151 | 3x | nested = nested, | 
| 152 | 3x | extra_args = extra_args, | 
| 153 | 3x | var_labels = var_labels, | 
| 154 | 3x | show_labels = show_labels, | 
| 155 | 3x | table_names = table_names | 
| 156 | ) | |
| 157 | } | |
| 158 | ||
| 159 | #' @describeIn summarize_glm_count Statistics function that produces a named list of results | |
| 160 | #' of the investigated Poisson model. | |
| 161 | #' | |
| 162 | #' @return | |
| 163 | #' * `s_glm_count()` returns a named `list` of 5 statistics: | |
| 164 | #' * `n`: Count of complete sample size for the group. | |
| 165 | #' * `rate`: Estimated event rate per follow-up time. | |
| 166 | #' * `rate_ci`: Confidence level for estimated rate per follow-up time. | |
| 167 | #' * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm. | |
| 168 | #' * `rate_ratio_ci`: Confidence level for the rate ratio. | |
| 169 | #' * `pval`: p-value. | |
| 170 | #' | |
| 171 | #' @keywords internal | |
| 172 | s_glm_count <- function(df, | |
| 173 | .var, | |
| 174 | .df_row, | |
| 175 | .ref_group, | |
| 176 | .in_ref_col, | |
| 177 | variables, | |
| 178 | distribution, | |
| 179 | conf_level, | |
| 180 | rate_mean_method, | |
| 181 | weights, | |
| 182 | scale = 1, | |
| 183 |                         ...) { | |
| 184 | 14x | arm <- variables$arm | 
| 185 | ||
| 186 | 14x | y <- df[[.var]] | 
| 187 | 13x | smry_level <- as.character(unique(df[[arm]])) | 
| 188 | ||
| 189 | # ensure there is only 1 value | |
| 190 | 13x | checkmate::assert_scalar(smry_level) | 
| 191 | ||
| 192 | 13x | results <- h_glm_count( | 
| 193 | 13x | .var = .var, | 
| 194 | 13x | .df_row = .df_row, | 
| 195 | 13x | variables = variables, | 
| 196 | 13x | distribution = distribution, | 
| 197 | 13x | weights | 
| 198 | ) | |
| 199 | ||
| 200 | 13x |   if (rate_mean_method == "emmeans") { | 
| 201 | 13x | emmeans_smry <- summary(results$emmeans_fit, level = conf_level) | 
| 202 | ! |   } else if (rate_mean_method == "ppmeans") { | 
| 203 | ! | emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level) | 
| 204 | } | |
| 205 | ||
| 206 | 13x | emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ] | 
| 207 | ||
| 208 | # This happens if there is a reference col. No Ratio is calculated? | |
| 209 | 13x |   if (.in_ref_col) { | 
| 210 | 5x | list( | 
| 211 | 5x | n = length(y[!is.na(y)]), | 
| 212 | 5x | rate = formatters::with_label( | 
| 213 | 5x | ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate * scale), | 
| 214 | 5x | "Adjusted Rate" | 
| 215 | ), | |
| 216 | 5x | rate_ci = formatters::with_label( | 
| 217 | 5x | c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), | 
| 218 | 5x | f_conf_level(conf_level) | 
| 219 | ), | |
| 220 | 5x | rate_ratio = formatters::with_label(numeric(), "Adjusted Rate Ratio"), | 
| 221 | 5x | rate_ratio_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), | 
| 222 | 5x | pval = formatters::with_label(numeric(), "p-value") | 
| 223 | ) | |
| 224 |   } else { | |
| 225 | 8x | emmeans_contrasts <- emmeans::contrast( | 
| 226 | 8x | results$emmeans_fit, | 
| 227 | 8x | method = "trt.vs.ctrl", | 
| 228 | 8x | ref = grep( | 
| 229 | 8x | as.character(unique(.ref_group[[arm]])), | 
| 230 | 8x | as.data.frame(results$emmeans_fit)[[arm]] | 
| 231 | ) | |
| 232 | ) | |
| 233 | ||
| 234 | 8x | contrasts_smry <- summary( | 
| 235 | 8x | emmeans_contrasts, | 
| 236 | 8x | infer = TRUE, | 
| 237 | 8x | adjust = "none" | 
| 238 | ) | |
| 239 | ||
| 240 | 8x | smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ] | 
| 241 | ||
| 242 | 8x | list( | 
| 243 | 8x | n = length(y[!is.na(y)]), | 
| 244 | 8x | rate = formatters::with_label( | 
| 245 | 8x | ifelse(distribution == "negbin", | 
| 246 | 8x | emmeans_smry_level$response * scale, | 
| 247 | 8x | emmeans_smry_level$rate * scale | 
| 248 | ), | |
| 249 | 8x | "Adjusted Rate" | 
| 250 | ), | |
| 251 | 8x | rate_ci = formatters::with_label( | 
| 252 | 8x | c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), | 
| 253 | 8x | f_conf_level(conf_level) | 
| 254 | ), | |
| 255 | 8x | rate_ratio = formatters::with_label( | 
| 256 | 8x | smry_contrasts_level$ratio, | 
| 257 | 8x | "Adjusted Rate Ratio" | 
| 258 | ), | |
| 259 | 8x | rate_ratio_ci = formatters::with_label( | 
| 260 | 8x | c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL), | 
| 261 | 8x | f_conf_level(conf_level) | 
| 262 | ), | |
| 263 | 8x | pval = formatters::with_label( | 
| 264 | 8x | smry_contrasts_level$p.value, | 
| 265 | 8x | "p-value" | 
| 266 | ) | |
| 267 | ) | |
| 268 | } | |
| 269 | } | |
| 270 | ||
| 271 | #' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`. | |
| 272 | #' | |
| 273 | #' @return | |
| 274 | #' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 275 | #' | |
| 276 | #' @keywords internal | |
| 277 | a_glm_count <- function(df, | |
| 278 | ..., | |
| 279 | .stats = NULL, | |
| 280 | .stat_names = NULL, | |
| 281 | .formats = NULL, | |
| 282 | .labels = NULL, | |
| 283 |                         .indent_mods = NULL) { | |
| 284 | # Check for additional parameters to the statistics function | |
| 285 | 9x | dots_extra_args <- list(...) | 
| 286 | 9x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 287 | 9x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 288 | ||
| 289 | # Check for user-defined functions | |
| 290 | 9x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 291 | 9x | .stats <- default_and_custom_stats_list$all_stats | 
| 292 | 9x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 293 | ||
| 294 | # Apply statistics function | |
| 295 | 9x | x_stats <- .apply_stat_functions( | 
| 296 | 9x | default_stat_fnc = s_glm_count, | 
| 297 | 9x | custom_stat_fnc_list = custom_stat_functions, | 
| 298 | 9x | args_list = c( | 
| 299 | 9x | df = list(df), | 
| 300 | 9x | extra_afun_params, | 
| 301 | 9x | dots_extra_args | 
| 302 | ) | |
| 303 | ) | |
| 304 | ||
| 305 | # Fill in formatting defaults | |
| 306 | 9x |   .stats <- get_stats("summarize_glm_count", | 
| 307 | 9x | stats_in = .stats, | 
| 308 | 9x | custom_stats_in = names(custom_stat_functions) | 
| 309 | ) | |
| 310 | 9x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 311 | 9x | .labels <- get_labels_from_stats(.stats, .labels) | 
| 312 | 9x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 313 | ||
| 314 | 9x | x_stats <- x_stats[.stats] | 
| 315 | ||
| 316 | # Auto format handling | |
| 317 | 9x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 318 | ||
| 319 | # Get and check statistical names | |
| 320 | 9x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 321 | ||
| 322 | 9x | in_rows( | 
| 323 | 9x | .list = x_stats, | 
| 324 | 9x | .formats = .formats, | 
| 325 | 9x | .names = .labels %>% .unlist_keep_nulls(), | 
| 326 | 9x | .stat_names = .stat_names, | 
| 327 | 9x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 328 | 9x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 329 | ) | |
| 330 | } | |
| 331 | ||
| 332 | # h_glm_count ------------------------------------------------------------------ | |
| 333 | ||
| 334 | #' Helper functions for Poisson models | |
| 335 | #' | |
| 336 | #' @description `r lifecycle::badge("experimental")` | |
| 337 | #' | |
| 338 | #' Helper functions that returns the results of [stats::glm()] when Poisson or Quasi-Poisson | |
| 339 | #' distributions are needed (see `family` parameter), or [MASS::glm.nb()] for Negative Binomial | |
| 340 | #' distributions. Link function for the GLM is `log`. | |
| 341 | #' | |
| 342 | #' @inheritParams argument_convention | |
| 343 | #' | |
| 344 | #' @seealso [summarize_glm_count] | |
| 345 | #' | |
| 346 | #' @name h_glm_count | |
| 347 | NULL | |
| 348 | ||
| 349 | #' @describeIn h_glm_count Helper function to return the results of the | |
| 350 | #' selected model (Poisson, Quasi-Poisson, negative binomial). | |
| 351 | #' | |
| 352 | #' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called | |
| 353 | #' in `.var` and `variables`. | |
| 354 | #' @param variables (named `list` of `string`)\cr list of additional analysis variables, with | |
| 355 | #' expected elements: | |
| 356 | #' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple | |
| 357 | #' groups will be summarized. Specifically, the first level of `arm` variable is taken as the | |
| 358 | #' reference group. | |
| 359 | #' * `covariates` (`character`)\cr a vector that can contain single variable names (such as | |
| 360 | #' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`. | |
| 361 | #' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset. | |
| 362 | #' @param distribution (`character`)\cr a character value specifying the distribution | |
| 363 | #' used in the regression (Poisson, Quasi-Poisson, negative binomial). | |
| 364 | #' @param weights (`character`)\cr a character vector specifying weights used | |
| 365 | #' in averaging predictions. Number of weights must equal the number of levels included in the covariates. | |
| 366 | #' Weights option passed to [emmeans::emmeans()]. | |
| 367 | #' | |
| 368 | #' @return | |
| 369 | #' * `h_glm_count()` returns the results of the selected model. | |
| 370 | #' | |
| 371 | #' @keywords internal | |
| 372 | h_glm_count <- function(.var, | |
| 373 | .df_row, | |
| 374 | variables, | |
| 375 | distribution, | |
| 376 |                         weights) { | |
| 377 | 21x |   checkmate::assert_subset(distribution, c("poisson", "quasipoisson", "negbin"), empty.ok = FALSE) | 
| 378 | 19x | switch(distribution, | 
| 379 | 13x | poisson = h_glm_poisson(.var, .df_row, variables, weights), | 
| 380 | 1x | quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights), | 
| 381 | 5x | negbin = h_glm_negbin(.var, .df_row, variables, weights) | 
| 382 | ) | |
| 383 | } | |
| 384 | ||
| 385 | #' @describeIn h_glm_count Helper function to return results of a Poisson model. | |
| 386 | #' | |
| 387 | #' @return | |
| 388 | #' * `h_glm_poisson()` returns the results of a Poisson model. | |
| 389 | #' | |
| 390 | #' @keywords internal | |
| 391 | h_glm_poisson <- function(.var, | |
| 392 | .df_row, | |
| 393 | variables, | |
| 394 |                           weights) { | |
| 395 | 17x | arm <- variables$arm | 
| 396 | 17x | covariates <- variables$covariates | 
| 397 | ||
| 398 | 17x | formula <- stats::as.formula(paste0( | 
| 399 | 17x | .var, " ~ ", | 
| 400 | " + ", | |
| 401 | 17x | paste(covariates, collapse = " + "), | 
| 402 | " + ", | |
| 403 | 17x | arm | 
| 404 | )) | |
| 405 | ||
| 406 | 17x |   if (is.null(variables$offset)) { | 
| 407 | 1x | glm_fit <- stats::glm( | 
| 408 | 1x | formula = formula, | 
| 409 | 1x | data = .df_row, | 
| 410 | 1x | family = stats::poisson(link = "log") | 
| 411 | ) | |
| 412 |   } else { | |
| 413 | 16x | offset <- .df_row[[variables$offset]] | 
| 414 | 14x | glm_fit <- stats::glm( | 
| 415 | 14x | formula = formula, | 
| 416 | 14x | offset = offset, | 
| 417 | 14x | data = .df_row, | 
| 418 | 14x | family = stats::poisson(link = "log") | 
| 419 | ) | |
| 420 | } | |
| 421 | ||
| 422 | 15x | emmeans_fit <- emmeans::emmeans( | 
| 423 | 15x | glm_fit, | 
| 424 | 15x | specs = arm, | 
| 425 | 15x | data = .df_row, | 
| 426 | 15x | type = "response", | 
| 427 | 15x | offset = 0, | 
| 428 | 15x | weights = weights | 
| 429 | ) | |
| 430 | ||
| 431 | 15x | list( | 
| 432 | 15x | glm_fit = glm_fit, | 
| 433 | 15x | emmeans_fit = emmeans_fit | 
| 434 | ) | |
| 435 | } | |
| 436 | ||
| 437 | #' @describeIn h_glm_count Helper function to return results of a Quasi-Poisson model. | |
| 438 | #' | |
| 439 | #' @return | |
| 440 | #' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model. | |
| 441 | #' | |
| 442 | #' @keywords internal | |
| 443 | h_glm_quasipoisson <- function(.var, | |
| 444 | .df_row, | |
| 445 | variables, | |
| 446 |                                weights) { | |
| 447 | 5x | arm <- variables$arm | 
| 448 | 5x | covariates <- variables$covariates | 
| 449 | ||
| 450 | 5x | formula <- stats::as.formula(paste0( | 
| 451 | 5x | .var, " ~ ", | 
| 452 | " + ", | |
| 453 | 5x | paste(covariates, collapse = " + "), | 
| 454 | " + ", | |
| 455 | 5x | arm | 
| 456 | )) | |
| 457 | ||
| 458 | 5x |   if (is.null(variables$offset)) { | 
| 459 | ! | glm_fit <- stats::glm( | 
| 460 | ! | formula = formula, | 
| 461 | ! | data = .df_row, | 
| 462 | ! | family = stats::quasipoisson(link = "log") | 
| 463 | ) | |
| 464 |   } else { | |
| 465 | 5x | offset <- .df_row[[variables$offset]] | 
| 466 | 3x | glm_fit <- stats::glm( | 
| 467 | 3x | formula = formula, | 
| 468 | 3x | offset = offset, | 
| 469 | 3x | data = .df_row, | 
| 470 | 3x | family = stats::quasipoisson(link = "log") | 
| 471 | ) | |
| 472 | } | |
| 473 | 3x | emmeans_fit <- emmeans::emmeans( | 
| 474 | 3x | glm_fit, | 
| 475 | 3x | specs = arm, | 
| 476 | 3x | data = .df_row, | 
| 477 | 3x | type = "response", | 
| 478 | 3x | offset = 0, | 
| 479 | 3x | weights = weights | 
| 480 | ) | |
| 481 | ||
| 482 | 3x | list( | 
| 483 | 3x | glm_fit = glm_fit, | 
| 484 | 3x | emmeans_fit = emmeans_fit | 
| 485 | ) | |
| 486 | } | |
| 487 | ||
| 488 | #' @describeIn h_glm_count Helper function to return results of a negative binomial model. | |
| 489 | #' | |
| 490 | #' @return | |
| 491 | #' * `h_glm_negbin()` returns the results of a negative binomial model. | |
| 492 | #' | |
| 493 | #' @keywords internal | |
| 494 | h_glm_negbin <- function(.var, | |
| 495 | .df_row, | |
| 496 | variables, | |
| 497 |                          weights) { | |
| 498 | 9x | arm <- variables$arm | 
| 499 | 9x | covariates <- variables$covariates | 
| 500 | 9x | formula <- stats::as.formula(paste0( | 
| 501 | 9x | .var, " ~ ", | 
| 502 | " + ", | |
| 503 | 9x | paste(covariates, collapse = " + "), | 
| 504 | " + ", | |
| 505 | 9x | arm | 
| 506 | )) | |
| 507 | ||
| 508 | 9x |   if (is.null(variables$offset)) { | 
| 509 | 1x | formula <- stats::as.formula(paste0( | 
| 510 | 1x | .var, " ~ ", | 
| 511 | " + ", | |
| 512 | 1x | paste(covariates, collapse = " + "), | 
| 513 | " + ", | |
| 514 | 1x | arm | 
| 515 | )) | |
| 516 |   } else { | |
| 517 | 8x | offset <- variables$offset | 
| 518 | 8x | formula_txt <- sprintf( | 
| 519 | 8x | "%s ~ %s + %s + offset(%s)", | 
| 520 | 8x | .var, | 
| 521 | 8x | arm, paste0(covariates, collapse = " + "), offset | 
| 522 | ) | |
| 523 | 8x | formula <- stats::as.formula( | 
| 524 | 8x | formula_txt | 
| 525 | ) | |
| 526 | } | |
| 527 | ||
| 528 | 9x | glm_fit <- MASS::glm.nb( | 
| 529 | 9x | formula = formula, | 
| 530 | 9x | data = .df_row, | 
| 531 | 9x | link = "log" | 
| 532 | ) | |
| 533 | ||
| 534 | 7x | emmeans_fit <- emmeans::emmeans( | 
| 535 | 7x | glm_fit, | 
| 536 | 7x | specs = arm, | 
| 537 | 7x | data = .df_row, | 
| 538 | 7x | type = "response", | 
| 539 | 7x | offset = 0, | 
| 540 | 7x | weights = weights | 
| 541 | ) | |
| 542 | ||
| 543 | 7x | list( | 
| 544 | 7x | glm_fit = glm_fit, | 
| 545 | 7x | emmeans_fit = emmeans_fit | 
| 546 | ) | |
| 547 | } | |
| 548 | ||
| 549 | # h_ppmeans -------------------------------------------------------------------- | |
| 550 | #' Function to return the estimated means using predicted probabilities | |
| 551 | #' | |
| 552 | #' @description | |
| 553 | #' For each arm level, the predicted mean rate is calculated using the fitted model object, with `newdata` | |
| 554 | #' set to the result of `stats::model.frame`, a reconstructed data or the original data, depending on the | |
| 555 | #' object formula (coming from the fit). The confidence interval is derived using the `conf_level` parameter. | |
| 556 | #' | |
| 557 | #' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm. | |
| 558 | #' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called in `.var` and `variables`. | |
| 559 | #' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be | |
| 560 | #' summarized. Specifically, the first level of `arm` variable is taken as the reference group. | |
| 561 | #' @param conf_level (`proportion`)\cr value used to derive the confidence interval for the rate. | |
| 562 | #' | |
| 563 | #' @return | |
| 564 | #' * `h_ppmeans()` returns the estimated means. | |
| 565 | #' | |
| 566 | #' @seealso [summarize_glm_count()]. | |
| 567 | #' | |
| 568 | #' @export | |
| 569 | h_ppmeans <- function(obj, .df_row, arm, conf_level) { | |
| 570 | 1x | alpha <- 1 - conf_level | 
| 571 | 1x | p <- 1 - alpha / 2 | 
| 572 | ||
| 573 | 1x | arm_levels <- levels(.df_row[[arm]]) | 
| 574 | ||
| 575 | 1x |   out <- lapply(arm_levels, function(lev) { | 
| 576 | 3x | temp <- .df_row | 
| 577 | 3x | temp[[arm]] <- factor(lev, levels = arm_levels) | 
| 578 | ||
| 579 | 3x | mf <- stats::model.frame(obj$formula, data = temp) | 
| 580 | 3x | X <- stats::model.matrix(obj$formula, data = mf) # nolint | 
| 581 | ||
| 582 | 3x | rate <- stats::predict(obj, newdata = mf, type = "response") | 
| 583 | 3x | rate_hat <- mean(rate) | 
| 584 | ||
| 585 | 3x | zz <- colMeans(rate * X) | 
| 586 | 3x | se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz)) | 
| 587 | 3x | rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat) | 
| 588 | 3x | rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat) | 
| 589 | ||
| 590 | 3x | c(rate_hat, rate_lwr, rate_upr) | 
| 591 | }) | |
| 592 | ||
| 593 | 1x | names(out) <- arm_levels | 
| 594 | 1x | out <- do.call(rbind, out) | 
| 595 | 1x |   if ("negbin" %in% class(obj)) { | 
| 596 | ! |     colnames(out) <- c("response", "asymp.LCL", "asymp.UCL") | 
| 597 |   } else { | |
| 598 | 1x |     colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL") | 
| 599 | } | |
| 600 | 1x | out <- as.data.frame(out) | 
| 601 | 1x | out[[arm]] <- rownames(out) | 
| 602 | 1x | out | 
| 603 | } | 
| 1 | #' Control functions for Kaplan-Meier plot annotation tables | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Auxiliary functions for controlling arguments for formatting the annotation tables that can be added to plots | |
| 6 | #' generated via [g_km()]. | |
| 7 | #' | |
| 8 | #' @param x (`proportion`)\cr x-coordinate for center of annotation table. | |
| 9 | #' @param y (`proportion`)\cr y-coordinate for center of annotation table. | |
| 10 | #' @param w (`proportion`)\cr relative width of the annotation table. | |
| 11 | #' @param h (`proportion`)\cr relative height of the annotation table. | |
| 12 | #' @param fill (`flag` or `character`)\cr whether the annotation table should have a background fill color. | |
| 13 | #' Can also be a color code to use as the background fill color. If `TRUE`, color code defaults to `"#00000020"`. | |
| 14 | #' | |
| 15 | #' @return A list of components with the same names as the arguments. | |
| 16 | #' | |
| 17 | #' @seealso [g_km()] | |
| 18 | #' | |
| 19 | #' @name control_annot | |
| 20 | NULL | |
| 21 | ||
| 22 | #' @describeIn control_annot Control function for formatting the median survival time annotation table. This annotation | |
| 23 | #' table can be added in [g_km()] by setting `annot_surv_med=TRUE`, and can be configured using the | |
| 24 | #' `control_surv_med_annot()` function by setting it as the `control_annot_surv_med` argument. | |
| 25 | #' | |
| 26 | #' @examples | |
| 27 | #' control_surv_med_annot() | |
| 28 | #' | |
| 29 | #' @export | |
| 30 | control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { | |
| 31 | 22x | assert_proportion_value(x) | 
| 32 | 22x | assert_proportion_value(y) | 
| 33 | 22x | assert_proportion_value(w) | 
| 34 | 22x | assert_proportion_value(h) | 
| 35 | ||
| 36 | 22x | list(x = x, y = y, w = w, h = h, fill = fill) | 
| 37 | } | |
| 38 | ||
| 39 | #' @describeIn control_annot Control function for formatting the Cox-PH annotation table. This annotation table can be | |
| 40 | #' added in [g_km()] by setting `annot_coxph=TRUE`, and can be configured using the `control_coxph_annot()` function | |
| 41 | #' by setting it as the `control_annot_coxph` argument. | |
| 42 | #' | |
| 43 | #' @param ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the | |
| 44 | #' annotation table. If `FALSE` (default), only comparison groups will be printed in the table labels. | |
| 45 | #' | |
| 46 | #' @examples | |
| 47 | #' control_coxph_annot() | |
| 48 | #' | |
| 49 | #' @export | |
| 50 | control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { | |
| 51 | 11x | checkmate::assert_logical(ref_lbls, any.missing = FALSE) | 
| 52 | ||
| 53 | 11x | res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) | 
| 54 | 11x | res | 
| 55 | } | |
| 56 | ||
| 57 | #' Helper function to calculate x-tick positions | |
| 58 | #' | |
| 59 | #' @description `r lifecycle::badge("stable")` | |
| 60 | #' | |
| 61 | #' Calculate the positions of ticks on the x-axis. However, if `xticks` already | |
| 62 | #' exists it is kept as is. It is based on the same function `ggplot2` relies on, | |
| 63 | #' and is required in the graphic and the patient-at-risk annotation table. | |
| 64 | #' | |
| 65 | #' @inheritParams g_km | |
| 66 | #' @inheritParams h_ggkm | |
| 67 | #' | |
| 68 | #' @return A vector of positions to use for x-axis ticks on a `ggplot` object. | |
| 69 | #' | |
| 70 | #' @examples | |
| 71 | #' library(dplyr) | |
| 72 | #' library(survival) | |
| 73 | #' | |
| 74 | #' data <- tern_ex_adtte %>% | |
| 75 | #' filter(PARAMCD == "OS") %>% | |
| 76 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% | |
| 77 | #' h_data_plot() | |
| 78 | #' | |
| 79 | #' h_xticks(data) | |
| 80 | #' h_xticks(data, xticks = seq(0, 3000, 500)) | |
| 81 | #' h_xticks(data, xticks = 500) | |
| 82 | #' h_xticks(data, xticks = 500, max_time = 6000) | |
| 83 | #' h_xticks(data, xticks = c(0, 500), max_time = 300) | |
| 84 | #' h_xticks(data, xticks = 500, max_time = 300) | |
| 85 | #' | |
| 86 | #' @export | |
| 87 | h_xticks <- function(data, xticks = NULL, max_time = NULL) { | |
| 88 | 18x |   if (is.null(xticks)) { | 
| 89 | 13x |     if (is.null(max_time)) { | 
| 90 | 11x | labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) | 
| 91 |     } else { | |
| 92 | 2x | labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) | 
| 93 | } | |
| 94 | 5x |   } else if (checkmate::test_number(xticks)) { | 
| 95 | 2x |     if (is.null(max_time)) { | 
| 96 | 1x | seq(0, max(data$time), xticks) | 
| 97 |     } else { | |
| 98 | 1x | seq(0, max(data$time, max_time), xticks) | 
| 99 | } | |
| 100 | 3x |   } else if (is.numeric(xticks)) { | 
| 101 | 2x | xticks | 
| 102 |   } else { | |
| 103 | 1x | stop( | 
| 104 | 1x | paste( | 
| 105 | 1x | "xticks should be either `NULL`", | 
| 106 | 1x | "or a single number (interval between x ticks)", | 
| 107 | 1x | "or a numeric vector (position of ticks on the x axis)" | 
| 108 | ) | |
| 109 | ) | |
| 110 | } | |
| 111 | } | |
| 112 | ||
| 113 | #' Helper function for survival estimations | |
| 114 | #' | |
| 115 | #' @description `r lifecycle::badge("stable")` | |
| 116 | #' | |
| 117 | #' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval. | |
| 118 | #' | |
| 119 | #' @inheritParams h_data_plot | |
| 120 | #' | |
| 121 | #' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`). | |
| 122 | #' | |
| 123 | #' @examples | |
| 124 | #' library(dplyr) | |
| 125 | #' library(survival) | |
| 126 | #' | |
| 127 | #' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS") | |
| 128 | #' fit <- survfit( | |
| 129 | #' formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, | |
| 130 | #' data = adtte | |
| 131 | #' ) | |
| 132 | #' h_tbl_median_surv(fit_km = fit) | |
| 133 | #' | |
| 134 | #' @export | |
| 135 | h_tbl_median_surv <- function(fit_km, armval = "All") { | |
| 136 | 10x |   y <- if (is.null(fit_km$strata)) { | 
| 137 | ! | as.data.frame(t(summary(fit_km)$table), row.names = armval) | 
| 138 |   } else { | |
| 139 | 10x | tbl <- summary(fit_km)$table | 
| 140 | 10x |     rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") | 
| 141 | 10x | rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] | 
| 142 | 10x | as.data.frame(tbl) | 
| 143 | } | |
| 144 | 10x | conf.int <- summary(fit_km)$conf.int # nolint | 
| 145 | 10x | y$records <- round(y$records) | 
| 146 | 10x | y$median <- signif(y$median, 4) | 
| 147 | 10x | y$`CI` <- paste0( | 
| 148 | 10x |     "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" | 
| 149 | ) | |
| 150 | 10x | stats::setNames( | 
| 151 | 10x |     y[c("records", "median", "CI")], | 
| 152 | 10x |     c("N", "Median", f_conf_level(conf.int)) | 
| 153 | ) | |
| 154 | } | |
| 155 | ||
| 156 | #' Helper function for generating a pairwise Cox-PH table | |
| 157 | #' | |
| 158 | #' @description `r lifecycle::badge("stable")` | |
| 159 | #' | |
| 160 | #' Create a `data.frame` of pairwise stratified or unstratified Cox-PH analysis results. | |
| 161 | #' | |
| 162 | #' @inheritParams g_km | |
| 163 | #' @param annot_coxph_ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the | |
| 164 | #' `annot_coxph` table. If `FALSE` (default), only comparison groups will be printed in `annot_coxph` table labels. | |
| 165 | #' | |
| 166 | #' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`), | |
| 167 | #' and `p-value (log-rank)`. | |
| 168 | #' | |
| 169 | #' @examples | |
| 170 | #' library(dplyr) | |
| 171 | #' | |
| 172 | #' adtte <- tern_ex_adtte %>% | |
| 173 | #' filter(PARAMCD == "OS") %>% | |
| 174 | #' mutate(is_event = CNSR == 0) | |
| 175 | #' | |
| 176 | #' h_tbl_coxph_pairwise( | |
| 177 | #' df = adtte, | |
| 178 | #' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"), | |
| 179 | #' control_coxph_pw = control_coxph(conf_level = 0.9) | |
| 180 | #' ) | |
| 181 | #' | |
| 182 | #' @export | |
| 183 | h_tbl_coxph_pairwise <- function(df, | |
| 184 | variables, | |
| 185 | ref_group_coxph = NULL, | |
| 186 | control_coxph_pw = control_coxph(), | |
| 187 |                                  annot_coxph_ref_lbls = FALSE) { | |
| 188 | 4x |   if ("strat" %in% names(variables)) { | 
| 189 | ! | warning( | 
| 190 | ! | "Warning: the `strat` element name of the `variables` list argument to `h_tbl_coxph_pairwise() ", | 
| 191 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 192 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 193 | ) | |
| 194 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 195 | } | |
| 196 | ||
| 197 | 4x | assert_df_with_variables(df, variables) | 
| 198 | 4x | checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) | 
| 199 | 4x | checkmate::assert_flag(annot_coxph_ref_lbls) | 
| 200 | ||
| 201 | 4x | arm <- variables$arm | 
| 202 | 4x | df[[arm]] <- factor(df[[arm]]) | 
| 203 | ||
| 204 | 4x | ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] | 
| 205 | 4x | comp_group <- setdiff(levels(df[[arm]]), ref_group) | 
| 206 | ||
| 207 | 4x |   results <- Map(function(comp) { | 
| 208 | 8x | res <- s_coxph_pairwise( | 
| 209 | 8x | df = df[df[[arm]] == comp, , drop = FALSE], | 
| 210 | 8x | .ref_group = df[df[[arm]] == ref_group, , drop = FALSE], | 
| 211 | 8x | .in_ref_col = FALSE, | 
| 212 | 8x | .var = variables$tte, | 
| 213 | 8x | is_event = variables$is_event, | 
| 214 | 8x | strata = variables$strata, | 
| 215 | 8x | control = control_coxph_pw | 
| 216 | ) | |
| 217 | 8x | res_df <- data.frame( | 
| 218 | 8x | hr = format(round(res$hr, 2), nsmall = 2), | 
| 219 | 8x | hr_ci = paste0( | 
| 220 | 8x |         "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", | 
| 221 | 8x | format(round(res$hr_ci[2], 2), nsmall = 2), ")" | 
| 222 | ), | |
| 223 | 8x | pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), | 
| 224 | 8x | stringsAsFactors = FALSE | 
| 225 | ) | |
| 226 | 8x |     colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) | 
| 227 | 8x | row.names(res_df) <- comp | 
| 228 | 8x | res_df | 
| 229 | 4x | }, comp_group) | 
| 230 | 1x | if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) | 
| 231 | ||
| 232 | 4x | do.call(rbind, results) | 
| 233 | } | |
| 234 | ||
| 235 | #' Helper function to tidy survival fit data | |
| 236 | #' | |
| 237 | #' @description `r lifecycle::badge("stable")` | |
| 238 | #' | |
| 239 | #' Convert the survival fit data into a data frame designed for plotting | |
| 240 | #' within `g_km`. | |
| 241 | #' | |
| 242 | #' This starts from the [broom::tidy()] result, and then: | |
| 243 | #' * Post-processes the `strata` column into a factor. | |
| 244 | #' * Extends each stratum by an additional first row with time 0 and probability 1 so that | |
| 245 | #' downstream plot lines start at those coordinates. | |
| 246 | #' * Adds a `censor` column. | |
| 247 | #' * Filters the rows before `max_time`. | |
| 248 | #' | |
| 249 | #' @inheritParams g_km | |
| 250 | #' @param fit_km (`survfit`)\cr result of [survival::survfit()]. | |
| 251 | #' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`. | |
| 252 | #' | |
| 253 | #' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`, | |
| 254 | #' `conf.low`, `strata`, and `censor`. | |
| 255 | #' | |
| 256 | #' @examples | |
| 257 | #' library(dplyr) | |
| 258 | #' library(survival) | |
| 259 | #' | |
| 260 | #' # Test with multiple arms | |
| 261 | #' tern_ex_adtte %>% | |
| 262 | #' filter(PARAMCD == "OS") %>% | |
| 263 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% | |
| 264 | #' h_data_plot() | |
| 265 | #' | |
| 266 | #' # Test with single arm | |
| 267 | #' tern_ex_adtte %>% | |
| 268 | #' filter(PARAMCD == "OS", ARMCD == "ARM B") %>% | |
| 269 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% | |
| 270 | #' h_data_plot(armval = "ARM B") | |
| 271 | #' | |
| 272 | #' @export | |
| 273 | h_data_plot <- function(fit_km, | |
| 274 | armval = "All", | |
| 275 |                         max_time = NULL) { | |
| 276 | 18x | y <- broom::tidy(fit_km) | 
| 277 | ||
| 278 | 18x |   if (!is.null(fit_km$strata)) { | 
| 279 | 18x |     fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") | 
| 280 | 18x | strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) | 
| 281 | 18x |     strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals") | 
| 282 | 18x | y$strata <- factor( | 
| 283 | 18x | vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), | 
| 284 | 18x | levels = strata_levels | 
| 285 | ) | |
| 286 |   } else { | |
| 287 | ! | y$strata <- armval | 
| 288 | } | |
| 289 | ||
| 290 | 18x | y_by_strata <- split(y, y$strata) | 
| 291 | 18x | y_by_strata_extended <- lapply( | 
| 292 | 18x | y_by_strata, | 
| 293 | 18x |     FUN = function(tbl) { | 
| 294 | 53x | first_row <- tbl[1L, ] | 
| 295 | 53x | first_row$time <- 0 | 
| 296 | 53x |       first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")]) | 
| 297 | 53x | first_row$n.event <- first_row$n.censor <- 0 | 
| 298 | 53x | first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 | 
| 299 | 53x | first_row$std.error <- 0 | 
| 300 | 53x | rbind( | 
| 301 | 53x | first_row, | 
| 302 | 53x | tbl | 
| 303 | ) | |
| 304 | } | |
| 305 | ) | |
| 306 | 18x | y <- do.call(rbind, y_by_strata_extended) | 
| 307 | ||
| 308 | 18x | y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) | 
| 309 | 18x |   if (!is.null(max_time)) { | 
| 310 | 1x | y <- y[y$time <= max(max_time), ] | 
| 311 | } | |
| 312 | 18x | y | 
| 313 | } | |
| 314 | ||
| 315 | ## Deprecated Functions ---- | |
| 316 | ||
| 317 | #' Helper function to create a KM plot | |
| 318 | #' | |
| 319 | #' @description `r lifecycle::badge("deprecated")` | |
| 320 | #' | |
| 321 | #' Draw the Kaplan-Meier plot using `ggplot2`. | |
| 322 | #' | |
| 323 | #' @inheritParams g_km | |
| 324 | #' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`. | |
| 325 | #' | |
| 326 | #' @return A `ggplot` object. | |
| 327 | #' | |
| 328 | #' @examples | |
| 329 | #' \donttest{ | |
| 330 | #' library(dplyr) | |
| 331 | #' library(survival) | |
| 332 | #' | |
| 333 | #' fit_km <- tern_ex_adtte %>% | |
| 334 | #' filter(PARAMCD == "OS") %>% | |
| 335 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) | |
| 336 | #' data_plot <- h_data_plot(fit_km = fit_km) | |
| 337 | #' xticks <- h_xticks(data = data_plot) | |
| 338 | #' gg <- h_ggkm( | |
| 339 | #' data = data_plot, | |
| 340 | #' censor_show = TRUE, | |
| 341 | #' xticks = xticks, | |
| 342 | #' xlab = "Days", | |
| 343 | #' yval = "Survival", | |
| 344 | #' ylab = "Survival Probability", | |
| 345 | #' title = "Survival" | |
| 346 | #' ) | |
| 347 | #' gg | |
| 348 | #' } | |
| 349 | #' | |
| 350 | #' @export | |
| 351 | h_ggkm <- function(data, | |
| 352 | xticks = NULL, | |
| 353 | yval = "Survival", | |
| 354 | censor_show, | |
| 355 | xlab, | |
| 356 | ylab, | |
| 357 | ylim = NULL, | |
| 358 | title, | |
| 359 | footnotes = NULL, | |
| 360 | max_time = NULL, | |
| 361 | lwd = 1, | |
| 362 | lty = NULL, | |
| 363 | pch = 3, | |
| 364 | size = 2, | |
| 365 | col = NULL, | |
| 366 | ci_ribbon = FALSE, | |
| 367 |                    ggtheme = nestcolor::theme_nest()) { | |
| 368 | 1x | lifecycle::deprecate_warn( | 
| 369 | 1x | "0.9.4", | 
| 370 | 1x | "h_ggkm()", | 
| 371 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 372 | ) | |
| 373 | 1x | checkmate::assert_numeric(lty, null.ok = TRUE) | 
| 374 | 1x | checkmate::assert_character(col, null.ok = TRUE) | 
| 375 | ||
| 376 | 1x |   if (is.null(ylim)) { | 
| 377 | 1x | data_lims <- data | 
| 378 | ! | if (yval == "Failure") data_lims[["estimate"]] <- 1 - data_lims[["estimate"]] | 
| 379 | 1x |     if (!is.null(max_time)) { | 
| 380 | ! | y_lwr <- min(data_lims[data_lims$time < max_time, ][["estimate"]]) | 
| 381 | ! | y_upr <- max(data_lims[data_lims$time < max_time, ][["estimate"]]) | 
| 382 |     } else { | |
| 383 | 1x | y_lwr <- min(data_lims[["estimate"]]) | 
| 384 | 1x | y_upr <- max(data_lims[["estimate"]]) | 
| 385 | } | |
| 386 | 1x | ylim <- c(y_lwr, y_upr) | 
| 387 | } | |
| 388 | 1x | checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE) | 
| 389 | ||
| 390 | # change estimates of survival to estimates of failure (1 - survival) | |
| 391 | 1x |   if (yval == "Failure") { | 
| 392 | ! | data$estimate <- 1 - data$estimate | 
| 393 | ! |     data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high) | 
| 394 | ! | data$censor <- 1 - data$censor | 
| 395 | } | |
| 396 | ||
| 397 | 1x |   gg <- { | 
| 398 | 1x | ggplot2::ggplot( | 
| 399 | 1x | data = data, | 
| 400 | 1x | mapping = ggplot2::aes( | 
| 401 | 1x | x = .data[["time"]], | 
| 402 | 1x | y = .data[["estimate"]], | 
| 403 | 1x | ymin = .data[["conf.low"]], | 
| 404 | 1x | ymax = .data[["conf.high"]], | 
| 405 | 1x | color = .data[["strata"]], | 
| 406 | 1x | fill = .data[["strata"]] | 
| 407 | ) | |
| 408 | ) + | |
| 409 | 1x | ggplot2::geom_hline(yintercept = 0) | 
| 410 | } | |
| 411 | ||
| 412 | 1x |   if (ci_ribbon) { | 
| 413 | ! | gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0) | 
| 414 | } | |
| 415 | ||
| 416 | 1x |   gg <- if (is.null(lty)) { | 
| 417 | 1x | gg + | 
| 418 | 1x | ggplot2::geom_step(linewidth = lwd) | 
| 419 | 1x |   } else if (checkmate::test_number(lty)) { | 
| 420 | ! | gg + | 
| 421 | ! | ggplot2::geom_step(linewidth = lwd, lty = lty) | 
| 422 | 1x |   } else if (is.numeric(lty)) { | 
| 423 | ! | gg + | 
| 424 | ! | ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) + | 
| 425 | ! | ggplot2::scale_linetype_manual(values = lty) | 
| 426 | } | |
| 427 | ||
| 428 | 1x | gg <- gg + | 
| 429 | 1x | ggplot2::coord_cartesian(ylim = ylim) + | 
| 430 | 1x | ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes) | 
| 431 | ||
| 432 | 1x |   if (!is.null(col)) { | 
| 433 | ! | gg <- gg + | 
| 434 | ! | ggplot2::scale_color_manual(values = col) + | 
| 435 | ! | ggplot2::scale_fill_manual(values = col) | 
| 436 | } | |
| 437 | 1x |   if (censor_show) { | 
| 438 | 1x | dt <- data[data$n.censor != 0, ] | 
| 439 | 1x |     dt$censor_lbl <- factor("Censored") | 
| 440 | ||
| 441 | 1x | gg <- gg + ggplot2::geom_point( | 
| 442 | 1x | data = dt, | 
| 443 | 1x | ggplot2::aes( | 
| 444 | 1x | x = .data[["time"]], | 
| 445 | 1x | y = .data[["censor"]], | 
| 446 | 1x | shape = .data[["censor_lbl"]] | 
| 447 | ), | |
| 448 | 1x | size = size, | 
| 449 | 1x | show.legend = TRUE, | 
| 450 | 1x | inherit.aes = TRUE | 
| 451 | ) + | |
| 452 | 1x | ggplot2::scale_shape_manual(name = NULL, values = pch) + | 
| 453 | 1x | ggplot2::guides( | 
| 454 | 1x | shape = ggplot2::guide_legend(override.aes = list(linetype = NA)), | 
| 455 | 1x | fill = ggplot2::guide_legend(override.aes = list(shape = NA)) | 
| 456 | ) | |
| 457 | } | |
| 458 | ||
| 459 | 1x |   if (!is.null(max_time) && !is.null(xticks)) { | 
| 460 | ! | gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time)))) | 
| 461 | 1x |   } else if (!is.null(xticks)) { | 
| 462 | 1x |     if (max(data$time) <= max(xticks)) { | 
| 463 | 1x | gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks))) | 
| 464 |     } else { | |
| 465 | ! | gg <- gg + ggplot2::scale_x_continuous(breaks = xticks) | 
| 466 | } | |
| 467 | ! |   } else if (!is.null(max_time)) { | 
| 468 | ! | gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time)) | 
| 469 | } | |
| 470 | ||
| 471 | 1x |   if (!is.null(ggtheme)) { | 
| 472 | 1x | gg <- gg + ggtheme | 
| 473 | } | |
| 474 | ||
| 475 | 1x | gg + ggplot2::theme( | 
| 476 | 1x | legend.position = "bottom", | 
| 477 | 1x | legend.title = ggplot2::element_blank(), | 
| 478 | 1x | legend.key.height = unit(0.02, "npc"), | 
| 479 | 1x | panel.grid.major.x = ggplot2::element_line(linewidth = 2) | 
| 480 | ) | |
| 481 | } | |
| 482 | ||
| 483 | #' `ggplot` decomposition | |
| 484 | #' | |
| 485 | #' @description `r lifecycle::badge("deprecated")` | |
| 486 | #' | |
| 487 | #' The elements composing the `ggplot` are extracted and organized in a `list`. | |
| 488 | #' | |
| 489 | #' @param gg (`ggplot`)\cr a graphic to decompose. | |
| 490 | #' | |
| 491 | #' @return A named `list` with elements: | |
| 492 | #' * `panel`: The panel. | |
| 493 | #' * `yaxis`: The y-axis. | |
| 494 | #' * `xaxis`: The x-axis. | |
| 495 | #' * `xlab`: The x-axis label. | |
| 496 | #' * `ylab`: The y-axis label. | |
| 497 | #' * `guide`: The legend. | |
| 498 | #' | |
| 499 | #' @examples | |
| 500 | #' \donttest{ | |
| 501 | #' library(dplyr) | |
| 502 | #' library(survival) | |
| 503 | #' library(grid) | |
| 504 | #' | |
| 505 | #' fit_km <- tern_ex_adtte %>% | |
| 506 | #' filter(PARAMCD == "OS") %>% | |
| 507 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) | |
| 508 | #' data_plot <- h_data_plot(fit_km = fit_km) | |
| 509 | #' xticks <- h_xticks(data = data_plot) | |
| 510 | #' gg <- h_ggkm( | |
| 511 | #' data = data_plot, | |
| 512 | #' yval = "Survival", | |
| 513 | #' censor_show = TRUE, | |
| 514 | #' xticks = xticks, xlab = "Days", ylab = "Survival Probability", | |
| 515 | #' title = "tt", | |
| 516 | #' footnotes = "ff" | |
| 517 | #' ) | |
| 518 | #' | |
| 519 | #' g_el <- h_decompose_gg(gg) | |
| 520 | #' grid::grid.newpage() | |
| 521 | #' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5)) | |
| 522 | #' grid::grid.draw(g_el$panel) | |
| 523 | #' | |
| 524 | #' grid::grid.newpage() | |
| 525 | #' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5)) | |
| 526 | #' grid::grid.draw(with(g_el, cbind(ylab, yaxis))) | |
| 527 | #' } | |
| 528 | #' | |
| 529 | #' @export | |
| 530 | h_decompose_gg <- function(gg) { | |
| 531 | 1x | lifecycle::deprecate_warn( | 
| 532 | 1x | "0.9.4", | 
| 533 | 1x | "h_decompose_gg()", | 
| 534 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 535 | ) | |
| 536 | 1x | g_el <- ggplot2::ggplotGrob(gg) | 
| 537 | 1x | y <- c( | 
| 538 | 1x | panel = "panel", | 
| 539 | 1x | yaxis = "axis-l", | 
| 540 | 1x | xaxis = "axis-b", | 
| 541 | 1x | xlab = "xlab-b", | 
| 542 | 1x | ylab = "ylab-l", | 
| 543 | 1x | guide = "guide" | 
| 544 | ) | |
| 545 | 1x | lapply(X = y, function(x) gtable::gtable_filter(g_el, x)) | 
| 546 | } | |
| 547 | ||
| 548 | #' Helper function to prepare a KM layout | |
| 549 | #' | |
| 550 | #' @description `r lifecycle::badge("deprecated")` | |
| 551 | #' | |
| 552 | #' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve. | |
| 553 | #' | |
| 554 | #' @inheritParams g_km | |
| 555 | #' @inheritParams h_ggkm | |
| 556 | #' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`. | |
| 557 | #' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of | |
| 558 | #' patient at risk matching the main grid of the Kaplan-Meier curve. | |
| 559 | #' | |
| 560 | #' @return A grid layout. | |
| 561 | #' | |
| 562 | #' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the | |
| 563 | #' dimension are fixed, only the curve is flexible and will accommodate with the remaining free space. | |
| 564 | #' * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient | |
| 565 | #' at risk tabulation. The main constraint is about the width of the columns which must allow the writing of | |
| 566 | #' the strata name. | |
| 567 | #' * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table. | |
| 568 | #' | |
| 569 | #' @examples | |
| 570 | #' \donttest{ | |
| 571 | #' library(dplyr) | |
| 572 | #' library(survival) | |
| 573 | #' library(grid) | |
| 574 | #' | |
| 575 | #' fit_km <- tern_ex_adtte %>% | |
| 576 | #' filter(PARAMCD == "OS") %>% | |
| 577 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) | |
| 578 | #' data_plot <- h_data_plot(fit_km = fit_km) | |
| 579 | #' xticks <- h_xticks(data = data_plot) | |
| 580 | #' gg <- h_ggkm( | |
| 581 | #' data = data_plot, | |
| 582 | #' censor_show = TRUE, | |
| 583 | #' xticks = xticks, xlab = "Days", ylab = "Survival Probability", | |
| 584 | #' title = "tt", footnotes = "ff", yval = "Survival" | |
| 585 | #' ) | |
| 586 | #' g_el <- h_decompose_gg(gg) | |
| 587 | #' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f") | |
| 588 | #' grid.show.layout(lyt) | |
| 589 | #' } | |
| 590 | #' | |
| 591 | #' @export | |
| 592 | h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE, annot_at_risk_title = TRUE) { | |
| 593 | 1x | lifecycle::deprecate_warn( | 
| 594 | 1x | "0.9.4", | 
| 595 | 1x | "h_km_layout()", | 
| 596 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 597 | ) | |
| 598 | 1x | txtlines <- levels(as.factor(data$strata)) | 
| 599 | 1x | nlines <- nlevels(as.factor(data$strata)) | 
| 600 | 1x | col_annot_width <- max( | 
| 601 | 1x | c( | 
| 602 | 1x | as.numeric(grid::convertX(g_el$yaxis$widths + g_el$ylab$widths, "pt")), | 
| 603 | 1x | as.numeric( | 
| 604 | 1x | grid::convertX( | 
| 605 | 1x | grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt" | 
| 606 | ) | |
| 607 | ) | |
| 608 | ) | |
| 609 | ) | |
| 610 | ||
| 611 | 1x | ttl_row <- as.numeric(!is.null(title)) | 
| 612 | 1x | foot_row <- as.numeric(!is.null(footnotes)) | 
| 613 | 1x | no_tbl_ind <- c() | 
| 614 | 1x | ht_x <- c() | 
| 615 | 1x | ht_units <- c() | 
| 616 | ||
| 617 | 1x |   if (ttl_row == 1) { | 
| 618 | 1x | no_tbl_ind <- c(no_tbl_ind, TRUE) | 
| 619 | 1x | ht_x <- c(ht_x, 2) | 
| 620 | 1x | ht_units <- c(ht_units, "lines") | 
| 621 | } | |
| 622 | ||
| 623 | 1x | no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2)) | 
| 624 | 1x | ht_x <- c( | 
| 625 | 1x | ht_x, | 
| 626 | 1x | 1, | 
| 627 | 1x | grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") + grid::unit(5, "pt"), | 
| 628 | 1x | grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"), | 
| 629 | 1x | 1, | 
| 630 | 1x | nlines + 0.5, | 
| 631 | 1x | grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") | 
| 632 | ) | |
| 633 | 1x | ht_units <- c( | 
| 634 | 1x | ht_units, | 
| 635 | 1x | "null", | 
| 636 | 1x | "pt", | 
| 637 | 1x | "pt", | 
| 638 | 1x | "lines", | 
| 639 | 1x | "lines", | 
| 640 | 1x | "pt" | 
| 641 | ) | |
| 642 | ||
| 643 | 1x |   if (foot_row == 1) { | 
| 644 | 1x | no_tbl_ind <- c(no_tbl_ind, TRUE) | 
| 645 | 1x | ht_x <- c(ht_x, 1) | 
| 646 | 1x | ht_units <- c(ht_units, "lines") | 
| 647 | } | |
| 648 | 1x |   if (annot_at_risk) { | 
| 649 | 1x | no_at_risk_tbl <- rep(TRUE, 6 + ttl_row + foot_row) | 
| 650 | 1x |     if (!annot_at_risk_title) { | 
| 651 | ! | no_at_risk_tbl[length(no_at_risk_tbl) - 2 - foot_row] <- FALSE | 
| 652 | } | |
| 653 |   } else { | |
| 654 | ! | no_at_risk_tbl <- no_tbl_ind | 
| 655 | } | |
| 656 | ||
| 657 | 1x | grid::grid.layout( | 
| 658 | 1x | nrow = sum(no_at_risk_tbl), ncol = 2, | 
| 659 | 1x |     widths = grid::unit(c(col_annot_width, 1), c("pt", "null")), | 
| 660 | 1x | heights = grid::unit( | 
| 661 | 1x | x = ht_x[no_at_risk_tbl], | 
| 662 | 1x | units = ht_units[no_at_risk_tbl] | 
| 663 | ) | |
| 664 | ) | |
| 665 | } | |
| 666 | ||
| 667 | #' Helper function to create patient-at-risk grobs | |
| 668 | #' | |
| 669 | #' @description `r lifecycle::badge("deprecated")` | |
| 670 | #' | |
| 671 | #' Two graphical objects are obtained, one corresponding to row labeling and the second to the table of | |
| 672 | #' numbers of patients at risk. If `title = TRUE`, a third object corresponding to the table title is | |
| 673 | #' also obtained. | |
| 674 | #' | |
| 675 | #' @inheritParams g_km | |
| 676 | #' @inheritParams h_ggkm | |
| 677 | #' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which | |
| 678 | #' includes the number of patients at risk at given time points. | |
| 679 | #' @param xlim (`numeric(1)`)\cr the maximum value on the x-axis (used to ensure the at risk table aligns with the KM | |
| 680 | #' graph). | |
| 681 | #' @param title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk` | |
| 682 | #' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`. | |
| 683 | #' | |
| 684 | #' @return A named `list` of two `gTree` objects if `title = FALSE`: `at_risk` and `label`, or three | |
| 685 | #' `gTree` objects if `title = TRUE`: `at_risk`, `label`, and `title`. | |
| 686 | #' | |
| 687 | #' @examples | |
| 688 | #' \donttest{ | |
| 689 | #' library(dplyr) | |
| 690 | #' library(survival) | |
| 691 | #' library(grid) | |
| 692 | #' | |
| 693 | #' fit_km <- tern_ex_adtte %>% | |
| 694 | #' filter(PARAMCD == "OS") %>% | |
| 695 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) | |
| 696 | #' | |
| 697 | #' data_plot <- h_data_plot(fit_km = fit_km) | |
| 698 | #' | |
| 699 | #' xticks <- h_xticks(data = data_plot) | |
| 700 | #' | |
| 701 | #' gg <- h_ggkm( | |
| 702 | #' data = data_plot, | |
| 703 | #' censor_show = TRUE, | |
| 704 | #' xticks = xticks, xlab = "Days", ylab = "Survival Probability", | |
| 705 | #' title = "tt", footnotes = "ff", yval = "Survival" | |
| 706 | #' ) | |
| 707 | #' | |
| 708 | #' # The annotation table reports the patient at risk for a given strata and | |
| 709 | #' # times (`xticks`). | |
| 710 | #' annot_tbl <- summary(fit_km, times = xticks) | |
| 711 | #' if (is.null(fit_km$strata)) { | |
| 712 | #' annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All")) | |
| 713 | #' } else { | |
| 714 | #'   strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") | |
| 715 | #' levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] | |
| 716 | #' annot_tbl <- data.frame( | |
| 717 | #' n.risk = annot_tbl$n.risk, | |
| 718 | #' time = annot_tbl$time, | |
| 719 | #' strata = annot_tbl$strata | |
| 720 | #' ) | |
| 721 | #' } | |
| 722 | #' | |
| 723 | #' # The annotation table is transformed into a grob. | |
| 724 | #' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks)) | |
| 725 | #' | |
| 726 | #' # For the representation, the layout is estimated for which the decomposition | |
| 727 | #' # of the graphic element is necessary. | |
| 728 | #' g_el <- h_decompose_gg(gg) | |
| 729 | #' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f") | |
| 730 | #' | |
| 731 | #' grid::grid.newpage() | |
| 732 | #' pushViewport(viewport(layout = lyt, height = .95, width = .95)) | |
| 733 | #' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1)) | |
| 734 | #' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 2)) | |
| 735 | #' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1)) | |
| 736 | #' grid::grid.draw(tbl$at_risk) | |
| 737 | #' popViewport() | |
| 738 | #' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 1)) | |
| 739 | #' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1)) | |
| 740 | #' grid::grid.draw(tbl$label) | |
| 741 | #' } | |
| 742 | #' | |
| 743 | #' @export | |
| 744 | h_grob_tbl_at_risk <- function(data, annot_tbl, xlim, title = TRUE) { | |
| 745 | 1x | lifecycle::deprecate_warn( | 
| 746 | 1x | "0.9.4", | 
| 747 | 1x | "h_grob_tbl_at_risk()", | 
| 748 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 749 | ) | |
| 750 | 1x | txtlines <- levels(as.factor(data$strata)) | 
| 751 | 1x | nlines <- nlevels(as.factor(data$strata)) | 
| 752 | 1x | y_int <- annot_tbl$time[2] - annot_tbl$time[1] | 
| 753 | 1x | annot_tbl <- expand.grid( | 
| 754 | 1x | time = seq(0, xlim, y_int), | 
| 755 | 1x | strata = unique(annot_tbl$strata) | 
| 756 | 1x |   ) %>% dplyr::left_join(annot_tbl, by = c("time", "strata")) | 
| 757 | 1x | annot_tbl[is.na(annot_tbl)] <- 0 | 
| 758 | 1x | y_str_unit <- as.numeric(annot_tbl$strata) | 
| 759 | 1x | vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines")) | 
| 760 | 1x |   if (title) { | 
| 761 | 1x | gb_table_title <- grid::gList( | 
| 762 | 1x | grid::textGrob( | 
| 763 | 1x | label = "Patients at Risk:", | 
| 764 | 1x | x = 1, | 
| 765 | 1x | y = grid::unit(0.2, "native"), | 
| 766 | 1x | gp = grid::gpar(fontface = "bold", fontsize = 10) | 
| 767 | ) | |
| 768 | ) | |
| 769 | } | |
| 770 | 1x | gb_table_left_annot <- grid::gList( | 
| 771 | 1x | grid::rectGrob( | 
| 772 | 1x | x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), | 
| 773 | 1x |       gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"), | 
| 774 | 1x | height = grid::unit(1, "lines"), just = "bottom", hjust = 0 | 
| 775 | ), | |
| 776 | 1x | grid::textGrob( | 
| 777 | 1x | label = unique(annot_tbl$strata), | 
| 778 | 1x | x = 0.5, | 
| 779 | 1x | y = grid::unit( | 
| 780 | 1x | (max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75, | 
| 781 | 1x | "native" | 
| 782 | ), | |
| 783 | 1x | gp = grid::gpar(fontface = "italic", fontsize = 10) | 
| 784 | ) | |
| 785 | ) | |
| 786 | 1x | gb_patient_at_risk <- grid::gList( | 
| 787 | 1x | grid::rectGrob( | 
| 788 | 1x | x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), | 
| 789 | 1x |       gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"), | 
| 790 | 1x | height = grid::unit(1, "lines"), just = "bottom", hjust = 0 | 
| 791 | ), | |
| 792 | 1x | grid::textGrob( | 
| 793 | 1x | label = annot_tbl$n.risk, | 
| 794 | 1x | x = grid::unit(annot_tbl$time, "native"), | 
| 795 | 1x | y = grid::unit( | 
| 796 | 1x | (max(y_str_unit) - y_str_unit) + .5, | 
| 797 | 1x | "line" | 
| 798 | 1x | ) # maybe native | 
| 799 | ) | |
| 800 | ) | |
| 801 | ||
| 802 | 1x | ret <- list( | 
| 803 | 1x | at_risk = grid::gList( | 
| 804 | 1x | grid::gTree( | 
| 805 | 1x | vp = vp_table, | 
| 806 | 1x | children = grid::gList( | 
| 807 | 1x | grid::gTree( | 
| 808 | 1x | vp = grid::dataViewport( | 
| 809 | 1x | xscale = c(0, xlim) + c(-0.05, 0.05) * xlim, | 
| 810 | 1x | yscale = c(0, nlines + 1), | 
| 811 | 1x | extension = c(0.05, 0) | 
| 812 | ), | |
| 813 | 1x | children = grid::gList(gb_patient_at_risk) | 
| 814 | ) | |
| 815 | ) | |
| 816 | ) | |
| 817 | ), | |
| 818 | 1x | label = grid::gList( | 
| 819 | 1x | grid::gTree( | 
| 820 | 1x | vp = grid::viewport(width = max(grid::stringWidth(txtlines))), | 
| 821 | 1x | children = grid::gList( | 
| 822 | 1x | grid::gTree( | 
| 823 | 1x | vp = grid::dataViewport( | 
| 824 | 1x | xscale = 0:1, | 
| 825 | 1x | yscale = c(0, nlines + 1), | 
| 826 | 1x | extension = c(0.0, 0) | 
| 827 | ), | |
| 828 | 1x | children = grid::gList(gb_table_left_annot) | 
| 829 | ) | |
| 830 | ) | |
| 831 | ) | |
| 832 | ) | |
| 833 | ) | |
| 834 | ||
| 835 | 1x |   if (title) { | 
| 836 | 1x | ret[["title"]] <- grid::gList( | 
| 837 | 1x | grid::gTree( | 
| 838 | 1x | vp = grid::viewport(width = max(grid::stringWidth(txtlines))), | 
| 839 | 1x | children = grid::gList( | 
| 840 | 1x | grid::gTree( | 
| 841 | 1x | vp = grid::dataViewport( | 
| 842 | 1x | xscale = 0:1, | 
| 843 | 1x | yscale = c(0, 1), | 
| 844 | 1x | extension = c(0, 0) | 
| 845 | ), | |
| 846 | 1x | children = grid::gList(gb_table_title) | 
| 847 | ) | |
| 848 | ) | |
| 849 | ) | |
| 850 | ) | |
| 851 | } | |
| 852 | ||
| 853 | 1x | ret | 
| 854 | } | |
| 855 | ||
| 856 | #' Helper function to create survival estimation grobs | |
| 857 | #' | |
| 858 | #' @description `r lifecycle::badge("deprecated")` | |
| 859 | #' | |
| 860 | #' The survival fit is transformed in a grob containing a table with groups in | |
| 861 | #' rows characterized by N, median and 95% confidence interval. | |
| 862 | #' | |
| 863 | #' @inheritParams g_km | |
| 864 | #' @inheritParams h_data_plot | |
| 865 | #' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()]. | |
| 866 | #' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location. | |
| 867 | #' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location. | |
| 868 | #' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob. | |
| 869 | #' | |
| 870 | #' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`). | |
| 871 | #' | |
| 872 | #' @examples | |
| 873 | #' \donttest{ | |
| 874 | #' library(dplyr) | |
| 875 | #' library(survival) | |
| 876 | #' library(grid) | |
| 877 | #' | |
| 878 | #' grid::grid.newpage() | |
| 879 | #' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1)) | |
| 880 | #' tern_ex_adtte %>% | |
| 881 | #' filter(PARAMCD == "OS") %>% | |
| 882 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% | |
| 883 | #' h_grob_median_surv() %>% | |
| 884 | #' grid::grid.draw() | |
| 885 | #' } | |
| 886 | #' | |
| 887 | #' @export | |
| 888 | h_grob_median_surv <- function(fit_km, | |
| 889 | armval = "All", | |
| 890 | x = 0.9, | |
| 891 | y = 0.9, | |
| 892 | width = grid::unit(0.3, "npc"), | |
| 893 |                                ttheme = gridExtra::ttheme_default()) { | |
| 894 | 1x | lifecycle::deprecate_warn( | 
| 895 | 1x | "0.9.4", | 
| 896 | 1x | "h_grob_median_surv()", | 
| 897 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 898 | ) | |
| 899 | 1x | data <- h_tbl_median_surv(fit_km, armval = armval) | 
| 900 | ||
| 901 | 1x | width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") | 
| 902 | 1x | height <- width * (nrow(data) + 1) / 12 | 
| 903 | ||
| 904 | 1x |   w <- paste(" ", c( | 
| 905 | 1x | rownames(data)[which.max(nchar(rownames(data)))], | 
| 906 | 1x | sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) | 
| 907 | )) | |
| 908 | 1x | w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) | 
| 909 | ||
| 910 | 1x |   w_txt <- sapply(1:64, function(x) { | 
| 911 | 64x | graphics::par(ps = x) | 
| 912 | 64x | graphics::strwidth(w[4], units = "in") | 
| 913 | }) | |
| 914 | 1x | f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) | 
| 915 | ||
| 916 | 1x |   h_txt <- sapply(1:64, function(x) { | 
| 917 | 64x | graphics::par(ps = x) | 
| 918 | 64x |     graphics::strheight(grid::stringHeight("X"), units = "in") | 
| 919 | }) | |
| 920 | 1x | f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) | 
| 921 | ||
| 922 | 1x |   if (ttheme$core$fg_params$fontsize == 12) { | 
| 923 | 1x | ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) | 
| 924 | 1x | ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) | 
| 925 | 1x | ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) | 
| 926 | } | |
| 927 | ||
| 928 | 1x | gt <- gridExtra::tableGrob( | 
| 929 | 1x | d = data, | 
| 930 | 1x | theme = ttheme | 
| 931 | ) | |
| 932 | 1x | gt$widths <- ((w_unit / sum(w_unit)) * width) | 
| 933 | 1x | gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) | 
| 934 | ||
| 935 | 1x | vp <- grid::viewport( | 
| 936 | 1x | x = grid::unit(x, "npc") + grid::unit(1, "lines"), | 
| 937 | 1x | y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), | 
| 938 | 1x | height = height, | 
| 939 | 1x | width = width, | 
| 940 | 1x |     just = c("right", "top") | 
| 941 | ) | |
| 942 | ||
| 943 | 1x | grid::gList( | 
| 944 | 1x | grid::gTree( | 
| 945 | 1x | vp = vp, | 
| 946 | 1x | children = grid::gList(gt) | 
| 947 | ) | |
| 948 | ) | |
| 949 | } | |
| 950 | ||
| 951 | #' Helper function to create grid object with y-axis annotation | |
| 952 | #' | |
| 953 | #' @description `r lifecycle::badge("deprecated")` | |
| 954 | #' | |
| 955 | #' Build the y-axis annotation from a decomposed `ggplot`. | |
| 956 | #' | |
| 957 | #' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`. | |
| 958 | #' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`. | |
| 959 | #' | |
| 960 | #' @return A `gTree` object containing the y-axis annotation from a `ggplot`. | |
| 961 | #' | |
| 962 | #' @examples | |
| 963 | #' \donttest{ | |
| 964 | #' library(dplyr) | |
| 965 | #' library(survival) | |
| 966 | #' library(grid) | |
| 967 | #' | |
| 968 | #' fit_km <- tern_ex_adtte %>% | |
| 969 | #' filter(PARAMCD == "OS") %>% | |
| 970 | #' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) | |
| 971 | #' data_plot <- h_data_plot(fit_km = fit_km) | |
| 972 | #' xticks <- h_xticks(data = data_plot) | |
| 973 | #' gg <- h_ggkm( | |
| 974 | #' data = data_plot, | |
| 975 | #' censor_show = TRUE, | |
| 976 | #' xticks = xticks, xlab = "Days", ylab = "Survival Probability", | |
| 977 | #' title = "title", footnotes = "footnotes", yval = "Survival" | |
| 978 | #' ) | |
| 979 | #' | |
| 980 | #' g_el <- h_decompose_gg(gg) | |
| 981 | #' | |
| 982 | #' grid::grid.newpage() | |
| 983 | #' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20)) | |
| 984 | #' pushViewport(pvp) | |
| 985 | #' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)) | |
| 986 | #' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA)) | |
| 987 | #' } | |
| 988 | #' | |
| 989 | #' @export | |
| 990 | h_grob_y_annot <- function(ylab, yaxis) { | |
| 991 | 1x | lifecycle::deprecate_warn( | 
| 992 | 1x | "0.9.4", | 
| 993 | 1x | "h_grob_y_annot()", | 
| 994 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 995 | ) | |
| 996 | 1x | grid::gList( | 
| 997 | 1x | grid::gTree( | 
| 998 | 1x | vp = grid::viewport( | 
| 999 | 1x | width = grid::convertX(yaxis$widths + ylab$widths, "pt"), | 
| 1000 | 1x | x = grid::unit(1, "npc"), | 
| 1001 | 1x | just = "right" | 
| 1002 | ), | |
| 1003 | 1x | children = grid::gList(cbind(ylab, yaxis)) | 
| 1004 | ) | |
| 1005 | ) | |
| 1006 | } | |
| 1007 | ||
| 1008 | #' Helper function to create Cox-PH grobs | |
| 1009 | #' | |
| 1010 | #' @description `r lifecycle::badge("deprecated")` | |
| 1011 | #' | |
| 1012 | #' Grob of `rtable` output from [h_tbl_coxph_pairwise()] | |
| 1013 | #' | |
| 1014 | #' @inheritParams h_grob_median_surv | |
| 1015 | #' @param ... arguments to pass to [h_tbl_coxph_pairwise()]. | |
| 1016 | #' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location. | |
| 1017 | #' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location. | |
| 1018 | #' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob. | |
| 1019 | #' | |
| 1020 | #' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`), | |
| 1021 | #' and `p-value (log-rank)`. | |
| 1022 | #' | |
| 1023 | #' @examples | |
| 1024 | #' \donttest{ | |
| 1025 | #' library(dplyr) | |
| 1026 | #' library(survival) | |
| 1027 | #' library(grid) | |
| 1028 | #' | |
| 1029 | #' grid::grid.newpage() | |
| 1030 | #' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1)) | |
| 1031 | #' data <- tern_ex_adtte %>% | |
| 1032 | #' filter(PARAMCD == "OS") %>% | |
| 1033 | #' mutate(is_event = CNSR == 0) | |
| 1034 | #' tbl_grob <- h_grob_coxph( | |
| 1035 | #' df = data, | |
| 1036 | #' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"), | |
| 1037 | #' control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5 | |
| 1038 | #' ) | |
| 1039 | #' grid::grid.draw(tbl_grob) | |
| 1040 | #' } | |
| 1041 | #' | |
| 1042 | #' @export | |
| 1043 | h_grob_coxph <- function(..., | |
| 1044 | x = 0, | |
| 1045 | y = 0, | |
| 1046 | width = grid::unit(0.4, "npc"), | |
| 1047 | ttheme = gridExtra::ttheme_default( | |
| 1048 | padding = grid::unit(c(1, .5), "lines"), | |
| 1049 |                            core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5)) | |
| 1050 |                          )) { | |
| 1051 | 1x | lifecycle::deprecate_warn( | 
| 1052 | 1x | "0.9.4", | 
| 1053 | 1x | "h_grob_coxph()", | 
| 1054 | 1x | details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 1055 | ) | |
| 1056 | 1x | data <- h_tbl_coxph_pairwise(...) | 
| 1057 | ||
| 1058 | 1x | width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") | 
| 1059 | 1x | height <- width * (nrow(data) + 1) / 12 | 
| 1060 | ||
| 1061 | 1x |   w <- paste("    ", c( | 
| 1062 | 1x | rownames(data)[which.max(nchar(rownames(data)))], | 
| 1063 | 1x | sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) | 
| 1064 | )) | |
| 1065 | 1x | w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) | 
| 1066 | ||
| 1067 | 1x |   w_txt <- sapply(1:64, function(x) { | 
| 1068 | 64x | graphics::par(ps = x) | 
| 1069 | 64x | graphics::strwidth(w[4], units = "in") | 
| 1070 | }) | |
| 1071 | 1x | f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) | 
| 1072 | ||
| 1073 | 1x |   h_txt <- sapply(1:64, function(x) { | 
| 1074 | 64x | graphics::par(ps = x) | 
| 1075 | 64x |     graphics::strheight(grid::stringHeight("X"), units = "in") | 
| 1076 | }) | |
| 1077 | 1x | f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) | 
| 1078 | ||
| 1079 | 1x |   if (ttheme$core$fg_params$fontsize == 12) { | 
| 1080 | 1x | ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) | 
| 1081 | 1x | ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) | 
| 1082 | 1x | ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) | 
| 1083 | } | |
| 1084 | ||
| 1085 | 1x | tryCatch( | 
| 1086 | 1x |     expr = { | 
| 1087 | 1x | gt <- gridExtra::tableGrob( | 
| 1088 | 1x | d = data, | 
| 1089 | 1x | theme = ttheme | 
| 1090 | 1x | ) # ERROR 'data' must be of a vector type, was 'NULL' | 
| 1091 | 1x | gt$widths <- ((w_unit / sum(w_unit)) * width) | 
| 1092 | 1x | gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) | 
| 1093 | 1x | vp <- grid::viewport( | 
| 1094 | 1x | x = grid::unit(x, "npc") + grid::unit(1, "lines"), | 
| 1095 | 1x | y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), | 
| 1096 | 1x | height = height, | 
| 1097 | 1x | width = width, | 
| 1098 | 1x |         just = c("left", "bottom") | 
| 1099 | ) | |
| 1100 | 1x | grid::gList( | 
| 1101 | 1x | grid::gTree( | 
| 1102 | 1x | vp = vp, | 
| 1103 | 1x | children = grid::gList(gt) | 
| 1104 | ) | |
| 1105 | ) | |
| 1106 | }, | |
| 1107 | 1x |     error = function(w) { | 
| 1108 | ! | message(paste( | 
| 1109 | ! | "Warning: Cox table will not be displayed as there is", | 
| 1110 | ! | "not any level to be compared in the arm variable." | 
| 1111 | )) | |
| 1112 | ! | return( | 
| 1113 | ! | grid::gList( | 
| 1114 | ! | grid::gTree( | 
| 1115 | ! | vp = NULL, | 
| 1116 | ! | children = NULL | 
| 1117 | ) | |
| 1118 | ) | |
| 1119 | ) | |
| 1120 | } | |
| 1121 | ) | |
| 1122 | } | 
| 1 | #' Split function to configure risk difference column | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Wrapper function for [rtables::add_combo_levels()] which configures settings for the risk difference | |
| 6 | #' column to be added to an `rtables` object. To add a risk difference column to a table, this function | |
| 7 | #' should be used as `split_fun` in calls to [rtables::split_cols_by()], followed by setting argument | |
| 8 | #' `riskdiff` to `TRUE` in all following analyze function calls. | |
| 9 | #' | |
| 10 | #' @param arm_x (`string`)\cr name of reference arm to use in risk difference calculations. | |
| 11 | #' @param arm_y (`character`)\cr names of one or more arms to compare to reference arm in risk difference | |
| 12 | #' calculations. A new column will be added for each value of `arm_y`. | |
| 13 | #' @param col_label (`character`)\cr labels to use when rendering the risk difference column within the table. | |
| 14 | #' If more than one comparison arm is specified in `arm_y`, default labels will specify which two arms are | |
| 15 | #' being compared (reference arm vs. comparison arm). | |
| 16 | #' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`. | |
| 17 | #' | |
| 18 | #' @return A closure suitable for use as a split function (`split_fun`) within [rtables::split_cols_by()] | |
| 19 | #' when creating a table layout. | |
| 20 | #' | |
| 21 | #' @seealso [stat_propdiff_ci()] for details on risk difference calculation. | |
| 22 | #' | |
| 23 | #' @examples | |
| 24 | #' adae <- tern_ex_adae | |
| 25 | #' adae$AESEV <- factor(adae$AESEV) | |
| 26 | #' | |
| 27 | #' lyt <- basic_table() %>% | |
| 28 | #'   split_cols_by("ARMCD", split_fun = add_riskdiff(arm_x = "ARM A", arm_y = c("ARM B", "ARM C"))) %>% | |
| 29 | #' count_occurrences_by_grade( | |
| 30 | #' var = "AESEV", | |
| 31 | #' riskdiff = TRUE | |
| 32 | #' ) | |
| 33 | #' | |
| 34 | #' tbl <- build_table(lyt, df = adae) | |
| 35 | #' tbl | |
| 36 | #' | |
| 37 | #' @export | |
| 38 | add_riskdiff <- function(arm_x, | |
| 39 | arm_y, | |
| 40 | col_label = paste0( | |
| 41 |                            "Risk Difference (%) (95% CI)", if (length(arm_y) > 1) paste0("\n", arm_x, " vs. ", arm_y) | |
| 42 | ), | |
| 43 |                          pct = TRUE) { | |
| 44 | 19x | checkmate::assert_character(arm_x, len = 1) | 
| 45 | 19x | checkmate::assert_character(arm_y, min.len = 1) | 
| 46 | 19x | checkmate::assert_character(col_label, len = length(arm_y)) | 
| 47 | ||
| 48 | 19x | combodf <- tibble::tribble(~valname, ~label, ~levelcombo, ~exargs) | 
| 49 | 19x |   for (i in seq_len(length(arm_y))) { | 
| 50 | 20x | combodf <- rbind( | 
| 51 | 20x | combodf, | 
| 52 | 20x | tibble::tribble( | 
| 53 | 20x | ~valname, ~label, ~levelcombo, ~exargs, | 
| 54 | 20x |         paste("riskdiff", arm_x, arm_y[i], sep = "_"), col_label[i], c(arm_x, arm_y[i]), list() | 
| 55 | ) | |
| 56 | ) | |
| 57 | } | |
| 58 | 19x | if (pct) combodf$valname <- paste0(combodf$valname, "_pct") | 
| 59 | 19x | add_combo_levels(combodf) | 
| 60 | } | |
| 61 | ||
| 62 | #' Analysis function to calculate risk difference column values | |
| 63 | #' | |
| 64 | #' In the risk difference column, this function uses the statistics function associated with `afun` to | |
| 65 | #' calculates risk difference values from arm X (reference group) and arm Y. These arms are specified | |
| 66 | #' when configuring the risk difference column which is done using the [add_riskdiff()] split function in | |
| 67 | #' the previous call to [rtables::split_cols_by()]. For all other columns, applies `afun` as usual. This | |
| 68 | #' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations. | |
| 69 | #' | |
| 70 | #' @inheritParams argument_convention | |
| 71 | #' @param afun (named `list`)\cr a named list containing one name-value pair where the name corresponds to | |
| 72 | #' the name of the statistics function that should be used in calculations and the value is the corresponding | |
| 73 | #' analysis function. | |
| 74 | #' | |
| 75 | #' @return A list of formatted [rtables::CellValue()]. | |
| 76 | #' | |
| 77 | #' @seealso | |
| 78 | #' * [stat_propdiff_ci()] for details on risk difference calculation. | |
| 79 | #' * Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] with | |
| 80 | #' `riskdiff` argument set to `TRUE` in subsequent analyze functions calls, adds a risk difference column | |
| 81 | #' to a table layout. | |
| 82 | #' | |
| 83 | #' @keywords internal | |
| 84 | afun_riskdiff <- function(df, | |
| 85 | labelstr = "", | |
| 86 | afun, | |
| 87 | ..., | |
| 88 | .stats = NULL, | |
| 89 | .stat_names = NULL, | |
| 90 | .formats = NULL, | |
| 91 | .labels = NULL, | |
| 92 |                           .indent_mods = NULL) { | |
| 93 | 146x |   if (!any(grepl("riskdiff", names(.spl_context)))) { | 
| 94 | ! | stop( | 
| 95 | ! | "Please set up levels to use in risk difference calculations using the `add_riskdiff` ", | 
| 96 | ! | "split function within `split_cols_by`. See ?add_riskdiff for details." | 
| 97 | ) | |
| 98 | } | |
| 99 | 146x | checkmate::assert_list(afun, len = 1, types = "function") | 
| 100 | 146x | checkmate::assert_named(afun) | 
| 101 | ||
| 102 | 146x | sfun <- names(afun) | 
| 103 | 146x | dots_extra_args <- list(...)[intersect(names(list(...)), names(formals(sfun)))] | 
| 104 | 146x | extra_args <- list( | 
| 105 | 146x | .var = .var, .df_row = .df_row, .N_col = .N_col, .N_row = .N_row, .stats = .stats, .formats = .formats, | 
| 106 | 146x | .labels = .labels, .indent_mods = .indent_mods | 
| 107 | ) | |
| 108 | 146x | cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1) | 
| 109 | ||
| 110 | 146x |   if (!grepl("^riskdiff", cur_split)) { | 
| 111 | # Apply basic afun (no risk difference) in all other columns | |
| 112 | 108x | do.call(afun[[1]], args = c(list(df = df, labelstr = labelstr), extra_args, dots_extra_args)) | 
| 113 |   } else { | |
| 114 | 38x | arm_x <- strsplit(cur_split, "_")[[1]][2] | 
| 115 | 38x | arm_y <- strsplit(cur_split, "_")[[1]][3] | 
| 116 | 38x |     if (length(.spl_context$cur_col_split[[1]]) > 1) { # Different split name for nested column splits | 
| 117 | 8x |       arm_spl_x <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 2)], collapse = "")) | 
| 118 | 8x |       arm_spl_y <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 3)], collapse = "")) | 
| 119 |     } else { | |
| 120 | 30x | arm_spl_x <- arm_x | 
| 121 | 30x | arm_spl_y <- arm_y | 
| 122 | } | |
| 123 | 38x | N_col_x <- .all_col_counts[[arm_spl_x]] # nolint | 
| 124 | 38x | N_col_y <- .all_col_counts[[arm_spl_y]] # nolint | 
| 125 | 38x | cur_var <- tail(.spl_context$cur_col_split[[1]], 1) | 
| 126 | ||
| 127 | # Apply statistics function to arm X and arm Y data | |
| 128 | 38x | s_args <- c(dots_extra_args, extra_args[intersect(setdiff(names(extra_args), ".N_col"), names(formals(sfun)))]) | 
| 129 | 38x | s_x <- do.call(sfun, args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args)) | 
| 130 | 38x | s_y <- do.call(sfun, args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args)) | 
| 131 | ||
| 132 | # Get statistic name and row names | |
| 133 | 38x |     stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique") | 
| 134 | 38x |     if ("flag_variables" %in% names(s_args)) { | 
| 135 | 2x | var_nms <- s_args$flag_variables | 
| 136 | 36x |     } else if (is.list(s_x[[stat]]) && !is.null(names(s_x[[stat]]))) { | 
| 137 | 24x | var_nms <- names(s_x[[stat]]) | 
| 138 |     } else { | |
| 139 | 12x | var_nms <- "" | 
| 140 | 12x | s_x[[stat]] <- list(s_x[[stat]]) | 
| 141 | 12x | s_y[[stat]] <- list(s_y[[stat]]) | 
| 142 | } | |
| 143 | ||
| 144 | # Calculate risk difference for each row, repeated if multiple statistics in table | |
| 145 | 38x | pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct" | 
| 146 | 38x | rd_ci <- rep(stat_propdiff_ci( | 
| 147 | 38x | lapply(s_x[[stat]], `[`, 1), lapply(s_y[[stat]], `[`, 1), | 
| 148 | 38x | N_col_x, N_col_y, | 
| 149 | 38x | list_names = var_nms, | 
| 150 | 38x | pct = pct | 
| 151 | 38x | ), max(1, length(.stats))) | 
| 152 | ||
| 153 | 38x | in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods) | 
| 154 | } | |
| 155 | } | |
| 156 | ||
| 157 | #' Control function for risk difference column | |
| 158 | #' | |
| 159 | #' @description `r lifecycle::badge("stable")` | |
| 160 | #' | |
| 161 | #' Sets a list of parameters to use when generating a risk (proportion) difference column. Used as input to the | |
| 162 | #' `riskdiff` parameter of [tabulate_rsp_subgroups()] and [tabulate_survival_subgroups()]. | |
| 163 | #' | |
| 164 | #' @inheritParams add_riskdiff | |
| 165 | #' @param format (`string` or `function`)\cr the format label (string) or formatting function to apply to the risk | |
| 166 | #' difference statistic. See the `3d` string options in [formatters::list_valid_format_labels()] for possible format | |
| 167 | #' strings. Defaults to `"xx.x (xx.x - xx.x)"`. | |
| 168 | #' | |
| 169 | #' @return A `list` of items with names corresponding to the arguments. | |
| 170 | #' | |
| 171 | #' @seealso [add_riskdiff()], [tabulate_rsp_subgroups()], and [tabulate_survival_subgroups()]. | |
| 172 | #' | |
| 173 | #' @examples | |
| 174 | #' control_riskdiff() | |
| 175 | #' control_riskdiff(arm_x = "ARM A", arm_y = "ARM B") | |
| 176 | #' | |
| 177 | #' @export | |
| 178 | control_riskdiff <- function(arm_x = NULL, | |
| 179 | arm_y = NULL, | |
| 180 | format = "xx.x (xx.x - xx.x)", | |
| 181 | col_label = "Risk Difference (%) (95% CI)", | |
| 182 |                              pct = TRUE) { | |
| 183 | 4x | checkmate::assert_character(arm_x, len = 1, null.ok = TRUE) | 
| 184 | 4x | checkmate::assert_character(arm_y, min.len = 1, null.ok = TRUE) | 
| 185 | 4x | checkmate::assert_character(format, len = 1) | 
| 186 | 4x | checkmate::assert_character(col_label) | 
| 187 | 4x | checkmate::assert_flag(pct) | 
| 188 | ||
| 189 | 4x | list(arm_x = arm_x, arm_y = arm_y, format = format, col_label = col_label, pct = pct) | 
| 190 | } | 
| 1 | #' Cox regression helper function for interactions | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Test and estimate the effect of a treatment in interaction with a covariate. | |
| 6 | #' The effect is estimated as the HR of the tested treatment for a given level | |
| 7 | #' of the covariate, in comparison to the treatment control. | |
| 8 | #' | |
| 9 | #' @inheritParams argument_convention | |
| 10 | #' @param x (`numeric` or `factor`)\cr the values of the covariate to be tested. | |
| 11 | #' @param effect (`string`)\cr the name of the effect to be tested and estimated. | |
| 12 | #' @param covar (`string`)\cr the name of the covariate in the model. | |
| 13 | #' @param mod (`coxph`)\cr the Cox regression model. | |
| 14 | #' @param label (`string`)\cr the label to be returned as `term_label`. | |
| 15 | #' @param control (`list`)\cr a list of controls as returned by [control_coxreg()]. | |
| 16 | #' @param ... see methods. | |
| 17 | #' | |
| 18 | #' @examples | |
| 19 | #' library(survival) | |
| 20 | #' | |
| 21 | #' set.seed(1, kind = "Mersenne-Twister") | |
| 22 | #' | |
| 23 | #' # Testing dataset [survival::bladder]. | |
| 24 | #' dta_bladder <- with( | |
| 25 | #' data = bladder[bladder$enum < 5, ], | |
| 26 | #' data.frame( | |
| 27 | #' time = stop, | |
| 28 | #' status = event, | |
| 29 | #' armcd = as.factor(rx), | |
| 30 | #' covar1 = as.factor(enum), | |
| 31 | #' covar2 = factor( | |
| 32 | #' sample(as.factor(enum)), | |
| 33 | #' levels = 1:4, | |
| 34 | #'       labels = c("F", "F", "M", "M") | |
| 35 | #' ) | |
| 36 | #' ) | |
| 37 | #' ) | |
| 38 | #' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)") | |
| 39 | #' formatters::var_labels(dta_bladder)[names(labels)] <- labels | |
| 40 | #' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) | |
| 41 | #' | |
| 42 | #' plot( | |
| 43 | #' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder), | |
| 44 | #' lty = 2:4, | |
| 45 | #' xlab = "Months", | |
| 46 | #'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4") | |
| 47 | #' ) | |
| 48 | #' | |
| 49 | #' @name cox_regression_inter | |
| 50 | NULL | |
| 51 | ||
| 52 | #' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect. | |
| 53 | #' | |
| 54 | #' @return | |
| 55 | #' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following | |
| 56 | #' variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`. | |
| 57 | #' | |
| 58 | #' @export | |
| 59 | h_coxreg_inter_effect <- function(x, | |
| 60 | effect, | |
| 61 | covar, | |
| 62 | mod, | |
| 63 | label, | |
| 64 | control, | |
| 65 |                                   ...) { | |
| 66 | 29x |   UseMethod("h_coxreg_inter_effect", x) | 
| 67 | } | |
| 68 | ||
| 69 | #' @describeIn cox_regression_inter Method for `numeric` class. Estimates the interaction with a `numeric` covariate. | |
| 70 | #' | |
| 71 | #' @method h_coxreg_inter_effect numeric | |
| 72 | #' | |
| 73 | #' @param at (`list`)\cr a list with items named after the covariate, every | |
| 74 | #' item is a vector of levels at which the interaction should be estimated. | |
| 75 | #' | |
| 76 | #' @export | |
| 77 | h_coxreg_inter_effect.numeric <- function(x, | |
| 78 | effect, | |
| 79 | covar, | |
| 80 | mod, | |
| 81 | label, | |
| 82 | control, | |
| 83 | at, | |
| 84 |                                           ...) { | |
| 85 | 7x | betas <- stats::coef(mod) | 
| 86 | 7x | attrs <- attr(stats::terms(mod), "term.labels") | 
| 87 | 7x | term_indices <- grep( | 
| 88 | 7x | pattern = effect, | 
| 89 | 7x |     x = attrs[!grepl("strata\\(", attrs)] | 
| 90 | ) | |
| 91 | 7x | checkmate::assert_vector(term_indices, len = 2) | 
| 92 | 7x | betas <- betas[term_indices] | 
| 93 | 7x | betas_var <- diag(stats::vcov(mod))[term_indices] | 
| 94 | 7x | betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]] | 
| 95 | 7x |   xval <- if (is.null(at[[covar]])) { | 
| 96 | 6x | stats::median(x) | 
| 97 |   } else { | |
| 98 | 1x | at[[covar]] | 
| 99 | } | |
| 100 | 7x | effect_index <- !grepl(covar, names(betas)) | 
| 101 | 7x | coef_hat <- betas[effect_index] + xval * betas[!effect_index] | 
| 102 | 7x | coef_se <- sqrt( | 
| 103 | 7x | betas_var[effect_index] + | 
| 104 | 7x | xval ^ 2 * betas_var[!effect_index] + # styler: off | 
| 105 | 7x | 2 * xval * betas_cov | 
| 106 | ) | |
| 107 | 7x | q_norm <- stats::qnorm((1 + control$conf_level) / 2) | 
| 108 | 7x | data.frame( | 
| 109 | 7x | effect = "Covariate:", | 
| 110 | 7x | term = rep(covar, length(xval)), | 
| 111 | 7x |     term_label = paste0("  ", xval), | 
| 112 | 7x | level = as.character(xval), | 
| 113 | 7x | n = NA, | 
| 114 | 7x | hr = exp(coef_hat), | 
| 115 | 7x | lcl = exp(coef_hat - q_norm * coef_se), | 
| 116 | 7x | ucl = exp(coef_hat + q_norm * coef_se), | 
| 117 | 7x | pval = NA, | 
| 118 | 7x | pval_inter = NA, | 
| 119 | 7x | stringsAsFactors = FALSE | 
| 120 | ) | |
| 121 | } | |
| 122 | ||
| 123 | #' @describeIn cox_regression_inter Method for `factor` class. Estimate the interaction with a `factor` covariate. | |
| 124 | #' | |
| 125 | #' @method h_coxreg_inter_effect factor | |
| 126 | #' | |
| 127 | #' @param data (`data.frame`)\cr the data frame on which the model was fit. | |
| 128 | #' | |
| 129 | #' @export | |
| 130 | h_coxreg_inter_effect.factor <- function(x, | |
| 131 | effect, | |
| 132 | covar, | |
| 133 | mod, | |
| 134 | label, | |
| 135 | control, | |
| 136 | data, | |
| 137 |                                          ...) { | |
| 138 | 17x | lvl_given <- levels(x) | 
| 139 | 17x | y <- h_coxreg_inter_estimations( | 
| 140 | 17x | variable = effect, given = covar, | 
| 141 | 17x | lvl_var = levels(data[[effect]]), | 
| 142 | 17x | lvl_given = lvl_given, | 
| 143 | 17x | mod = mod, | 
| 144 | 17x | conf_level = 0.95 | 
| 145 | 17x | )[[1]] | 
| 146 | ||
| 147 | 17x | data.frame( | 
| 148 | 17x | effect = "Covariate:", | 
| 149 | 17x | term = rep(covar, nrow(y)), | 
| 150 | 17x |     term_label = paste0("  ", lvl_given), | 
| 151 | 17x | level = lvl_given, | 
| 152 | 17x | n = NA, | 
| 153 | 17x | hr = y[, "hr"], | 
| 154 | 17x | lcl = y[, "lcl"], | 
| 155 | 17x | ucl = y[, "ucl"], | 
| 156 | 17x | pval = NA, | 
| 157 | 17x | pval_inter = NA, | 
| 158 | 17x | stringsAsFactors = FALSE | 
| 159 | ) | |
| 160 | } | |
| 161 | ||
| 162 | #' @describeIn cox_regression_inter Method for `character` class. Estimate the interaction with a `character` covariate. | |
| 163 | #' This makes an automatic conversion to `factor` and then forwards to the method for factors. | |
| 164 | #' | |
| 165 | #' @method h_coxreg_inter_effect character | |
| 166 | #' | |
| 167 | #' @note | |
| 168 | #' * Automatic conversion of character to factor does not guarantee results can be generated correctly. It is | |
| 169 | #' therefore better to always pre-process the dataset such that factors are manually created from character | |
| 170 | #' variables before passing the dataset to [rtables::build_table()]. | |
| 171 | #' | |
| 172 | #' @export | |
| 173 | h_coxreg_inter_effect.character <- function(x, | |
| 174 | effect, | |
| 175 | covar, | |
| 176 | mod, | |
| 177 | label, | |
| 178 | control, | |
| 179 | data, | |
| 180 |                                             ...) { | |
| 181 | 5x | y <- as.factor(x) | 
| 182 | ||
| 183 | 5x | h_coxreg_inter_effect( | 
| 184 | 5x | x = y, | 
| 185 | 5x | effect = effect, | 
| 186 | 5x | covar = covar, | 
| 187 | 5x | mod = mod, | 
| 188 | 5x | label = label, | 
| 189 | 5x | control = control, | 
| 190 | 5x | data = data, | 
| 191 | ... | |
| 192 | ) | |
| 193 | } | |
| 194 | ||
| 195 | #' @describeIn cox_regression_inter A higher level function to get | |
| 196 | #' the results of the interaction test and the estimated values. | |
| 197 | #' | |
| 198 | #' @return | |
| 199 | #' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If | |
| 200 | #' no interaction, [h_coxreg_univar_extract()] is applied instead. | |
| 201 | #' | |
| 202 | #' @examples | |
| 203 | #' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder) | |
| 204 | #' h_coxreg_extract_interaction( | |
| 205 | #' mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder, | |
| 206 | #' control = control_coxreg() | |
| 207 | #' ) | |
| 208 | #' | |
| 209 | #' @export | |
| 210 | h_coxreg_extract_interaction <- function(effect, | |
| 211 | covar, | |
| 212 | mod, | |
| 213 | data, | |
| 214 | at, | |
| 215 |                                          control) { | |
| 216 | 31x |   if (!any(attr(stats::terms(mod), "order") == 2)) { | 
| 217 | 12x | y <- h_coxreg_univar_extract( | 
| 218 | 12x | effect = effect, covar = covar, mod = mod, data = data, control = control | 
| 219 | ) | |
| 220 | 12x | y$pval_inter <- NA | 
| 221 | 12x | y | 
| 222 |   } else { | |
| 223 | 19x | test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] | 
| 224 | ||
| 225 | # Test the main treatment effect. | |
| 226 | 19x | mod_aov <- muffled_car_anova(mod, test_statistic) | 
| 227 | 19x | sum_anova <- broom::tidy(mod_aov) | 
| 228 | 19x | pval <- sum_anova[sum_anova$term == effect, ][["p.value"]] | 
| 229 | ||
| 230 | # Test the interaction effect. | |
| 231 | 19x |     pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]] | 
| 232 | 19x | covar_test <- data.frame( | 
| 233 | 19x | effect = "Covariate:", | 
| 234 | 19x | term = covar, | 
| 235 | 19x | term_label = unname(labels_or_names(data[covar])), | 
| 236 | 19x | level = "", | 
| 237 | 19x | n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval, | 
| 238 | 19x | pval_inter = pval_inter, | 
| 239 | 19x | stringsAsFactors = FALSE | 
| 240 | ) | |
| 241 | # Estimate the interaction. | |
| 242 | 19x | y <- h_coxreg_inter_effect( | 
| 243 | 19x | data[[covar]], | 
| 244 | 19x | covar = covar, | 
| 245 | 19x | effect = effect, | 
| 246 | 19x | mod = mod, | 
| 247 | 19x | label = unname(labels_or_names(data[covar])), | 
| 248 | 19x | at = at, | 
| 249 | 19x | control = control, | 
| 250 | 19x | data = data | 
| 251 | ) | |
| 252 | 19x | rbind(covar_test, y) | 
| 253 | } | |
| 254 | } | |
| 255 | ||
| 256 | #' @describeIn cox_regression_inter Hazard ratio estimation in interactions. | |
| 257 | #' | |
| 258 | #' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation | |
| 259 | #' of the levels of `variable` given the levels of `given`. | |
| 260 | #' @param lvl_var,lvl_given (`character`)\cr corresponding levels as given by [levels()]. | |
| 261 | #' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]). | |
| 262 | #' | |
| 263 | #' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A) | |
| 264 | #' and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex. | |
| 265 | #' The cox regression estimates the coefficients along with a variance-covariance matrix for: | |
| 266 | #' | |
| 267 | #' - b1 (arm b), b2 (arm c) | |
| 268 | #' - b3 (sex m) | |
| 269 | #' - b4 (arm b: sex m), b5 (arm c: sex m) | |
| 270 | #' | |
| 271 | #' The estimation of the Hazard Ratio for arm C/sex M is given in reference | |
| 272 | #' to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5). | |
| 273 | #' The interaction coefficient is deduced by b2 + b5 while the standard error | |
| 274 | #' is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$. | |
| 275 | #' | |
| 276 | #' @return | |
| 277 | #' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding | |
| 278 | #' to the combinations of `variable` and `given`, with columns: | |
| 279 | #' * `coef_hat`: Estimation of the coefficient. | |
| 280 | #' * `coef_se`: Standard error of the estimation. | |
| 281 | #' * `hr`: Hazard ratio. | |
| 282 | #' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio. | |
| 283 | #' | |
| 284 | #' @examples | |
| 285 | #' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder) | |
| 286 | #' result <- h_coxreg_inter_estimations( | |
| 287 | #' variable = "armcd", given = "covar1", | |
| 288 | #' lvl_var = levels(dta_bladder$armcd), | |
| 289 | #' lvl_given = levels(dta_bladder$covar1), | |
| 290 | #' mod = mod, conf_level = .95 | |
| 291 | #' ) | |
| 292 | #' result | |
| 293 | #' | |
| 294 | #' @export | |
| 295 | h_coxreg_inter_estimations <- function(variable, | |
| 296 | given, | |
| 297 | lvl_var, | |
| 298 | lvl_given, | |
| 299 | mod, | |
| 300 |                                        conf_level = 0.95) { | |
| 301 | 18x | var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level | 
| 302 | 18x | giv_lvl <- paste0(given, lvl_given) | 
| 303 | 18x | design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) | 
| 304 | 18x | design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] | 
| 305 | 18x | design_mat <- within( | 
| 306 | 18x | data = design_mat, | 
| 307 | 18x |     expr = { | 
| 308 | 18x | inter <- paste0(variable, ":", given) | 
| 309 | 18x | rev_inter <- paste0(given, ":", variable) | 
| 310 | } | |
| 311 | ) | |
| 312 | 18x | split_by_variable <- design_mat$variable | 
| 313 | 18x | interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") | 
| 314 | ||
| 315 | 18x | mmat <- stats::model.matrix(mod)[1, ] | 
| 316 | 18x | mmat[!mmat == 0] <- 0 | 
| 317 | ||
| 318 | 18x | design_mat <- apply( | 
| 319 | 18x |     X = design_mat, MARGIN = 1, FUN = function(x) { | 
| 320 | 52x | mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 | 
| 321 | 52x | mmat | 
| 322 | } | |
| 323 | ) | |
| 324 | 18x | colnames(design_mat) <- interaction_names | 
| 325 | ||
| 326 | 18x | coef <- stats::coef(mod) | 
| 327 | 18x | vcov <- stats::vcov(mod) | 
| 328 | 18x | betas <- as.matrix(coef) | 
| 329 | 18x | coef_hat <- t(design_mat) %*% betas | 
| 330 | 18x | dimnames(coef_hat)[2] <- "coef" | 
| 331 | 18x | coef_se <- apply( | 
| 332 | 18x | design_mat, 2, | 
| 333 | 18x |     function(x) { | 
| 334 | 52x | vcov_el <- as.logical(x) | 
| 335 | 52x | y <- vcov[vcov_el, vcov_el] | 
| 336 | 52x | y <- sum(y) | 
| 337 | 52x | y <- sqrt(y) | 
| 338 | 52x | return(y) | 
| 339 | } | |
| 340 | ) | |
| 341 | 18x | q_norm <- stats::qnorm((1 + conf_level) / 2) | 
| 342 | 18x | y <- cbind(coef_hat, `se(coef)` = coef_se) | 
| 343 | 18x |   y <- apply(y, 1, function(x) { | 
| 344 | 52x | x["hr"] <- exp(x["coef"]) | 
| 345 | 52x | x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) | 
| 346 | 52x | x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) | 
| 347 | 52x | x | 
| 348 | }) | |
| 349 | 18x | y <- t(y) | 
| 350 | 18x | y <- by(y, split_by_variable, identity) | 
| 351 | 18x | y <- lapply(y, as.matrix) | 
| 352 | 18x | attr(y, "details") <- paste0( | 
| 353 | 18x | "Estimations of ", variable, | 
| 354 | 18x | " hazard ratio given the level of ", given, " compared to ", | 
| 355 | 18x | variable, " level ", lvl_var[1], "." | 
| 356 | ) | |
| 357 | 18x | y | 
| 358 | } | 
| 1 | #' Count patients by most extreme post-baseline toxicity grade per direction of abnormality | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_abnormal_by_worst_grade()] creates a layout element to count patients by highest (worst) | |
| 6 | #' analysis toxicity grade post-baseline for each direction, categorized by parameter value. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `var` which indicates toxicity grades. Additional | |
| 9 | #' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to | |
| 10 | #' `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a variable | |
| 11 | #' to indicate parameter values, and `grade_dir` (defaults to `GRADE_DIR`), a variable to indicate directions | |
| 12 | #' (e.g. High or Low) for each toxicity grade supplied in `var`. | |
| 13 | #' | |
| 14 | #' For each combination of `param` and `grade_dir` levels, patient counts by worst | |
| 15 | #' grade are calculated as follows: | |
| 16 | #' * `1` to `4`: The number of patients with worst grades 1-4, respectively. | |
| 17 | #' * `Any`: The number of patients with at least one abnormality (i.e. grade is not 0). | |
| 18 | #' | |
| 19 | #' Fractions are calculated by dividing the above counts by the number of patients with at least one | |
| 20 | #' valid measurement recorded during treatment. | |
| 21 | #' | |
| 22 | #' Pre-processing is crucial when using this function and can be done automatically using the | |
| 23 | #' [h_adlb_abnormal_by_worst_grade()] helper function. See the description of this function for details on the | |
| 24 | #' necessary pre-processing steps. | |
| 25 | #' | |
| 26 | #' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two row | |
| 27 | #' splits, one on variable `param` and one on variable `grade_dir`. | |
| 28 | #' | |
| 29 | #' @inheritParams argument_convention | |
| 30 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 31 | #' | |
| 32 | #'   Options are: ``r shQuote(get_stats("abnormal_by_worst_grade"), type = "sh")`` | |
| 33 | #' | |
| 34 | #' @seealso [h_adlb_abnormal_by_worst_grade()] which pre-processes ADLB data frames to be used in | |
| 35 | #' [count_abnormal_by_worst_grade()]. | |
| 36 | #' | |
| 37 | #' @name abnormal_by_worst_grade | |
| 38 | #' @order 1 | |
| 39 | NULL | |
| 40 | ||
| 41 | #' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade. | |
| 42 | #' | |
| 43 | #' @return | |
| 44 | #' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and | |
| 45 | #' "Any" results. | |
| 46 | #' | |
| 47 | #' @keywords internal | |
| 48 | s_count_abnormal_by_worst_grade <- function(df, | |
| 49 | .var = "GRADE_ANL", | |
| 50 | .spl_context, | |
| 51 | variables = list( | |
| 52 | id = "USUBJID", | |
| 53 | param = "PARAM", | |
| 54 | grade_dir = "GRADE_DIR" | |
| 55 | ), | |
| 56 |                                             ...) { | |
| 57 | 5x | checkmate::assert_string(.var) | 
| 58 | 5x | assert_valid_factor(df[[.var]]) | 
| 59 | 5x | assert_valid_factor(df[[variables$param]]) | 
| 60 | 4x | assert_valid_factor(df[[variables$grade_dir]]) | 
| 61 | 4x | assert_df_with_variables(df, c(a = .var, variables)) | 
| 62 | 4x |   checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) | 
| 63 | ||
| 64 | # To verify that the `split_rows_by` are performed with correct variables. | |
| 65 | 4x | checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split) | 
| 66 | 4x | first_row <- .spl_context[.spl_context$split == variables[["param"]], ] | 
| 67 | 4x | x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any") | 
| 68 | 4x | result <- split(numeric(0), factor(x_lvls)) | 
| 69 | ||
| 70 | 4x | subj <- first_row$full_parent_df[[1]][[variables[["id"]]]] | 
| 71 | 4x | subj_cur_col <- subj[first_row$cur_col_subset[[1]]] | 
| 72 | # Some subjects may have a record for high and low directions but | |
| 73 | # should be counted only once. | |
| 74 | 4x | denom <- length(unique(subj_cur_col)) | 
| 75 | ||
| 76 | 4x |   for (lvl in x_lvls) { | 
| 77 | 20x |     if (lvl != "Any") { | 
| 78 | 16x | df_lvl <- df[df[[.var]] == lvl, ] | 
| 79 |     } else { | |
| 80 | 4x | df_lvl <- df[df[[.var]] != 0, ] | 
| 81 | } | |
| 82 | 20x | num <- length(unique(df_lvl[[variables[["id"]]]])) | 
| 83 | 20x | fraction <- ifelse(denom == 0, 0, num / denom) | 
| 84 | 20x | result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl) | 
| 85 | } | |
| 86 | ||
| 87 | 4x | result <- list(count_fraction = result) | 
| 88 | 4x | result | 
| 89 | } | |
| 90 | ||
| 91 | #' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun` | |
| 92 | #' in `count_abnormal_by_worst_grade()`. | |
| 93 | #' | |
| 94 | #' @return | |
| 95 | #' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 96 | #' | |
| 97 | #' @keywords internal | |
| 98 | a_count_abnormal_by_worst_grade <- function(df, | |
| 99 | ..., | |
| 100 | .stats = NULL, | |
| 101 | .stat_names = NULL, | |
| 102 | .formats = NULL, | |
| 103 | .labels = NULL, | |
| 104 |                                             .indent_mods = NULL) { | |
| 105 | # Check for additional parameters to the statistics function | |
| 106 | 4x | dots_extra_args <- list(...) | 
| 107 | 4x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 108 | 4x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 109 | ||
| 110 | # Check for user-defined functions | |
| 111 | 4x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 112 | 4x | .stats <- default_and_custom_stats_list$all_stats | 
| 113 | 4x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 114 | ||
| 115 | # Apply statistics function | |
| 116 | 4x | x_stats <- .apply_stat_functions( | 
| 117 | 4x | default_stat_fnc = s_count_abnormal_by_worst_grade, | 
| 118 | 4x | custom_stat_fnc_list = custom_stat_functions, | 
| 119 | 4x | args_list = c( | 
| 120 | 4x | df = list(df), | 
| 121 | 4x | extra_afun_params, | 
| 122 | 4x | dots_extra_args | 
| 123 | ) | |
| 124 | ) | |
| 125 | ||
| 126 | # Fill in formatting defaults | |
| 127 | 3x |   .stats <- get_stats("abnormal_by_worst_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 128 | 3x | levels_per_stats <- lapply(x_stats, names) | 
| 129 | 3x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 130 | 3x | .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 131 | 3x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 132 | ||
| 133 | 3x | x_stats <- x_stats[.stats] %>% | 
| 134 | 3x | .unlist_keep_nulls() %>% | 
| 135 | 3x | setNames(names(.formats)) | 
| 136 | ||
| 137 | # Auto format handling | |
| 138 | 3x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 139 | ||
| 140 | # Get and check statistical names | |
| 141 | 3x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 142 | ||
| 143 | 3x | in_rows( | 
| 144 | 3x | .list = x_stats, | 
| 145 | 3x | .formats = .formats, | 
| 146 | 3x | .names = .labels %>% .unlist_keep_nulls(), | 
| 147 | 3x | .stat_names = .stat_names, | 
| 148 | 3x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 149 | 3x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 150 | ) | |
| 151 | } | |
| 152 | ||
| 153 | #' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments | |
| 154 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 155 | #' | |
| 156 | #' @return | |
| 157 | #' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions, | |
| 158 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 159 | #' the statistics from `s_count_abnormal_by_worst_grade()` to the table layout. | |
| 160 | #' | |
| 161 | #' @examples | |
| 162 | #' library(dplyr) | |
| 163 | #' library(forcats) | |
| 164 | #' adlb <- tern_ex_adlb | |
| 165 | #' | |
| 166 | #' # Data is modified in order to have some parameters with grades only in one direction | |
| 167 | #' # and simulate the real data. | |
| 168 | #' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1" | |
| 169 | #' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW" | |
| 170 | #' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- "" | |
| 171 | #' | |
| 172 | #' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1" | |
| 173 | #' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH" | |
| 174 | #' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- "" | |
| 175 | #' | |
| 176 | #' # Pre-processing | |
| 177 | #' adlb_f <- adlb %>% h_adlb_abnormal_by_worst_grade() | |
| 178 | #' | |
| 179 | #' # Map excludes records without abnormal grade since they should not be displayed | |
| 180 | #' # in the table. | |
| 181 | #' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>% | |
| 182 | #' lapply(as.character) %>% | |
| 183 | #' as.data.frame() %>% | |
| 184 | #' arrange(PARAM, desc(GRADE_DIR), GRADE_ANL) | |
| 185 | #' | |
| 186 | #' basic_table() %>% | |
| 187 | #'   split_cols_by("ARMCD") %>% | |
| 188 | #'   split_rows_by("PARAM") %>% | |
| 189 | #'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>% | |
| 190 | #' count_abnormal_by_worst_grade( | |
| 191 | #' var = "GRADE_ANL", | |
| 192 | #' variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR") | |
| 193 | #' ) %>% | |
| 194 | #' build_table(df = adlb_f) | |
| 195 | #' | |
| 196 | #' @export | |
| 197 | #' @order 2 | |
| 198 | count_abnormal_by_worst_grade <- function(lyt, | |
| 199 | var, | |
| 200 | variables = list( | |
| 201 | id = "USUBJID", | |
| 202 | param = "PARAM", | |
| 203 | grade_dir = "GRADE_DIR" | |
| 204 | ), | |
| 205 | na_str = default_na_str(), | |
| 206 | nested = TRUE, | |
| 207 | ..., | |
| 208 | .stats = "count_fraction", | |
| 209 | .stat_names = NULL, | |
| 210 | .formats = list(count_fraction = format_count_fraction), | |
| 211 | .labels = NULL, | |
| 212 |                                           .indent_mods = NULL) { | |
| 213 | # Process standard extra arguments | |
| 214 | 2x |   extra_args <- list(".stats" = .stats) | 
| 215 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 216 | 2x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 217 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 218 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 219 | ||
| 220 | # Process additional arguments to the statistic function | |
| 221 | 2x | extra_args <- c(extra_args, "variables" = list(variables), ...) | 
| 222 | ||
| 223 | # Append additional info from layout to the analysis function | |
| 224 | 2x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 225 | 2x | formals(a_count_abnormal_by_worst_grade) <- c( | 
| 226 | 2x | formals(a_count_abnormal_by_worst_grade), extra_args[[".additional_fun_parameters"]] | 
| 227 | ) | |
| 228 | ||
| 229 | 2x | analyze( | 
| 230 | 2x | lyt = lyt, | 
| 231 | 2x | vars = var, | 
| 232 | 2x | afun = a_count_abnormal_by_worst_grade, | 
| 233 | 2x | na_str = na_str, | 
| 234 | 2x | nested = nested, | 
| 235 | 2x | extra_args = extra_args, | 
| 236 | 2x | show_labels = "hidden" | 
| 237 | ) | |
| 238 | } | |
| 239 | ||
| 240 | #' Helper function to prepare ADLB for `count_abnormal_by_worst_grade()` | |
| 241 | #' | |
| 242 | #' @description `r lifecycle::badge("stable")` | |
| 243 | #' | |
| 244 | #' Helper function to prepare an ADLB data frame to be used as input in | |
| 245 | #' [count_abnormal_by_worst_grade()]. The following pre-processing steps are applied: | |
| 246 | #' | |
| 247 | #' 1. `adlb` is filtered on variable `avisit` to only include post-baseline visits. | |
| 248 | #' 2. `adlb` is filtered on variables `worst_flag_low` and `worst_flag_high` so that only | |
| 249 | #' worst grades (in either direction) are included. | |
| 250 | #' 3. From the standard lab grade variable `atoxgr`, the following two variables are derived | |
| 251 | #' and added to `adlb`: | |
| 252 | #' * A grade direction variable (e.g. `GRADE_DIR`). The variable takes value `"HIGH"` when | |
| 253 | #' `atoxgr > 0`, `"LOW"` when `atoxgr < 0`, and `"ZERO"` otherwise. | |
| 254 | #' * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from `atoxgr` are | |
| 255 | #' replaced by their absolute values. | |
| 256 | #' 4. Unused factor levels are dropped from `adlb` via [droplevels()]. | |
| 257 | #' | |
| 258 | #' @param adlb (`data.frame`)\cr ADLB data frame. | |
| 259 | #' @param atoxgr (`string`)\cr name of the analysis toxicity grade variable. This must be a `factor` | |
| 260 | #' variable. | |
| 261 | #' @param avisit (`string`)\cr name of the analysis visit variable. | |
| 262 | #' @param worst_flag_low (`string`)\cr name of the worst low lab grade flag variable. This variable is | |
| 263 | #' set to `"Y"` when indicating records of worst low lab grades. | |
| 264 | #' @param worst_flag_high (`string`)\cr name of the worst high lab grade flag variable. This variable is | |
| 265 | #' set to `"Y"` when indicating records of worst high lab grades. | |
| 266 | #' | |
| 267 | #' @return `h_adlb_abnormal_by_worst_grade()` returns the `adlb` data frame with two new | |
| 268 | #' variables: `GRADE_DIR` and `GRADE_ANL`. | |
| 269 | #' | |
| 270 | #' @seealso [abnormal_by_worst_grade] | |
| 271 | #' | |
| 272 | #' @examples | |
| 273 | #' h_adlb_abnormal_by_worst_grade(tern_ex_adlb) %>% | |
| 274 | #' dplyr::select(ATOXGR, GRADE_DIR, GRADE_ANL) %>% | |
| 275 | #' head(10) | |
| 276 | #' | |
| 277 | #' @export | |
| 278 | h_adlb_abnormal_by_worst_grade <- function(adlb, | |
| 279 | atoxgr = "ATOXGR", | |
| 280 | avisit = "AVISIT", | |
| 281 | worst_flag_low = "WGRLOFL", | |
| 282 |                                            worst_flag_high = "WGRHIFL") { | |
| 283 | 1x | adlb %>% | 
| 284 | 1x | dplyr::filter( | 
| 285 | 1x |       !.data[[avisit]] %in% c("SCREENING", "BASELINE"), | 
| 286 | 1x | .data[[worst_flag_low]] == "Y" | .data[[worst_flag_high]] == "Y" | 
| 287 | ) %>% | |
| 288 | 1x | dplyr::mutate( | 
| 289 | 1x | GRADE_DIR = factor( | 
| 290 | 1x | dplyr::case_when( | 
| 291 | 1x |           .data[[atoxgr]] %in% c("-1", "-2", "-3", "-4") ~ "LOW", | 
| 292 | 1x | .data[[atoxgr]] == "0" ~ "ZERO", | 
| 293 | 1x |           .data[[atoxgr]] %in% c("1", "2", "3", "4") ~ "HIGH" | 
| 294 | ), | |
| 295 | 1x |         levels = c("LOW", "ZERO", "HIGH") | 
| 296 | ), | |
| 297 | 1x | GRADE_ANL = forcats::fct_relevel( | 
| 298 | 1x | forcats::fct_recode(.data[[atoxgr]], `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"), | 
| 299 | 1x |         c("0", "1", "2", "3", "4") | 
| 300 | ) | |
| 301 | ) %>% | |
| 302 | 1x | droplevels() | 
| 303 | } | 
| 1 | #' Line plot with optional table | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Line plot with optional table. | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) | |
| 9 | #' to counts objects in groups for stratification. | |
| 10 | #' @param variables (named `character`) vector of variable names in `df` which should include: | |
| 11 | #' * `x` (`string`)\cr name of x-axis variable. | |
| 12 | #' * `y` (`string`)\cr name of y-axis variable. | |
| 13 | #' * `group_var` (`string` or `NULL`)\cr name of grouping variable (or strata), i.e. treatment arm. | |
| 14 | #' Can be `NA` to indicate lack of groups. | |
| 15 | #' * `subject_var` (`string` or `NULL`)\cr name of subject variable. Only applies if `group_var` is | |
| 16 | #' not NULL. | |
| 17 | #' * `paramcd` (`string` or `NA`)\cr name of the variable for parameter's code. Used for y-axis label and plot's | |
| 18 | #' subtitle. Can be `NA` if `paramcd` is not to be added to the y-axis label or subtitle. | |
| 19 | #' * `y_unit` (`string` or `NA`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle. | |
| 20 | #' Can be `NA` if y unit is not to be added to the y-axis label or subtitle. | |
| 21 | #' * `facet_var` (`string` or `NA`)\cr name of the secondary grouping variable used for plot faceting, i.e. treatment | |
| 22 | #' arm. Can be `NA` to indicate lack of groups. | |
| 23 | #' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints. | |
| 24 | #' All the statistics indicated in `mid` variable must be present in the object returned by `sfun`, | |
| 25 | #' and be of a `double` or `numeric` type vector of length one. | |
| 26 | #' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals. | |
| 27 | #' All the statistics indicated in `interval` variable must be present in the object returned by `sfun`, | |
| 28 | #' and be of a `double` or `numeric` type vector of length two. Set `interval = NULL` if intervals should not be | |
| 29 | #' added to the plot. | |
| 30 | #' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Names must match names | |
| 31 | #' of the list element `interval` that will be returned by `sfun` (e.g. `mean_ci_lwr` element of | |
| 32 | #' `sfun(x)[["mean_ci"]]`). It is possible to specify one whisker only, or to suppress all whiskers by setting | |
| 33 | #' `interval = NULL`. | |
| 34 | #' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot. | |
| 35 | #' All the statistics indicated in `table` variable must be present in the object returned by `sfun`. | |
| 36 | #' @param sfun (`function`)\cr the function to compute the values of required statistics. It must return a named `list` | |
| 37 | #' with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`, | |
| 38 | #' `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed. | |
| 39 | #' @param ... optional arguments to `sfun`. | |
| 40 | #' @param mid_type (`string`)\cr controls the type of the `mid` plot, it can be point (`"p"`), line (`"l"`), | |
| 41 | #' or point and line (`"pl"`). | |
| 42 | #' @param mid_point_size (`numeric(1)`)\cr font size of the `mid` plot points. | |
| 43 | #' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of | |
| 44 | #' a call to a position adjustment function. | |
| 45 | #' @param legend_title (`string`)\cr legend title. | |
| 46 | #' @param legend_position (`string`)\cr the position of the plot legend (`"none"`, `"left"`, `"right"`, `"bottom"`, | |
| 47 | #' `"top"`, or a two-element numeric vector). | |
| 48 | #' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot. | |
| 49 | #' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing | |
| 50 | #' between ticks on the x-axis, for use when `variables$x` is numeric. If `NULL` (default), [labeling::extended()] is | |
| 51 | #' used to determine optimal tick positions on the x-axis. If `variables$x` is not numeric, this argument is ignored. | |
| 52 | #' @param x_lab (`string` or `NULL`)\cr x-axis label. If `NULL` then no label will be added. | |
| 53 | #' @param y_lab (`string` or `NULL`)\cr y-axis label. If `NULL` then no label will be added. | |
| 54 | #' @param y_lab_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be added | |
| 55 | #' to the y-axis label (`y_lab`). | |
| 56 | #' @param y_lab_add_unit (`flag`)\cr whether y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be added | |
| 57 | #' to the y-axis label (`y_lab`). | |
| 58 | #' @param title (`string`)\cr plot title. | |
| 59 | #' @param subtitle (`string`)\cr plot subtitle. | |
| 60 | #' @param subtitle_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be | |
| 61 | #' added to the plot's subtitle (`subtitle`). | |
| 62 | #' @param subtitle_add_unit (`flag`)\cr whether the y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be | |
| 63 | #' added to the plot's subtitle (`subtitle`). | |
| 64 | #' @param caption (`string`)\cr optional caption below the plot. | |
| 65 | #' @param table_format (named `vector` or `NULL`)\cr custom formats for descriptive statistics used instead of defaults | |
| 66 | #' in the (optional) table appended to the plot. It is passed directly to the `h_format_row` function through | |
| 67 | #' the `format` parameter. Names of `table_format` must match the names of statistics returned by `sfun` function. | |
| 68 | #' Can be a character vector with values from [formatters::list_valid_format_labels()] or custom format functions. | |
| 69 | #' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table | |
| 70 | #' appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function. | |
| 71 | #' @param table_font_size (`numeric(1)`)\cr font size of the text in the table. | |
| 72 | #' @param newpage `r lifecycle::badge("deprecated")` not used. | |
| 73 | #' @param col (`character`)\cr color(s). See `?ggplot2::aes_colour_fill_alpha` for example values. | |
| 74 | #' @param linetype (`character`)\cr line type(s). See `?ggplot2::aes_linetype_size_shape` for example values. | |
| 75 | #' @param errorbar_width (`numeric(1)`)\cr width of the error bars. | |
| 76 | #' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the line plot. | |
| 77 | #' Relative height of annotation table is then `1 - rel_height_plot`. If `table = NULL`, this parameter is ignored. | |
| 78 | #' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `table` is not `NULL`. | |
| 79 | #' If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the | |
| 80 | #' annotation table is printed below the plot via [cowplot::plot_grid()]. | |
| 81 | #' | |
| 82 | #' @return A `ggplot` line plot (and statistics table if applicable). | |
| 83 | #' | |
| 84 | #' @examples | |
| 85 | #' | |
| 86 | #' adsl <- tern_ex_adsl | |
| 87 | #' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING") | |
| 88 | #' adlb$AVISIT <- droplevels(adlb$AVISIT) | |
| 89 | #' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min)) | |
| 90 | #' | |
| 91 | #' # Mean with CI | |
| 92 | #' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:") | |
| 93 | #' | |
| 94 | #' # Mean with CI, no stratification with group_var | |
| 95 | #' g_lineplot(adlb, variables = control_lineplot_vars(group_var = NA)) | |
| 96 | #' | |
| 97 | #' # Mean, upper whisker of CI, no group_var(strata) counts N | |
| 98 | #' g_lineplot( | |
| 99 | #' adlb, | |
| 100 | #' whiskers = "mean_ci_upr", | |
| 101 | #' title = "Plot of Mean and Upper 95% Confidence Limit by Visit" | |
| 102 | #' ) | |
| 103 | #' | |
| 104 | #' # Median with CI | |
| 105 | #' g_lineplot( | |
| 106 | #' adlb, | |
| 107 | #' adsl, | |
| 108 | #' mid = "median", | |
| 109 | #' interval = "median_ci", | |
| 110 | #'   whiskers = c("median_ci_lwr", "median_ci_upr"), | |
| 111 | #' title = "Plot of Median and 95% Confidence Limits by Visit" | |
| 112 | #' ) | |
| 113 | #' | |
| 114 | #' # Mean, +/- SD | |
| 115 | #' g_lineplot(adlb, adsl, | |
| 116 | #' interval = "mean_sdi", | |
| 117 | #'   whiskers = c("mean_sdi_lwr", "mean_sdi_upr"), | |
| 118 | #' title = "Plot of Median +/- SD by Visit" | |
| 119 | #' ) | |
| 120 | #' | |
| 121 | #' # Mean with CI plot with stats table | |
| 122 | #' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci")) | |
| 123 | #' | |
| 124 | #' # Mean with CI, table and customized confidence level | |
| 125 | #' g_lineplot( | |
| 126 | #' adlb, | |
| 127 | #' adsl, | |
| 128 | #'   table = c("n", "mean", "mean_ci"), | |
| 129 | #' control = control_analyze_vars(conf_level = 0.80), | |
| 130 | #' title = "Plot of Mean and 80% Confidence Limits by Visit" | |
| 131 | #' ) | |
| 132 | #' | |
| 133 | #' # Mean with CI, table with customized formats/labels | |
| 134 | #' g_lineplot( | |
| 135 | #' adlb, | |
| 136 | #' adsl, | |
| 137 | #'   table = c("n", "mean", "mean_ci"), | |
| 138 | #' table_format = list( | |
| 139 | #'     mean = function(x, ...) { | |
| 140 | #' ifelse(x < 20, round_fmt(x, digits = 3), round_fmt(x, digits = 2)) | |
| 141 | #' }, | |
| 142 | #' mean_ci = "(xx.xxx, xx.xxx)" | |
| 143 | #' ), | |
| 144 | #' table_labels = list( | |
| 145 | #' mean = "mean", | |
| 146 | #' mean_ci = "95% CI" | |
| 147 | #' ) | |
| 148 | #' ) | |
| 149 | #' | |
| 150 | #' # Mean with CI, table, filtered data | |
| 151 | #' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE") | |
| 152 | #' g_lineplot(adlb_f, table = c("n", "mean")) | |
| 153 | #' | |
| 154 | #' @export | |
| 155 | g_lineplot <- function(df, | |
| 156 | alt_counts_df = NULL, | |
| 157 | variables = control_lineplot_vars(), | |
| 158 | mid = "mean", | |
| 159 | interval = "mean_ci", | |
| 160 |                        whiskers = c("mean_ci_lwr", "mean_ci_upr"), | |
| 161 | table = NULL, | |
| 162 | sfun = s_summary, | |
| 163 | ..., | |
| 164 | mid_type = "pl", | |
| 165 | mid_point_size = 2, | |
| 166 | position = ggplot2::position_dodge(width = 0.4), | |
| 167 | legend_title = NULL, | |
| 168 | legend_position = "bottom", | |
| 169 | ggtheme = nestcolor::theme_nest(), | |
| 170 | xticks = NULL, | |
| 171 | xlim = NULL, | |
| 172 | ylim = NULL, | |
| 173 | x_lab = obj_label(df[[variables[["x"]]]]), | |
| 174 | y_lab = NULL, | |
| 175 | y_lab_add_paramcd = TRUE, | |
| 176 | y_lab_add_unit = TRUE, | |
| 177 | title = "Plot of Mean and 95% Confidence Limits by Visit", | |
| 178 | subtitle = "", | |
| 179 | subtitle_add_paramcd = TRUE, | |
| 180 | subtitle_add_unit = TRUE, | |
| 181 | caption = NULL, | |
| 182 | table_format = NULL, | |
| 183 | table_labels = NULL, | |
| 184 | table_font_size = 3, | |
| 185 | errorbar_width = 0.45, | |
| 186 | newpage = lifecycle::deprecated(), | |
| 187 | col = NULL, | |
| 188 | linetype = NULL, | |
| 189 | rel_height_plot = 0.5, | |
| 190 |                        as_list = FALSE) { | |
| 191 | 14x | checkmate::assert_character(variables, any.missing = TRUE) | 
| 192 | 14x | checkmate::assert_character(mid, null.ok = TRUE) | 
| 193 | 14x | checkmate::assert_character(interval, null.ok = TRUE) | 
| 194 | 14x | checkmate::assert_character(col, null.ok = TRUE) | 
| 195 | 14x | checkmate::assert_character(linetype, null.ok = TRUE) | 
| 196 | 14x | checkmate::assert_numeric(xticks, null.ok = TRUE) | 
| 197 | 14x | checkmate::assert_numeric(xlim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) | 
| 198 | 14x | checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) | 
| 199 | 14x | checkmate::assert_number(errorbar_width, lower = 0) | 
| 200 | 14x | checkmate::assert_string(title, null.ok = TRUE) | 
| 201 | 14x | checkmate::assert_string(subtitle, null.ok = TRUE) | 
| 202 | 14x | assert_proportion_value(rel_height_plot) | 
| 203 | 14x | checkmate::assert_logical(as_list) | 
| 204 | ||
| 205 | 14x |   if (!is.null(table)) { | 
| 206 | 6x | table_format <- get_formats_from_stats(table, formats_in = table_format) | 
| 207 | 6x | table_labels <- get_labels_from_stats(table, labels_in = table_labels) %>% .unlist_keep_nulls() | 
| 208 | } | |
| 209 | ||
| 210 | 14x | extra_args <- list(...) | 
| 211 | 14x |   if ("control" %in% names(extra_args)) { | 
| 212 | 4x |     if (!is.null(table) && all(table_labels == .unlist_keep_nulls(get_labels_from_stats(table)))) { | 
| 213 | 3x | table_labels <- table_labels %>% labels_use_control(extra_args[["control"]]) | 
| 214 | } | |
| 215 | } | |
| 216 | ||
| 217 | 14x |   if (is.character(interval)) { | 
| 218 | 14x | checkmate::assert_vector(whiskers, min.len = 0, max.len = 2) | 
| 219 | } | |
| 220 | ||
| 221 | 14x |   if (length(whiskers) == 1) { | 
| 222 | ! | checkmate::assert_character(mid) | 
| 223 | } | |
| 224 | ||
| 225 | 14x |   if (is.character(mid)) { | 
| 226 | 14x | checkmate::assert_scalar(mid_type) | 
| 227 | 14x |     checkmate::assert_subset(mid_type, c("pl", "p", "l")) | 
| 228 | } | |
| 229 | ||
| 230 | 14x | x <- variables[["x"]] | 
| 231 | 14x | y <- variables[["y"]] | 
| 232 | 14x | paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables | 
| 233 | 14x | y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables | 
| 234 | 14x |   if (is.na(variables["group_var"])) { | 
| 235 | 1x | group_var <- NULL # NULL if group_var == NA or it is not in variables | 
| 236 |   } else { | |
| 237 | 13x | group_var <- variables[["group_var"]] | 
| 238 | 13x | subject_var <- variables[["subject_var"]] | 
| 239 | } | |
| 240 | 14x |   if (is.na(variables["facet_var"])) { | 
| 241 | 13x | facet_var <- NULL # NULL if facet_var == NA or it is not in variables | 
| 242 |   } else { | |
| 243 | 1x | facet_var <- variables[["facet_var"]] | 
| 244 | } | |
| 245 | 14x | checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE) | 
| 246 | 14x | checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE) | 
| 247 | 14x |   if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) { | 
| 248 | 14x | checkmate::assert_false(is.na(paramcd)) | 
| 249 | 14x | checkmate::assert_scalar(unique(df[[paramcd]])) | 
| 250 | } | |
| 251 | ||
| 252 | 14x | checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE) | 
| 253 | 14x | checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE) | 
| 254 | 14x |   if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) { | 
| 255 | 14x | checkmate::assert_false(is.na(y_unit)) | 
| 256 | 14x | checkmate::assert_scalar(unique(df[[y_unit]])) | 
| 257 | } | |
| 258 | ||
| 259 | 14x |   if (!is.null(group_var) && !is.null(alt_counts_df)) { | 
| 260 | 9x | checkmate::assert_set_equal(unique(alt_counts_df[[group_var]]), unique(df[[group_var]])) | 
| 261 | } | |
| 262 | ||
| 263 | ####################################### | | |
| 264 | # ---- Compute required statistics ---- | |
| 265 | ####################################### | | |
| 266 | # Remove unused levels for x-axis | |
| 267 | 14x |   if (is.factor(df[[x]])) { | 
| 268 | 13x | df[[x]] <- droplevels(df[[x]]) | 
| 269 | } | |
| 270 | ||
| 271 | 14x |   if (!is.null(facet_var) && !is.null(group_var)) { | 
| 272 | 1x | df_grp <- tidyr::expand(df, .data[[facet_var]], .data[[group_var]], .data[[x]]) # expand based on levels of factors | 
| 273 | 13x |   } else if (!is.null(group_var)) { | 
| 274 | 12x | df_grp <- tidyr::expand(df, .data[[group_var]], .data[[x]]) # expand based on levels of factors | 
| 275 |   } else { | |
| 276 | 1x | df_grp <- tidyr::expand(df, NULL, .data[[x]]) | 
| 277 | } | |
| 278 | ||
| 279 | 14x | df_grp <- df_grp %>% | 
| 280 | 14x | dplyr::full_join(y = df[, c(facet_var, group_var, x, y)], by = c(facet_var, group_var, x), multiple = "all") %>% | 
| 281 | 14x | dplyr::group_by_at(c(facet_var, group_var, x)) | 
| 282 | ||
| 283 | 14x | df_stats <- df_grp %>% | 
| 284 | 14x | dplyr::summarise( | 
| 285 | 14x | data.frame(t(do.call(c, unname(sfun(.data[[y]])[c(mid, interval)])))), | 
| 286 | 14x | .groups = "drop" | 
| 287 | ) | |
| 288 | ||
| 289 | 14x | df_stats <- df_stats[!is.na(df_stats[[mid]]), ] | 
| 290 | ||
| 291 | # add number of objects N in group_var (strata) | |
| 292 | 14x |   if (!is.null(group_var) && !is.null(alt_counts_df)) { | 
| 293 | 9x | strata_N <- paste0(group_var, "_N") # nolint | 
| 294 | ||
| 295 | 9x | df_N <- stats::aggregate(eval(parse(text = subject_var)) ~ eval(parse(text = group_var)), data = alt_counts_df, FUN = function(x) length(unique(x))) # nolint | 
| 296 | 9x | colnames(df_N) <- c(group_var, "N") # nolint | 
| 297 | 9x | df_N[[strata_N]] <- paste0(df_N[[group_var]], " (N = ", df_N$N, ")") # nolint | 
| 298 | ||
| 299 | # keep strata factor levels | |
| 300 | 9x |     matches <- sapply(unique(df_N[[group_var]]), function(x) { | 
| 301 | 25x |       regex_pattern <- gsub("([][(){}^$.|*+?\\\\])", "\\\\\\1", x) | 
| 302 | 25x | unique(df_N[[paste0(group_var, "_N")]])[grepl( | 
| 303 | 25x |         paste0("^", regex_pattern), | 
| 304 | 25x | unique(df_N[[paste0(group_var, "_N")]]) | 
| 305 | )] | |
| 306 | }) | |
| 307 | 9x | df_N[[paste0(group_var, "_N")]] <- factor(df_N[[group_var]]) # nolint | 
| 308 | 9x | levels(df_N[[paste0(group_var, "_N")]]) <- unlist(matches) # nolint | 
| 309 | ||
| 310 | # strata_N should not be in colnames(df_stats) | |
| 311 | 9x | checkmate::assert_disjunct(strata_N, colnames(df_stats)) | 
| 312 | ||
| 313 | 9x | df_stats <- merge(x = df_stats, y = df_N[, c(group_var, strata_N)], by = group_var) | 
| 314 | 5x |   } else if (!is.null(group_var)) { | 
| 315 | 4x | strata_N <- group_var # nolint | 
| 316 |   } else { | |
| 317 | 1x | strata_N <- NULL # nolint | 
| 318 | } | |
| 319 | ||
| 320 | ############################################### | | |
| 321 | # ---- Prepare certain plot's properties. ---- | |
| 322 | ############################################### | | |
| 323 | # legend title | |
| 324 | 14x |   if (is.null(legend_title) && !is.null(group_var) && legend_position != "none") { | 
| 325 | 13x | legend_title <- attr(df[[group_var]], "label") | 
| 326 | } | |
| 327 | ||
| 328 | # y label | |
| 329 | 14x |   if (!is.null(y_lab)) { | 
| 330 | 4x |     if (y_lab_add_paramcd) { | 
| 331 | 4x | y_lab <- paste(y_lab, unique(df[[paramcd]])) | 
| 332 | } | |
| 333 | ||
| 334 | 4x |     if (y_lab_add_unit) { | 
| 335 | 4x |       y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")") | 
| 336 | } | |
| 337 | ||
| 338 | 4x | y_lab <- trimws(y_lab) | 
| 339 | } | |
| 340 | ||
| 341 | # subtitle | |
| 342 | 14x |   if (!is.null(subtitle)) { | 
| 343 | 14x |     if (subtitle_add_paramcd) { | 
| 344 | 14x | subtitle <- paste(subtitle, unique(df[[paramcd]])) | 
| 345 | } | |
| 346 | ||
| 347 | 14x |     if (subtitle_add_unit) { | 
| 348 | 14x |       subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")") | 
| 349 | } | |
| 350 | ||
| 351 | 14x | subtitle <- trimws(subtitle) | 
| 352 | } | |
| 353 | ||
| 354 | ############################### | | |
| 355 | # ---- Build plot object. ---- | |
| 356 | ############################### | | |
| 357 | 14x | p <- ggplot2::ggplot( | 
| 358 | 14x | data = df_stats, | 
| 359 | 14x | mapping = ggplot2::aes( | 
| 360 | 14x | x = .data[[x]], y = .data[[mid]], | 
| 361 | 14x | color = if (is.null(strata_N)) NULL else .data[[strata_N]], | 
| 362 | 14x | shape = if (is.null(strata_N)) NULL else .data[[strata_N]], | 
| 363 | 14x | lty = if (is.null(strata_N)) NULL else .data[[strata_N]], | 
| 364 | 14x | group = if (is.null(strata_N)) NULL else .data[[strata_N]] | 
| 365 | ) | |
| 366 | ) | |
| 367 | ||
| 368 | 14x |   if (!is.null(group_var) && nlevels(df_stats[[strata_N]]) > 6) { | 
| 369 | 1x | p <- p + | 
| 370 | 1x | scale_shape_manual(values = seq(15, 15 + nlevels(df_stats[[strata_N]]))) | 
| 371 | } | |
| 372 | ||
| 373 | 14x |   if (!is.null(mid)) { | 
| 374 | # points | |
| 375 | 14x |     if (grepl("p", mid_type, fixed = TRUE)) { | 
| 376 | 14x | p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE) | 
| 377 | } | |
| 378 | ||
| 379 | # lines - plotted only if there is a strata grouping (group_var) | |
| 380 | 14x |     if (grepl("l", mid_type, fixed = TRUE) && !is.null(strata_N)) { | 
| 381 | 13x | p <- p + ggplot2::geom_line(position = position, na.rm = TRUE) | 
| 382 | } | |
| 383 | } | |
| 384 | ||
| 385 | # interval | |
| 386 | 14x |   if (!is.null(interval)) { | 
| 387 | 14x | p <- p + | 
| 388 | 14x | ggplot2::geom_errorbar( | 
| 389 | 14x | ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]), | 
| 390 | 14x | width = errorbar_width, | 
| 391 | 14x | position = position | 
| 392 | ) | |
| 393 | ||
| 394 | 14x |     if (length(whiskers) == 1) { # lwr or upr only; mid is then required | 
| 395 | # workaround as geom_errorbar does not provide single-direction whiskers | |
| 396 | ! | p <- p + | 
| 397 | ! | ggplot2::geom_linerange( | 
| 398 | ! | data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings | 
| 399 | ! | ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]), | 
| 400 | ! | position = position, | 
| 401 | ! | na.rm = TRUE, | 
| 402 | ! | show.legend = FALSE | 
| 403 | ) | |
| 404 | } | |
| 405 | } | |
| 406 | ||
| 407 | 14x |   if (is.numeric(df_stats[[x]])) { | 
| 408 | 1x | if (length(xticks) == 1) xticks <- seq(from = min(df_stats[[x]]), to = max(df_stats[[x]]), by = xticks) | 
| 409 | 1x | p <- p + ggplot2::scale_x_continuous(breaks = if (!is.null(xticks)) xticks else waiver(), limits = xlim) | 
| 410 | } | |
| 411 | ||
| 412 | 14x | p <- p + | 
| 413 | 14x | ggplot2::scale_y_continuous(labels = scales::comma, limits = ylim) + | 
| 414 | 14x | ggplot2::labs( | 
| 415 | 14x | title = title, | 
| 416 | 14x | subtitle = subtitle, | 
| 417 | 14x | caption = caption, | 
| 418 | 14x | color = legend_title, | 
| 419 | 14x | lty = legend_title, | 
| 420 | 14x | shape = legend_title, | 
| 421 | 14x | x = x_lab, | 
| 422 | 14x | y = y_lab | 
| 423 | ) | |
| 424 | ||
| 425 | 14x |   if (!is.null(col)) { | 
| 426 | 1x | p <- p + | 
| 427 | 1x | ggplot2::scale_color_manual(values = col) | 
| 428 | } | |
| 429 | 14x |   if (!is.null(linetype)) { | 
| 430 | 1x | p <- p + | 
| 431 | 1x | ggplot2::scale_linetype_manual(values = linetype) | 
| 432 | } | |
| 433 | ||
| 434 | 14x |   if (!is.null(facet_var)) { | 
| 435 | 1x | p <- p + | 
| 436 | 1x | facet_grid(cols = vars(df_stats[[facet_var]])) | 
| 437 | } | |
| 438 | ||
| 439 | 14x |   if (!is.null(ggtheme)) { | 
| 440 | 14x | p <- p + ggtheme | 
| 441 |   } else { | |
| 442 | ! | p <- p + | 
| 443 | ! | ggplot2::theme_bw() + | 
| 444 | ! | ggplot2::theme( | 
| 445 | ! | legend.key.width = grid::unit(1, "cm"), | 
| 446 | ! | legend.position = legend_position, | 
| 447 | ! | legend.direction = ifelse( | 
| 448 | ! |           legend_position %in% c("top", "bottom"), | 
| 449 | ! | "horizontal", | 
| 450 | ! | "vertical" | 
| 451 | ) | |
| 452 | ) | |
| 453 | } | |
| 454 | ||
| 455 | ############################################################# | | |
| 456 | # ---- Optionally, add table to the bottom of the plot. ---- | |
| 457 | ############################################################# | | |
| 458 | 14x |   if (!is.null(table)) { | 
| 459 | 6x | df_stats_table <- df_grp %>% | 
| 460 | 6x | dplyr::summarise( | 
| 461 | 6x | h_format_row( | 
| 462 | 6x | x = sfun(.data[[y]], ...)[table], | 
| 463 | 6x | format = table_format, | 
| 464 | 6x | labels = table_labels | 
| 465 | ), | |
| 466 | 6x | .groups = "drop" | 
| 467 | ) | |
| 468 | ||
| 469 | 6x | stats_lev <- rev(setdiff(colnames(df_stats_table), c(group_var, x))) | 
| 470 | ||
| 471 | 6x | df_stats_table <- df_stats_table %>% | 
| 472 | 6x | tidyr::pivot_longer( | 
| 473 | 6x | cols = -dplyr::all_of(c(group_var, x)), | 
| 474 | 6x | names_to = "stat", | 
| 475 | 6x | values_to = "value", | 
| 476 | 6x | names_ptypes = list(stat = factor(levels = stats_lev)) | 
| 477 | ) | |
| 478 | ||
| 479 | 6x | tbl <- ggplot2::ggplot( | 
| 480 | 6x | df_stats_table, | 
| 481 | 6x | ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]]) | 
| 482 | ) + | |
| 483 | 6x | ggplot2::geom_text(size = table_font_size) + | 
| 484 | 6x | ggplot2::theme_bw() + | 
| 485 | 6x | ggplot2::theme( | 
| 486 | 6x | panel.border = ggplot2::element_blank(), | 
| 487 | 6x | panel.grid.major = ggplot2::element_blank(), | 
| 488 | 6x | panel.grid.minor = ggplot2::element_blank(), | 
| 489 | 6x | axis.ticks = ggplot2::element_blank(), | 
| 490 | 6x | axis.title = ggplot2::element_blank(), | 
| 491 | 6x | axis.text.x = ggplot2::element_blank(), | 
| 492 | 6x | axis.text.y = ggplot2::element_text( | 
| 493 | 6x | size = table_font_size * ggplot2::.pt, | 
| 494 | 6x | margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5) | 
| 495 | ), | |
| 496 | 6x | strip.text = ggplot2::element_text(hjust = 0), | 
| 497 | 6x | strip.text.x = ggplot2::element_text( | 
| 498 | 6x | size = table_font_size * ggplot2::.pt, | 
| 499 | 6x | margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt") | 
| 500 | ), | |
| 501 | 6x | strip.background = ggplot2::element_rect(fill = "grey95", color = NA), | 
| 502 | 6x | legend.position = "none" | 
| 503 | ) | |
| 504 | ||
| 505 | 6x |     if (!is.null(group_var)) { | 
| 506 | 6x | tbl <- tbl + ggplot2::facet_wrap(facets = group_var, ncol = 1) | 
| 507 | } | |
| 508 | ||
| 509 | 6x |     if (!as_list) { | 
| 510 | # align plot and table | |
| 511 | 5x | cowplot::plot_grid( | 
| 512 | 5x | p, | 
| 513 | 5x | tbl, | 
| 514 | 5x | ncol = 1, | 
| 515 | 5x | align = "v", | 
| 516 | 5x | axis = "tblr", | 
| 517 | 5x | rel_heights = c(rel_height_plot, 1 - rel_height_plot) | 
| 518 | ) | |
| 519 |     } else { | |
| 520 | 1x | list(plot = p, table = tbl) | 
| 521 | } | |
| 522 |   } else { | |
| 523 | 8x | p | 
| 524 | } | |
| 525 | } | |
| 526 | ||
| 527 | #' Helper function to format the optional `g_lineplot` table | |
| 528 | #' | |
| 529 | #' @description `r lifecycle::badge("stable")` | |
| 530 | #' | |
| 531 | #' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled. | |
| 532 | #' Elements of `x` must be `numeric` vectors. | |
| 533 | #' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must | |
| 534 | #' match the names of `x`. This parameter is passed directly to the `rtables::format_rcell` | |
| 535 | #' function through the `format` parameter. | |
| 536 | #' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must | |
| 537 | #' match the names of `x`. When a label is not specified for an element of `x`, | |
| 538 | #' then this function tries to use `label` or `names` (in this order) attribute of that element | |
| 539 | #' (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes | |
| 540 | #' are attached to a given element of `x`, then the label is automatically generated. | |
| 541 | #' | |
| 542 | #' @return A single row `data.frame` object. | |
| 543 | #' | |
| 544 | #' @examples | |
| 545 | #' mean_ci <- c(48, 51) | |
| 546 | #' x <- list(mean = 50, mean_ci = mean_ci) | |
| 547 | #' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)") | |
| 548 | #' labels <- c(mean = "My Mean") | |
| 549 | #' h_format_row(x, format, labels) | |
| 550 | #' | |
| 551 | #' attr(mean_ci, "label") <- "Mean 95% CI" | |
| 552 | #' x <- list(mean = 50, mean_ci = mean_ci) | |
| 553 | #' h_format_row(x, format, labels) | |
| 554 | #' | |
| 555 | #' @export | |
| 556 | h_format_row <- function(x, format, labels = NULL) { | |
| 557 | # cell: one row, one column data.frame | |
| 558 | 110x |   format_cell <- function(x, format, label = NULL) { | 
| 559 | 292x | fc <- format_rcell(x = x, format = format) | 
| 560 | 292x |     if (is.na(fc)) { | 
| 561 | ! | fc <- "NA" | 
| 562 | } | |
| 563 | 292x | x_label <- attr(x, "label") | 
| 564 | 292x |     if (!is.null(label) && !is.na(label)) { | 
| 565 | 290x | names(fc) <- label | 
| 566 | 2x |     } else if (!is.null(x_label) && !is.na(x_label)) { | 
| 567 | 1x | names(fc) <- x_label | 
| 568 | 1x |     } else if (length(x) == length(fc)) { | 
| 569 | ! | names(fc) <- names(x) | 
| 570 | } | |
| 571 | 292x | as.data.frame(t(fc)) | 
| 572 | } | |
| 573 | ||
| 574 | 110x | row <- do.call( | 
| 575 | 110x | cbind, | 
| 576 | 110x | lapply( | 
| 577 | 110x | names(x), function(xn) format_cell(x[[xn]], format = format[[xn]], label = labels[xn]) | 
| 578 | ) | |
| 579 | ) | |
| 580 | ||
| 581 | 110x | row | 
| 582 | } | |
| 583 | ||
| 584 | #' Control function for `g_lineplot()` | |
| 585 | #' | |
| 586 | #' @description `r lifecycle::badge("stable")` | |
| 587 | #' | |
| 588 | #' Default values for `variables` parameter in `g_lineplot` function. | |
| 589 | #' A variable's default value can be overwritten for any variable. | |
| 590 | #' | |
| 591 | #' @param x (`string`)\cr x-variable name. | |
| 592 | #' @param y (`string`)\cr y-variable name. | |
| 593 | #' @param group_var (`string` or `NA`)\cr group variable name. | |
| 594 | #' @param subject_var (`string` or `NA`)\cr subject variable name. | |
| 595 | #' @param facet_var (`string` or `NA`)\cr faceting variable name. | |
| 596 | #' @param paramcd (`string` or `NA`)\cr parameter code variable name. | |
| 597 | #' @param y_unit (`string` or `NA`)\cr y-axis unit variable name. | |
| 598 | #' | |
| 599 | #' @return A named character vector of variable names. | |
| 600 | #' | |
| 601 | #' @examples | |
| 602 | #' control_lineplot_vars() | |
| 603 | #' control_lineplot_vars(group_var = NA) | |
| 604 | #' | |
| 605 | #' @export | |
| 606 | control_lineplot_vars <- function(x = "AVISIT", | |
| 607 | y = "AVAL", | |
| 608 | group_var = "ARM", | |
| 609 | facet_var = NA, | |
| 610 | paramcd = "PARAMCD", | |
| 611 | y_unit = "AVALU", | |
| 612 |                                   subject_var = "USUBJID") { | |
| 613 | 17x | checkmate::assert_string(x) | 
| 614 | 17x | checkmate::assert_string(y) | 
| 615 | 17x | checkmate::assert_string(group_var, na.ok = TRUE, null.ok = TRUE) | 
| 616 | 17x | checkmate::assert_string(facet_var, na.ok = TRUE, null.ok = TRUE) | 
| 617 | 17x | checkmate::assert_string(subject_var, na.ok = TRUE, null.ok = TRUE) | 
| 618 | 17x | checkmate::assert_string(paramcd, na.ok = TRUE, null.ok = TRUE) | 
| 619 | 17x | checkmate::assert_string(y_unit, na.ok = TRUE, null.ok = TRUE) | 
| 620 | ||
| 621 | 17x | variables <- c( | 
| 622 | 17x | x = x, y = y, group_var = group_var, paramcd = paramcd, | 
| 623 | 17x | y_unit = y_unit, subject_var = subject_var, facet_var = facet_var | 
| 624 | ) | |
| 625 | 17x | return(variables) | 
| 626 | } | 
| 1 | #' Cox proportional hazards regression | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis. | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 9 | #' | |
| 10 | #'   Options are: ``r shQuote(get_stats("summarize_coxreg"), type = "sh")`` | |
| 11 | #' | |
| 12 | #' @details Cox models are the most commonly used methods to estimate the magnitude of | |
| 13 | #' the effect in survival analysis. It assumes proportional hazards: the ratio | |
| 14 | #' of the hazards between groups (e.g., two arms) is constant over time. | |
| 15 | #' This ratio is referred to as the "hazard ratio" (HR) and is one of the | |
| 16 | #' most commonly reported metrics to describe the effect size in survival | |
| 17 | #' analysis (NEST Team, 2020). | |
| 18 | #' | |
| 19 | #' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant | |
| 20 | #' helper functions, and [tidy_coxreg] for custom tidy methods. | |
| 21 | #' | |
| 22 | #' @examples | |
| 23 | #' library(survival) | |
| 24 | #' | |
| 25 | #' # Testing dataset [survival::bladder]. | |
| 26 | #' set.seed(1, kind = "Mersenne-Twister") | |
| 27 | #' dta_bladder <- with( | |
| 28 | #' data = bladder[bladder$enum < 5, ], | |
| 29 | #' tibble::tibble( | |
| 30 | #' TIME = stop, | |
| 31 | #' STATUS = event, | |
| 32 | #' ARM = as.factor(rx), | |
| 33 | #'     COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"), | |
| 34 | #' COVAR2 = factor( | |
| 35 | #' sample(as.factor(enum)), | |
| 36 | #'       levels = 1:4, labels = c("F", "F", "M", "M") | |
| 37 | #'     ) %>% formatters::with_label("Sex (F/M)") | |
| 38 | #' ) | |
| 39 | #' ) | |
| 40 | #' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) | |
| 41 | #' dta_bladder$STUDYID <- factor("X") | |
| 42 | #' | |
| 43 | #' u1_variables <- list( | |
| 44 | #'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2") | |
| 45 | #' ) | |
| 46 | #' | |
| 47 | #' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2")) | |
| 48 | #' | |
| 49 | #' m1_variables <- list( | |
| 50 | #'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2") | |
| 51 | #' ) | |
| 52 | #' | |
| 53 | #' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2")) | |
| 54 | #' | |
| 55 | #' @name cox_regression | |
| 56 | #' @order 1 | |
| 57 | NULL | |
| 58 | ||
| 59 | #' @describeIn cox_regression Statistics function that transforms results tabulated | |
| 60 | #' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list. | |
| 61 | #' | |
| 62 | #' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg] | |
| 63 | #' function with tidying applied via [broom::tidy()]. | |
| 64 | #' @param .stats (`character`)\cr the names of statistics to be reported among: | |
| 65 | #' * `n`: number of observations (univariate only) | |
| 66 | #' * `hr`: hazard ratio | |
| 67 | #' * `ci`: confidence interval | |
| 68 | #' * `pval`: p-value of the treatment effect | |
| 69 | #' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only) | |
| 70 | #' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model. | |
| 71 | #' Defaults to `"all"`. Other options include `"var_main"` for main effects, `"inter"` for interaction effects, | |
| 72 | #' and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is `"all"`, specific | |
| 73 | #' variables can be selected by specifying `.var_nms`. | |
| 74 | #' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically | |
| 75 | #' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired | |
| 76 | #' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars` | |
| 77 | #' is `"var_main"`, `.var_nms` should be only the variable name. | |
| 78 | #' | |
| 79 | #' @return | |
| 80 | #' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s). | |
| 81 | #' | |
| 82 | #' @examples | |
| 83 | #' # s_coxreg | |
| 84 | #' | |
| 85 | #' # Univariate | |
| 86 | #' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder) | |
| 87 | #' df1 <- broom::tidy(univar_model) | |
| 88 | #' | |
| 89 | #' s_coxreg(model_df = df1, .stats = "hr") | |
| 90 | #' | |
| 91 | #' # Univariate with interactions | |
| 92 | #' univar_model_inter <- fit_coxreg_univar( | |
| 93 | #' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder | |
| 94 | #' ) | |
| 95 | #' df1_inter <- broom::tidy(univar_model_inter) | |
| 96 | #' | |
| 97 | #' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1") | |
| 98 | #' | |
| 99 | #' # Univariate without treatment arm - only "COVAR2" covariate effects | |
| 100 | #' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder) | |
| 101 | #' df1_covs <- broom::tidy(univar_covs_model) | |
| 102 | #' | |
| 103 | #' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)")) | |
| 104 | #' | |
| 105 | #' # Multivariate. | |
| 106 | #' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder) | |
| 107 | #' df2 <- broom::tidy(multivar_model) | |
| 108 | #' | |
| 109 | #' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1") | |
| 110 | #' s_coxreg( | |
| 111 | #' model_df = df2, .stats = "pval", .which_vars = "multi_lvl", | |
| 112 | #'   .var_nms = c("COVAR1", "A Covariate Label") | |
| 113 | #' ) | |
| 114 | #' | |
| 115 | #' # Multivariate without treatment arm - only "COVAR1" main effect | |
| 116 | #' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder) | |
| 117 | #' df2_covs <- broom::tidy(multivar_covs_model) | |
| 118 | #' | |
| 119 | #' s_coxreg(model_df = df2_covs, .stats = "hr") | |
| 120 | #' | |
| 121 | #' @export | |
| 122 | s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) { | |
| 123 | 291x | assert_df_with_variables(model_df, list(term = "term", stat = .stats)) | 
| 124 | 291x |   checkmate::assert_multi_class(model_df$term, classes = c("factor", "character")) | 
| 125 | 291x | model_df$term <- as.character(model_df$term) | 
| 126 | 291x | .var_nms <- .var_nms[!is.na(.var_nms)] | 
| 127 | ||
| 128 | 289x | if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ] | 
| 129 | 69x | if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1) | 
| 130 | ||
| 131 | # We need a list with names corresponding to the stats to display of equal length to the list of stats. | |
| 132 | 291x | y <- split(model_df, f = model_df$term, drop = FALSE) | 
| 133 | 291x | y <- stats::setNames(y, nm = rep(.stats, length(y))) | 
| 134 | ||
| 135 | 291x |   if (.which_vars == "var_main") { | 
| 136 | 128x | y <- lapply(y, function(x) x[1, ]) # only main effect | 
| 137 | 163x |   } else if (.which_vars %in% c("inter", "multi_lvl")) { | 
| 138 | 120x | y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect | 
| 139 | } | |
| 140 | ||
| 141 | 291x | lapply( | 
| 142 | 291x | X = y, | 
| 143 | 291x |     FUN = function(x) { | 
| 144 | 295x | z <- as.list(x[[.stats]]) | 
| 145 | 295x | stats::setNames(z, nm = x$term_label) | 
| 146 | } | |
| 147 | ) | |
| 148 | } | |
| 149 | ||
| 150 | #' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()] | |
| 151 | #' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`. | |
| 152 | #' | |
| 153 | #' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`. | |
| 154 | #' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`. | |
| 155 | #' @param na_str (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`. | |
| 156 | #' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to | |
| 157 | #' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching). | |
| 158 | #' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed | |
| 159 | #' as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing | |
| 160 | #' from variables, then only Cox model(s) including the covariates will be fitted and the corresponding | |
| 161 | #' effect estimates will be tabulated later. | |
| 162 | #' | |
| 163 | #' @return | |
| 164 | #' * `a_coxreg()` returns formatted [rtables::CellValue()]. | |
| 165 | #' | |
| 166 | #' @examples | |
| 167 | #' a_coxreg( | |
| 168 | #' df = dta_bladder, | |
| 169 | #' labelstr = "Label 1", | |
| 170 | #' variables = u1_variables, | |
| 171 | #' .spl_context = list(value = "COVAR1"), | |
| 172 | #' .stats = "n", | |
| 173 | #' .formats = "xx" | |
| 174 | #' ) | |
| 175 | #' | |
| 176 | #' a_coxreg( | |
| 177 | #' df = dta_bladder, | |
| 178 | #' labelstr = "", | |
| 179 | #' variables = u1_variables, | |
| 180 | #' .spl_context = list(value = "COVAR2"), | |
| 181 | #' .stats = "pval", | |
| 182 | #' .formats = "xx.xxxx" | |
| 183 | #' ) | |
| 184 | #' | |
| 185 | #' @export | |
| 186 | a_coxreg <- function(df, | |
| 187 | labelstr, | |
| 188 | eff = FALSE, | |
| 189 | var_main = FALSE, | |
| 190 | multivar = FALSE, | |
| 191 | variables, | |
| 192 | at = list(), | |
| 193 | control = control_coxreg(), | |
| 194 | .spl_context, | |
| 195 | .stats, | |
| 196 | .formats, | |
| 197 | .indent_mods = NULL, | |
| 198 | na_str = "", | |
| 199 |                      cache_env = NULL) { | |
| 200 | 288x | cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm | 
| 201 | 288x | cov <- tail(.spl_context$value, 1) # current variable/covariate | 
| 202 | 288x | var_lbl <- formatters::var_labels(df)[cov] # check for df labels | 
| 203 | 288x |   if (length(labelstr) > 1) { | 
| 204 | 8x | labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none | 
| 205 | 280x |   } else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) { | 
| 206 | 67x | labelstr <- var_lbl | 
| 207 | } | |
| 208 | 288x |   if (eff || multivar || cov_no_arm) { | 
| 209 | 143x | control$interaction <- FALSE | 
| 210 |   } else { | |
| 211 | 145x | variables$covariates <- cov | 
| 212 | 50x | if (var_main) control$interaction <- TRUE | 
| 213 | } | |
| 214 | ||
| 215 | 288x |   if (is.null(cache_env[[cov]])) { | 
| 216 | 47x |     if (!multivar) { | 
| 217 | 32x | model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy() | 
| 218 |     } else { | |
| 219 | 15x | model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy() | 
| 220 | } | |
| 221 | 47x | cache_env[[cov]] <- model | 
| 222 |   } else { | |
| 223 | 241x | model <- cache_env[[cov]] | 
| 224 | } | |
| 225 | 148x | if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_ | 
| 226 | ||
| 227 | 288x |   if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) { | 
| 228 | 15x | multivar <- TRUE | 
| 229 | 3x | if (!cov_no_arm) var_main <- TRUE | 
| 230 | } | |
| 231 | ||
| 232 | 288x | vars_coxreg <- list(which_vars = "all", var_nms = NULL) | 
| 233 | 288x |   if (eff) { | 
| 234 | 65x |     if (multivar && !var_main) { # multivar treatment level | 
| 235 | 12x | var_lbl_arm <- formatters::var_labels(df)[[variables$arm]] | 
| 236 | 12x |       vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl") | 
| 237 |     } else { # treatment effect | |
| 238 | 53x | vars_coxreg["var_nms"] <- variables$arm | 
| 239 | 12x | if (var_main) vars_coxreg["which_vars"] <- "var_main" | 
| 240 | } | |
| 241 |   } else { | |
| 242 | 223x |     if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level | 
| 243 | 166x |       vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main") | 
| 244 | 57x |     } else if (multivar) { # multivar covariate level | 
| 245 | 57x |       vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl") | 
| 246 | 12x | if (var_main) model[cov, .stats] <- NA_real_ | 
| 247 | } | |
| 248 | 50x | if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect | 
| 249 | } | |
| 250 | 288x | var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]] | 
| 251 | 288x |   var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) { | 
| 252 | 27x | paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels | 
| 253 | 288x | } else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) || | 
| 254 | 288x |     (multivar && var_main && is.numeric(df[[cov]]))) { # nolint | 
| 255 | 71x | labelstr # other main effect labels | 
| 256 | 288x |   } else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) { | 
| 257 | 12x | "All" # multivar numeric covariate | 
| 258 |   } else { | |
| 259 | 178x | names(var_vals) | 
| 260 | } | |
| 261 | 288x | in_rows( | 
| 262 | 288x | .list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods, | 
| 263 | 288x | .formats = stats::setNames(rep(.formats, length(var_names)), var_names), | 
| 264 | 288x | .format_na_strs = stats::setNames(rep(na_str, length(var_names)), var_names) | 
| 265 | ) | |
| 266 | } | |
| 267 | ||
| 268 | #' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table | |
| 269 | #' layout. This function is a wrapper for several `rtables` layouting functions. This function | |
| 270 | #' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()]. | |
| 271 | #' | |
| 272 | #' @inheritParams fit_coxreg_univar | |
| 273 | #' @param multivar (`flag`)\cr whether multivariate Cox regression should run (defaults to `FALSE`), otherwise | |
| 274 | #' univariate Cox regression will run. | |
| 275 | #' @param common_var (`string`)\cr the name of a factor variable in the dataset which takes the same value | |
| 276 | #' for all rows. This should be created during pre-processing if no such variable currently exists. | |
| 277 | #' @param .section_div (`string` or `NA`)\cr string which should be repeated as a section divider between sections. | |
| 278 | #' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between | |
| 279 | #' treatment and covariate sections and the second between different covariates. | |
| 280 | #' | |
| 281 | #' @return | |
| 282 | #' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions, | |
| 283 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table | |
| 284 | #' containing the chosen statistics to the table layout. | |
| 285 | #' | |
| 286 | #' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`, | |
| 287 | #' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate | |
| 288 | #' Cox regression models, respectively. | |
| 289 | #' | |
| 290 | #' @examples | |
| 291 | #' # summarize_coxreg | |
| 292 | #' | |
| 293 | #' result_univar <- basic_table() %>% | |
| 294 | #' summarize_coxreg(variables = u1_variables) %>% | |
| 295 | #' build_table(dta_bladder) | |
| 296 | #' result_univar | |
| 297 | #' | |
| 298 | #' result_univar_covs <- basic_table() %>% | |
| 299 | #' summarize_coxreg( | |
| 300 | #' variables = u2_variables, | |
| 301 | #' ) %>% | |
| 302 | #' build_table(dta_bladder) | |
| 303 | #' result_univar_covs | |
| 304 | #' | |
| 305 | #' result_multivar <- basic_table() %>% | |
| 306 | #' summarize_coxreg( | |
| 307 | #' variables = m1_variables, | |
| 308 | #' multivar = TRUE, | |
| 309 | #' ) %>% | |
| 310 | #' build_table(dta_bladder) | |
| 311 | #' result_multivar | |
| 312 | #' | |
| 313 | #' result_multivar_covs <- basic_table() %>% | |
| 314 | #' summarize_coxreg( | |
| 315 | #' variables = m2_variables, | |
| 316 | #' multivar = TRUE, | |
| 317 | #'     varlabels = c("Covariate 1", "Covariate 2") # custom labels | |
| 318 | #' ) %>% | |
| 319 | #' build_table(dta_bladder) | |
| 320 | #' result_multivar_covs | |
| 321 | #' | |
| 322 | #' @export | |
| 323 | #' @order 2 | |
| 324 | summarize_coxreg <- function(lyt, | |
| 325 | variables, | |
| 326 | control = control_coxreg(), | |
| 327 | at = list(), | |
| 328 | multivar = FALSE, | |
| 329 | common_var = "STUDYID", | |
| 330 |                              .stats = c("n", "hr", "ci", "pval", "pval_inter"), | |
| 331 | .formats = c( | |
| 332 | n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)", | |
| 333 | pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)" | |
| 334 | ), | |
| 335 | varlabels = NULL, | |
| 336 | .indent_mods = NULL, | |
| 337 | na_str = "", | |
| 338 |                              .section_div = NA_character_) { | |
| 339 | 16x |   if (multivar && control$interaction) { | 
| 340 | 1x | warning(paste( | 
| 341 | 1x | "Interactions are not available for multivariate cox regression using summarize_coxreg.", | 
| 342 | 1x | "The model will be calculated without interaction effects." | 
| 343 | )) | |
| 344 | } | |
| 345 | 16x |   if (control$interaction && !"arm" %in% names(variables)) { | 
| 346 | 1x |     stop("To include interactions please specify 'arm' in variables.") | 
| 347 | } | |
| 348 | ||
| 349 | 15x |   .stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics | 
| 350 | 6x |     intersect(c("hr", "ci", "pval"), .stats) | 
| 351 | 15x |   } else if (control$interaction) { | 
| 352 | 5x |     intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats) | 
| 353 |   } else { | |
| 354 | 4x |     intersect(c("n", "hr", "ci", "pval"), .stats) | 
| 355 | } | |
| 356 | 15x | stat_labels <- c( | 
| 357 | 15x | n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"), | 
| 358 | 15x | pval = "p-value", pval_inter = "Interaction p-value" | 
| 359 | ) | |
| 360 | 15x | stat_labels <- stat_labels[names(stat_labels) %in% .stats] | 
| 361 | 15x | .formats <- .formats[names(.formats) %in% .stats] | 
| 362 | 15x | env <- new.env() # create caching environment | 
| 363 | ||
| 364 | 15x | lyt <- lyt %>% | 
| 365 | 15x | split_cols_by_multivar( | 
| 366 | 15x | vars = rep(common_var, length(.stats)), | 
| 367 | 15x | varlabels = stat_labels, | 
| 368 | 15x | extra_args = list( | 
| 369 | 15x | .stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_str = rep(na_str, length(.stats)), | 
| 370 | 15x | cache_env = replicate(length(.stats), list(env)) | 
| 371 | ) | |
| 372 | ) | |
| 373 | ||
| 374 | 15x |   if ("arm" %in% names(variables)) { # treatment effect | 
| 375 | 13x | lyt <- lyt %>% | 
| 376 | 13x | split_rows_by( | 
| 377 | 13x | common_var, | 
| 378 | 13x | split_label = "Treatment:", | 
| 379 | 13x | label_pos = "visible", | 
| 380 | 13x | child_labels = "hidden", | 
| 381 | 13x | section_div = head(.section_div, 1) | 
| 382 | ) | |
| 383 | 13x |     if (!multivar) { | 
| 384 | 9x | lyt <- lyt %>% | 
| 385 | 9x | analyze_colvars( | 
| 386 | 9x | afun = a_coxreg, | 
| 387 | 9x | na_str = na_str, | 
| 388 | 9x | extra_args = list( | 
| 389 | 9x | variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar, | 
| 390 | 9x | labelstr = "" | 
| 391 | ) | |
| 392 | ) | |
| 393 |     } else { # treatment level effects | |
| 394 | 4x | lyt <- lyt %>% | 
| 395 | 4x | summarize_row_groups( | 
| 396 | 4x | cfun = a_coxreg, | 
| 397 | 4x | na_str = na_str, | 
| 398 | 4x | extra_args = list( | 
| 399 | 4x | variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar | 
| 400 | ) | |
| 401 | ) %>% | |
| 402 | 4x | analyze_colvars( | 
| 403 | 4x | afun = a_coxreg, | 
| 404 | 4x | na_str = na_str, | 
| 405 | 4x | extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "") | 
| 406 | ) | |
| 407 | } | |
| 408 | } | |
| 409 | ||
| 410 | 15x |   if ("covariates" %in% names(variables)) { # covariate main effects | 
| 411 | 15x | lyt <- lyt %>% | 
| 412 | 15x | split_rows_by_multivar( | 
| 413 | 15x | vars = variables$covariates, | 
| 414 | 15x | varlabels = varlabels, | 
| 415 | 15x | split_label = "Covariate:", | 
| 416 | 15x | nested = FALSE, | 
| 417 | 15x | child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden", | 
| 418 | 15x | section_div = tail(.section_div, 1) | 
| 419 | ) | |
| 420 | 15x |     if (multivar || control$interaction || !"arm" %in% names(variables)) { | 
| 421 | 11x | lyt <- lyt %>% | 
| 422 | 11x | summarize_row_groups( | 
| 423 | 11x | cfun = a_coxreg, | 
| 424 | 11x | na_str = na_str, | 
| 425 | 11x | extra_args = list( | 
| 426 | 11x | variables = variables, at = at, control = control, multivar = multivar, | 
| 427 | 11x | var_main = if (multivar) multivar else control$interaction | 
| 428 | ) | |
| 429 | ) | |
| 430 |     } else { | |
| 431 | 1x | if (!is.null(varlabels)) names(varlabels) <- variables$covariates | 
| 432 | 4x | lyt <- lyt %>% | 
| 433 | 4x | analyze_colvars( | 
| 434 | 4x | afun = a_coxreg, | 
| 435 | 4x | na_str = na_str, | 
| 436 | 4x | extra_args = list( | 
| 437 | 4x | variables = variables, at = at, control = control, multivar = multivar, | 
| 438 | 4x | var_main = if (multivar) multivar else control$interaction, | 
| 439 | 4x | labelstr = if (is.null(varlabels)) "" else varlabels | 
| 440 | ) | |
| 441 | ) | |
| 442 | } | |
| 443 | ||
| 444 | 2x | if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm | 
| 445 | 15x |     if (multivar || control$interaction) { # covariate level effects | 
| 446 | 11x | lyt <- lyt %>% | 
| 447 | 11x | analyze_colvars( | 
| 448 | 11x | afun = a_coxreg, | 
| 449 | 11x | na_str = na_str, | 
| 450 | 11x | extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = ""), | 
| 451 | 11x | indent_mod = if (!"arm" %in% names(variables) || multivar) 0L else -1L | 
| 452 | ) | |
| 453 | } | |
| 454 | } | |
| 455 | ||
| 456 | 15x | lyt | 
| 457 | } | 
| 1 | #' Count patients with abnormal analysis range values by baseline status | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_abnormal_by_baseline()] creates a layout element to count patients with abnormal | |
| 6 | #' analysis range values, categorized by baseline status. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `var` which indicates abnormal range results. Additional | |
| 9 | #' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to | |
| 10 | #' `USUBJID`), a variable to indicate unique subject identifiers, and `baseline` (defaults to `BNRIND`), a | |
| 11 | #' variable to indicate baseline reference ranges. | |
| 12 | #' | |
| 13 | #' For each direction specified via the `abnormal` parameter (e.g. High or Low), we condition on baseline | |
| 14 | #' range result and count patients in the numerator and denominator as follows for each of the following | |
| 15 | #' categories: | |
| 16 | #' * `Not <abnormality>` | |
| 17 | #' * `num`: The number of patients without abnormality at baseline (excluding those with missing baseline) | |
| 18 | #' and with at least one abnormality post-baseline. | |
| 19 | #' * `denom`: The number of patients without abnormality at baseline (excluding those with missing baseline). | |
| 20 | #' * `<Abnormality>` | |
| 21 | #' * `num`: The number of patients with abnormality as baseline and at least one abnormality post-baseline. | |
| 22 | #' * `denom`: The number of patients with abnormality at baseline. | |
| 23 | #' * `Total` | |
| 24 | #' * `num`: The number of patients with at least one post-baseline record and at least one abnormality | |
| 25 | #' post-baseline. | |
| 26 | #' * `denom`: The number of patients with at least one post-baseline record. | |
| 27 | #' | |
| 28 | #' This function assumes that `df` has been filtered to only include post-baseline records. | |
| 29 | #' | |
| 30 | #' @inheritParams argument_convention | |
| 31 | #' @param abnormal (`character`)\cr values identifying the abnormal range level(s) in `.var`. | |
| 32 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 33 | #' | |
| 34 | #'   Options are: ``r shQuote(get_stats("abnormal_by_baseline"), type = "sh")`` | |
| 35 | #' | |
| 36 | #' @note | |
| 37 | #' * `df` should be filtered to include only post-baseline records. | |
| 38 | #' * If the baseline variable or analysis variable contains `NA` records, it is expected that `df` has been | |
| 39 | #' pre-processed using [df_explicit_na()] or [explicit_na()]. | |
| 40 | #' | |
| 41 | #' @seealso Relevant description function [d_count_abnormal_by_baseline()]. | |
| 42 | #' | |
| 43 | #' @name abnormal_by_baseline | |
| 44 | #' @order 1 | |
| 45 | NULL | |
| 46 | ||
| 47 | #' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level. | |
| 48 | #' | |
| 49 | #' @param na_str (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with | |
| 50 | #' [df_explicit_na()]). The default is `"<Missing>"`. | |
| 51 | #' | |
| 52 | #' @return | |
| 53 | #' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements: | |
| 54 | #' `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts. | |
| 55 | #' | |
| 56 | #' @keywords internal | |
| 57 | s_count_abnormal_by_baseline <- function(df, | |
| 58 | .var, | |
| 59 | abnormal, | |
| 60 | na_str = "<Missing>", | |
| 61 | variables = list(id = "USUBJID", baseline = "BNRIND"), | |
| 62 |                                          ...) { | |
| 63 | 11x | checkmate::assert_string(.var) | 
| 64 | 11x | checkmate::assert_string(abnormal) | 
| 65 | 11x | checkmate::assert_string(na_str) | 
| 66 | 11x | assert_df_with_variables(df, c(range = .var, variables)) | 
| 67 | 11x |   checkmate::assert_subset(names(variables), c("id", "baseline")) | 
| 68 | 11x |   checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) | 
| 69 | 11x |   checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character")) | 
| 70 | 11x |   checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) | 
| 71 | ||
| 72 | # If input is passed as character, changed to factor | |
| 73 | 11x | df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_str) | 
| 74 | 11x | df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_str) | 
| 75 | ||
| 76 | 11x | assert_valid_factor(df[[.var]], any.missing = FALSE) | 
| 77 | 10x | assert_valid_factor(df[[variables$baseline]], any.missing = FALSE) | 
| 78 | ||
| 79 | # Keep only records with valid analysis value. | |
| 80 | 9x | df <- df[df[[.var]] != na_str, ] | 
| 81 | ||
| 82 | 9x | anl <- data.frame( | 
| 83 | 9x | id = df[[variables$id]], | 
| 84 | 9x | var = df[[.var]], | 
| 85 | 9x | baseline = df[[variables$baseline]], | 
| 86 | 9x | stringsAsFactors = FALSE | 
| 87 | ) | |
| 88 | ||
| 89 | # Total: | |
| 90 | # - Patients in denominator: have at least one valid measurement post-baseline. | |
| 91 | # - Patients in numerator: have at least one abnormality. | |
| 92 | 9x | total_denom <- length(unique(anl$id)) | 
| 93 | 9x | total_num <- length(unique(anl$id[anl$var == abnormal])) | 
| 94 | ||
| 95 | # Baseline NA records are counted only in total rows. | |
| 96 | 9x | anl <- anl[anl$baseline != na_str, ] | 
| 97 | ||
| 98 | # Abnormal: | |
| 99 | # - Patients in denominator: have abnormality at baseline. | |
| 100 | # - Patients in numerator: have abnormality at baseline AND | |
| 101 | # have at least one abnormality post-baseline. | |
| 102 | 9x | abn_denom <- length(unique(anl$id[anl$baseline == abnormal])) | 
| 103 | 9x | abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal])) | 
| 104 | ||
| 105 | # Not abnormal: | |
| 106 | # - Patients in denominator: do not have abnormality at baseline. | |
| 107 | # - Patients in numerator: do not have abnormality at baseline AND | |
| 108 | # have at least one abnormality post-baseline. | |
| 109 | 9x | not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal])) | 
| 110 | 9x | not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal])) | 
| 111 | ||
| 112 | 9x | labels <- d_count_abnormal_by_baseline(abnormal) | 
| 113 | 9x | list(fraction = list( | 
| 114 | 9x | not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal), | 
| 115 | 9x | abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal), | 
| 116 | 9x | total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total) | 
| 117 | )) | |
| 118 | } | |
| 119 | ||
| 120 | #' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun` | |
| 121 | #' in `count_abnormal_by_baseline()`. | |
| 122 | #' | |
| 123 | #' @return | |
| 124 | #' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 125 | #' | |
| 126 | #' @keywords internal | |
| 127 | a_count_abnormal_by_baseline <- function(df, | |
| 128 | ..., | |
| 129 | .stats = NULL, | |
| 130 | .stat_names = NULL, | |
| 131 | .formats = NULL, | |
| 132 | .labels = NULL, | |
| 133 |                                          .indent_mods = NULL) { | |
| 134 | # Check for additional parameters to the statistics function | |
| 135 | 4x | dots_extra_args <- list(...) | 
| 136 | 4x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 137 | 4x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 138 | ||
| 139 | # Check for user-defined functions | |
| 140 | 4x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 141 | 4x | .stats <- default_and_custom_stats_list$all_stats | 
| 142 | 4x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 143 | ||
| 144 | # Apply statistics function | |
| 145 | 4x | x_stats <- .apply_stat_functions( | 
| 146 | 4x | default_stat_fnc = s_count_abnormal_by_baseline, | 
| 147 | 4x | custom_stat_fnc_list = custom_stat_functions, | 
| 148 | 4x | args_list = c( | 
| 149 | 4x | df = list(df), | 
| 150 | 4x | extra_afun_params, | 
| 151 | 4x | dots_extra_args | 
| 152 | ) | |
| 153 | ) | |
| 154 | ||
| 155 | # Fill in formatting defaults | |
| 156 | 4x |   .stats <- get_stats("abnormal_by_baseline", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 157 | 4x | levels_per_stats <- lapply(x_stats, names) | 
| 158 | 4x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 159 | 4x | .labels <- get_labels_from_stats( | 
| 160 | 4x | .stats, .labels, levels_per_stats, d_count_abnormal_by_baseline(dots_extra_args$abnormal) | 
| 161 | ) | |
| 162 | 4x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 163 | ||
| 164 | 4x | x_stats <- x_stats[.stats] %>% | 
| 165 | 4x | .unlist_keep_nulls() %>% | 
| 166 | 4x | setNames(names(.formats)) | 
| 167 | ||
| 168 | # Auto format handling | |
| 169 | 4x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 170 | ||
| 171 | # Get and check statistical names | |
| 172 | 4x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 173 | ||
| 174 | 4x | in_rows( | 
| 175 | 4x | .list = x_stats, | 
| 176 | 4x | .formats = .formats, | 
| 177 | 4x | .names = .labels %>% .unlist_keep_nulls(), | 
| 178 | 4x | .stat_names = .stat_names, | 
| 179 | 4x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 180 | 4x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 181 | ) | |
| 182 | } | |
| 183 | ||
| 184 | #' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments | |
| 185 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 186 | #' | |
| 187 | #' @return | |
| 188 | #' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions, | |
| 189 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 190 | #' the statistics from `s_count_abnormal_by_baseline()` to the table layout. | |
| 191 | #' | |
| 192 | #' @examples | |
| 193 | #' df <- data.frame( | |
| 194 | #' USUBJID = as.character(c(1:6)), | |
| 195 | #'   ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")), | |
| 196 | #'   BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL")) | |
| 197 | #' ) | |
| 198 | #' df <- df_explicit_na(df) | |
| 199 | #' | |
| 200 | #' # Layout creating function. | |
| 201 | #' basic_table() %>% | |
| 202 | #' count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>% | |
| 203 | #' build_table(df) | |
| 204 | #' | |
| 205 | #' # Passing of statistics function and formatting arguments. | |
| 206 | #' df2 <- data.frame( | |
| 207 | #' ID = as.character(c(1, 2, 3, 4)), | |
| 208 | #'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")), | |
| 209 | #'   BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL")) | |
| 210 | #' ) | |
| 211 | #' | |
| 212 | #' basic_table() %>% | |
| 213 | #' count_abnormal_by_baseline( | |
| 214 | #' var = "RANGE", | |
| 215 | #' abnormal = c(Low = "LOW"), | |
| 216 | #' variables = list(id = "ID", baseline = "BLRANGE"), | |
| 217 | #' .formats = c(fraction = "xx / xx"), | |
| 218 | #' .indent_mods = c(fraction = 2L) | |
| 219 | #' ) %>% | |
| 220 | #' build_table(df2) | |
| 221 | #' | |
| 222 | #' @export | |
| 223 | #' @order 2 | |
| 224 | count_abnormal_by_baseline <- function(lyt, | |
| 225 | var, | |
| 226 | abnormal, | |
| 227 | variables = list(id = "USUBJID", baseline = "BNRIND"), | |
| 228 | na_str = "<Missing>", | |
| 229 | nested = TRUE, | |
| 230 | ..., | |
| 231 | table_names = abnormal, | |
| 232 | .stats = "fraction", | |
| 233 | .stat_names = NULL, | |
| 234 | .formats = list(fraction = format_fraction), | |
| 235 | .labels = NULL, | |
| 236 |                                        .indent_mods = NULL) { | |
| 237 | 2x | checkmate::assert_character(abnormal, len = length(table_names), names = "named") | 
| 238 | 2x | checkmate::assert_string(var) | 
| 239 | ||
| 240 | # Process standard extra arguments | |
| 241 | 2x |   extra_args <- list(".stats" = .stats) | 
| 242 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 243 | 2x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 244 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 245 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 246 | ||
| 247 | # Process additional arguments to the statistic function | |
| 248 | 2x | extra_args <- c(extra_args, "variables" = list(variables), ...) | 
| 249 | ||
| 250 | # Append additional info from layout to the analysis function | |
| 251 | 2x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 252 | 2x | formals(a_count_abnormal_by_baseline) <- c( | 
| 253 | 2x | formals(a_count_abnormal_by_baseline), extra_args[[".additional_fun_parameters"]] | 
| 254 | ) | |
| 255 | ||
| 256 | # Add a new table section with label for each value in abnormal | |
| 257 | 2x |   for (i in seq_along(abnormal)) { | 
| 258 | 4x | extra_args[["abnormal"]] <- abnormal[i] | 
| 259 | ||
| 260 | 4x | lyt <- analyze( | 
| 261 | 4x | lyt = lyt, | 
| 262 | 4x | vars = var, | 
| 263 | 4x | afun = a_count_abnormal_by_baseline, | 
| 264 | 4x | var_labels = names(abnormal)[i], | 
| 265 | 4x | na_str = na_str, | 
| 266 | 4x | nested = nested, | 
| 267 | 4x | extra_args = extra_args, | 
| 268 | 4x | show_labels = "visible", | 
| 269 | 4x | table_names = table_names[i] | 
| 270 | ) | |
| 271 | } | |
| 272 | ||
| 273 | 2x | lyt | 
| 274 | } | |
| 275 | ||
| 276 | #' Description function for `s_count_abnormal_by_baseline()` | |
| 277 | #' | |
| 278 | #' @description `r lifecycle::badge("stable")` | |
| 279 | #' | |
| 280 | #' Description function that produces the labels for [s_count_abnormal_by_baseline()]. | |
| 281 | #' | |
| 282 | #' @inheritParams abnormal_by_baseline | |
| 283 | #' | |
| 284 | #' @return Abnormal category labels for [s_count_abnormal_by_baseline()]. | |
| 285 | #' | |
| 286 | #' @examples | |
| 287 | #' d_count_abnormal_by_baseline("LOW") | |
| 288 | #' | |
| 289 | #' @export | |
| 290 | d_count_abnormal_by_baseline <- function(abnormal) { | |
| 291 | 13x |   not_abn_name <- paste("Not", tolower(abnormal)) | 
| 292 | 13x | abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2))) | 
| 293 | 13x | total_name <- "Total" | 
| 294 | ||
| 295 | 13x | list( | 
| 296 | 13x | not_abnormal = not_abn_name, | 
| 297 | 13x | abnormal = abn_name, | 
| 298 | 13x | total = total_name | 
| 299 | ) | |
| 300 | } | 
| 1 | #' Helper functions for multivariate logistic regression | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions used in calculations for logistic regression. | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. | |
| 9 | #' Limited functionality is also available for conditional logistic regression models fitted by | |
| 10 | #' [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()]. | |
| 11 | #' @param x (`character`)\cr a variable or interaction term in `fit_glm` (depending on the helper function used). | |
| 12 | #' | |
| 13 | #' @examples | |
| 14 | #' library(dplyr) | |
| 15 | #' library(broom) | |
| 16 | #' | |
| 17 | #' adrs_f <- tern_ex_adrs %>% | |
| 18 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 19 | #'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% | |
| 20 | #' mutate( | |
| 21 | #'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), | |
| 22 | #' RACE = factor(RACE), | |
| 23 | #' SEX = factor(SEX) | |
| 24 | #' ) | |
| 25 | #' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") | |
| 26 | #' mod1 <- fit_logistic( | |
| 27 | #' data = adrs_f, | |
| 28 | #' variables = list( | |
| 29 | #' response = "Response", | |
| 30 | #' arm = "ARMCD", | |
| 31 | #'     covariates = c("AGE", "RACE") | |
| 32 | #' ) | |
| 33 | #' ) | |
| 34 | #' mod2 <- fit_logistic( | |
| 35 | #' data = adrs_f, | |
| 36 | #' variables = list( | |
| 37 | #' response = "Response", | |
| 38 | #' arm = "ARMCD", | |
| 39 | #'     covariates = c("AGE", "RACE"), | |
| 40 | #' interaction = "AGE" | |
| 41 | #' ) | |
| 42 | #' ) | |
| 43 | #' | |
| 44 | #' @name h_logistic_regression | |
| 45 | NULL | |
| 46 | ||
| 47 | #' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted | |
| 48 | #' model assuming only one interaction term. | |
| 49 | #' | |
| 50 | #' @return Vector of names of interaction variables. | |
| 51 | #' | |
| 52 | #' @export | |
| 53 | h_get_interaction_vars <- function(fit_glm) { | |
| 54 | 34x | checkmate::assert_class(fit_glm, "glm") | 
| 55 | 34x | terms_name <- attr(stats::terms(fit_glm), "term.labels") | 
| 56 | 34x | terms_order <- attr(stats::terms(fit_glm), "order") | 
| 57 | 34x | interaction_term <- terms_name[terms_order == 2] | 
| 58 | 34x | checkmate::assert_string(interaction_term) | 
| 59 | 34x | strsplit(interaction_term, split = ":")[[1]] | 
| 60 | } | |
| 61 | ||
| 62 | #' @describeIn h_logistic_regression Helper function to get the right coefficient name from the | |
| 63 | #' interaction variable names and the given levels. The main value here is that the order | |
| 64 | #' of first and second variable is checked in the `interaction_vars` input. | |
| 65 | #' | |
| 66 | #' @param interaction_vars (`character(2)`)\cr interaction variable names. | |
| 67 | #' @param first_var_with_level (`character(2)`)\cr the first variable name with the interaction level. | |
| 68 | #' @param second_var_with_level (`character(2)`)\cr the second variable name with the interaction level. | |
| 69 | #' | |
| 70 | #' @return Name of coefficient. | |
| 71 | #' | |
| 72 | #' @export | |
| 73 | h_interaction_coef_name <- function(interaction_vars, | |
| 74 | first_var_with_level, | |
| 75 |                                     second_var_with_level) { | |
| 76 | 55x | checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE) | 
| 77 | 55x | checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE) | 
| 78 | 55x | checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE) | 
| 79 | 55x | checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars) | 
| 80 | ||
| 81 | 55x | first_name <- paste(first_var_with_level, collapse = "") | 
| 82 | 55x | second_name <- paste(second_var_with_level, collapse = "") | 
| 83 | 55x |   if (first_var_with_level[1] == interaction_vars[1]) { | 
| 84 | 36x | paste(first_name, second_name, sep = ":") | 
| 85 | 19x |   } else if (second_var_with_level[1] == interaction_vars[1]) { | 
| 86 | 19x | paste(second_name, first_name, sep = ":") | 
| 87 | } | |
| 88 | } | |
| 89 | ||
| 90 | #' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates | |
| 91 | #' for the case when both the odds ratio and the interaction variable are categorical. | |
| 92 | #' | |
| 93 | #' @param odds_ratio_var (`string`)\cr the odds ratio variable. | |
| 94 | #' @param interaction_var (`string`)\cr the interaction variable. | |
| 95 | #' | |
| 96 | #' @return Odds ratio. | |
| 97 | #' | |
| 98 | #' @export | |
| 99 | h_or_cat_interaction <- function(odds_ratio_var, | |
| 100 | interaction_var, | |
| 101 | fit_glm, | |
| 102 |                                  conf_level = 0.95) { | |
| 103 | 8x | interaction_vars <- h_get_interaction_vars(fit_glm) | 
| 104 | 8x | checkmate::assert_string(odds_ratio_var) | 
| 105 | 8x | checkmate::assert_string(interaction_var) | 
| 106 | 8x | checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) | 
| 107 | 8x | checkmate::assert_vector(interaction_vars, len = 2) | 
| 108 | ||
| 109 | 8x | xs_level <- fit_glm$xlevels | 
| 110 | 8x | xs_coef <- stats::coef(fit_glm) | 
| 111 | 8x | xs_vcov <- stats::vcov(fit_glm) | 
| 112 | 8x | y <- list() | 
| 113 | 8x |   for (var_level in xs_level[[odds_ratio_var]][-1]) { | 
| 114 | 14x | x <- list() | 
| 115 | 14x |     for (ref_level in xs_level[[interaction_var]]) { | 
| 116 | 38x | coef_names <- paste0(odds_ratio_var, var_level) | 
| 117 | 38x |       if (ref_level != xs_level[[interaction_var]][1]) { | 
| 118 | 24x | interaction_coef_name <- h_interaction_coef_name( | 
| 119 | 24x | interaction_vars, | 
| 120 | 24x | c(odds_ratio_var, var_level), | 
| 121 | 24x | c(interaction_var, ref_level) | 
| 122 | ) | |
| 123 | 24x | coef_names <- c( | 
| 124 | 24x | coef_names, | 
| 125 | 24x | interaction_coef_name | 
| 126 | ) | |
| 127 | } | |
| 128 | 38x |       if (length(coef_names) > 1) { | 
| 129 | 24x | ones <- t(c(1, 1)) | 
| 130 | 24x | est <- as.numeric(ones %*% xs_coef[coef_names]) | 
| 131 | 24x | se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones))) | 
| 132 |       } else { | |
| 133 | 14x | est <- xs_coef[coef_names] | 
| 134 | 14x | se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) | 
| 135 | } | |
| 136 | 38x | or <- exp(est) | 
| 137 | 38x | ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) | 
| 138 | 38x | x[[ref_level]] <- list(or = or, ci = ci) | 
| 139 | } | |
| 140 | 14x | y[[var_level]] <- x | 
| 141 | } | |
| 142 | 8x | y | 
| 143 | } | |
| 144 | ||
| 145 | #' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates | |
| 146 | #' for the case when either the odds ratio or the interaction variable is continuous. | |
| 147 | #' | |
| 148 | #' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise | |
| 149 | #' the median is used. | |
| 150 | #' | |
| 151 | #' @return Odds ratio. | |
| 152 | #' | |
| 153 | #' @note We don't provide a function for the case when both variables are continuous because | |
| 154 | #' this does not arise in this table, as the treatment arm variable will always be involved | |
| 155 | #' and categorical. | |
| 156 | #' | |
| 157 | #' @export | |
| 158 | h_or_cont_interaction <- function(odds_ratio_var, | |
| 159 | interaction_var, | |
| 160 | fit_glm, | |
| 161 | at = NULL, | |
| 162 |                                   conf_level = 0.95) { | |
| 163 | 13x | interaction_vars <- h_get_interaction_vars(fit_glm) | 
| 164 | 13x | checkmate::assert_string(odds_ratio_var) | 
| 165 | 13x | checkmate::assert_string(interaction_var) | 
| 166 | 13x | checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) | 
| 167 | 13x | checkmate::assert_vector(interaction_vars, len = 2) | 
| 168 | 13x | checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE) | 
| 169 | 13x | xs_level <- fit_glm$xlevels | 
| 170 | 13x | xs_coef <- stats::coef(fit_glm) | 
| 171 | 13x | xs_vcov <- stats::vcov(fit_glm) | 
| 172 | 13x | xs_class <- attr(fit_glm$terms, "dataClasses") | 
| 173 | 13x | model_data <- fit_glm$model | 
| 174 | 13x |   if (!is.null(at)) { | 
| 175 | 3x | checkmate::assert_set_equal(xs_class[interaction_var], "numeric") | 
| 176 | } | |
| 177 | 12x | y <- list() | 
| 178 | 12x |   if (xs_class[interaction_var] == "numeric") { | 
| 179 | 7x |     if (is.null(at)) { | 
| 180 | 5x | at <- ceiling(stats::median(model_data[[interaction_var]])) | 
| 181 | } | |
| 182 | ||
| 183 | 7x |     for (var_level in xs_level[[odds_ratio_var]][-1]) { | 
| 184 | 14x | x <- list() | 
| 185 | 14x |       for (increment in at) { | 
| 186 | 20x | coef_names <- paste0(odds_ratio_var, var_level) | 
| 187 | 20x |         if (increment != 0) { | 
| 188 | 20x | interaction_coef_name <- h_interaction_coef_name( | 
| 189 | 20x | interaction_vars, | 
| 190 | 20x | c(odds_ratio_var, var_level), | 
| 191 | 20x | c(interaction_var, "") | 
| 192 | ) | |
| 193 | 20x | coef_names <- c( | 
| 194 | 20x | coef_names, | 
| 195 | 20x | interaction_coef_name | 
| 196 | ) | |
| 197 | } | |
| 198 | 20x |         if (length(coef_names) > 1) { | 
| 199 | 20x | xvec <- t(c(1, increment)) | 
| 200 | 20x | est <- as.numeric(xvec %*% xs_coef[coef_names]) | 
| 201 | 20x | se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) | 
| 202 |         } else { | |
| 203 | ! | est <- xs_coef[coef_names] | 
| 204 | ! | se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) | 
| 205 | } | |
| 206 | 20x | or <- exp(est) | 
| 207 | 20x | ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) | 
| 208 | 20x | x[[as.character(increment)]] <- list(or = or, ci = ci) | 
| 209 | } | |
| 210 | 14x | y[[var_level]] <- x | 
| 211 | } | |
| 212 |   } else { | |
| 213 | 5x | checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric") | 
| 214 | 5x | checkmate::assert_set_equal(xs_class[interaction_var], "factor") | 
| 215 | 5x |     for (var_level in xs_level[[interaction_var]]) { | 
| 216 | 15x | coef_names <- odds_ratio_var | 
| 217 | 15x |       if (var_level != xs_level[[interaction_var]][1]) { | 
| 218 | 10x | interaction_coef_name <- h_interaction_coef_name( | 
| 219 | 10x | interaction_vars, | 
| 220 | 10x | c(odds_ratio_var, ""), | 
| 221 | 10x | c(interaction_var, var_level) | 
| 222 | ) | |
| 223 | 10x | coef_names <- c( | 
| 224 | 10x | coef_names, | 
| 225 | 10x | interaction_coef_name | 
| 226 | ) | |
| 227 | } | |
| 228 | 15x |       if (length(coef_names) > 1) { | 
| 229 | 10x | xvec <- t(c(1, 1)) | 
| 230 | 10x | est <- as.numeric(xvec %*% xs_coef[coef_names]) | 
| 231 | 10x | se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) | 
| 232 |       } else { | |
| 233 | 5x | est <- xs_coef[coef_names] | 
| 234 | 5x | se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) | 
| 235 | } | |
| 236 | 15x | or <- exp(est) | 
| 237 | 15x | ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) | 
| 238 | 15x | y[[var_level]] <- list(or = or, ci = ci) | 
| 239 | } | |
| 240 | } | |
| 241 | 12x | y | 
| 242 | } | |
| 243 | ||
| 244 | #' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates | |
| 245 | #' in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and | |
| 246 | #' [h_or_cat_interaction()]. | |
| 247 | #' | |
| 248 | #' @return Odds ratio. | |
| 249 | #' | |
| 250 | #' @export | |
| 251 | h_or_interaction <- function(odds_ratio_var, | |
| 252 | interaction_var, | |
| 253 | fit_glm, | |
| 254 | at = NULL, | |
| 255 |                              conf_level = 0.95) { | |
| 256 | 15x | xs_class <- attr(fit_glm$terms, "dataClasses") | 
| 257 | 15x |   if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) { | 
| 258 | 9x | h_or_cont_interaction( | 
| 259 | 9x | odds_ratio_var, | 
| 260 | 9x | interaction_var, | 
| 261 | 9x | fit_glm, | 
| 262 | 9x | at = at, | 
| 263 | 9x | conf_level = conf_level | 
| 264 | ) | |
| 265 | 6x |   } else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) { | 
| 266 | 6x | h_or_cat_interaction( | 
| 267 | 6x | odds_ratio_var, | 
| 268 | 6x | interaction_var, | 
| 269 | 6x | fit_glm, | 
| 270 | 6x | conf_level = conf_level | 
| 271 | ) | |
| 272 |   } else { | |
| 273 | ! |     stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor") | 
| 274 | } | |
| 275 | } | |
| 276 | ||
| 277 | #' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table | |
| 278 | #' of numbers of patients. | |
| 279 | #' | |
| 280 | #' @param terms (`character`)\cr simple terms. | |
| 281 | #' @param table (`table`)\cr table containing numbers for terms. | |
| 282 | #' | |
| 283 | #' @return Term labels containing numbers of patients. | |
| 284 | #' | |
| 285 | #' @export | |
| 286 | h_simple_term_labels <- function(terms, | |
| 287 |                                  table) { | |
| 288 | 54x | checkmate::assert_true(is.table(table)) | 
| 289 | 54x |   checkmate::assert_multi_class(terms, classes = c("factor", "character")) | 
| 290 | 54x | terms <- as.character(terms) | 
| 291 | 54x | term_n <- table[terms] | 
| 292 | 54x | paste0(terms, ", n = ", term_n) | 
| 293 | } | |
| 294 | ||
| 295 | #' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table | |
| 296 | #' of numbers of patients. | |
| 297 | #' | |
| 298 | #' @param terms1 (`character`)\cr terms for first dimension (rows). | |
| 299 | #' @param terms2 (`character`)\cr terms for second dimension (rows). | |
| 300 | #' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the | |
| 301 | #' number of patients. In that case they can only be scalar (strings). | |
| 302 | #' | |
| 303 | #' @return Term labels containing numbers of patients. | |
| 304 | #' | |
| 305 | #' @export | |
| 306 | h_interaction_term_labels <- function(terms1, | |
| 307 | terms2, | |
| 308 | table, | |
| 309 |                                       any = FALSE) { | |
| 310 | 8x | checkmate::assert_true(is.table(table)) | 
| 311 | 8x | checkmate::assert_flag(any) | 
| 312 | 8x |   checkmate::assert_multi_class(terms1, classes = c("factor", "character")) | 
| 313 | 8x |   checkmate::assert_multi_class(terms2, classes = c("factor", "character")) | 
| 314 | 8x | terms1 <- as.character(terms1) | 
| 315 | 8x | terms2 <- as.character(terms2) | 
| 316 | 8x |   if (any) { | 
| 317 | 4x | checkmate::assert_scalar(terms1) | 
| 318 | 4x | checkmate::assert_scalar(terms2) | 
| 319 | 4x | paste0( | 
| 320 | 4x | terms1, " or ", terms2, ", n = ", | 
| 321 | # Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract. | |
| 322 | 4x | sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2] | 
| 323 | ) | |
| 324 |   } else { | |
| 325 | 4x | term_n <- table[cbind(terms1, terms2)] | 
| 326 | 4x | paste0(terms1, " * ", terms2, ", n = ", term_n) | 
| 327 | } | |
| 328 | } | |
| 329 | ||
| 330 | #' @describeIn h_logistic_regression Helper function to tabulate the main effect | |
| 331 | #' results of a (conditional) logistic regression model. | |
| 332 | #' | |
| 333 | #' @return Tabulated main effect results from a logistic regression model. | |
| 334 | #' | |
| 335 | #' @examples | |
| 336 | #' h_glm_simple_term_extract("AGE", mod1) | |
| 337 | #' h_glm_simple_term_extract("ARMCD", mod1) | |
| 338 | #' | |
| 339 | #' @export | |
| 340 | h_glm_simple_term_extract <- function(x, fit_glm) { | |
| 341 | 78x |   checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) | 
| 342 | 78x | checkmate::assert_string(x) | 
| 343 | ||
| 344 | 78x | xs_class <- attr(fit_glm$terms, "dataClasses") | 
| 345 | 78x | xs_level <- fit_glm$xlevels | 
| 346 | 78x | xs_coef <- summary(fit_glm)$coefficients | 
| 347 | 78x |   stats <- if (inherits(fit_glm, "glm")) { | 
| 348 | 66x |     c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") | 
| 349 |   } else { | |
| 350 | 12x |     c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)") | 
| 351 | } | |
| 352 | # Make sure x is not an interaction term. | |
| 353 | 78x | checkmate::assert_subset(x, names(xs_class)) | 
| 354 | 78x | x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1]) | 
| 355 | 78x | x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) | 
| 356 | 78x | colnames(x_stats) <- names(stats) | 
| 357 | 78x | x_stats$estimate <- as.list(x_stats$estimate) | 
| 358 | 78x | x_stats$std_error <- as.list(x_stats$std_error) | 
| 359 | 78x | x_stats$pvalue <- as.list(x_stats$pvalue) | 
| 360 | 78x | x_stats$df <- as.list(1) | 
| 361 | 78x |   if (xs_class[x] == "numeric") { | 
| 362 | 60x | x_stats$term <- x | 
| 363 | 60x |     x_stats$term_label <- if (inherits(fit_glm, "glm")) { | 
| 364 | 48x | formatters::var_labels(fit_glm$data[x], fill = TRUE) | 
| 365 |     } else { | |
| 366 | # We just fill in here with the `term` itself as we don't have the data available. | |
| 367 | 12x | x | 
| 368 | } | |
| 369 | 60x | x_stats$is_variable_summary <- FALSE | 
| 370 | 60x | x_stats$is_term_summary <- TRUE | 
| 371 |   } else { | |
| 372 | 18x | checkmate::assert_class(fit_glm, "glm") | 
| 373 | # The reason is that we don't have the original data set in the `clogit` object | |
| 374 | # and therefore cannot determine the `x_numbers` here. | |
| 375 | 18x | x_numbers <- table(fit_glm$data[[x]]) | 
| 376 | 18x | x_stats$term <- xs_level[[x]][-1] | 
| 377 | 18x | x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers) | 
| 378 | 18x | x_stats$is_variable_summary <- FALSE | 
| 379 | 18x | x_stats$is_term_summary <- TRUE | 
| 380 | 18x | main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") | 
| 381 | 18x | x_main <- data.frame( | 
| 382 | 18x | pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], | 
| 383 | 18x | term = xs_level[[x]][1], | 
| 384 | 18x |       term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)), | 
| 385 | 18x | df = main_effects[x, "Df", drop = TRUE], | 
| 386 | 18x | stringsAsFactors = FALSE | 
| 387 | ) | |
| 388 | 18x | x_main$pvalue <- as.list(x_main$pvalue) | 
| 389 | 18x | x_main$df <- as.list(x_main$df) | 
| 390 | 18x | x_main$estimate <- list(numeric(0)) | 
| 391 | 18x | x_main$std_error <- list(numeric(0)) | 
| 392 | 18x |     if (length(xs_level[[x]][-1]) == 1) { | 
| 393 | 8x | x_main$pvalue <- list(numeric(0)) | 
| 394 | 8x | x_main$df <- list(numeric(0)) | 
| 395 | } | |
| 396 | 18x | x_main$is_variable_summary <- TRUE | 
| 397 | 18x | x_main$is_term_summary <- FALSE | 
| 398 | 18x | x_stats <- rbind(x_main, x_stats) | 
| 399 | } | |
| 400 | 78x | x_stats$variable <- x | 
| 401 | 78x |   x_stats$variable_label <- if (inherits(fit_glm, "glm")) { | 
| 402 | 66x | formatters::var_labels(fit_glm$data[x], fill = TRUE) | 
| 403 |   } else { | |
| 404 | 12x | x | 
| 405 | } | |
| 406 | 78x | x_stats$interaction <- "" | 
| 407 | 78x | x_stats$interaction_label <- "" | 
| 408 | 78x | x_stats$reference <- "" | 
| 409 | 78x | x_stats$reference_label <- "" | 
| 410 | 78x | rownames(x_stats) <- NULL | 
| 411 | 78x | x_stats[c( | 
| 412 | 78x | "variable", | 
| 413 | 78x | "variable_label", | 
| 414 | 78x | "term", | 
| 415 | 78x | "term_label", | 
| 416 | 78x | "interaction", | 
| 417 | 78x | "interaction_label", | 
| 418 | 78x | "reference", | 
| 419 | 78x | "reference_label", | 
| 420 | 78x | "estimate", | 
| 421 | 78x | "std_error", | 
| 422 | 78x | "df", | 
| 423 | 78x | "pvalue", | 
| 424 | 78x | "is_variable_summary", | 
| 425 | 78x | "is_term_summary" | 
| 426 | )] | |
| 427 | } | |
| 428 | ||
| 429 | #' @describeIn h_logistic_regression Helper function to tabulate the interaction term | |
| 430 | #' results of a logistic regression model. | |
| 431 | #' | |
| 432 | #' @return Tabulated interaction term results from a logistic regression model. | |
| 433 | #' | |
| 434 | #' @examples | |
| 435 | #' h_glm_interaction_extract("ARMCD:AGE", mod2) | |
| 436 | #' | |
| 437 | #' @export | |
| 438 | h_glm_interaction_extract <- function(x, fit_glm) { | |
| 439 | 7x | vars <- h_get_interaction_vars(fit_glm) | 
| 440 | 7x | xs_class <- attr(fit_glm$terms, "dataClasses") | 
| 441 | ||
| 442 | 7x | checkmate::assert_string(x) | 
| 443 | ||
| 444 | # Only take two-way interaction | |
| 445 | 7x | checkmate::assert_vector(vars, len = 2) | 
| 446 | ||
| 447 | # Only consider simple case: first variable in interaction is arm, a categorical variable | |
| 448 | 7x | checkmate::assert_disjunct(xs_class[vars[1]], "numeric") | 
| 449 | ||
| 450 | 7x | xs_level <- fit_glm$xlevels | 
| 451 | 7x | xs_coef <- summary(fit_glm)$coefficients | 
| 452 | 7x | main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") | 
| 453 | 7x |   stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") | 
| 454 | 7x | v1_comp <- xs_level[[vars[1]]][-1] | 
| 455 | 7x |   if (xs_class[vars[2]] == "numeric") { | 
| 456 | 4x | x_stats <- as.data.frame( | 
| 457 | 4x | xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE], | 
| 458 | 4x | stringsAsFactors = FALSE | 
| 459 | ) | |
| 460 | 4x | colnames(x_stats) <- names(stats) | 
| 461 | 4x | x_stats$term <- v1_comp | 
| 462 | 4x | x_numbers <- table(fit_glm$data[[vars[1]]]) | 
| 463 | 4x | x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers) | 
| 464 | 4x | v1_ref <- xs_level[[vars[1]]][1] | 
| 465 | 4x | term_main <- v1_ref | 
| 466 | 4x | ref_label <- h_simple_term_labels(v1_ref, x_numbers) | 
| 467 | 3x |   } else if (xs_class[vars[2]] != "numeric") { | 
| 468 | 3x | v2_comp <- xs_level[[vars[2]]][-1] | 
| 469 | 3x | v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp) | 
| 470 | 3x | x_sel <- paste( | 
| 471 | 3x | paste0(vars[1], v1_v2_grid$v1), | 
| 472 | 3x | paste0(vars[2], v1_v2_grid$v2), | 
| 473 | 3x | sep = ":" | 
| 474 | ) | |
| 475 | 3x | x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) | 
| 476 | 3x | colnames(x_stats) <- names(stats) | 
| 477 | 3x | x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2) | 
| 478 | 3x | x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]]) | 
| 479 | 3x | x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers) | 
| 480 | 3x | v1_ref <- xs_level[[vars[1]]][1] | 
| 481 | 3x | v2_ref <- xs_level[[vars[2]]][1] | 
| 482 | 3x | term_main <- paste(vars[1], vars[2], sep = " * ") | 
| 483 | 3x | ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE) | 
| 484 | } | |
| 485 | 7x | x_stats$df <- as.list(1) | 
| 486 | 7x | x_stats$pvalue <- as.list(x_stats$pvalue) | 
| 487 | 7x | x_stats$is_variable_summary <- FALSE | 
| 488 | 7x | x_stats$is_term_summary <- TRUE | 
| 489 | 7x | x_main <- data.frame( | 
| 490 | 7x | pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], | 
| 491 | 7x | term = term_main, | 
| 492 | 7x |     term_label = paste("Reference", ref_label), | 
| 493 | 7x | df = main_effects[x, "Df", drop = TRUE], | 
| 494 | 7x | stringsAsFactors = FALSE | 
| 495 | ) | |
| 496 | 7x | x_main$pvalue <- as.list(x_main$pvalue) | 
| 497 | 7x | x_main$df <- as.list(x_main$df) | 
| 498 | 7x | x_main$estimate <- list(numeric(0)) | 
| 499 | 7x | x_main$std_error <- list(numeric(0)) | 
| 500 | 7x | x_main$is_variable_summary <- TRUE | 
| 501 | 7x | x_main$is_term_summary <- FALSE | 
| 502 | ||
| 503 | 7x | x_stats <- rbind(x_main, x_stats) | 
| 504 | 7x | x_stats$variable <- x | 
| 505 | 7x | x_stats$variable_label <- paste( | 
| 506 | 7x | "Interaction of", | 
| 507 | 7x | formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE), | 
| 508 | "*", | |
| 509 | 7x | formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE) | 
| 510 | ) | |
| 511 | 7x | x_stats$interaction <- "" | 
| 512 | 7x | x_stats$interaction_label <- "" | 
| 513 | 7x | x_stats$reference <- "" | 
| 514 | 7x | x_stats$reference_label <- "" | 
| 515 | 7x | rownames(x_stats) <- NULL | 
| 516 | 7x | x_stats[c( | 
| 517 | 7x | "variable", | 
| 518 | 7x | "variable_label", | 
| 519 | 7x | "term", | 
| 520 | 7x | "term_label", | 
| 521 | 7x | "interaction", | 
| 522 | 7x | "interaction_label", | 
| 523 | 7x | "reference", | 
| 524 | 7x | "reference_label", | 
| 525 | 7x | "estimate", | 
| 526 | 7x | "std_error", | 
| 527 | 7x | "df", | 
| 528 | 7x | "pvalue", | 
| 529 | 7x | "is_variable_summary", | 
| 530 | 7x | "is_term_summary" | 
| 531 | )] | |
| 532 | } | |
| 533 | ||
| 534 | #' @describeIn h_logistic_regression Helper function to tabulate the interaction | |
| 535 | #' results of a logistic regression model. This basically is a wrapper for | |
| 536 | #' [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results | |
| 537 | #' in the right data frame format. | |
| 538 | #' | |
| 539 | #' @return A `data.frame` of tabulated interaction term results from a logistic regression model. | |
| 540 | #' | |
| 541 | #' @examples | |
| 542 | #' h_glm_inter_term_extract("AGE", "ARMCD", mod2) | |
| 543 | #' | |
| 544 | #' @export | |
| 545 | h_glm_inter_term_extract <- function(odds_ratio_var, | |
| 546 | interaction_var, | |
| 547 | fit_glm, | |
| 548 |                                      ...) { | |
| 549 | # First obtain the main effects. | |
| 550 | 13x | main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm) | 
| 551 | 13x | main_stats$is_reference_summary <- FALSE | 
| 552 | 13x | main_stats$odds_ratio <- NA | 
| 553 | 13x | main_stats$lcl <- NA | 
| 554 | 13x | main_stats$ucl <- NA | 
| 555 | ||
| 556 | # Then we get the odds ratio estimates and put into df form. | |
| 557 | 13x | or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...) | 
| 558 | 13x | is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric" | 
| 559 | ||
| 560 | 13x |   if (is_num_or_var) { | 
| 561 | # Numeric OR variable case. | |
| 562 | 4x | references <- names(or_numbers) | 
| 563 | 4x | n_ref <- length(references) | 
| 564 | ||
| 565 | 4x |     extract_from_list <- function(l, name, pos = 1) { | 
| 566 | 12x | unname(unlist( | 
| 567 | 12x |         lapply(or_numbers, function(x) { | 
| 568 | 36x | x[[name]][pos] | 
| 569 | }) | |
| 570 | )) | |
| 571 | } | |
| 572 | 4x | or_stats <- data.frame( | 
| 573 | 4x | variable = odds_ratio_var, | 
| 574 | 4x | variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), | 
| 575 | 4x | term = odds_ratio_var, | 
| 576 | 4x | term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), | 
| 577 | 4x | interaction = interaction_var, | 
| 578 | 4x | interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), | 
| 579 | 4x | reference = references, | 
| 580 | 4x | reference_label = references, | 
| 581 | 4x | estimate = NA, | 
| 582 | 4x | std_error = NA, | 
| 583 | 4x | odds_ratio = extract_from_list(or_numbers, "or"), | 
| 584 | 4x | lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), | 
| 585 | 4x | ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), | 
| 586 | 4x | df = NA, | 
| 587 | 4x | pvalue = NA, | 
| 588 | 4x | is_variable_summary = FALSE, | 
| 589 | 4x | is_term_summary = FALSE, | 
| 590 | 4x | is_reference_summary = TRUE | 
| 591 | ) | |
| 592 |   } else { | |
| 593 | # Categorical OR variable case. | |
| 594 | 9x | references <- names(or_numbers[[1]]) | 
| 595 | 9x | n_ref <- length(references) | 
| 596 | ||
| 597 | 9x |     extract_from_list <- function(l, name, pos = 1) { | 
| 598 | 27x | unname(unlist( | 
| 599 | 27x |         lapply(or_numbers, function(x) { | 
| 600 | 48x | lapply(x, function(y) y[[name]][pos]) | 
| 601 | }) | |
| 602 | )) | |
| 603 | } | |
| 604 | 9x | or_stats <- data.frame( | 
| 605 | 9x | variable = odds_ratio_var, | 
| 606 | 9x | variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), | 
| 607 | 9x | term = rep(names(or_numbers), each = n_ref), | 
| 608 | 9x | term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])), | 
| 609 | 9x | interaction = interaction_var, | 
| 610 | 9x | interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), | 
| 611 | 9x | reference = unlist(lapply(or_numbers, names)), | 
| 612 | 9x | reference_label = unlist(lapply(or_numbers, names)), | 
| 613 | 9x | estimate = NA, | 
| 614 | 9x | std_error = NA, | 
| 615 | 9x | odds_ratio = extract_from_list(or_numbers, "or"), | 
| 616 | 9x | lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), | 
| 617 | 9x | ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), | 
| 618 | 9x | df = NA, | 
| 619 | 9x | pvalue = NA, | 
| 620 | 9x | is_variable_summary = FALSE, | 
| 621 | 9x | is_term_summary = FALSE, | 
| 622 | 9x | is_reference_summary = TRUE | 
| 623 | ) | |
| 624 | } | |
| 625 | ||
| 626 | 13x | df <- rbind( | 
| 627 | 13x | main_stats[, names(or_stats)], | 
| 628 | 13x | or_stats | 
| 629 | ) | |
| 630 | 13x | df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ] | 
| 631 | } | |
| 632 | ||
| 633 | #' @describeIn h_logistic_regression Helper function to tabulate the results including | |
| 634 | #' odds ratios and confidence intervals of simple terms. | |
| 635 | #' | |
| 636 | #' @return Tabulated statistics for the given variable(s) from the logistic regression model. | |
| 637 | #' | |
| 638 | #' @examples | |
| 639 | #' h_logistic_simple_terms("AGE", mod1) | |
| 640 | #' | |
| 641 | #' @export | |
| 642 | h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) { | |
| 643 | 53x |   checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) | 
| 644 | 53x |   if (inherits(fit_glm, "glm")) { | 
| 645 | 42x | checkmate::assert_set_equal(fit_glm$family$family, "binomial") | 
| 646 | } | |
| 647 | 53x | terms_name <- attr(stats::terms(fit_glm), "term.labels") | 
| 648 | 53x | xs_class <- attr(fit_glm$terms, "dataClasses") | 
| 649 | 53x | interaction <- terms_name[which(!terms_name %in% names(xs_class))] | 
| 650 | 53x | checkmate::assert_subset(x, terms_name) | 
| 651 | 53x |   if (length(interaction) != 0) { | 
| 652 | # Make sure any item in x is not part of interaction term | |
| 653 | 2x | checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":"))) | 
| 654 | } | |
| 655 | 53x | x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm) | 
| 656 | 53x | x_stats <- do.call(rbind, x_stats) | 
| 657 | 53x | q_norm <- stats::qnorm((1 + conf_level) / 2) | 
| 658 | 53x | x_stats$odds_ratio <- lapply(x_stats$estimate, exp) | 
| 659 | 53x | x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) | 
| 660 | 53x | x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) | 
| 661 | 53x | x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl) | 
| 662 | 53x | x_stats | 
| 663 | } | |
| 664 | ||
| 665 | #' @describeIn h_logistic_regression Helper function to tabulate the results including | |
| 666 | #' odds ratios and confidence intervals of interaction terms. | |
| 667 | #' | |
| 668 | #' @return Tabulated statistics for the given variable(s) from the logistic regression model. | |
| 669 | #' | |
| 670 | #' @examples | |
| 671 | #' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2) | |
| 672 | #' | |
| 673 | #' @export | |
| 674 | h_logistic_inter_terms <- function(x, | |
| 675 | fit_glm, | |
| 676 | conf_level = 0.95, | |
| 677 |                                    at = NULL) { | |
| 678 | # Find out the interaction variables and interaction term. | |
| 679 | 5x | inter_vars <- h_get_interaction_vars(fit_glm) | 
| 680 | 5x | checkmate::assert_vector(inter_vars, len = 2) | 
| 681 | ||
| 682 | ||
| 683 | 5x | inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x)) | 
| 684 | 5x | inter_term <- x[inter_term_index] | 
| 685 | ||
| 686 | # For the non-interaction vars we need the standard stuff. | |
| 687 | 5x | normal_terms <- setdiff(x, union(inter_vars, inter_term)) | 
| 688 | ||
| 689 | 5x | x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm) | 
| 690 | 5x | x_stats <- do.call(rbind, x_stats) | 
| 691 | 5x | q_norm <- stats::qnorm((1 + conf_level) / 2) | 
| 692 | 5x | x_stats$odds_ratio <- lapply(x_stats$estimate, exp) | 
| 693 | 5x | x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) | 
| 694 | 5x | x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) | 
| 695 | 5x | normal_stats <- x_stats | 
| 696 | 5x | normal_stats$is_reference_summary <- FALSE | 
| 697 | ||
| 698 | # Now the interaction term itself. | |
| 699 | 5x | inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm) | 
| 700 | 5x | inter_term_stats$odds_ratio <- NA | 
| 701 | 5x | inter_term_stats$lcl <- NA | 
| 702 | 5x | inter_term_stats$ucl <- NA | 
| 703 | 5x | inter_term_stats$is_reference_summary <- FALSE | 
| 704 | ||
| 705 | 5x | is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric" | 
| 706 | ||
| 707 | # Interaction stuff. | |
| 708 | 5x | inter_stats_one <- h_glm_inter_term_extract( | 
| 709 | 5x | inter_vars[1], | 
| 710 | 5x | inter_vars[2], | 
| 711 | 5x | fit_glm, | 
| 712 | 5x | conf_level = conf_level, | 
| 713 | 5x | at = `if`(is_intervar1_numeric, NULL, at) | 
| 714 | ) | |
| 715 | 5x | inter_stats_two <- h_glm_inter_term_extract( | 
| 716 | 5x | inter_vars[2], | 
| 717 | 5x | inter_vars[1], | 
| 718 | 5x | fit_glm, | 
| 719 | 5x | conf_level = conf_level, | 
| 720 | 5x | at = `if`(is_intervar1_numeric, at, NULL) | 
| 721 | ) | |
| 722 | ||
| 723 | # Now just combine everything in one data frame. | |
| 724 | 5x | col_names <- c( | 
| 725 | 5x | "variable", | 
| 726 | 5x | "variable_label", | 
| 727 | 5x | "term", | 
| 728 | 5x | "term_label", | 
| 729 | 5x | "interaction", | 
| 730 | 5x | "interaction_label", | 
| 731 | 5x | "reference", | 
| 732 | 5x | "reference_label", | 
| 733 | 5x | "estimate", | 
| 734 | 5x | "std_error", | 
| 735 | 5x | "df", | 
| 736 | 5x | "pvalue", | 
| 737 | 5x | "odds_ratio", | 
| 738 | 5x | "lcl", | 
| 739 | 5x | "ucl", | 
| 740 | 5x | "is_variable_summary", | 
| 741 | 5x | "is_term_summary", | 
| 742 | 5x | "is_reference_summary" | 
| 743 | ) | |
| 744 | 5x | df <- rbind( | 
| 745 | 5x | inter_stats_one[, col_names], | 
| 746 | 5x | inter_stats_two[, col_names], | 
| 747 | 5x | inter_term_stats[, col_names] | 
| 748 | ) | |
| 749 | 5x |   if (length(normal_terms) > 0) { | 
| 750 | 5x | df <- rbind( | 
| 751 | 5x | normal_stats[, col_names], | 
| 752 | 5x | df | 
| 753 | ) | |
| 754 | } | |
| 755 | 5x | df$ci <- combine_vectors(df$lcl, df$ucl) | 
| 756 | 5x | df | 
| 757 | } | 
| 1 | #' Convert `rtable` objects to `ggplot` objects | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("experimental")` | |
| 4 | #' | |
| 5 | #' Given a [rtables::rtable()] object, performs basic conversion to a [ggplot2::ggplot()] object built using | |
| 6 | #' functions from the `ggplot2` package. Any table titles and/or footnotes are ignored. | |
| 7 | #' | |
| 8 | #' @param tbl (`VTableTree`)\cr `rtables` table object. | |
| 9 | #' @param fontsize (`numeric(1)`)\cr font size. | |
| 10 | #' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in | |
| 11 | #' `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths | |
| 12 | #' are calculated according to maximum number of characters per column. | |
| 13 | #' @param lbl_col_padding (`numeric`)\cr additional padding to use when calculating spacing between | |
| 14 | #' the first (label) column and the second column of `tbl`. If `colwidths` is specified, | |
| 15 | #' the width of the first column becomes `colwidths[1] + lbl_col_padding`. Defaults to 0. | |
| 16 | #' | |
| 17 | #' @return A `ggplot` object. | |
| 18 | #' | |
| 19 | #' @examples | |
| 20 | #' dta <- data.frame( | |
| 21 | #' ARM = rep(LETTERS[1:3], rep(6, 3)), | |
| 22 | #'   AVISIT  = rep(paste0("V", 1:3), 6), | |
| 23 | #' AVAL = c(9:1, rep(NA, 9)) | |
| 24 | #' ) | |
| 25 | #' | |
| 26 | #' lyt <- basic_table() %>% | |
| 27 | #' split_cols_by(var = "ARM") %>% | |
| 28 | #' split_rows_by(var = "AVISIT") %>% | |
| 29 | #' analyze_vars(vars = "AVAL") | |
| 30 | #' | |
| 31 | #' tbl <- build_table(lyt, df = dta) | |
| 32 | #' | |
| 33 | #' rtable2gg(tbl) | |
| 34 | #' | |
| 35 | #' rtable2gg(tbl, fontsize = 15, colwidths = c(2, 1, 1, 1)) | |
| 36 | #' | |
| 37 | #' @export | |
| 38 | rtable2gg <- function(tbl, fontsize = 12, colwidths = NULL, lbl_col_padding = 0) { | |
| 39 | 6x | mat <- rtables::matrix_form(tbl, indent_rownames = TRUE) | 
| 40 | 6x | mat_strings <- formatters::mf_strings(mat) | 
| 41 | 6x | mat_aligns <- formatters::mf_aligns(mat) | 
| 42 | 6x | mat_indent <- formatters::mf_rinfo(mat)$indent | 
| 43 | 6x | mat_display <- formatters::mf_display(mat) | 
| 44 | 6x | nlines_hdr <- formatters::mf_nlheader(mat) | 
| 45 | 6x | shared_hdr_rows <- which(apply(mat_display, 1, function(x) (any(!x)))) | 
| 46 | ||
| 47 | 6x | tbl_df <- data.frame(mat_strings) | 
| 48 | 6x | body_rows <- seq(nlines_hdr + 1, nrow(tbl_df)) | 
| 49 | 6x | mat_aligns <- apply(mat_aligns, 1:2, function(x) if (x == "left") 0 else if (x == "right") 1 else 0.5) | 
| 50 | ||
| 51 | # Apply indentation in first column | |
| 52 | 6x |   tbl_df[body_rows, 1] <- sapply(body_rows, function(i) { | 
| 53 | 42x | ind_i <- mat_indent[i - nlines_hdr] * 4 | 
| 54 | 18x |     if (ind_i > 0) paste0(paste(rep(" ", ind_i), collapse = ""), tbl_df[i, 1]) else tbl_df[i, 1] | 
| 55 | }) | |
| 56 | ||
| 57 | # Get column widths | |
| 58 | 6x |   if (is.null(colwidths)) { | 
| 59 | 6x | colwidths <- apply(tbl_df, 2, function(x) max(nchar(x))) + 1 | 
| 60 | } | |
| 61 | 6x | tot_width <- sum(colwidths) + lbl_col_padding | 
| 62 | ||
| 63 | 6x |   if (length(shared_hdr_rows) > 0) { | 
| 64 | 5x | tbl_df <- tbl_df[-shared_hdr_rows, ] | 
| 65 | 5x | mat_aligns <- mat_aligns[-shared_hdr_rows, ] | 
| 66 | } | |
| 67 | ||
| 68 | 6x | res <- ggplot(data = tbl_df) + | 
| 69 | 6x | theme_void() + | 
| 70 | 6x | scale_x_continuous(limits = c(0, tot_width)) + | 
| 71 | 6x | scale_y_continuous(limits = c(0, nrow(mat_strings))) + | 
| 72 | 6x | annotate( | 
| 73 | 6x | "segment", | 
| 74 | 6x | x = 0, xend = tot_width, | 
| 75 | 6x | y = nrow(mat_strings) - nlines_hdr + 0.5, yend = nrow(mat_strings) - nlines_hdr + 0.5 | 
| 76 | ) | |
| 77 | ||
| 78 | # If header content spans multiple columns, center over these columns | |
| 79 | 6x |   if (length(shared_hdr_rows) > 0) { | 
| 80 | 5x | mat_strings[shared_hdr_rows, ] <- trimws(mat_strings[shared_hdr_rows, ]) | 
| 81 | 5x |     for (hr in shared_hdr_rows) { | 
| 82 | 6x | hdr_lbls <- mat_strings[1:hr, mat_display[hr, -1]] | 
| 83 | 6x | hdr_lbls <- matrix(hdr_lbls[nzchar(hdr_lbls)], nrow = hr) | 
| 84 | 6x |       for (idx_hl in seq_len(ncol(hdr_lbls))) { | 
| 85 | 13x | cur_lbl <- tail(hdr_lbls[, idx_hl], 1) | 
| 86 | 13x |         which_cols <- if (hr == 1) { | 
| 87 | 9x | which(mat_strings[hr, ] == hdr_lbls[idx_hl]) | 
| 88 | 13x |         } else { # for >2 col splits, only print labels for each unique combo of nested columns | 
| 89 | 4x | which( | 
| 90 | 4x | apply(mat_strings[1:hr, ], 2, function(x) all(x == hdr_lbls[1:hr, idx_hl])) | 
| 91 | ) | |
| 92 | } | |
| 93 | 13x | line_pos <- c( | 
| 94 | 13x | sum(colwidths[1:(which_cols[1] - 1)]) + 1 + lbl_col_padding, | 
| 95 | 13x | sum(colwidths[1:max(which_cols)]) - 1 + lbl_col_padding | 
| 96 | ) | |
| 97 | ||
| 98 | 13x | res <- res + | 
| 99 | 13x | annotate( | 
| 100 | 13x | "text", | 
| 101 | 13x | x = mean(line_pos), | 
| 102 | 13x | y = nrow(mat_strings) + 1 - hr, | 
| 103 | 13x | label = cur_lbl, | 
| 104 | 13x | size = fontsize / .pt | 
| 105 | ) + | |
| 106 | 13x | annotate( | 
| 107 | 13x | "segment", | 
| 108 | 13x | x = line_pos[1], | 
| 109 | 13x | xend = line_pos[2], | 
| 110 | 13x | y = nrow(mat_strings) - hr + 0.5, | 
| 111 | 13x | yend = nrow(mat_strings) - hr + 0.5 | 
| 112 | ) | |
| 113 | } | |
| 114 | } | |
| 115 | } | |
| 116 | ||
| 117 | # Add table columns | |
| 118 | 6x |   for (i in seq_len(ncol(tbl_df))) { | 
| 119 | 40x | res <- res + annotate( | 
| 120 | 40x | "text", | 
| 121 | 40x | x = if (i == 1) 0 else sum(colwidths[1:i]) - 0.5 * colwidths[i] + lbl_col_padding, | 
| 122 | 40x | y = rev(seq_len(nrow(tbl_df))), | 
| 123 | 40x | label = tbl_df[, i], | 
| 124 | 40x | hjust = mat_aligns[, i], | 
| 125 | 40x | size = fontsize / .pt | 
| 126 | ) | |
| 127 | } | |
| 128 | ||
| 129 | 6x | res | 
| 130 | } | |
| 131 | ||
| 132 | #' Convert `data.frame` object to `ggplot` object | |
| 133 | #' | |
| 134 | #' @description `r lifecycle::badge("experimental")` | |
| 135 | #' | |
| 136 | #' Given a `data.frame` object, performs basic conversion to a [ggplot2::ggplot()] object built using | |
| 137 | #' functions from the `ggplot2` package. | |
| 138 | #' | |
| 139 | #' @param df (`data.frame`)\cr a data frame. | |
| 140 | #' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in | |
| 141 | #' `colwidths` corresponds to the column of `df` in the same position. If `NULL`, column widths | |
| 142 | #' are calculated according to maximum number of characters per column. | |
| 143 | #' @param font_size (`numeric(1)`)\cr font size. | |
| 144 | #' @param col_labels (`flag`)\cr whether the column names (labels) of `df` should be used as the first row | |
| 145 | #' of the output table. | |
| 146 | #' @param col_lab_fontface (`string`)\cr font face to apply to the first row (of column labels | |
| 147 | #' if `col_labels = TRUE`). Defaults to `"bold"`. | |
| 148 | #' @param hline (`flag`)\cr whether a horizontal line should be printed below the first row of the table. | |
| 149 | #' @param bg_fill (`string`)\cr table background fill color. | |
| 150 | #' | |
| 151 | #' @return A `ggplot` object. | |
| 152 | #' | |
| 153 | #' @examples | |
| 154 | #' \dontrun{ | |
| 155 | #' df2gg(head(iris, 5)) | |
| 156 | #' | |
| 157 | #' df2gg(head(iris, 5), font_size = 15, colwidths = c(1, 1, 1, 1, 1)) | |
| 158 | #' } | |
| 159 | #' @keywords internal | |
| 160 | df2gg <- function(df, | |
| 161 | colwidths = NULL, | |
| 162 | font_size = 10, | |
| 163 | col_labels = TRUE, | |
| 164 | col_lab_fontface = "bold", | |
| 165 | hline = TRUE, | |
| 166 |                   bg_fill = NULL) { | |
| 167 | # convert to text | |
| 168 | 19x | df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) "NA" else as.character(x))) | 
| 169 | ||
| 170 | 19x |   if (col_labels) { | 
| 171 | 10x | df <- as.matrix(df) | 
| 172 | 10x | df <- rbind(colnames(df), df) | 
| 173 | } | |
| 174 | ||
| 175 | # Get column widths | |
| 176 | 19x |   if (is.null(colwidths)) { | 
| 177 | 1x | colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) | 
| 178 | } | |
| 179 | 19x | tot_width <- sum(colwidths) | 
| 180 | ||
| 181 | 19x | res <- ggplot(data = df) + | 
| 182 | 19x | theme_void() + | 
| 183 | 19x | scale_x_continuous(limits = c(0, tot_width)) + | 
| 184 | 19x | scale_y_continuous(limits = c(1, nrow(df))) | 
| 185 | ||
| 186 | 9x | if (!is.null(bg_fill)) res <- res + theme(plot.background = element_rect(fill = bg_fill)) | 
| 187 | ||
| 188 | 19x |   if (hline) { | 
| 189 | 10x | res <- res + | 
| 190 | 10x | annotate( | 
| 191 | 10x | "segment", | 
| 192 | 10x | x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), | 
| 193 | 10x | y = nrow(df) - 0.5, yend = nrow(df) - 0.5 | 
| 194 | ) | |
| 195 | } | |
| 196 | ||
| 197 | 19x |   for (i in seq_len(ncol(df))) { | 
| 198 | 86x | line_pos <- c( | 
| 199 | 86x | if (i == 1) 0 else sum(colwidths[1:(i - 1)]), | 
| 200 | 86x | sum(colwidths[1:i]) | 
| 201 | ) | |
| 202 | 86x | res <- res + | 
| 203 | 86x | annotate( | 
| 204 | 86x | "text", | 
| 205 | 86x | x = mean(line_pos), | 
| 206 | 86x | y = rev(seq_len(nrow(df))), | 
| 207 | 86x | label = df[, i], | 
| 208 | 86x | size = font_size / .pt, | 
| 209 | 86x |         fontface = if (col_labels) { | 
| 210 | 32x |           c(col_lab_fontface, rep("plain", nrow(df) - 1)) | 
| 211 |         } else { | |
| 212 | 54x |           rep("plain", nrow(df)) | 
| 213 | } | |
| 214 | ) | |
| 215 | } | |
| 216 | ||
| 217 | 19x | res | 
| 218 | } | 
| 1 | #' Tabulate binary response by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The [tabulate_rsp_subgroups()] function creates a layout element to tabulate binary response by subgroup, returning | |
| 6 | #' statistics including response rate and odds ratio for each population subgroup. The table is created from `df`, a | |
| 7 | #' list of data frames returned by [extract_rsp_subgroups()], with the statistics to include specified via the `vars` | |
| 8 | #' parameter. | |
| 9 | #' | |
| 10 | #' A forest plot can be created from the resulting table using the [g_forest()] function. | |
| 11 | #' | |
| 12 | #' @inheritParams extract_rsp_subgroups | |
| 13 | #' @inheritParams argument_convention | |
| 14 | #' | |
| 15 | #' @details These functions create a layout starting from a data frame which contains | |
| 16 | #' the required statistics. Tables typically used as part of forest plot. | |
| 17 | #' | |
| 18 | #' @seealso [extract_rsp_subgroups()] | |
| 19 | #' | |
| 20 | #' @examples | |
| 21 | #' library(dplyr) | |
| 22 | #' library(forcats) | |
| 23 | #' | |
| 24 | #' adrs <- tern_ex_adrs | |
| 25 | #' adrs_labels <- formatters::var_labels(adrs) | |
| 26 | #' | |
| 27 | #' adrs_f <- adrs %>% | |
| 28 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 29 | #'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>% | |
| 30 | #' droplevels() %>% | |
| 31 | #' mutate( | |
| 32 | #' # Reorder levels of factor to make the placebo group the reference arm. | |
| 33 | #' ARM = fct_relevel(ARM, "B: Placebo"), | |
| 34 | #' rsp = AVALC == "CR" | |
| 35 | #' ) | |
| 36 | #' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") | |
| 37 | #' | |
| 38 | #' # Unstratified analysis. | |
| 39 | #' df <- extract_rsp_subgroups( | |
| 40 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")), | |
| 41 | #' data = adrs_f | |
| 42 | #' ) | |
| 43 | #' df | |
| 44 | #' | |
| 45 | #' # Stratified analysis. | |
| 46 | #' df_strat <- extract_rsp_subgroups( | |
| 47 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strata = "STRATA1"), | |
| 48 | #' data = adrs_f | |
| 49 | #' ) | |
| 50 | #' df_strat | |
| 51 | #' | |
| 52 | #' # Grouping of the BMRKR2 levels. | |
| 53 | #' df_grouped <- extract_rsp_subgroups( | |
| 54 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")), | |
| 55 | #' data = adrs_f, | |
| 56 | #' groups_lists = list( | |
| 57 | #' BMRKR2 = list( | |
| 58 | #' "low" = "LOW", | |
| 59 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 60 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 61 | #' ) | |
| 62 | #' ) | |
| 63 | #' ) | |
| 64 | #' df_grouped | |
| 65 | #' | |
| 66 | #' @name response_subgroups | |
| 67 | #' @order 1 | |
| 68 | NULL | |
| 69 | ||
| 70 | #' Prepare response data for population subgroups in data frames | |
| 71 | #' | |
| 72 | #' @description `r lifecycle::badge("stable")` | |
| 73 | #' | |
| 74 | #' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper | |
| 75 | #' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two | |
| 76 | #' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`, | |
| 77 | #' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strata`. | |
| 78 | #' `groups_lists` optionally specifies groupings for `subgroups` variables. | |
| 79 | #' | |
| 80 | #' @inheritParams argument_convention | |
| 81 | #' @inheritParams response_subgroups | |
| 82 | #' @param label_all (`string`)\cr label for the total population analysis. | |
| 83 | #' | |
| 84 | #' @return A named list of two elements: | |
| 85 | #' * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`, | |
| 86 | #' `var_label`, and `row_type`. | |
| 87 | #' * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, | |
| 88 | #' `subgroup`, `var`, `var_label`, and `row_type`. | |
| 89 | #' | |
| 90 | #' @seealso [response_subgroups] | |
| 91 | #' | |
| 92 | #' @export | |
| 93 | extract_rsp_subgroups <- function(variables, | |
| 94 | data, | |
| 95 | groups_lists = list(), | |
| 96 | conf_level = 0.95, | |
| 97 | method = NULL, | |
| 98 |                                   label_all = "All Patients") { | |
| 99 | 14x |   if ("strat" %in% names(variables)) { | 
| 100 | ! | warning( | 
| 101 | ! | "Warning: the `strat` element name of the `variables` list argument to `extract_rsp_subgroups() ", | 
| 102 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 103 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 104 | ) | |
| 105 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 106 | } | |
| 107 | ||
| 108 | 14x | df_prop <- h_proportion_subgroups_df( | 
| 109 | 14x | variables, | 
| 110 | 14x | data, | 
| 111 | 14x | groups_lists = groups_lists, | 
| 112 | 14x | label_all = label_all | 
| 113 | ) | |
| 114 | 14x | df_or <- h_odds_ratio_subgroups_df( | 
| 115 | 14x | variables, | 
| 116 | 14x | data, | 
| 117 | 14x | groups_lists = groups_lists, | 
| 118 | 14x | conf_level = conf_level, | 
| 119 | 14x | method = method, | 
| 120 | 14x | label_all = label_all | 
| 121 | ) | |
| 122 | ||
| 123 | 14x | list(prop = df_prop, or = df_or) | 
| 124 | } | |
| 125 | ||
| 126 | #' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`. | |
| 127 | #' | |
| 128 | #' @return | |
| 129 | #' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 130 | #' | |
| 131 | #' @keywords internal | |
| 132 | a_response_subgroups <- function(df, | |
| 133 | labelstr = "", | |
| 134 | ..., | |
| 135 | .stats = NULL, | |
| 136 | .stat_names = NULL, | |
| 137 | .formats = NULL, | |
| 138 | .labels = NULL, | |
| 139 |                                  .indent_mods = NULL) { | |
| 140 | # Check for additional parameters to the statistics function | |
| 141 | 375x | dots_extra_args <- list(...) | 
| 142 | 375x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 143 | 375x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 144 | 375x | cur_col_stat <- extra_afun_params$.var %||% .stats | 
| 145 | ||
| 146 | # Uniquely name & label rows | |
| 147 | 375x |   var_lvls <- if ("biomarker" %in% names(dots_extra_args) && "biomarker" %in% names(df)) { | 
| 148 | 90x |     if ("overall" %in% names(dots_extra_args)) { # label rows for (nested) biomarker tables - e.g. "AGE", "BMRKR1" | 
| 149 | 42x | as.character(df$biomarker) | 
| 150 | 375x |     } else { # data rows for (nested) biomarker tables - e.g. "AGE.LOW", "BMRKR1.Total Patients" | 
| 151 | 48x | paste(as.character(df$biomarker), as.character(df$subgroup), sep = ".") | 
| 152 | } | |
| 153 | 375x |   } else { # data rows for non-biomarker tables - e.g. "Total Patients", "F", "M" | 
| 154 | 285x | make.unique(as.character(df$subgroup)) | 
| 155 | } | |
| 156 | ||
| 157 | # if empty, return NA | |
| 158 | 375x |   if (nrow(df) == 0) { | 
| 159 | 1x | return(in_rows(.list = list(NA) %>% stats::setNames(cur_col_stat))) | 
| 160 | } | |
| 161 | ||
| 162 | # Main statistics taken from df | |
| 163 | 374x | x_stats <- as.list(df) | 
| 164 | ||
| 165 | # Fill in formatting defaults | |
| 166 | 374x |   .stats <- get_stats("tabulate_rsp_subgroups", stats_in = cur_col_stat) | 
| 167 | 374x | levels_per_stats <- rep(list(var_lvls), length(.stats)) %>% setNames(.stats) | 
| 168 | 374x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 169 | 374x | .labels <- get_labels_from_stats( | 
| 170 | 374x | .stats, .labels, levels_per_stats, | 
| 171 | # default labels are pre-determined in extract_*() function | |
| 172 | 374x | tern_defaults = as.list(as.character(df$subgroup)) %>% setNames(var_lvls) | 
| 173 | ) | |
| 174 | 374x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 175 | ||
| 176 | 374x | x_stats <- lapply( | 
| 177 | 374x | .stats, | 
| 178 | 374x | function(x) x_stats[[x]] %>% stats::setNames(var_lvls) | 
| 179 | ) %>% | |
| 180 | 374x | stats::setNames(.stats) %>% | 
| 181 | 374x | .unlist_keep_nulls() | 
| 182 | ||
| 183 | 374x |   .nms <- if ("biomarker" %in% names(dots_extra_args)) var_lvls else names(.labels) | 
| 184 | ||
| 185 | # Auto format handling | |
| 186 | 374x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 187 | ||
| 188 | # Get and check statistical names | |
| 189 | 374x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 190 | ||
| 191 | 374x | in_rows( | 
| 192 | 374x | .list = x_stats, | 
| 193 | 374x | .formats = .formats, | 
| 194 | 374x | .names = .nms, | 
| 195 | 374x | .stat_names = .stat_names, | 
| 196 | 374x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 197 | 374x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 198 | ) | |
| 199 | } | |
| 200 | ||
| 201 | #' @describeIn response_subgroups Table-creating function which creates a table | |
| 202 | #' summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()] | |
| 203 | #' and [rtables::summarize_row_groups()]. | |
| 204 | #' | |
| 205 | #' @param df (`list`)\cr a list of data frames containing all analysis variables. List should be | |
| 206 | #' created using [extract_rsp_subgroups()]. | |
| 207 | #' @param vars (`character`)\cr the names of statistics to be reported among: | |
| 208 | #' * `n`: Total number of observations per group. | |
| 209 | #' * `n_rsp`: Number of responders per group. | |
| 210 | #' * `prop`: Proportion of responders. | |
| 211 | #' * `n_tot`: Total number of observations. | |
| 212 | #' * `or`: Odds ratio. | |
| 213 | #' * `ci` : Confidence interval of odds ratio. | |
| 214 | #' * `pval`: p-value of the effect. | |
| 215 | #' Note, the statistics `n_tot`, `or`, and `ci` are required. | |
| 216 | #' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply | |
| 217 | #' within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If | |
| 218 | #' `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$prop$arm` will be used as `arm_x` and | |
| 219 | #' the second level as `arm_y`. | |
| 220 | #' | |
| 221 | #' @return An `rtables` table summarizing binary response by subgroup. | |
| 222 | #' | |
| 223 | #' @examples | |
| 224 | #' # Table with default columns | |
| 225 | #' basic_table() %>% | |
| 226 | #' tabulate_rsp_subgroups(df) | |
| 227 | #' | |
| 228 | #' # Table with selected columns | |
| 229 | #' basic_table() %>% | |
| 230 | #' tabulate_rsp_subgroups( | |
| 231 | #' df = df, | |
| 232 | #'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci") | |
| 233 | #' ) | |
| 234 | #' | |
| 235 | #' # Table with risk difference column added | |
| 236 | #' basic_table() %>% | |
| 237 | #' tabulate_rsp_subgroups( | |
| 238 | #' df, | |
| 239 | #' riskdiff = control_riskdiff( | |
| 240 | #' arm_x = levels(df$prop$arm)[1], | |
| 241 | #' arm_y = levels(df$prop$arm)[2] | |
| 242 | #' ) | |
| 243 | #' ) | |
| 244 | #' | |
| 245 | #' @export | |
| 246 | #' @order 2 | |
| 247 | tabulate_rsp_subgroups <- function(lyt, | |
| 248 | df, | |
| 249 |                                    vars = c("n_tot", "n", "prop", "or", "ci"), | |
| 250 | groups_lists = list(), | |
| 251 | label_all = lifecycle::deprecated(), | |
| 252 | riskdiff = NULL, | |
| 253 | na_str = default_na_str(), | |
| 254 | ..., | |
| 255 | .stat_names = NULL, | |
| 256 | .formats = NULL, | |
| 257 | .labels = NULL, | |
| 258 |                                    .indent_mods = NULL) { | |
| 259 | 14x | checkmate::assert_list(riskdiff, null.ok = TRUE) | 
| 260 | 14x |   checkmate::assert_true(all(c("n_tot", "or", "ci") %in% vars)) | 
| 261 | 14x |   if ("pval" %in% vars && !"pval" %in% names(df$or)) { | 
| 262 | 1x | warning( | 
| 263 | 1x | 'The "pval" statistic has been selected but is not present in "df" so it will not be included in the output ', | 
| 264 | 1x | 'table. To include the "pval" statistic, please specify a p-value test when generating "df" via ', | 
| 265 | 1x | 'the "method" argument to `extract_rsp_subgroups()`. If method = "cmh", strata must also be specified via the ', | 
| 266 | 1x | '"variables" argument to `extract_rsp_subgroups()`.' | 
| 267 | ) | |
| 268 | } | |
| 269 | ||
| 270 | 14x |   if (lifecycle::is_present(label_all)) { | 
| 271 | ! | lifecycle::deprecate_warn( | 
| 272 | ! | "0.9.8", "tabulate_rsp_subgroups(label_all)", | 
| 273 | ! | details = | 
| 274 | ! | "Please assign the `label_all` parameter within the `extract_rsp_subgroups()` function when creating `df`." | 
| 275 | ) | |
| 276 | } | |
| 277 | ||
| 278 | # Process standard extra arguments | |
| 279 | 14x |   extra_args <- list(".stats" = vars) | 
| 280 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 281 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 282 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 283 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 284 | ||
| 285 | # Create "ci" column from "lcl" and "ucl" | |
| 286 | 14x | df$or$ci <- combine_vectors(df$or$lcl, df$or$ucl) | 
| 287 | ||
| 288 | # Extract additional parameters from df | |
| 289 | 14x | conf_level <- df$or$conf_level[1] | 
| 290 | 14x |   method <- if ("pval_label" %in% names(df$or)) df$or$pval_label[1] else NULL | 
| 291 | 14x | colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method) | 
| 292 | 14x |   prop_vars <- intersect(colvars$vars, c("n", "prop", "n_rsp")) | 
| 293 | 14x |   or_vars <- intersect(names(colvars$labels), c("n_tot", "or", "ci", "pval")) | 
| 294 | 14x | colvars_prop <- list(vars = prop_vars, labels = colvars$labels[prop_vars]) | 
| 295 | 14x | colvars_or <- list(vars = or_vars, labels = colvars$labels[or_vars]) | 
| 296 | ||
| 297 | # Process additional arguments to the statistic function | |
| 298 | 14x | extra_args <- c( | 
| 299 | 14x | extra_args, | 
| 300 | 14x | groups_lists = list(groups_lists), conf_level = conf_level, method = method, | 
| 301 | ... | |
| 302 | ) | |
| 303 | ||
| 304 | # Adding additional info from layout to analysis function | |
| 305 | 14x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 306 | 14x | formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]]) | 
| 307 | ||
| 308 | # Add risk difference column | |
| 309 | 14x |   if (!is.null(riskdiff)) { | 
| 310 | ! | if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$prop$arm)[1] | 
| 311 | ! | if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$prop$arm)[2] | 
| 312 | 2x | colvars_or$vars <- c(colvars_or$vars, "riskdiff") | 
| 313 | 2x | colvars_or$labels <- c(colvars_or$labels, riskdiff = riskdiff$col_label) | 
| 314 | 2x |     arm_cols <- paste(rep(c("n_rsp", "n_rsp", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_") | 
| 315 | ||
| 316 | 2x | df_prop_diff <- df$prop %>% | 
| 317 | 2x | dplyr::select(-"prop") %>% | 
| 318 | 2x | tidyr::pivot_wider( | 
| 319 | 2x |         id_cols = c("subgroup", "var", "var_label", "row_type"), | 
| 320 | 2x | names_from = "arm", | 
| 321 | 2x |         values_from = c("n", "n_rsp") | 
| 322 | ) %>% | |
| 323 | 2x | dplyr::rowwise() %>% | 
| 324 | 2x | dplyr::mutate( | 
| 325 | 2x | riskdiff = stat_propdiff_ci( | 
| 326 | 2x | x = as.list(.data[[arm_cols[1]]]), | 
| 327 | 2x | y = as.list(.data[[arm_cols[2]]]), | 
| 328 | 2x | N_x = .data[[arm_cols[3]]], | 
| 329 | 2x | N_y = .data[[arm_cols[4]]], | 
| 330 | 2x | pct = riskdiff$pct | 
| 331 | ) | |
| 332 | ) %>% | |
| 333 | 2x | dplyr::select(-dplyr::all_of(arm_cols)) | 
| 334 | ||
| 335 | 2x | df$or <- df$or %>% | 
| 336 | 2x | dplyr::left_join( | 
| 337 | 2x | df_prop_diff, | 
| 338 | 2x |         by = c("subgroup", "var", "var_label", "row_type") | 
| 339 | ) | |
| 340 | } | |
| 341 | ||
| 342 | # Add columns from table_prop (optional) | |
| 343 | 14x |   if (length(colvars_prop$vars) > 0) { | 
| 344 | 13x | lyt_prop <- split_cols_by(lyt = lyt, var = "arm") | 
| 345 | 13x | lyt_prop <- split_cols_by_multivar( | 
| 346 | 13x | lyt = lyt_prop, | 
| 347 | 13x | vars = colvars_prop$vars, | 
| 348 | 13x | varlabels = colvars_prop$labels | 
| 349 | ) | |
| 350 | ||
| 351 | # Add "All Patients" row | |
| 352 | 13x | lyt_prop <- split_rows_by( | 
| 353 | 13x | lyt = lyt_prop, | 
| 354 | 13x | var = "row_type", | 
| 355 | 13x |       split_fun = keep_split_levels("content"), | 
| 356 | 13x | nested = FALSE, | 
| 357 | 13x | child_labels = "hidden", | 
| 358 | 13x | parent_name = "All Patients" | 
| 359 | ) | |
| 360 | 13x | lyt_prop <- analyze_colvars( | 
| 361 | 13x | lyt = lyt_prop, | 
| 362 | 13x | afun = a_response_subgroups, | 
| 363 | 13x | na_str = na_str, | 
| 364 | 13x | extra_args = extra_args | 
| 365 | ) | |
| 366 | ||
| 367 | # Add analysis rows | |
| 368 | 13x |     if ("analysis" %in% df$prop$row_type) { | 
| 369 | 12x | lyt_prop <- split_rows_by( | 
| 370 | 12x | lyt = lyt_prop, | 
| 371 | 12x | var = "row_type", | 
| 372 | 12x |         split_fun = keep_split_levels("analysis"), | 
| 373 | 12x | nested = FALSE, | 
| 374 | 12x | child_labels = "hidden", | 
| 375 | 12x | parent_name = "analysis rows" | 
| 376 | ) | |
| 377 | 12x | lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE) | 
| 378 | 12x | lyt_prop <- analyze_colvars( | 
| 379 | 12x | lyt = lyt_prop, | 
| 380 | 12x | afun = a_response_subgroups, | 
| 381 | 12x | na_str = na_str, | 
| 382 | 12x | inclNAs = TRUE, | 
| 383 | 12x | extra_args = extra_args | 
| 384 | ) | |
| 385 | } | |
| 386 | ||
| 387 | 13x | table_prop <- build_table(lyt_prop, df = df$prop) | 
| 388 |   } else { | |
| 389 | 1x | table_prop <- NULL | 
| 390 | } | |
| 391 | ||
| 392 |   # Add columns from table_or ("n_tot", "or", and "ci" required) | |
| 393 | 14x | lyt_or <- split_cols_by(lyt = lyt, var = "arm") | 
| 394 | 14x | lyt_or <- split_cols_by_multivar( | 
| 395 | 14x | lyt = lyt_or, | 
| 396 | 14x | vars = colvars_or$vars, | 
| 397 | 14x | varlabels = colvars_or$labels | 
| 398 | ) | |
| 399 | ||
| 400 | # Add "All Patients" row | |
| 401 | 14x | lyt_or <- split_rows_by( | 
| 402 | 14x | lyt = lyt_or, | 
| 403 | 14x | var = "row_type", | 
| 404 | 14x |     split_fun = keep_split_levels("content"), | 
| 405 | 14x | nested = FALSE, | 
| 406 | 14x | child_labels = "hidden", | 
| 407 | 14x | parent_name = "All Patients" | 
| 408 | ) | |
| 409 | 14x | lyt_or <- analyze_colvars( | 
| 410 | 14x | lyt = lyt_or, | 
| 411 | 14x | afun = a_response_subgroups, | 
| 412 | 14x | na_str = na_str, | 
| 413 | 14x | extra_args = extra_args | 
| 414 | ) %>% | |
| 415 | 14x |     append_topleft("Baseline Risk Factors") | 
| 416 | ||
| 417 | # Add analysis rows | |
| 418 | 14x |   if ("analysis" %in% df$or$row_type) { | 
| 419 | 13x | lyt_or <- split_rows_by( | 
| 420 | 13x | lyt = lyt_or, | 
| 421 | 13x | var = "row_type", | 
| 422 | 13x |       split_fun = keep_split_levels("analysis"), | 
| 423 | 13x | nested = FALSE, | 
| 424 | 13x | child_labels = "hidden", | 
| 425 | 13x | parent_name = "analysis rows" | 
| 426 | ) | |
| 427 | 13x | lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE) | 
| 428 | 13x | lyt_or <- analyze_colvars( | 
| 429 | 13x | lyt = lyt_or, | 
| 430 | 13x | afun = a_response_subgroups, | 
| 431 | 13x | na_str = na_str, | 
| 432 | 13x | inclNAs = TRUE, | 
| 433 | 13x | extra_args = extra_args | 
| 434 | ) | |
| 435 | } | |
| 436 | ||
| 437 | 14x | table_or <- build_table(lyt_or, df = df$or) | 
| 438 | ||
| 439 | # Join tables, add forest plot attributes | |
| 440 | 14x |   n_tot_id <- match("n_tot", colvars_or$vars) | 
| 441 | 14x |   if (is.null(table_prop)) { | 
| 442 | 1x | result <- table_or | 
| 443 | 1x |     or_id <- match("or", colvars_or$vars) | 
| 444 | 1x |     ci_id <- match("ci", colvars_or$vars) | 
| 445 |   } else { | |
| 446 | 13x | result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id]) | 
| 447 | 13x |     or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id]) | 
| 448 | 13x |     ci_id <- 1L + ncol(table_prop) + match("ci", colvars_or$vars[-n_tot_id]) | 
| 449 | 13x | n_tot_id <- 1L | 
| 450 | } | |
| 451 | 14x | structure( | 
| 452 | 14x | result, | 
| 453 | 14x | forest_header = paste0(levels(df$prop$arm), "\nBetter"), | 
| 454 | 14x | col_x = or_id, | 
| 455 | 14x | col_ci = ci_id, | 
| 456 | 14x | col_symbol_size = n_tot_id | 
| 457 | ) | |
| 458 | } | |
| 459 | ||
| 460 | #' Labels for column variables in binary response by subgroup table | |
| 461 | #' | |
| 462 | #' @description `r lifecycle::badge("stable")` | |
| 463 | #' | |
| 464 | #' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels. | |
| 465 | #' | |
| 466 | #' @inheritParams argument_convention | |
| 467 | #' @inheritParams tabulate_rsp_subgroups | |
| 468 | #' | |
| 469 | #' @return A `list` of variables to tabulate and their labels. | |
| 470 | #' | |
| 471 | #' @export | |
| 472 | d_rsp_subgroups_colvars <- function(vars, | |
| 473 | conf_level = NULL, | |
| 474 |                                     method = NULL) { | |
| 475 | 20x | checkmate::assert_character(vars) | 
| 476 | 20x |   checkmate::assert_subset(c("n_tot", "or", "ci"), vars) | 
| 477 | 20x | checkmate::assert_subset( | 
| 478 | 20x | vars, | 
| 479 | 20x |     c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval") | 
| 480 | ) | |
| 481 | ||
| 482 | 20x | varlabels <- c( | 
| 483 | 20x | n = "n", | 
| 484 | 20x | n_rsp = "Responders", | 
| 485 | 20x | prop = "Response (%)", | 
| 486 | 20x | n_tot = "Total n", | 
| 487 | 20x | or = "Odds Ratio" | 
| 488 | ) | |
| 489 | 20x | colvars <- vars | 
| 490 | ||
| 491 | 20x |   if ("ci" %in% colvars) { | 
| 492 | 20x | checkmate::assert_false(is.null(conf_level)) | 
| 493 | ||
| 494 | 20x | varlabels <- c( | 
| 495 | 20x | varlabels, | 
| 496 | 20x | ci = paste0(100 * conf_level, "% CI") | 
| 497 | ) | |
| 498 | } | |
| 499 | ||
| 500 | 20x |   if ("pval" %in% colvars) { | 
| 501 | 14x | varlabels <- c( | 
| 502 | 14x | varlabels, | 
| 503 | 14x | pval = method | 
| 504 | ) | |
| 505 | } | |
| 506 | ||
| 507 | 20x | list( | 
| 508 | 20x | vars = colvars, | 
| 509 | 20x | labels = varlabels[vars] | 
| 510 | ) | |
| 511 | } | 
| 1 | #' Count patients with toxicity grades that have worsened from baseline by highest grade post-baseline | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_abnormal_lab_worsen_by_baseline()] creates a layout element to count patients with | |
| 6 | #' analysis toxicity grades which have worsened from baseline, categorized by highest (worst) grade post-baseline. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `var` which indicates analysis toxicity grades. Additional | |
| 9 | #' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to `USUBJID`), | |
| 10 | #' a variable to indicate unique subject identifiers, `baseline_var` (defaults to `BTOXGR`), a variable to indicate | |
| 11 | #' baseline toxicity grades, and `direction_var` (defaults to `GRADDIR`), a variable to indicate toxicity grade | |
| 12 | #' directions of interest to include (e.g. `"H"` (high), `"L"` (low), or `"B"` (both)). | |
| 13 | #' | |
| 14 | #' For the direction(s) specified in `direction_var`, patient counts by worst grade for patients who have | |
| 15 | #' worsened from baseline are calculated as follows: | |
| 16 | #' * `1` to `4`: The number of patients who have worsened from their baseline grades with worst | |
| 17 | #' grades 1-4, respectively. | |
| 18 | #' * `Any`: The total number of patients who have worsened from their baseline grades. | |
| 19 | #' | |
| 20 | #' Fractions are calculated by dividing the above counts by the number of patients who's analysis toxicity grades | |
| 21 | #' have worsened from baseline toxicity grades during treatment. | |
| 22 | #' | |
| 23 | #' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create a row | |
| 24 | #' split on variable `direction_var`. | |
| 25 | #' | |
| 26 | #' @inheritParams argument_convention | |
| 27 | #' @param variables (named `list` of `string`)\cr list of additional analysis variables including: | |
| 28 | #' * `id` (`string`)\cr subject variable name. | |
| 29 | #' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable. | |
| 30 | #' * `direction_var` (`string`)\cr see `direction_var` for more details. | |
| 31 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 32 | #' @param table_names `r lifecycle::badge("deprecated")` this parameter has no effect. | |
| 33 | #' | |
| 34 | #'   Options are: ``r shQuote(get_stats("abnormal_lab_worsen_by_baseline"), type = "sh")`` | |
| 35 | #' | |
| 36 | #' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()] which are used within | |
| 37 | #' [s_count_abnormal_lab_worsen_by_baseline()] to process input data. | |
| 38 | #' | |
| 39 | #' @name abnormal_lab_worsen_by_baseline | |
| 40 | #' @order 1 | |
| 41 | NULL | |
| 42 | ||
| 43 | #' @describeIn abnormal_lab_worsen_by_baseline Statistics function for patients whose worst post-baseline | |
| 44 | #' lab grades are worse than their baseline grades. | |
| 45 | #' | |
| 46 | #' @return | |
| 47 | #' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst | |
| 48 | #' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades | |
| 49 | #' "1", "2", "3", "4" and "Any". | |
| 50 | #' | |
| 51 | #' @keywords internal | |
| 52 | s_count_abnormal_lab_worsen_by_baseline <- function(df, | |
| 53 | .var = "ATOXGR", | |
| 54 | variables = list( | |
| 55 | id = "USUBJID", | |
| 56 | baseline_var = "BTOXGR", | |
| 57 | direction_var = "GRADDR" | |
| 58 | ), | |
| 59 |                                                     ...) { | |
| 60 | 13x | checkmate::assert_string(.var) | 
| 61 | 13x |   checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var")) | 
| 62 | 13x | checkmate::assert_string(variables$id) | 
| 63 | 13x | checkmate::assert_string(variables$baseline_var) | 
| 64 | 13x | checkmate::assert_string(variables$direction_var) | 
| 65 | 13x | assert_df_with_variables(df, c(aval = .var, variables[1:3])) | 
| 66 | 13x | assert_list_of_variables(variables) | 
| 67 | ||
| 68 | 13x | h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var) | 
| 69 | } | |
| 70 | ||
| 71 | #' @describeIn abnormal_lab_worsen_by_baseline Formatted analysis function which is used as `afun` | |
| 72 | #' in `count_abnormal_lab_worsen_by_baseline()`. | |
| 73 | #' | |
| 74 | #' @return | |
| 75 | #' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with | |
| 76 | #' formatted [rtables::CellValue()]. | |
| 77 | #' | |
| 78 | #' @keywords internal | |
| 79 | a_count_abnormal_lab_worsen_by_baseline <- function(df, | |
| 80 | ..., | |
| 81 | .stats = NULL, | |
| 82 | .stat_names = NULL, | |
| 83 | .formats = NULL, | |
| 84 | .labels = NULL, | |
| 85 |                                                     .indent_mods = NULL) { | |
| 86 | # Check for additional parameters to the statistics function | |
| 87 | 12x | dots_extra_args <- list(...) | 
| 88 | 12x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 89 | 12x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 90 | ||
| 91 | # Check for user-defined functions | |
| 92 | 12x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 93 | 12x | .stats <- default_and_custom_stats_list$all_stats | 
| 94 | 12x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 95 | ||
| 96 | # Apply statistics function | |
| 97 | 12x | x_stats <- .apply_stat_functions( | 
| 98 | 12x | default_stat_fnc = s_count_abnormal_lab_worsen_by_baseline, | 
| 99 | 12x | custom_stat_fnc_list = custom_stat_functions, | 
| 100 | 12x | args_list = c( | 
| 101 | 12x | df = list(df), | 
| 102 | 12x | extra_afun_params, | 
| 103 | 12x | dots_extra_args | 
| 104 | ) | |
| 105 | ) | |
| 106 | ||
| 107 | # Fill in formatting defaults | |
| 108 | 12x | .stats <- get_stats( | 
| 109 | 12x | "abnormal_lab_worsen_by_baseline", | 
| 110 | 12x | stats_in = .stats, | 
| 111 | 12x | custom_stats_in = names(custom_stat_functions) | 
| 112 | ) | |
| 113 | 12x | levels_per_stats <- lapply(x_stats, names) | 
| 114 | 12x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 115 | 12x | .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 116 | 12x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 117 | ||
| 118 | 12x | x_stats <- x_stats[.stats] %>% | 
| 119 | 12x | .unlist_keep_nulls() %>% | 
| 120 | 12x | setNames(names(.formats)) | 
| 121 | ||
| 122 | # Auto format handling | |
| 123 | 12x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 124 | ||
| 125 | # Get and check statistical names | |
| 126 | 12x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 127 | ||
| 128 | 12x | in_rows( | 
| 129 | 12x | .list = x_stats, | 
| 130 | 12x | .formats = .formats, | 
| 131 | 12x | .names = .labels %>% .unlist_keep_nulls(), | 
| 132 | 12x | .stat_names = .stat_names, | 
| 133 | 12x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 134 | 12x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 135 | ) | |
| 136 | } | |
| 137 | ||
| 138 | #' @describeIn abnormal_lab_worsen_by_baseline Layout-creating function which can take statistics function | |
| 139 | #' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 140 | #' | |
| 141 | #' @return | |
| 142 | #' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting | |
| 143 | #' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted | |
| 144 | #' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout. | |
| 145 | #' | |
| 146 | #' @examples | |
| 147 | #' library(dplyr) | |
| 148 | #' | |
| 149 | #' # The direction variable, GRADDR, is based on metadata | |
| 150 | #' adlb <- tern_ex_adlb %>% | |
| 151 | #' mutate( | |
| 152 | #' GRADDR = case_when( | |
| 153 | #' PARAMCD == "ALT" ~ "B", | |
| 154 | #' PARAMCD == "CRP" ~ "L", | |
| 155 | #' PARAMCD == "IGA" ~ "H" | |
| 156 | #' ) | |
| 157 | #' ) %>% | |
| 158 | #' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") | |
| 159 | #' | |
| 160 | #' df <- h_adlb_worsen( | |
| 161 | #' adlb, | |
| 162 | #'   worst_flag_low = c("WGRLOFL" = "Y"), | |
| 163 | #'   worst_flag_high = c("WGRHIFL" = "Y"), | |
| 164 | #' direction_var = "GRADDR" | |
| 165 | #' ) | |
| 166 | #' | |
| 167 | #' basic_table() %>% | |
| 168 | #'   split_cols_by("ARMCD") %>% | |
| 169 | #' add_colcounts() %>% | |
| 170 | #'   split_rows_by("PARAMCD") %>% | |
| 171 | #'   split_rows_by("GRADDR") %>% | |
| 172 | #' count_abnormal_lab_worsen_by_baseline( | |
| 173 | #' var = "ATOXGR", | |
| 174 | #' variables = list( | |
| 175 | #' id = "USUBJID", | |
| 176 | #' baseline_var = "BTOXGR", | |
| 177 | #' direction_var = "GRADDR" | |
| 178 | #' ) | |
| 179 | #' ) %>% | |
| 180 | #'   append_topleft("Direction of Abnormality") %>% | |
| 181 | #' build_table(df = df, alt_counts_df = tern_ex_adsl) | |
| 182 | #' | |
| 183 | #' @export | |
| 184 | #' @order 2 | |
| 185 | count_abnormal_lab_worsen_by_baseline <- function(lyt, | |
| 186 | var, | |
| 187 | variables = list( | |
| 188 | id = "USUBJID", | |
| 189 | baseline_var = "BTOXGR", | |
| 190 | direction_var = "GRADDR" | |
| 191 | ), | |
| 192 | na_str = default_na_str(), | |
| 193 | nested = TRUE, | |
| 194 | ..., | |
| 195 | table_names = lifecycle::deprecated(), | |
| 196 | .stats = "fraction", | |
| 197 | .stat_names = NULL, | |
| 198 | .formats = list(fraction = format_fraction), | |
| 199 | .labels = NULL, | |
| 200 |                                                   .indent_mods = NULL) { | |
| 201 | 1x | checkmate::assert_string(var) | 
| 202 | ||
| 203 | # Deprecated argument warning | |
| 204 | 1x |   if (lifecycle::is_present(table_names)) { | 
| 205 | ! | lifecycle::deprecate_warn( | 
| 206 | ! | "0.9.8", "count_abnormal_lab_worsen_by_baseline(table_names)", | 
| 207 | ! | details = "The argument has no effect on the output." | 
| 208 | ) | |
| 209 | } | |
| 210 | ||
| 211 | # Process standard extra arguments | |
| 212 | 1x |   extra_args <- list(".stats" = .stats) | 
| 213 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 214 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 215 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 216 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 217 | ||
| 218 | # Process additional arguments to the statistic function | |
| 219 | 1x | extra_args <- c(extra_args, "variables" = list(variables), ...) | 
| 220 | ||
| 221 | # Append additional info from layout to the analysis function | |
| 222 | 1x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 223 | 1x | formals(a_count_abnormal_lab_worsen_by_baseline) <- c( | 
| 224 | 1x | formals(a_count_abnormal_lab_worsen_by_baseline), extra_args[[".additional_fun_parameters"]] | 
| 225 | ) | |
| 226 | ||
| 227 | 1x | analyze( | 
| 228 | 1x | lyt = lyt, | 
| 229 | 1x | vars = var, | 
| 230 | 1x | afun = a_count_abnormal_lab_worsen_by_baseline, | 
| 231 | 1x | na_str = na_str, | 
| 232 | 1x | nested = nested, | 
| 233 | 1x | extra_args = extra_args, | 
| 234 | 1x | show_labels = "hidden" | 
| 235 | ) | |
| 236 | } | |
| 237 | ||
| 238 | #' Helper function to prepare ADLB with worst labs | |
| 239 | #' | |
| 240 | #' @description `r lifecycle::badge("stable")` | |
| 241 | #' | |
| 242 | #' Helper function to prepare a `df` for generate the patient count shift table. | |
| 243 | #' | |
| 244 | #' @param adlb (`data.frame`)\cr ADLB data frame. | |
| 245 | #' @param worst_flag_low (named `vector`)\cr worst low post-baseline lab grade flag variable. See how this is | |
| 246 | #' implemented in the following examples. | |
| 247 | #' @param worst_flag_high (named `vector`)\cr worst high post-baseline lab grade flag variable. See how this is | |
| 248 | #' implemented in the following examples. | |
| 249 | #' @param direction_var (`string`)\cr name of the direction variable specifying the direction of the shift table of | |
| 250 | #' interest. Only lab records flagged by `L`, `H` or `B` are included in the shift table. | |
| 251 | #' * `L`: low direction only | |
| 252 | #' * `H`: high direction only | |
| 253 | #' * `B`: both low and high directions | |
| 254 | #' | |
| 255 | #' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the | |
| 256 | #' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the | |
| 257 | #' direction specified according to `direction_var`. For instance, for a lab that is | |
| 258 | #' needed for the low direction only, only records flagged by `worst_flag_low` are | |
| 259 | #' selected. For a lab that is needed for both low and high directions, the worst | |
| 260 | #' low records are selected for the low direction, and the worst high record are selected | |
| 261 | #' for the high direction. | |
| 262 | #' | |
| 263 | #' @seealso [abnormal_lab_worsen_by_baseline] | |
| 264 | #' | |
| 265 | #' @examples | |
| 266 | #' library(dplyr) | |
| 267 | #' | |
| 268 | #' # The direction variable, GRADDR, is based on metadata | |
| 269 | #' adlb <- tern_ex_adlb %>% | |
| 270 | #' mutate( | |
| 271 | #' GRADDR = case_when( | |
| 272 | #' PARAMCD == "ALT" ~ "B", | |
| 273 | #' PARAMCD == "CRP" ~ "L", | |
| 274 | #' PARAMCD == "IGA" ~ "H" | |
| 275 | #' ) | |
| 276 | #' ) %>% | |
| 277 | #' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") | |
| 278 | #' | |
| 279 | #' df <- h_adlb_worsen( | |
| 280 | #' adlb, | |
| 281 | #'   worst_flag_low = c("WGRLOFL" = "Y"), | |
| 282 | #'   worst_flag_high = c("WGRHIFL" = "Y"), | |
| 283 | #' direction_var = "GRADDR" | |
| 284 | #' ) | |
| 285 | #' | |
| 286 | #' @export | |
| 287 | h_adlb_worsen <- function(adlb, | |
| 288 | worst_flag_low = NULL, | |
| 289 | worst_flag_high = NULL, | |
| 290 |                           direction_var) { | |
| 291 | 5x | checkmate::assert_string(direction_var) | 
| 292 | 5x |   checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H")) | 
| 293 | 5x |   assert_df_with_variables(adlb, list("Col" = direction_var)) | 
| 294 | ||
| 295 | 5x |   if (any(unique(adlb[[direction_var]]) == "H")) { | 
| 296 | 4x |     assert_df_with_variables(adlb, list("High" = names(worst_flag_high))) | 
| 297 | } | |
| 298 | ||
| 299 | 5x |   if (any(unique(adlb[[direction_var]]) == "L")) { | 
| 300 | 4x |     assert_df_with_variables(adlb, list("Low" = names(worst_flag_low))) | 
| 301 | } | |
| 302 | ||
| 303 | 5x |   if (any(unique(adlb[[direction_var]]) == "B")) { | 
| 304 | 3x | assert_df_with_variables( | 
| 305 | 3x | adlb, | 
| 306 | 3x | list( | 
| 307 | 3x | "Low" = names(worst_flag_low), | 
| 308 | 3x | "High" = names(worst_flag_high) | 
| 309 | ) | |
| 310 | ) | |
| 311 | } | |
| 312 | ||
| 313 | # extract patients with worst post-baseline lab, either low or high or both | |
| 314 | 5x | worst_flag <- c(worst_flag_low, worst_flag_high) | 
| 315 | 5x | col_names <- names(worst_flag) | 
| 316 | 5x | filter_values <- worst_flag | 
| 317 | 5x | temp <- Map( | 
| 318 | 5x | function(x, y) which(adlb[[x]] == y), | 
| 319 | 5x | col_names, | 
| 320 | 5x | filter_values | 
| 321 | ) | |
| 322 | 5x | position_satisfy_filters <- Reduce(union, temp) | 
| 323 | ||
| 324 | # select variables of interest | |
| 325 | 5x | adlb_f <- adlb[position_satisfy_filters, ] | 
| 326 | ||
| 327 | # generate subsets for different directionality | |
| 328 | 5x | adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ] | 
| 329 | 5x | adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ] | 
| 330 | 5x | adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ] | 
| 331 | ||
| 332 | # for labs requiring both high and low, data is duplicated and will be stacked on top of each other | |
| 333 | 5x | adlb_f_b_h <- adlb_f_b | 
| 334 | 5x | adlb_f_b_l <- adlb_f_b | 
| 335 | ||
| 336 | # extract data with worst lab | |
| 337 | 5x |   if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) { | 
| 338 | # change H to High, L to Low | |
| 339 | 3x |     adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h)) | 
| 340 | 3x |     adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l)) | 
| 341 | ||
| 342 | # change, B to High and Low | |
| 343 | 3x |     adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h)) | 
| 344 | 3x |     adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l)) | 
| 345 | ||
| 346 | 3x | adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] | 
| 347 | 3x | adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] | 
| 348 | 3x | adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] | 
| 349 | 3x | adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] | 
| 350 | ||
| 351 | 3x | out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l) | 
| 352 | 2x |   } else if (!is.null(worst_flag_high)) { | 
| 353 | 1x |     adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h)) | 
| 354 | 1x |     adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h)) | 
| 355 | ||
| 356 | 1x | adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] | 
| 357 | 1x | adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] | 
| 358 | ||
| 359 | 1x | out <- rbind(adlb_out_h, adlb_out_b_h) | 
| 360 | 1x |   } else if (!is.null(worst_flag_low)) { | 
| 361 | 1x |     adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l)) | 
| 362 | 1x |     adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l)) | 
| 363 | ||
| 364 | 1x | adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] | 
| 365 | 1x | adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] | 
| 366 | ||
| 367 | 1x | out <- rbind(adlb_out_l, adlb_out_b_l) | 
| 368 | } | |
| 369 | ||
| 370 | # label | |
| 371 | 5x | formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE) | 
| 372 | ||
| 373 | 5x | out | 
| 374 | } | |
| 375 | ||
| 376 | #' Helper function to analyze patients for `s_count_abnormal_lab_worsen_by_baseline()` | |
| 377 | #' | |
| 378 | #' @description `r lifecycle::badge("stable")` | |
| 379 | #' | |
| 380 | #' Helper function to count the number of patients and the fraction of patients according to | |
| 381 | #' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`, | |
| 382 | #' and the direction of interest specified in `direction_var`. | |
| 383 | #' | |
| 384 | #' @inheritParams argument_convention | |
| 385 | #' @inheritParams h_adlb_worsen | |
| 386 | #' @param baseline_var (`string`)\cr name of the baseline lab grade variable. | |
| 387 | #' | |
| 388 | #' @return The counts and fraction of patients | |
| 389 | #' whose worst post-baseline lab grades are worse than their baseline grades, for | |
| 390 | #' post-baseline worst grades "1", "2", "3", "4" and "Any". | |
| 391 | #' | |
| 392 | #' @seealso [abnormal_lab_worsen_by_baseline] | |
| 393 | #' | |
| 394 | #' @examples | |
| 395 | #' library(dplyr) | |
| 396 | #' | |
| 397 | #' # The direction variable, GRADDR, is based on metadata | |
| 398 | #' adlb <- tern_ex_adlb %>% | |
| 399 | #' mutate( | |
| 400 | #' GRADDR = case_when( | |
| 401 | #' PARAMCD == "ALT" ~ "B", | |
| 402 | #' PARAMCD == "CRP" ~ "L", | |
| 403 | #' PARAMCD == "IGA" ~ "H" | |
| 404 | #' ) | |
| 405 | #' ) %>% | |
| 406 | #' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") | |
| 407 | #' | |
| 408 | #' df <- h_adlb_worsen( | |
| 409 | #' adlb, | |
| 410 | #'   worst_flag_low = c("WGRLOFL" = "Y"), | |
| 411 | #'   worst_flag_high = c("WGRHIFL" = "Y"), | |
| 412 | #' direction_var = "GRADDR" | |
| 413 | #' ) | |
| 414 | #' | |
| 415 | #' # `h_worsen_counter` | |
| 416 | #' h_worsen_counter( | |
| 417 | #' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"), | |
| 418 | #' id = "USUBJID", | |
| 419 | #' .var = "ATOXGR", | |
| 420 | #' baseline_var = "BTOXGR", | |
| 421 | #' direction_var = "GRADDR" | |
| 422 | #' ) | |
| 423 | #' | |
| 424 | #' @export | |
| 425 | h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) { | |
| 426 | 17x | checkmate::assert_string(id) | 
| 427 | 17x | checkmate::assert_string(.var) | 
| 428 | 17x | checkmate::assert_string(baseline_var) | 
| 429 | 17x | checkmate::assert_scalar(unique(df[[direction_var]])) | 
| 430 | 17x |   checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low")) | 
| 431 | 17x | assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var))) | 
| 432 | ||
| 433 | # remove post-baseline missing | |
| 434 | 17x | df <- df[df[[.var]] != "<Missing>", ] | 
| 435 | ||
| 436 | # obtain directionality | |
| 437 | 17x | direction <- unique(df[[direction_var]]) | 
| 438 | ||
| 439 | 17x |   if (direction == "Low") { | 
| 440 | 10x | grade <- -1:-4 | 
| 441 | 10x | worst_grade <- -4 | 
| 442 | 7x |   } else if (direction == "High") { | 
| 443 | 7x | grade <- 1:4 | 
| 444 | 7x | worst_grade <- 4 | 
| 445 | } | |
| 446 | ||
| 447 | 17x |   if (nrow(df) > 0) { | 
| 448 | 17x |     by_grade <- lapply(grade, function(i) { | 
| 449 | # filter baseline values that is less than i or <Missing> | |
| 450 | 68x | df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ] | 
| 451 | # num: number of patients with post-baseline worst lab equal to i | |
| 452 | 68x | num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE])) | 
| 453 | # denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction | |
| 454 | 68x | denom <- length(unique(df_temp[[id]])) | 
| 455 | 68x | rm(df_temp) | 
| 456 | 68x | c(num = num, denom = denom) | 
| 457 | }) | |
| 458 |   } else { | |
| 459 | ! |     by_grade <- lapply(1, function(i) { | 
| 460 | ! | c(num = 0, denom = 0) | 
| 461 | }) | |
| 462 | } | |
| 463 | ||
| 464 | 17x | names(by_grade) <- as.character(seq_along(by_grade)) | 
| 465 | ||
| 466 | # baseline grade less 4 or missing | |
| 467 | 17x | df_temp <- df[!df[[baseline_var]] %in% worst_grade, ] | 
| 468 | ||
| 469 | # denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction | |
| 470 | 17x | denom <- length(unique(df_temp[, id, drop = TRUE])) | 
| 471 | ||
| 472 | # condition 1: missing baseline and in the direction of abnormality | |
| 473 | 17x | con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade) | 
| 474 | 17x | df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ] | 
| 475 | ||
| 476 | # condition 2: if post-baseline values are present then post-baseline values must be worse than baseline | |
| 477 | 17x |   if (direction == "Low") { | 
| 478 | 10x | con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]]))) | 
| 479 |   } else { | |
| 480 | 7x | con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]]))) | 
| 481 | } | |
| 482 | ||
| 483 | # number of patients satisfy either conditions 1 or 2 | |
| 484 | 17x | num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE])) | 
| 485 | ||
| 486 | 17x |   list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom)))) | 
| 487 | } | 
| 1 | #' Occurrence table pruning | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Family of constructor and condition functions to flexibly prune occurrence tables. | |
| 6 | #' The condition functions always return whether the row result is higher than the threshold. | |
| 7 | #' Since they are of class [CombinationFunction()] they can be logically combined with other condition | |
| 8 | #' functions. | |
| 9 | #' | |
| 10 | #' @note Since most table specifications are worded positively, we name our constructor and condition | |
| 11 | #' functions positively, too. However, note that the result of [keep_rows()] says what | |
| 12 | #' should be pruned, to conform with the [rtables::prune_table()] interface. | |
| 13 | #' | |
| 14 | #' @examples | |
| 15 | #' \donttest{ | |
| 16 | #' tab <- basic_table() %>% | |
| 17 | #'   split_cols_by("ARM") %>% | |
| 18 | #'   split_rows_by("RACE") %>% | |
| 19 | #'   split_rows_by("STRATA1") %>% | |
| 20 | #' summarize_row_groups() %>% | |
| 21 | #'   analyze_vars("COUNTRY", .stats = "count_fraction") %>% | |
| 22 | #' build_table(DM) | |
| 23 | #' } | |
| 24 | #' | |
| 25 | #' @name prune_occurrences | |
| 26 | NULL | |
| 27 | ||
| 28 | #' @describeIn prune_occurrences Constructor for creating pruning functions based on | |
| 29 | #' a row condition function. This removes all analysis rows (`TableRow`) that should be | |
| 30 | #' pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no | |
| 31 | #' children left. | |
| 32 | #' | |
| 33 | #' @param row_condition (`CombinationFunction`)\cr condition function which works on individual | |
| 34 | #' analysis rows and flags whether these should be kept in the pruned table. | |
| 35 | #' | |
| 36 | #' @return | |
| 37 | #' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()] | |
| 38 | #' to prune an `rtables` table. | |
| 39 | #' | |
| 40 | #' @examples | |
| 41 | #' \donttest{ | |
| 42 | #' # `keep_rows` | |
| 43 | #' is_non_empty <- !CombinationFunction(all_zero_or_na) | |
| 44 | #' prune_table(tab, keep_rows(is_non_empty)) | |
| 45 | #' } | |
| 46 | #' | |
| 47 | #' @export | |
| 48 | keep_rows <- function(row_condition) { | |
| 49 | 6x | checkmate::assert_function(row_condition) | 
| 50 | 6x |   function(table_tree) { | 
| 51 | 2256x |     if (inherits(table_tree, "TableRow")) { | 
| 52 | 1872x | return(!row_condition(table_tree)) | 
| 53 | } | |
| 54 | 384x | children <- tree_children(table_tree) | 
| 55 | 384x | identical(length(children), 0L) | 
| 56 | } | |
| 57 | } | |
| 58 | ||
| 59 | #' @describeIn prune_occurrences Constructor for creating pruning functions based on | |
| 60 | #' a condition for the (first) content row in leaf tables. This removes all leaf tables where | |
| 61 | #' the first content row does not fulfill the condition. It does not check individual rows. | |
| 62 | #' It then proceeds recursively by removing the sub tree if there are no children left. | |
| 63 | #' | |
| 64 | #' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual | |
| 65 | #' first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table. | |
| 66 | #' | |
| 67 | #' @return | |
| 68 | #' * `keep_content_rows()` returns a pruning function that checks the condition on the first content | |
| 69 | #' row of leaf tables in the table. | |
| 70 | #' | |
| 71 | #' @examples | |
| 72 | #' # `keep_content_rows` | |
| 73 | #' \donttest{ | |
| 74 | #' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab)) | |
| 75 | #' prune_table(tab, keep_content_rows(more_than_twenty)) | |
| 76 | #' } | |
| 77 | #' | |
| 78 | #' @export | |
| 79 | keep_content_rows <- function(content_row_condition) { | |
| 80 | 1x | checkmate::assert_function(content_row_condition) | 
| 81 | 1x |   function(table_tree) { | 
| 82 | 166x |     if (is_leaf_table(table_tree)) { | 
| 83 | 24x | content_row <- h_content_first_row(table_tree) | 
| 84 | 24x | return(!content_row_condition(content_row)) | 
| 85 | } | |
| 86 | 142x |     if (inherits(table_tree, "DataRow")) { | 
| 87 | 120x | return(FALSE) | 
| 88 | } | |
| 89 | 22x | children <- tree_children(table_tree) | 
| 90 | 22x | identical(length(children), 0L) | 
| 91 | } | |
| 92 | } | |
| 93 | ||
| 94 | #' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns. | |
| 95 | #' | |
| 96 | #' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row. | |
| 97 | #' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including | |
| 98 | #' the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices | |
| 99 | #' directly instead. | |
| 100 | #' | |
| 101 | #' @return | |
| 102 | #' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column. | |
| 103 | #' | |
| 104 | #' @examples | |
| 105 | #' \donttest{ | |
| 106 | #' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab)) | |
| 107 | #' prune_table(tab, keep_rows(more_than_one)) | |
| 108 | #' } | |
| 109 | #' | |
| 110 | #' @export | |
| 111 | has_count_in_cols <- function(atleast, ...) { | |
| 112 | 6x | checkmate::assert_count(atleast) | 
| 113 | 6x |   CombinationFunction(function(table_row) { | 
| 114 | 337x | row_counts <- h_row_counts(table_row, ...) | 
| 115 | 337x | total_count <- sum(row_counts) | 
| 116 | 337x | total_count >= atleast | 
| 117 | }) | |
| 118 | } | |
| 119 | ||
| 120 | #' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in | |
| 121 | #' the specified columns satisfying a threshold. | |
| 122 | #' | |
| 123 | #' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row. | |
| 124 | #' | |
| 125 | #' @return | |
| 126 | #' * `has_count_in_any_col()` returns a condition function that compares the counts in the | |
| 127 | #' specified columns with the threshold. | |
| 128 | #' | |
| 129 | #' @examples | |
| 130 | #' \donttest{ | |
| 131 | #' # `has_count_in_any_col` | |
| 132 | #' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab)) | |
| 133 | #' prune_table(tab, keep_rows(any_more_than_one)) | |
| 134 | #' } | |
| 135 | #' | |
| 136 | #' @export | |
| 137 | has_count_in_any_col <- function(atleast, ...) { | |
| 138 | 3x | checkmate::assert_count(atleast) | 
| 139 | 3x |   CombinationFunction(function(table_row) { | 
| 140 | 3x | row_counts <- h_row_counts(table_row, ...) | 
| 141 | 3x | any(row_counts >= atleast) | 
| 142 | }) | |
| 143 | } | |
| 144 | ||
| 145 | #' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in | |
| 146 | #' the specified columns. | |
| 147 | #' | |
| 148 | #' @return | |
| 149 | #' * `has_fraction_in_cols()` returns a condition function that sums the counts in the | |
| 150 | #' specified column, and computes the fraction by dividing by the total column counts. | |
| 151 | #' | |
| 152 | #' @examples | |
| 153 | #' \donttest{ | |
| 154 | #' # `has_fraction_in_cols` | |
| 155 | #' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab)) | |
| 156 | #' prune_table(tab, keep_rows(more_than_five_percent)) | |
| 157 | #' } | |
| 158 | #' | |
| 159 | #' @export | |
| 160 | has_fraction_in_cols <- function(atleast, ...) { | |
| 161 | 4x | assert_proportion_value(atleast, include_boundaries = TRUE) | 
| 162 | 4x |   CombinationFunction(function(table_row) { | 
| 163 | 306x | row_counts <- h_row_counts(table_row, ...) | 
| 164 | 306x | total_count <- sum(row_counts) | 
| 165 | 306x | col_counts <- h_col_counts(table_row, ...) | 
| 166 | 306x | total_n <- sum(col_counts) | 
| 167 | 306x | total_percent <- total_count / total_n | 
| 168 | 306x | total_percent >= atleast | 
| 169 | }) | |
| 170 | } | |
| 171 | ||
| 172 | #' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in | |
| 173 | #' the specified columns. | |
| 174 | #' | |
| 175 | #' @return | |
| 176 | #' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions | |
| 177 | #' in the specified columns and checks whether any of them fulfill the threshold. | |
| 178 | #' | |
| 179 | #' @examples | |
| 180 | #' \donttest{ | |
| 181 | #' # `has_fraction_in_any_col` | |
| 182 | #' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab)) | |
| 183 | #' prune_table(tab, keep_rows(any_atleast_five_percent)) | |
| 184 | #' } | |
| 185 | #' | |
| 186 | #' @export | |
| 187 | has_fraction_in_any_col <- function(atleast, ...) { | |
| 188 | 3x | assert_proportion_value(atleast, include_boundaries = TRUE) | 
| 189 | 3x |   CombinationFunction(function(table_row) { | 
| 190 | 3x | row_fractions <- h_row_fractions(table_row, ...) | 
| 191 | 3x | any(row_fractions >= atleast) | 
| 192 | }) | |
| 193 | } | |
| 194 | ||
| 195 | #' @describeIn prune_occurrences Constructor for creating condition function that checks the difference | |
| 196 | #' between the fractions reported in each specified column. | |
| 197 | #' | |
| 198 | #' @return | |
| 199 | #' * `has_fractions_difference()` returns a condition function that extracts the fractions of each | |
| 200 | #' specified column, and computes the difference of the minimum and maximum. | |
| 201 | #' | |
| 202 | #' @examples | |
| 203 | #' \donttest{ | |
| 204 | #' # `has_fractions_difference` | |
| 205 | #' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab)) | |
| 206 | #' prune_table(tab, keep_rows(more_than_five_percent_diff)) | |
| 207 | #' } | |
| 208 | #' | |
| 209 | #' @export | |
| 210 | has_fractions_difference <- function(atleast, ...) { | |
| 211 | 4x | assert_proportion_value(atleast, include_boundaries = TRUE) | 
| 212 | 4x |   CombinationFunction(function(table_row) { | 
| 213 | 246x | fractions <- h_row_fractions(table_row, ...) | 
| 214 | 246x | difference <- diff(range(fractions)) | 
| 215 | 246x | difference >= atleast | 
| 216 | }) | |
| 217 | } | |
| 218 | ||
| 219 | #' @describeIn prune_occurrences Constructor for creating condition function that checks the difference | |
| 220 | #' between the counts reported in each specified column. | |
| 221 | #' | |
| 222 | #' @return | |
| 223 | #' * `has_counts_difference()` returns a condition function that extracts the counts of each | |
| 224 | #' specified column, and computes the difference of the minimum and maximum. | |
| 225 | #' | |
| 226 | #' @examples | |
| 227 | #' \donttest{ | |
| 228 | #' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab)) | |
| 229 | #' prune_table(tab, keep_rows(more_than_one_diff)) | |
| 230 | #' } | |
| 231 | #' | |
| 232 | #' @export | |
| 233 | has_counts_difference <- function(atleast, ...) { | |
| 234 | 4x | checkmate::assert_count(atleast) | 
| 235 | 4x |   CombinationFunction(function(table_row) { | 
| 236 | 30x | counts <- h_row_counts(table_row, ...) | 
| 237 | 30x | difference <- diff(range(counts)) | 
| 238 | 30x | difference >= atleast | 
| 239 | }) | |
| 240 | } | 
| 1 | #' Count specific values | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_values()] creates a layout element to calculate counts of specific values within a | |
| 6 | #' variable of interest. | |
| 7 | #' | |
| 8 | #' This function analyzes one or more variables of interest supplied as a vector to `vars`. Values to | |
| 9 | #' count for variable(s) in `vars` can be given as a vector via the `values` argument. One row of | |
| 10 | #' counts will be generated for each variable. | |
| 11 | #' | |
| 12 | #' @inheritParams argument_convention | |
| 13 | #' @param values (`character`)\cr specific values that should be counted. | |
| 14 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 15 | #' | |
| 16 | #'   Options are: ``r shQuote(get_stats("count_values"), type = "sh")`` | |
| 17 | #' | |
| 18 | #' @note | |
| 19 | #' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x` | |
| 20 | #' and fails otherwise. | |
| 21 | #' * For `count_values()`, variable labels are shown when there is more than one element in `vars`, | |
| 22 | #' otherwise they are hidden. | |
| 23 | #' | |
| 24 | #' @name count_values | |
| 25 | #' @order 1 | |
| 26 | NULL | |
| 27 | ||
| 28 | #' @describeIn count_values S3 generic function to count values. | |
| 29 | #' | |
| 30 | #' @inheritParams s_summary.logical | |
| 31 | #' | |
| 32 | #' @return | |
| 33 | #' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable. | |
| 34 | #' | |
| 35 | #' @export | |
| 36 | s_count_values <- function(x, | |
| 37 | values, | |
| 38 | na.rm = TRUE, # nolint | |
| 39 |                            denom = c("n", "N_col", "N_row"), | |
| 40 |                            ...) { | |
| 41 | 207x |   UseMethod("s_count_values", x) | 
| 42 | } | |
| 43 | ||
| 44 | #' @describeIn count_values Method for `character` class. | |
| 45 | #' | |
| 46 | #' @method s_count_values character | |
| 47 | #' | |
| 48 | #' @examples | |
| 49 | #' # `s_count_values.character` | |
| 50 | #' s_count_values(x = c("a", "b", "a"), values = "a") | |
| 51 | #' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE) | |
| 52 | #' | |
| 53 | #' @export | |
| 54 | s_count_values.character <- function(x, | |
| 55 | values = "Y", | |
| 56 | na.rm = TRUE, # nolint | |
| 57 |                                      ...) { | |
| 58 | 200x | checkmate::assert_character(values) | 
| 59 | ||
| 60 | 200x |   if (na.rm) { | 
| 61 | 199x | x <- x[!is.na(x)] | 
| 62 | } | |
| 63 | ||
| 64 | 200x | is_in_values <- x %in% values | 
| 65 | ||
| 66 | 200x | s_summary(is_in_values, na_rm = na.rm, ...) | 
| 67 | } | |
| 68 | ||
| 69 | #' @describeIn count_values Method for `factor` class. This makes an automatic | |
| 70 | #' conversion to `character` and then forwards to the method for characters. | |
| 71 | #' | |
| 72 | #' @method s_count_values factor | |
| 73 | #' | |
| 74 | #' @examples | |
| 75 | #' # `s_count_values.factor` | |
| 76 | #' s_count_values(x = factor(c("a", "b", "a")), values = "a") | |
| 77 | #' | |
| 78 | #' @export | |
| 79 | s_count_values.factor <- function(x, | |
| 80 | values = "Y", | |
| 81 |                                   ...) { | |
| 82 | 4x | s_count_values(as.character(x), values = as.character(values), ...) | 
| 83 | } | |
| 84 | ||
| 85 | #' @describeIn count_values Method for `logical` class. | |
| 86 | #' | |
| 87 | #' @method s_count_values logical | |
| 88 | #' | |
| 89 | #' @examples | |
| 90 | #' # `s_count_values.logical` | |
| 91 | #' s_count_values(x = c(TRUE, FALSE, TRUE)) | |
| 92 | #' | |
| 93 | #' @export | |
| 94 | s_count_values.logical <- function(x, values = TRUE, ...) { | |
| 95 | 3x | checkmate::assert_logical(values) | 
| 96 | 3x | s_count_values(as.character(x), values = as.character(values), ...) | 
| 97 | } | |
| 98 | ||
| 99 | #' @describeIn count_values Formatted analysis function which is used as `afun` | |
| 100 | #' in `count_values()`. | |
| 101 | #' | |
| 102 | #' @return | |
| 103 | #' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 104 | #' | |
| 105 | #' @examples | |
| 106 | #' # `a_count_values` | |
| 107 | #' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10) | |
| 108 | #' | |
| 109 | #' @export | |
| 110 | a_count_values <- function(x, | |
| 111 | ..., | |
| 112 | .stats = NULL, | |
| 113 | .stat_names = NULL, | |
| 114 | .formats = NULL, | |
| 115 | .labels = NULL, | |
| 116 |                            .indent_mods = NULL) { | |
| 117 | # Check for additional parameters to the statistics function | |
| 118 | 17x | dots_extra_args <- list(...) | 
| 119 | 17x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 120 | 17x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 121 | ||
| 122 | # Check for user-defined functions | |
| 123 | 17x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 124 | 17x | .stats <- default_and_custom_stats_list$all_stats | 
| 125 | 17x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 126 | ||
| 127 | # Main statistic calculations | |
| 128 | 17x | x_stats <- .apply_stat_functions( | 
| 129 | 17x | default_stat_fnc = s_count_values, | 
| 130 | 17x | custom_stat_fnc_list = custom_stat_functions, | 
| 131 | 17x | args_list = c( | 
| 132 | 17x | x = list(x), | 
| 133 | 17x | extra_afun_params, | 
| 134 | 17x | dots_extra_args | 
| 135 | ) | |
| 136 | ) | |
| 137 | ||
| 138 | # Fill in formatting defaults | |
| 139 | 17x |   .stats <- get_stats("analyze_vars_counts", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 140 | 17x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 141 | 17x | .labels <- get_labels_from_stats(.stats, .labels) | 
| 142 | 17x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 143 | ||
| 144 | 17x | x_stats <- x_stats[.stats] | 
| 145 | ||
| 146 | # Auto format handling | |
| 147 | 17x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 148 | ||
| 149 | # Get and check statistical names | |
| 150 | 17x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 151 | ||
| 152 | 17x | in_rows( | 
| 153 | 17x | .list = x_stats, | 
| 154 | 17x | .formats = .formats, | 
| 155 | 17x | .names = names(.labels), | 
| 156 | 17x | .stat_names = .stat_names, | 
| 157 | 17x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 158 | 17x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 159 | ) | |
| 160 | } | |
| 161 | ||
| 162 | #' @describeIn count_values Layout-creating function which can take statistics function arguments | |
| 163 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 164 | #' | |
| 165 | #' @return | |
| 166 | #' * `count_values()` returns a layout object suitable for passing to further layouting functions, | |
| 167 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 168 | #' the statistics from `s_count_values()` to the table layout. | |
| 169 | #' | |
| 170 | #' @examples | |
| 171 | #' # `count_values` | |
| 172 | #' basic_table() %>% | |
| 173 | #'   count_values("Species", values = "setosa") %>% | |
| 174 | #' build_table(iris) | |
| 175 | #' | |
| 176 | #' @export | |
| 177 | #' @order 2 | |
| 178 | count_values <- function(lyt, | |
| 179 | vars, | |
| 180 | values, | |
| 181 | na_str = default_na_str(), | |
| 182 | na_rm = TRUE, | |
| 183 | nested = TRUE, | |
| 184 | ..., | |
| 185 | table_names = vars, | |
| 186 | .stats = "count_fraction", | |
| 187 | .stat_names = NULL, | |
| 188 | .formats = c(count_fraction = "xx (xx.xx%)", count = "xx"), | |
| 189 | .labels = c(count_fraction = paste(values, collapse = ", ")), | |
| 190 |                          .indent_mods = NULL) { | |
| 191 | # Process standard extra arguments | |
| 192 | 8x |   extra_args <- list(".stats" = .stats) | 
| 193 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 194 | 8x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 195 | 8x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 196 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 197 | ||
| 198 | # Process additional arguments to the statistic function | |
| 199 | 8x | extra_args <- c( | 
| 200 | 8x | extra_args, | 
| 201 | 8x | na_rm = na_rm, values = list(values), | 
| 202 | ... | |
| 203 | ) | |
| 204 | ||
| 205 | # Adding additional info from layout to analysis function | |
| 206 | 8x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 207 | 8x | formals(a_count_values) <- c(formals(a_count_values), extra_args[[".additional_fun_parameters"]]) | 
| 208 | ||
| 209 | 8x | analyze( | 
| 210 | 8x | lyt, | 
| 211 | 8x | vars, | 
| 212 | 8x | afun = a_count_values, | 
| 213 | 8x | na_str = na_str, | 
| 214 | 8x | nested = nested, | 
| 215 | 8x | extra_args = extra_args, | 
| 216 | 8x | show_labels = ifelse(length(vars) > 1, "visible", "hidden"), | 
| 217 | 8x | table_names = table_names | 
| 218 | ) | |
| 219 | } | 
| 1 | #' Helper functions for subgroup treatment effect pattern (STEP) calculations | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions that are used internally for the STEP calculations. | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' | |
| 9 | #' @name h_step | |
| 10 | #' @include control_step.R | |
| 11 | NULL | |
| 12 | ||
| 13 | #' @describeIn h_step Creates the windows for STEP, based on the control settings | |
| 14 | #' provided. | |
| 15 | #' | |
| 16 | #' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`). | |
| 17 | #' @param control (named `list`)\cr output from `control_step()`. | |
| 18 | #' | |
| 19 | #' @return | |
| 20 | #' * `h_step_window()` returns a list containing the window-selection matrix `sel` | |
| 21 | #' and the interval information matrix `interval`. | |
| 22 | #' | |
| 23 | #' @export | |
| 24 | h_step_window <- function(x, | |
| 25 |                           control = control_step()) { | |
| 26 | 12x | checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) | 
| 27 | 12x | checkmate::assert_list(control, names = "named") | 
| 28 | ||
| 29 | 12x | sel <- matrix(FALSE, length(x), control$num_points) | 
| 30 | 12x | out <- matrix(0, control$num_points, 3) | 
| 31 | 12x |   colnames(out) <- paste("Interval", c("Center", "Lower", "Upper")) | 
| 32 | 12x |   if (control$use_percentile) { | 
| 33 | # Create windows according to percentile cutoffs. | |
| 34 | 9x | out <- cbind(out, out) | 
| 35 | 9x |     colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper")) | 
| 36 | 9x | xs <- seq(0, 1, length.out = control$num_points + 2)[-1] | 
| 37 | 9x |     for (i in seq_len(control$num_points)) { | 
| 38 | 185x | out[i, 2:3] <- c( | 
| 39 | 185x | max(xs[i] - control$bandwidth, 0), | 
| 40 | 185x | min(xs[i] + control$bandwidth, 1) | 
| 41 | ) | |
| 42 | 185x | out[i, 5:6] <- stats::quantile(x, out[i, 2:3]) | 
| 43 | 185x | sel[, i] <- x >= out[i, 5] & x <= out[i, 6] | 
| 44 | } | |
| 45 | # Center is the middle point of the percentile window. | |
| 46 | 9x | out[, 1] <- xs[-control$num_points - 1] | 
| 47 | 9x | out[, 4] <- stats::quantile(x, out[, 1]) | 
| 48 |   } else { | |
| 49 | # Create windows according to cutoffs. | |
| 50 | 3x | m <- c(min(x), max(x)) | 
| 51 | 3x | xs <- seq(m[1], m[2], length.out = control$num_points + 2)[-1] | 
| 52 | 3x |     for (i in seq_len(control$num_points)) { | 
| 53 | 11x | out[i, 2:3] <- c( | 
| 54 | 11x | max(xs[i] - control$bandwidth, m[1]), | 
| 55 | 11x | min(xs[i] + control$bandwidth, m[2]) | 
| 56 | ) | |
| 57 | 11x | sel[, i] <- x >= out[i, 2] & x <= out[i, 3] | 
| 58 | } | |
| 59 | # Center is the same as the point for predicting. | |
| 60 | 3x | out[, 1] <- xs[-control$num_points - 1] | 
| 61 | } | |
| 62 | 12x | list(sel = sel, interval = out) | 
| 63 | } | |
| 64 | ||
| 65 | #' @describeIn h_step Calculates the estimated treatment effect estimate | |
| 66 | #' on the linear predictor scale and corresponding standard error from a STEP `model` fitted | |
| 67 | #' on `data` given `variables` specification, for a single biomarker value `x`. | |
| 68 | #' This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds | |
| 69 | #' ratio estimates. | |
| 70 | #' | |
| 71 | #' @param model (`coxph` or `glm`)\cr the regression model object. | |
| 72 | #' | |
| 73 | #' @return | |
| 74 | #' * `h_step_trt_effect()` returns a vector with elements `est` and `se`. | |
| 75 | #' | |
| 76 | #' @export | |
| 77 | h_step_trt_effect <- function(data, | |
| 78 | model, | |
| 79 | variables, | |
| 80 |                               x) { | |
| 81 | 208x |   checkmate::assert_multi_class(model, c("coxph", "glm")) | 
| 82 | 208x | checkmate::assert_number(x) | 
| 83 | 208x | assert_df_with_variables(data, variables) | 
| 84 | 208x | checkmate::assert_factor(data[[variables$arm]], n.levels = 2) | 
| 85 | ||
| 86 | 208x | newdata <- data[c(1, 1), ] | 
| 87 | 208x | newdata[, variables$biomarker] <- x | 
| 88 | 208x | newdata[, variables$arm] <- levels(data[[variables$arm]]) | 
| 89 | 208x | model_terms <- stats::delete.response(stats::terms(model)) | 
| 90 | 208x | model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels) | 
| 91 | 208x | mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts) | 
| 92 | 208x | coefs <- stats::coef(model) | 
| 93 | # Note: It is important to use the coef subset from matrix, otherwise intercept and | |
| 94 | # strata are included for coxph() models. | |
| 95 | 208x | mat <- mat[, names(coefs)] | 
| 96 | 208x | mat_diff <- diff(mat) | 
| 97 | 208x | est <- mat_diff %*% coefs | 
| 98 | 208x | var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff) | 
| 99 | 208x | se <- sqrt(var) | 
| 100 | 208x | c( | 
| 101 | 208x | est = est, | 
| 102 | 208x | se = se | 
| 103 | ) | |
| 104 | } | |
| 105 | ||
| 106 | #' @describeIn h_step Builds the model formula used in survival STEP calculations. | |
| 107 | #' | |
| 108 | #' @return | |
| 109 | #' * `h_step_survival_formula()` returns a model formula. | |
| 110 | #' | |
| 111 | #' @export | |
| 112 | h_step_survival_formula <- function(variables, | |
| 113 |                                     control = control_step()) { | |
| 114 | 10x | checkmate::assert_character(variables$covariates, null.ok = TRUE) | 
| 115 | ||
| 116 | 10x |   assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")]) | 
| 117 | 10x |   form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm) | 
| 118 | 10x |   if (control$degree > 0) { | 
| 119 | 5x |     form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)") | 
| 120 | } | |
| 121 | 10x |   if (!is.null(variables$covariates)) { | 
| 122 | 6x | form <- paste(form, "+", paste(variables$covariates, collapse = "+")) | 
| 123 | } | |
| 124 | 10x |   if (!is.null(variables$strata)) { | 
| 125 | 2x |     form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")") | 
| 126 | } | |
| 127 | 10x | stats::as.formula(form) | 
| 128 | } | |
| 129 | ||
| 130 | #' @describeIn h_step Estimates the model with `formula` built based on | |
| 131 | #' `variables` in `data` for a given `subset` and `control` parameters for the | |
| 132 | #' Cox regression. | |
| 133 | #' | |
| 134 | #' @param formula (`formula`)\cr the regression model formula. | |
| 135 | #' @param subset (`logical`)\cr subset vector. | |
| 136 | #' | |
| 137 | #' @return | |
| 138 | #' * `h_step_survival_est()` returns a matrix of number of observations `n`, | |
| 139 | #' `events`, log hazard ratio estimates `loghr`, standard error `se`, | |
| 140 | #' and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is | |
| 141 | #' included for each biomarker value in `x`. | |
| 142 | #' | |
| 143 | #' @export | |
| 144 | h_step_survival_est <- function(formula, | |
| 145 | data, | |
| 146 | variables, | |
| 147 | x, | |
| 148 | subset = rep(TRUE, nrow(data)), | |
| 149 |                                 control = control_coxph()) { | |
| 150 | 55x | checkmate::assert_formula(formula) | 
| 151 | 55x | assert_df_with_variables(data, variables) | 
| 152 | 55x | checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE) | 
| 153 | 55x | checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) | 
| 154 | 55x | checkmate::assert_list(control, names = "named") | 
| 155 | ||
| 156 | # Note: `subset` in `coxph` needs to be an expression referring to `data` variables. | |
| 157 | 55x | data$.subset <- subset | 
| 158 | 55x | coxph_warnings <- NULL | 
| 159 | 55x | tryCatch( | 
| 160 | 55x | withCallingHandlers( | 
| 161 | 55x |       expr = { | 
| 162 | 55x | fit <- survival::coxph( | 
| 163 | 55x | formula = formula, | 
| 164 | 55x | data = data, | 
| 165 | 55x | subset = .subset, | 
| 166 | 55x | ties = control$ties | 
| 167 | ) | |
| 168 | }, | |
| 169 | 55x |       warning = function(w) { | 
| 170 | 1x | coxph_warnings <<- c(coxph_warnings, w) | 
| 171 | 1x |         invokeRestart("muffleWarning") | 
| 172 | } | |
| 173 | ), | |
| 174 | 55x |     finally = { | 
| 175 | } | |
| 176 | ) | |
| 177 | 55x |   if (!is.null(coxph_warnings)) { | 
| 178 | 1x | warning(paste( | 
| 179 | 1x | "Fit warnings occurred, please consider using a simpler model, or", | 
| 180 | 1x | "larger `bandwidth`, less `num_points` in `control_step()` settings" | 
| 181 | )) | |
| 182 | } | |
| 183 | # Produce a matrix with one row per `x` and columns `est` and `se`. | |
| 184 | 55x | estimates <- t(vapply( | 
| 185 | 55x | X = x, | 
| 186 | 55x | FUN = h_step_trt_effect, | 
| 187 | 55x | FUN.VALUE = c(1, 2), | 
| 188 | 55x | data = data, | 
| 189 | 55x | model = fit, | 
| 190 | 55x | variables = variables | 
| 191 | )) | |
| 192 | 55x | q_norm <- stats::qnorm((1 + control$conf_level) / 2) | 
| 193 | 55x | cbind( | 
| 194 | 55x | n = fit$n, | 
| 195 | 55x | events = fit$nevent, | 
| 196 | 55x | loghr = estimates[, "est"], | 
| 197 | 55x | se = estimates[, "se"], | 
| 198 | 55x | ci_lower = estimates[, "est"] - q_norm * estimates[, "se"], | 
| 199 | 55x | ci_upper = estimates[, "est"] + q_norm * estimates[, "se"] | 
| 200 | ) | |
| 201 | } | |
| 202 | ||
| 203 | #' @describeIn h_step Builds the model formula used in response STEP calculations. | |
| 204 | #' | |
| 205 | #' @return | |
| 206 | #' * `h_step_rsp_formula()` returns a model formula. | |
| 207 | #' | |
| 208 | #' @export | |
| 209 | h_step_rsp_formula <- function(variables, | |
| 210 |                                control = c(control_step(), control_logistic())) { | |
| 211 | 14x | checkmate::assert_character(variables$covariates, null.ok = TRUE) | 
| 212 | 14x |   assert_list_of_variables(variables[c("arm", "biomarker", "response")]) | 
| 213 | 14x | response_definition <- sub( | 
| 214 | 14x | pattern = "response", | 
| 215 | 14x | replacement = variables$response, | 
| 216 | 14x | x = control$response_definition, | 
| 217 | 14x | fixed = TRUE | 
| 218 | ) | |
| 219 | 14x | form <- paste0(response_definition, " ~ ", variables$arm) | 
| 220 | 14x |   if (control$degree > 0) { | 
| 221 | 8x |     form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)") | 
| 222 | } | |
| 223 | 14x |   if (!is.null(variables$covariates)) { | 
| 224 | 8x | form <- paste(form, "+", paste(variables$covariates, collapse = "+")) | 
| 225 | } | |
| 226 | 14x |   if (!is.null(variables$strata)) { | 
| 227 | 5x |     strata_arg <- if (length(variables$strata) > 1) { | 
| 228 | 2x |       paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))") | 
| 229 |     } else { | |
| 230 | 3x | variables$strata | 
| 231 | } | |
| 232 | 5x |     form <- paste0(form, "+ strata(", strata_arg, ")") | 
| 233 | } | |
| 234 | 14x | stats::as.formula(form) | 
| 235 | } | |
| 236 | ||
| 237 | #' @describeIn h_step Estimates the model with `formula` built based on | |
| 238 | #' `variables` in `data` for a given `subset` and `control` parameters for the | |
| 239 | #' logistic regression. | |
| 240 | #' | |
| 241 | #' @param formula (`formula`)\cr the regression model formula. | |
| 242 | #' @param subset (`logical`)\cr subset vector. | |
| 243 | #' | |
| 244 | #' @return | |
| 245 | #' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds | |
| 246 | #' ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds | |
| 247 | #' `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`. | |
| 248 | #' | |
| 249 | #' @export | |
| 250 | h_step_rsp_est <- function(formula, | |
| 251 | data, | |
| 252 | variables, | |
| 253 | x, | |
| 254 | subset = rep(TRUE, nrow(data)), | |
| 255 |                            control = control_logistic()) { | |
| 256 | 58x | checkmate::assert_formula(formula) | 
| 257 | 58x | assert_df_with_variables(data, variables) | 
| 258 | 58x | checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE) | 
| 259 | 58x | checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) | 
| 260 | 58x | checkmate::assert_list(control, names = "named") | 
| 261 | # Note: `subset` in `glm` needs to be an expression referring to `data` variables. | |
| 262 | 58x | data$.subset <- subset | 
| 263 | 58x | fit_warnings <- NULL | 
| 264 | 58x | tryCatch( | 
| 265 | 58x | withCallingHandlers( | 
| 266 | 58x |       expr = { | 
| 267 | 58x |         fit <- if (is.null(variables$strata)) { | 
| 268 | 54x | stats::glm( | 
| 269 | 54x | formula = formula, | 
| 270 | 54x | data = data, | 
| 271 | 54x | subset = .subset, | 
| 272 | 54x |             family = stats::binomial("logit") | 
| 273 | ) | |
| 274 |         } else { | |
| 275 | # clogit needs coxph and strata imported | |
| 276 | 4x | survival::clogit( | 
| 277 | 4x | formula = formula, | 
| 278 | 4x | data = data, | 
| 279 | 4x | subset = .subset | 
| 280 | ) | |
| 281 | } | |
| 282 | }, | |
| 283 | 58x |       warning = function(w) { | 
| 284 | 19x | fit_warnings <<- c(fit_warnings, w) | 
| 285 | 19x |         invokeRestart("muffleWarning") | 
| 286 | } | |
| 287 | ), | |
| 288 | 58x |     finally = { | 
| 289 | } | |
| 290 | ) | |
| 291 | 58x |   if (!is.null(fit_warnings)) { | 
| 292 | 13x | warning(paste( | 
| 293 | 13x | "Fit warnings occurred, please consider using a simpler model, or", | 
| 294 | 13x | "larger `bandwidth`, less `num_points` in `control_step()` settings" | 
| 295 | )) | |
| 296 | } | |
| 297 | # Produce a matrix with one row per `x` and columns `est` and `se`. | |
| 298 | 58x | estimates <- t(vapply( | 
| 299 | 58x | X = x, | 
| 300 | 58x | FUN = h_step_trt_effect, | 
| 301 | 58x | FUN.VALUE = c(1, 2), | 
| 302 | 58x | data = data, | 
| 303 | 58x | model = fit, | 
| 304 | 58x | variables = variables | 
| 305 | )) | |
| 306 | 58x | q_norm <- stats::qnorm((1 + control$conf_level) / 2) | 
| 307 | 58x | cbind( | 
| 308 | 58x | n = length(fit$y), | 
| 309 | 58x | logor = estimates[, "est"], | 
| 310 | 58x | se = estimates[, "se"], | 
| 311 | 58x | ci_lower = estimates[, "est"] - q_norm * estimates[, "se"], | 
| 312 | 58x | ci_upper = estimates[, "est"] + q_norm * estimates[, "se"] | 
| 313 | ) | |
| 314 | } | 
| 1 | #' Count occurrences by grade | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_occurrences_by_grade()] creates a layout element to calculate occurrence counts by grade. | |
| 6 | #' | |
| 7 | #' This function analyzes primary analysis variable `var` which indicates toxicity grades. The `id` variable | |
| 8 | #' is used to indicate unique subject identifiers (defaults to `USUBJID`). The user can also supply a list of | |
| 9 | #' custom groups of grades to analyze via the `grade_groups` parameter. The `remove_single` argument will | |
| 10 | #' remove single grades from the analysis so that *only* grade groups are analyzed. | |
| 11 | #' | |
| 12 | #' If there are multiple grades recorded for one patient only the highest grade level is counted. | |
| 13 | #' | |
| 14 | #' The summarize function [summarize_occurrences_by_grade()] performs the same function as | |
| 15 | #' [count_occurrences_by_grade()] except it creates content rows, not data rows, to summarize the current table | |
| 16 | #' row/column context and operates on the level of the latest row split or the root of the table if no row splits have | |
| 17 | #' occurred. | |
| 18 | #' | |
| 19 | #' @inheritParams count_occurrences | |
| 20 | #' @inheritParams argument_convention | |
| 21 | #' @param grade_groups (named `list` of `character`)\cr list containing groupings of grades. | |
| 22 | #' @param remove_single (`flag`)\cr `TRUE` to not include the elements of one-element grade groups | |
| 23 | #' in the the output list; in this case only the grade groups names will be included in the output. If | |
| 24 | #' `only_grade_groups` is set to `TRUE` this argument is ignored. | |
| 25 | #' @param only_grade_groups (`flag`)\cr whether only the specified grade groups should be | |
| 26 | #' included, with individual grade rows removed (`TRUE`), or all grades and grade groups | |
| 27 | #' should be displayed (`FALSE`). | |
| 28 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 29 | #' | |
| 30 | #'   Options are: ``r shQuote(get_stats("count_occurrences_by_grade"), type = "sh")`` | |
| 31 | #' | |
| 32 | #' @seealso Relevant helper function [h_append_grade_groups()]. | |
| 33 | #' | |
| 34 | #' @name count_occurrences_by_grade | |
| 35 | #' @order 1 | |
| 36 | NULL | |
| 37 | ||
| 38 | #' Helper function for `s_count_occurrences_by_grade()` | |
| 39 | #' | |
| 40 | #' @description `r lifecycle::badge("stable")` | |
| 41 | #' | |
| 42 | #' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with | |
| 43 | #' individual grade frequencies. The order of the final result follows the order of `grade_groups`. | |
| 44 | #' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to | |
| 45 | #' the end. Grade groups names must be unique. | |
| 46 | #' | |
| 47 | #' @inheritParams count_occurrences_by_grade | |
| 48 | #' @param refs (named `list` of `numeric`)\cr named list where each name corresponds to a reference grade level | |
| 49 | #' and each entry represents a count. | |
| 50 | #' | |
| 51 | #' @return Formatted list of grade groupings. | |
| 52 | #' | |
| 53 | #' @examples | |
| 54 | #' h_append_grade_groups( | |
| 55 | #' list( | |
| 56 | #' "Any Grade" = as.character(1:5), | |
| 57 | #'     "Grade 1-2" = c("1", "2"), | |
| 58 | #'     "Grade 3-4" = c("3", "4") | |
| 59 | #' ), | |
| 60 | #'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50) | |
| 61 | #' ) | |
| 62 | #' | |
| 63 | #' h_append_grade_groups( | |
| 64 | #' list( | |
| 65 | #' "Any Grade" = as.character(5:1), | |
| 66 | #' "Grade A" = "5", | |
| 67 | #'     "Grade B" = c("4", "3") | |
| 68 | #' ), | |
| 69 | #'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50) | |
| 70 | #' ) | |
| 71 | #' | |
| 72 | #' h_append_grade_groups( | |
| 73 | #' list( | |
| 74 | #' "Any Grade" = as.character(1:5), | |
| 75 | #'     "Grade 1-2" = c("1", "2"), | |
| 76 | #'     "Grade 3-4" = c("3", "4") | |
| 77 | #' ), | |
| 78 | #'   list("1" = 10, "2" = 5, "3" = 0) | |
| 79 | #' ) | |
| 80 | #' | |
| 81 | #' @export | |
| 82 | h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE, only_grade_groups = FALSE) { | |
| 83 | 32x | checkmate::assert_list(grade_groups) | 
| 84 | 32x | checkmate::assert_list(refs) | 
| 85 | 32x | refs_orig <- refs | 
| 86 | 32x | elements <- unique(unlist(grade_groups)) | 
| 87 | ||
| 88 | ### compute sums in groups | |
| 89 | 32x | grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i])) | 
| 90 | 32x |   if (!checkmate::test_subset(elements, names(refs))) { | 
| 91 | 2x | padding_el <- setdiff(elements, names(refs)) | 
| 92 | 2x | refs[padding_el] <- 0 | 
| 93 | } | |
| 94 | 32x | result <- c(grp_sum, refs) | 
| 95 | ||
| 96 | ### order result while keeping grade_groups's ordering | |
| 97 | 32x | ordr <- grade_groups | 
| 98 | ||
| 99 | # elements of any-grade group (if any) will be moved to the end | |
| 100 | 32x | is_any <- sapply(grade_groups, setequal, y = names(refs)) | 
| 101 | 32x | ordr[is_any] <- list(character(0)) # hide elements under any-grade group | 
| 102 | ||
| 103 | # groups-elements combined sequence | |
| 104 | 32x | ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE) | 
| 105 | 32x | ordr <- ordr[!duplicated(ordr)] | 
| 106 | ||
| 107 | # append remaining elements (if any) | |
| 108 | 32x | ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group | 
| 109 | 32x | ordr <- union(ordr, names(refs)) # from refs | 
| 110 | ||
| 111 | # remove elements of single-element groups, if any | |
| 112 | 32x |   if (only_grade_groups) { | 
| 113 | 3x | ordr <- intersect(ordr, names(grade_groups)) | 
| 114 | 29x |   } else if (remove_single) { | 
| 115 | 29x | is_single <- sapply(grade_groups, length) == 1L | 
| 116 | 29x | ordr <- setdiff(ordr, unlist(grade_groups[is_single])) | 
| 117 | } | |
| 118 | ||
| 119 | # apply the order | |
| 120 | 32x | result <- result[ordr] | 
| 121 | ||
| 122 | # remove groups without any elements in the original refs | |
| 123 | # note: it's OK if groups have 0 value | |
| 124 | 32x |   keep_grp <- vapply(grade_groups, function(x, rf) { | 
| 125 | 64x | any(x %in% rf) | 
| 126 | 32x | }, rf = names(refs_orig), logical(1)) | 
| 127 | ||
| 128 | 32x | keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp] | 
| 129 | 32x | result <- result[keep_el] | 
| 130 | ||
| 131 | 32x | result | 
| 132 | } | |
| 133 | ||
| 134 | #' @describeIn count_occurrences_by_grade Statistics function which counts the | |
| 135 | #' number of patients by highest grade. | |
| 136 | #' | |
| 137 | #' @return | |
| 138 | #' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or | |
| 139 | #' grade level grouping. | |
| 140 | #' | |
| 141 | #' @examples | |
| 142 | #' s_count_occurrences_by_grade( | |
| 143 | #' df, | |
| 144 | #' .N_col = 10L, | |
| 145 | #' .var = "AETOXGR", | |
| 146 | #' id = "USUBJID", | |
| 147 | #'   grade_groups = list("ANY" = levels(df$AETOXGR)) | |
| 148 | #' ) | |
| 149 | #' | |
| 150 | #' @export | |
| 151 | s_count_occurrences_by_grade <- function(df, | |
| 152 | labelstr = "", | |
| 153 | .var, | |
| 154 | .N_row, # nolint | |
| 155 | .N_col, # nolint | |
| 156 | ..., | |
| 157 | id = "USUBJID", | |
| 158 | grade_groups = list(), | |
| 159 | remove_single = TRUE, | |
| 160 | only_grade_groups = FALSE, | |
| 161 |                                          denom = c("N_col", "n", "N_row")) { | |
| 162 | 75x | assert_valid_factor(df[[.var]]) | 
| 163 | 75x | assert_df_with_variables(df, list(grade = .var, id = id)) | 
| 164 | ||
| 165 | 75x | denom <- match.arg(denom) %>% | 
| 166 | 75x | switch( | 
| 167 | 75x | n = nlevels(factor(df[[id]])), | 
| 168 | 75x | N_row = .N_row, | 
| 169 | 75x | N_col = .N_col | 
| 170 | ) | |
| 171 | ||
| 172 | 75x |   if (nrow(df) < 1) { | 
| 173 | 5x | grade_levels <- levels(df[[.var]]) | 
| 174 | 5x | l_count <- as.list(rep(0, length(grade_levels))) | 
| 175 | 5x | names(l_count) <- grade_levels | 
| 176 |   } else { | |
| 177 | 70x |     if (isTRUE(is.factor(df[[id]]))) { | 
| 178 | ! | assert_valid_factor(df[[id]], any.missing = FALSE) | 
| 179 |     } else { | |
| 180 | 70x | checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE) | 
| 181 | } | |
| 182 | 70x | checkmate::assert_count(.N_col) | 
| 183 | ||
| 184 | 70x | id <- df[[id]] | 
| 185 | 70x | grade <- df[[.var]] | 
| 186 | ||
| 187 | 70x |     if (!is.ordered(grade)) { | 
| 188 | 70x | grade_lbl <- obj_label(grade) | 
| 189 | 70x | lvls <- levels(grade) | 
| 190 | 70x |       if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) { | 
| 191 | 69x | lvl_ord <- lvls | 
| 192 |       } else { | |
| 193 | 1x |         lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1 | 
| 194 | 1x | lvl_ord <- levels(grade)[order(as.numeric(lvls))] | 
| 195 | } | |
| 196 | 70x | grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl) | 
| 197 | } | |
| 198 | ||
| 199 | 70x |     missing_lvl <- grepl("missing", tolower(levels(grade))) | 
| 200 | 70x |     if (any(missing_lvl)) { | 
| 201 | 1x | grade <- factor( | 
| 202 | 1x | grade, | 
| 203 | 1x | levels = c(levels(grade)[!missing_lvl], levels(grade)[missing_lvl]), | 
| 204 | 1x | ordered = is.ordered(grade) | 
| 205 | ) | |
| 206 | } | |
| 207 | 70x | df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE) | 
| 208 | 70x | l_count <- as.list(table(df_max$grade)) | 
| 209 | } | |
| 210 | ||
| 211 | 75x |   if (length(grade_groups) > 0) { | 
| 212 | 30x | l_count <- h_append_grade_groups(grade_groups, l_count, remove_single, only_grade_groups) | 
| 213 | } | |
| 214 | ||
| 215 | 75x | l_count_fraction <- lapply( | 
| 216 | 75x | l_count, | 
| 217 | 75x |     function(i, denom) { | 
| 218 | 299x |       if (i == 0 && denom == 0) { | 
| 219 | 9x | c(0, 0) | 
| 220 |       } else { | |
| 221 | 290x | c(i, i / denom) | 
| 222 | } | |
| 223 | }, | |
| 224 | 75x | denom = denom | 
| 225 | ) | |
| 226 | ||
| 227 | 75x | list( | 
| 228 | 75x | count_fraction = l_count_fraction, | 
| 229 | 75x | count_fraction_fixed_dp = l_count_fraction | 
| 230 | ) | |
| 231 | } | |
| 232 | ||
| 233 | #' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun` | |
| 234 | #' in `count_occurrences_by_grade()`. | |
| 235 | #' | |
| 236 | #' @return | |
| 237 | #' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 238 | #' | |
| 239 | #' @examples | |
| 240 | #' a_count_occurrences_by_grade( | |
| 241 | #' df, | |
| 242 | #' .N_col = 10L, | |
| 243 | #' .N_row = 10L, | |
| 244 | #' .var = "AETOXGR", | |
| 245 | #' id = "USUBJID", | |
| 246 | #'   grade_groups = list("ANY" = levels(df$AETOXGR)) | |
| 247 | #' ) | |
| 248 | #' | |
| 249 | #' @export | |
| 250 | a_count_occurrences_by_grade <- function(df, | |
| 251 | labelstr = "", | |
| 252 | ..., | |
| 253 | .stats = NULL, | |
| 254 | .stat_names = NULL, | |
| 255 | .formats = NULL, | |
| 256 | .labels = NULL, | |
| 257 |                                          .indent_mods = NULL) { | |
| 258 | # Check for additional parameters to the statistics function | |
| 259 | 56x | dots_extra_args <- list(...) | 
| 260 | 56x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 261 | 56x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 262 | ||
| 263 | # Check for user-defined functions | |
| 264 | 56x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 265 | 56x | .stats <- default_and_custom_stats_list$all_stats | 
| 266 | 56x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 267 | ||
| 268 | # Apply statistics function | |
| 269 | 56x | x_stats <- .apply_stat_functions( | 
| 270 | 56x | default_stat_fnc = s_count_occurrences_by_grade, | 
| 271 | 56x | custom_stat_fnc_list = custom_stat_functions, | 
| 272 | 56x | args_list = c( | 
| 273 | 56x | df = list(df), | 
| 274 | 56x | labelstr = list(labelstr), | 
| 275 | 56x | extra_afun_params, | 
| 276 | 56x | dots_extra_args | 
| 277 | ) | |
| 278 | ) | |
| 279 | ||
| 280 | # Fill in formatting defaults | |
| 281 | 56x |   .stats <- get_stats("count_occurrences_by_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 282 | 56x | x_stats <- x_stats[.stats] | 
| 283 | 56x | levels_per_stats <- lapply(x_stats, names) | 
| 284 | 56x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 285 | 56x | .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 286 | 56x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 287 | ||
| 288 | 56x | x_stats <- x_stats[.stats] %>% | 
| 289 | 56x | .unlist_keep_nulls() %>% | 
| 290 | 56x | setNames(names(.formats)) | 
| 291 | ||
| 292 | # Auto format handling | |
| 293 | 56x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 294 | ||
| 295 | # Get and check statistical names | |
| 296 | 56x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 297 | ||
| 298 | 56x | in_rows( | 
| 299 | 56x | .list = x_stats, | 
| 300 | 56x | .formats = .formats, | 
| 301 | 56x | .names = .labels %>% .unlist_keep_nulls(), | 
| 302 | 56x | .stat_names = .stat_names, | 
| 303 | 56x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 304 | 56x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 305 | ) | |
| 306 | } | |
| 307 | ||
| 308 | #' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function | |
| 309 | #' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 310 | #' | |
| 311 | #' @return | |
| 312 | #' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions, | |
| 313 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 314 | #' the statistics from `s_count_occurrences_by_grade()` to the table layout. | |
| 315 | #' | |
| 316 | #' @examples | |
| 317 | #' library(dplyr) | |
| 318 | #' | |
| 319 | #' df <- data.frame( | |
| 320 | #' USUBJID = as.character(c(1:6, 1)), | |
| 321 | #'   ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")), | |
| 322 | #' AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)), | |
| 323 | #' AESEV = factor( | |
| 324 | #'     x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"), | |
| 325 | #'     levels = c("MILD", "MODERATE", "SEVERE") | |
| 326 | #' ), | |
| 327 | #' stringsAsFactors = FALSE | |
| 328 | #' ) | |
| 329 | #' | |
| 330 | #' df_adsl <- df %>% | |
| 331 | #' select(USUBJID, ARM) %>% | |
| 332 | #' unique() | |
| 333 | #' | |
| 334 | #' # Layout creating function with custom format. | |
| 335 | #' basic_table() %>% | |
| 336 | #'   split_cols_by("ARM") %>% | |
| 337 | #' add_colcounts() %>% | |
| 338 | #' count_occurrences_by_grade( | |
| 339 | #' var = "AESEV", | |
| 340 | #'     .formats = c("count_fraction" = "xx.xx (xx.xx%)") | |
| 341 | #' ) %>% | |
| 342 | #' build_table(df, alt_counts_df = df_adsl) | |
| 343 | #' | |
| 344 | #' # Define additional grade groupings. | |
| 345 | #' grade_groups <- list( | |
| 346 | #'   "-Any-" = c("1", "2", "3", "4", "5"), | |
| 347 | #'   "Grade 1-2" = c("1", "2"), | |
| 348 | #'   "Grade 3-5" = c("3", "4", "5") | |
| 349 | #' ) | |
| 350 | #' | |
| 351 | #' basic_table() %>% | |
| 352 | #'   split_cols_by("ARM") %>% | |
| 353 | #' add_colcounts() %>% | |
| 354 | #' count_occurrences_by_grade( | |
| 355 | #' var = "AETOXGR", | |
| 356 | #' grade_groups = grade_groups, | |
| 357 | #' only_grade_groups = TRUE | |
| 358 | #' ) %>% | |
| 359 | #' build_table(df, alt_counts_df = df_adsl) | |
| 360 | #' | |
| 361 | #' @export | |
| 362 | #' @order 2 | |
| 363 | count_occurrences_by_grade <- function(lyt, | |
| 364 | var, | |
| 365 | id = "USUBJID", | |
| 366 | grade_groups = list(), | |
| 367 | remove_single = TRUE, | |
| 368 | only_grade_groups = FALSE, | |
| 369 | var_labels = var, | |
| 370 | show_labels = "default", | |
| 371 | riskdiff = FALSE, | |
| 372 | na_str = default_na_str(), | |
| 373 | nested = TRUE, | |
| 374 | ..., | |
| 375 | table_names = var, | |
| 376 | .stats = "count_fraction", | |
| 377 | .stat_names = NULL, | |
| 378 | .formats = list(count_fraction = format_count_fraction_fixed_dp), | |
| 379 | .labels = NULL, | |
| 380 |                                        .indent_mods = NULL) { | |
| 381 | 12x | checkmate::assert_flag(riskdiff) | 
| 382 | 12x | afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff | 
| 383 | ||
| 384 | # Process standard extra arguments | |
| 385 | 12x |   extra_args <- list(".stats" = .stats) | 
| 386 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 387 | 12x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 388 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 389 | 1x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 390 | ||
| 391 | # Process additional arguments to the statistic function | |
| 392 | 12x | extra_args <- c( | 
| 393 | 12x | extra_args, | 
| 394 | 12x | id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups, | 
| 395 | 12x |     if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)), | 
| 396 | ... | |
| 397 | ) | |
| 398 | ||
| 399 | # Append additional info from layout to the analysis function | |
| 400 | 12x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 401 | 12x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 402 | ||
| 403 | 12x | analyze( | 
| 404 | 12x | lyt = lyt, | 
| 405 | 12x | vars = var, | 
| 406 | 12x | afun = afun, | 
| 407 | 12x | na_str = na_str, | 
| 408 | 12x | nested = nested, | 
| 409 | 12x | extra_args = extra_args, | 
| 410 | 12x | var_labels = var_labels, | 
| 411 | 12x | show_labels = show_labels, | 
| 412 | 12x | table_names = table_names | 
| 413 | ) | |
| 414 | } | |
| 415 | ||
| 416 | #' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments | |
| 417 | #' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. | |
| 418 | #' | |
| 419 | #' @return | |
| 420 | #' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions, | |
| 421 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows | |
| 422 | #' containing the statistics from `s_count_occurrences_by_grade()` to the table layout. | |
| 423 | #' | |
| 424 | #' @examples | |
| 425 | #' # Layout creating function with custom format. | |
| 426 | #' basic_table() %>% | |
| 427 | #' add_colcounts() %>% | |
| 428 | #'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>% | |
| 429 | #' summarize_occurrences_by_grade( | |
| 430 | #' var = "AESEV", | |
| 431 | #'     .formats = c("count_fraction" = "xx.xx (xx.xx%)") | |
| 432 | #' ) %>% | |
| 433 | #' build_table(df, alt_counts_df = df_adsl) | |
| 434 | #' | |
| 435 | #' basic_table() %>% | |
| 436 | #' add_colcounts() %>% | |
| 437 | #'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>% | |
| 438 | #' summarize_occurrences_by_grade( | |
| 439 | #' var = "AETOXGR", | |
| 440 | #' grade_groups = grade_groups | |
| 441 | #' ) %>% | |
| 442 | #' build_table(df, alt_counts_df = df_adsl) | |
| 443 | #' | |
| 444 | #' @export | |
| 445 | #' @order 3 | |
| 446 | summarize_occurrences_by_grade <- function(lyt, | |
| 447 | var, | |
| 448 | id = "USUBJID", | |
| 449 | grade_groups = list(), | |
| 450 | remove_single = TRUE, | |
| 451 | only_grade_groups = FALSE, | |
| 452 | riskdiff = FALSE, | |
| 453 | na_str = default_na_str(), | |
| 454 | ..., | |
| 455 | .stats = "count_fraction", | |
| 456 | .stat_names = NULL, | |
| 457 | .formats = list(count_fraction = format_count_fraction_fixed_dp), | |
| 458 | .labels = NULL, | |
| 459 |                                            .indent_mods = 0L) { | |
| 460 | 6x | checkmate::assert_flag(riskdiff) | 
| 461 | 6x | afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff | 
| 462 | ||
| 463 | # Process standard extra arguments | |
| 464 | 6x |   extra_args <- list(".stats" = .stats) | 
| 465 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 466 | 6x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 467 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 468 | 6x |   if (is.null(.indent_mods)) { | 
| 469 | ! | indent_mod <- 0L | 
| 470 | 6x |   } else if (length(.indent_mods) == 1) { | 
| 471 | 6x | indent_mod <- .indent_mods | 
| 472 |   } else { | |
| 473 | ! | indent_mod <- 0L | 
| 474 | ! | extra_args[[".indent_mods"]] <- .indent_mods | 
| 475 | } | |
| 476 | ||
| 477 | # Process additional arguments to the statistic function | |
| 478 | 6x | extra_args <- c( | 
| 479 | 6x | extra_args, | 
| 480 | 6x | id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups, | 
| 481 | 6x |     if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)), | 
| 482 | ... | |
| 483 | ) | |
| 484 | ||
| 485 | # Append additional info from layout to the analysis function | |
| 486 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 487 | 6x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 488 | ||
| 489 | 6x | summarize_row_groups( | 
| 490 | 6x | lyt = lyt, | 
| 491 | 6x | var = var, | 
| 492 | 6x | cfun = afun, | 
| 493 | 6x | na_str = na_str, | 
| 494 | 6x | extra_args = extra_args, | 
| 495 | 6x | indent_mod = indent_mod | 
| 496 | ) | |
| 497 | } | 
| 1 | #' Confidence intervals for a difference of binomials | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("experimental")` | |
| 4 | #' | |
| 5 | #' Several confidence intervals for the difference between proportions. | |
| 6 | #' | |
| 7 | #' @name desctools_binom | |
| 8 | NULL | |
| 9 | ||
| 10 | #' Recycle list of parameters | |
| 11 | #' | |
| 12 | #' This function recycles all supplied elements to the maximal dimension. | |
| 13 | #' | |
| 14 | #' @param ... (`any`)\cr elements to recycle. | |
| 15 | #' | |
| 16 | #' @return A `list`. | |
| 17 | #' | |
| 18 | #' @keywords internal | |
| 19 | #' @noRd | |
| 20 | h_recycle <- function(...) { | |
| 21 | 78x | lst <- list(...) | 
| 22 | 78x | maxdim <- max(lengths(lst)) | 
| 23 | 78x | res <- lapply(lst, rep, length.out = maxdim) | 
| 24 | 78x | attr(res, "maxdim") <- maxdim | 
| 25 | 78x | return(res) | 
| 26 | } | |
| 27 | ||
| 28 | #' @describeIn desctools_binom Several confidence intervals for the difference between proportions. | |
| 29 | #' | |
| 30 | #' @return A `matrix` of 3 values: | |
| 31 | #' * `est`: estimate of proportion difference. | |
| 32 | #' * `lwr.ci`: estimate of lower end of the confidence interval. | |
| 33 | #' * `upr.ci`: estimate of upper end of the confidence interval. | |
| 34 | #' | |
| 35 | #' @keywords internal | |
| 36 | desctools_binom <- function(x1, | |
| 37 | n1, | |
| 38 | x2, | |
| 39 | n2, | |
| 40 | conf.level = 0.95, # nolint | |
| 41 |                             sides = c("two.sided", "left", "right"), | |
| 42 | method = c( | |
| 43 | "ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp" | |
| 44 |                             )) { | |
| 45 | 26x |   if (missing(sides)) { | 
| 46 | 26x | sides <- match.arg(sides) | 
| 47 | } | |
| 48 | 26x |   if (missing(method)) { | 
| 49 | 1x | method <- match.arg(method) | 
| 50 | } | |
| 51 | 26x |   iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint | 
| 52 | 26x |     if (sides != "two.sided") { | 
| 53 | ! | conf.level <- 1 - 2 * (1 - conf.level) # nolint | 
| 54 | } | |
| 55 | 26x | alpha <- 1 - conf.level | 
| 56 | 26x | kappa <- stats::qnorm(1 - alpha / 2) | 
| 57 | 26x | p1_hat <- x1 / n1 | 
| 58 | 26x | p2_hat <- x2 / n2 | 
| 59 | 26x | est <- p1_hat - p2_hat | 
| 60 | 26x | switch(method, | 
| 61 | 26x |       wald = { | 
| 62 | 4x | vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 | 
| 63 | 4x | term2 <- kappa * sqrt(vd) | 
| 64 | 4x | ci_lwr <- max(-1, est - term2) | 
| 65 | 4x | ci_upr <- min(1, est + term2) | 
| 66 | }, | |
| 67 | 26x |       waldcc = { | 
| 68 | 6x | vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 | 
| 69 | 6x | term2 <- kappa * sqrt(vd) | 
| 70 | 6x | term2 <- term2 + 0.5 * (1 / n1 + 1 / n2) | 
| 71 | 6x | ci_lwr <- max(-1, est - term2) | 
| 72 | 6x | ci_upr <- min(1, est + term2) | 
| 73 | }, | |
| 74 | 26x |       ac = { | 
| 75 | 2x | n1 <- n1 + 2 | 
| 76 | 2x | n2 <- n2 + 2 | 
| 77 | 2x | x1 <- x1 + 1 | 
| 78 | 2x | x2 <- x2 + 1 | 
| 79 | 2x | p1_hat <- x1 / n1 | 
| 80 | 2x | p2_hat <- x2 / n2 | 
| 81 | 2x | est1 <- p1_hat - p2_hat | 
| 82 | 2x | vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 | 
| 83 | 2x | term2 <- kappa * sqrt(vd) | 
| 84 | 2x | ci_lwr <- max(-1, est1 - term2) | 
| 85 | 2x | ci_upr <- min(1, est1 + term2) | 
| 86 | }, | |
| 87 | 26x |       exact = { | 
| 88 | ! | ci_lwr <- NA | 
| 89 | ! | ci_upr <- NA | 
| 90 | }, | |
| 91 | 26x |       score = { | 
| 92 | 3x | w1 <- desctools_binomci( | 
| 93 | 3x | x = x1, n = n1, conf.level = conf.level, | 
| 94 | 3x | method = "wilson" | 
| 95 | ) | |
| 96 | 3x | w2 <- desctools_binomci( | 
| 97 | 3x | x = x2, n = n2, conf.level = conf.level, | 
| 98 | 3x | method = "wilson" | 
| 99 | ) | |
| 100 | 3x | l1 <- w1[2] | 
| 101 | 3x | u1 <- w1[3] | 
| 102 | 3x | l2 <- w2[2] | 
| 103 | 3x | u2 <- w2[3] | 
| 104 | 3x | ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2) | 
| 105 | 3x | ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2) | 
| 106 | }, | |
| 107 | 26x |       scorecc = { | 
| 108 | 1x | w1 <- desctools_binomci( | 
| 109 | 1x | x = x1, n = n1, conf.level = conf.level, | 
| 110 | 1x | method = "wilsoncc" | 
| 111 | ) | |
| 112 | 1x | w2 <- desctools_binomci( | 
| 113 | 1x | x = x2, n = n2, conf.level = conf.level, | 
| 114 | 1x | method = "wilsoncc" | 
| 115 | ) | |
| 116 | 1x | l1 <- w1[2] | 
| 117 | 1x | u1 <- w1[3] | 
| 118 | 1x | l2 <- w2[2] | 
| 119 | 1x | u2 <- w2[3] | 
| 120 | 1x | ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2)) | 
| 121 | 1x | ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2)) | 
| 122 | }, | |
| 123 | 26x |       mee = { | 
| 124 | 1x |         .score <- function(p1, n1, p2, n2, dif) { | 
| 125 | ! | if (dif > 1) dif <- 1 | 
| 126 | ! | if (dif < -1) dif <- -1 | 
| 127 | 24x | diff <- p1 - p2 - dif | 
| 128 | 24x |           if (abs(diff) == 0) { | 
| 129 | ! | res <- 0 | 
| 130 |           } else { | |
| 131 | 24x | t <- n2 / n1 | 
| 132 | 24x | a <- 1 + t | 
| 133 | 24x | b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) | 
| 134 | 24x | c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 | 
| 135 | 24x | d <- -p1 * dif * (1 + dif) | 
| 136 | 24x | v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 | 
| 137 | 24x | if (abs(v) < .Machine$double.eps) v <- 0 | 
| 138 | 24x | s <- sqrt((b / a / 3)^2 - c / a / 3) | 
| 139 | 24x | u <- ifelse(v > 0, 1, -1) * s | 
| 140 | 24x | w <- (3.141592654 + acos(v / u^3)) / 3 | 
| 141 | 24x | p1d <- 2 * u * cos(w) - b / a / 3 | 
| 142 | 24x | p2d <- p1d - dif | 
| 143 | 24x | n <- n1 + n2 | 
| 144 | 24x | res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) | 
| 145 | } | |
| 146 | 24x | return(sqrt(res)) | 
| 147 | } | |
| 148 | 1x |         pval <- function(delta) { | 
| 149 | 24x | z <- (est - delta) / .score(p1_hat, n1, p2_hat, n2, delta) | 
| 150 | 24x | 2 * min(stats::pnorm(z), 1 - stats::pnorm(z)) | 
| 151 | } | |
| 152 | 1x |         ci_lwr <- max(-1, stats::uniroot(function(delta) { | 
| 153 | 12x | pval(delta) - alpha | 
| 154 | 1x | }, interval = c(-1 + 1e-06, est - 1e-06))$root) | 
| 155 | 1x |         ci_upr <- min(1, stats::uniroot(function(delta) { | 
| 156 | 12x | pval(delta) - alpha | 
| 157 | 1x | }, interval = c(est + 1e-06, 1 - 1e-06))$root) | 
| 158 | }, | |
| 159 | 26x |       blj = { | 
| 160 | 1x | p1_dash <- (x1 + 0.5) / (n1 + 1) | 
| 161 | 1x | p2_dash <- (x2 + 0.5) / (n2 + 1) | 
| 162 | 1x | vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - p2_dash) / n2 | 
| 163 | 1x | term2 <- kappa * sqrt(vd) | 
| 164 | 1x | est_dash <- p1_dash - p2_dash | 
| 165 | 1x | ci_lwr <- max(-1, est_dash - term2) | 
| 166 | 1x | ci_upr <- min(1, est_dash + term2) | 
| 167 | }, | |
| 168 | 26x |       ha = { | 
| 169 | 5x | term2 <- 1 / | 
| 170 | 5x | (2 * min(n1, n2)) + kappa * sqrt(p1_hat * (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - 1)) | 
| 171 | 5x | ci_lwr <- max(-1, est - term2) | 
| 172 | 5x | ci_upr <- min(1, est + term2) | 
| 173 | }, | |
| 174 | 26x |       mn = { | 
| 175 | 1x |         .conf <- function(x1, n1, x2, n2, z, lower = FALSE) { | 
| 176 | 2x | p1 <- x1 / n1 | 
| 177 | 2x | p2 <- x2 / n2 | 
| 178 | 2x | p_hat <- p1 - p2 | 
| 179 | 2x | dp <- 1 + ifelse(lower, 1, -1) * p_hat | 
| 180 | 2x | i <- 1 | 
| 181 | 2x |           while (i <= 50) { | 
| 182 | 46x | dp <- 0.5 * dp | 
| 183 | 46x | y <- p_hat + ifelse(lower, -1, 1) * dp | 
| 184 | 46x | score <- .score(p1, n1, p2, n2, y) | 
| 185 | 46x |             if (score < z) { | 
| 186 | 20x | p_hat <- y | 
| 187 | } | |
| 188 | 46x |             if ((dp < 1e-07) || (abs(z - score) < 1e-06)) { | 
| 189 | 2x | (break)() | 
| 190 |             } else { | |
| 191 | 44x | i <- i + 1 | 
| 192 | } | |
| 193 | } | |
| 194 | 2x | return(y) | 
| 195 | } | |
| 196 | 1x |         .score <- function(p1, n1, p2, n2, dif) { | 
| 197 | 46x | diff <- p1 - p2 - dif | 
| 198 | 46x |           if (abs(diff) == 0) { | 
| 199 | ! | res <- 0 | 
| 200 |           } else { | |
| 201 | 46x | t <- n2 / n1 | 
| 202 | 46x | a <- 1 + t | 
| 203 | 46x | b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) | 
| 204 | 46x | c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 | 
| 205 | 46x | d <- -p1 * dif * (1 + dif) | 
| 206 | 46x | v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 | 
| 207 | 46x | s <- sqrt((b / a / 3)^2 - c / a / 3) | 
| 208 | 46x | u <- ifelse(v > 0, 1, -1) * s | 
| 209 | 46x | w <- (3.141592654 + acos(v / u^3)) / 3 | 
| 210 | 46x | p1d <- 2 * u * cos(w) - b / a / 3 | 
| 211 | 46x | p2d <- p1d - dif | 
| 212 | 46x | n <- n1 + n2 | 
| 213 | 46x | var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * n / (n - 1) | 
| 214 | 46x | res <- diff^2 / var | 
| 215 | } | |
| 216 | 46x | return(res) | 
| 217 | } | |
| 218 | 1x | z <- stats::qchisq(conf.level, 1) | 
| 219 | 1x | ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE)) | 
| 220 | 1x | ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE)) | 
| 221 | }, | |
| 222 | 26x |       beal = { | 
| 223 | ! | a <- p1_hat + p2_hat | 
| 224 | ! | b <- p1_hat - p2_hat | 
| 225 | ! | u <- ((1 / n1) + (1 / n2)) / 4 | 
| 226 | ! | v <- ((1 / n1) - (1 / n2)) / 4 | 
| 227 | ! | V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint | 
| 228 | ! | z <- stats::qchisq(p = 1 - alpha / 2, df = 1) | 
| 229 | ! | A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint | 
| 230 | ! | B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint | 
| 231 | ! | ci_lwr <- max(-1, B - A / (1 + z * u)) | 
| 232 | ! | ci_upr <- min(1, B + A / (1 + z * u)) | 
| 233 | }, | |
| 234 | 26x |       hal = { | 
| 235 | 1x | psi <- (p1_hat + p2_hat) / 2 | 
| 236 | 1x | u <- (1 / n1 + 1 / n2) / 4 | 
| 237 | 1x | v <- (1 / n1 - 1 / n2) / 4 | 
| 238 | 1x | z <- kappa | 
| 239 | 1x | theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) | 
| 240 | 1x | w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * | 
| 241 | 1x | (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint | 
| 242 | 1x | c(theta + w, theta - w) | 
| 243 | 1x | ci_lwr <- max(-1, theta - w) | 
| 244 | 1x | ci_upr <- min(1, theta + w) | 
| 245 | }, | |
| 246 | 26x |       jp = { | 
| 247 | 1x | psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + 1)) | 
| 248 | 1x | u <- (1 / n1 + 1 / n2) / 4 | 
| 249 | 1x | v <- (1 / n1 - 1 / n2) / 4 | 
| 250 | 1x | z <- kappa | 
| 251 | 1x | theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) | 
| 252 | 1x | w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * | 
| 253 | 1x | (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint | 
| 254 | 1x | c(theta + w, theta - w) | 
| 255 | 1x | ci_lwr <- max(-1, theta - w) | 
| 256 | 1x | ci_upr <- min(1, theta + w) | 
| 257 | }, | |
| 258 | ) | |
| 259 | 26x | ci <- c( | 
| 260 | 26x | est = est, lwr.ci = min(ci_lwr, ci_upr), | 
| 261 | 26x | upr.ci = max(ci_lwr, ci_upr) | 
| 262 | ) | |
| 263 | 26x |     if (sides == "left") { | 
| 264 | ! | ci[3] <- 1 | 
| 265 | 26x |     } else if (sides == "right") { | 
| 266 | ! | ci[2] <- -1 | 
| 267 | } | |
| 268 | 26x | return(ci) | 
| 269 | } | |
| 270 | 26x | method <- match.arg(arg = method, several.ok = TRUE) | 
| 271 | 26x | sides <- match.arg(arg = sides, several.ok = TRUE) | 
| 272 | 26x | lst <- h_recycle( | 
| 273 | 26x | x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level, | 
| 274 | 26x | sides = sides, method = method | 
| 275 | ) | |
| 276 | 26x |   res <- t(sapply(1:attr(lst, "maxdim"), function(i) { | 
| 277 | 26x | iBinomDiffCI( | 
| 278 | 26x | x1 = lst$x1[i], | 
| 279 | 26x | n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i], | 
| 280 | 26x | sides = lst$sides[i], method = lst$method[i] | 
| 281 | ) | |
| 282 | })) | |
| 283 | 26x |   lgn <- h_recycle(x1 = if (is.null(names(x1))) { | 
| 284 | 26x |     paste("x1", seq_along(x1), sep = ".") | 
| 285 |   } else { | |
| 286 | ! | names(x1) | 
| 287 | 26x |   }, n1 = if (is.null(names(n1))) { | 
| 288 | 26x |     paste("n1", seq_along(n1), sep = ".") | 
| 289 |   } else { | |
| 290 | ! | names(n1) | 
| 291 | 26x |   }, x2 = if (is.null(names(x2))) { | 
| 292 | 26x |     paste("x2", seq_along(x2), sep = ".") | 
| 293 |   } else { | |
| 294 | ! | names(x2) | 
| 295 | 26x |   }, n2 = if (is.null(names(n2))) { | 
| 296 | 26x |     paste("n2", seq_along(n2), sep = ".") | 
| 297 |   } else { | |
| 298 | ! | names(n2) | 
| 299 | 26x | }, conf.level = conf.level, sides = sides, method = method) | 
| 300 | 26x |   xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { | 
| 301 | 182x | length(unique(x)) != | 
| 302 | 182x | 1 | 
| 303 | 26x | })]), 1, paste, collapse = ":") | 
| 304 | 26x | rownames(res) <- xn | 
| 305 | 26x | return(res) | 
| 306 | } | |
| 307 | ||
| 308 | #' @describeIn desctools_binom Compute confidence intervals for binomial proportions. | |
| 309 | #' | |
| 310 | #' @param x (`integer(1)`)\cr number of successes. | |
| 311 | #' @param n (`integer(1)`)\cr number of trials. | |
| 312 | #' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95. | |
| 313 | #' @param sides (`string`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default), | |
| 314 | #' `"left"`, or `"right"`. | |
| 315 | #' @param method (`string`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`, | |
| 316 | #' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`, | |
| 317 | #' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`. | |
| 318 | #' | |
| 319 | #' @return A `matrix` with 3 columns containing: | |
| 320 | #' * `est`: estimate of proportion difference. | |
| 321 | #' * `lwr.ci`: lower end of the confidence interval. | |
| 322 | #' * `upr.ci`: upper end of the confidence interval. | |
| 323 | #' | |
| 324 | #' @keywords internal | |
| 325 | desctools_binomci <- function(x, | |
| 326 | n, | |
| 327 | conf.level = 0.95, # nolint | |
| 328 |                               sides = c("two.sided", "left", "right"), | |
| 329 | method = c( | |
| 330 | "wilson", "wald", "waldcc", "agresti-coull", | |
| 331 | "jeffreys", "modified wilson", "wilsoncc", "modified jeffreys", | |
| 332 | "clopper-pearson", "arcsine", "logit", "witting", "pratt", | |
| 333 | "midp", "lik", "blaker" | |
| 334 | ), | |
| 335 | rand = 123, | |
| 336 |                               tol = 1e-05) { | |
| 337 | 26x |   if (missing(method)) { | 
| 338 | 1x | method <- "wilson" | 
| 339 | } | |
| 340 | 26x |   if (missing(sides)) { | 
| 341 | 25x | sides <- "two.sided" | 
| 342 | } | |
| 343 | 26x |   iBinomCI <- function(x, n, conf.level = 0.95, sides = c("two.sided", "left", "right"), # nolint | 
| 344 | 26x | method = c( | 
| 345 | 26x | "wilson", "wilsoncc", "wald", | 
| 346 | 26x | "waldcc", "agresti-coull", "jeffreys", "modified wilson", | 
| 347 | 26x | "modified jeffreys", "clopper-pearson", "arcsine", "logit", | 
| 348 | 26x | "witting", "pratt", "midp", "lik", "blaker" | 
| 349 | ), | |
| 350 | 26x | rand = 123, | 
| 351 | 26x |                        tol = 1e-05) { | 
| 352 | 26x |     if (length(x) != 1) { | 
| 353 | ! |       stop("'x' has to be of length 1 (number of successes)") | 
| 354 | } | |
| 355 | 26x |     if (length(n) != 1) { | 
| 356 | ! |       stop("'n' has to be of length 1 (number of trials)") | 
| 357 | } | |
| 358 | 26x |     if (length(conf.level) != 1) { | 
| 359 | ! |       stop("'conf.level' has to be of length 1 (confidence level)") | 
| 360 | } | |
| 361 | 26x |     if (conf.level < 0.5 || conf.level > 1) { | 
| 362 | ! |       stop("'conf.level' has to be in [0.5, 1]") | 
| 363 | } | |
| 364 | 26x | sides <- match.arg(sides, choices = c( | 
| 365 | 26x | "two.sided", "left", | 
| 366 | 26x | "right" | 
| 367 | 26x | ), several.ok = FALSE) | 
| 368 | 26x |     if (sides != "two.sided") { | 
| 369 | 1x | conf.level <- 1 - 2 * (1 - conf.level) # nolint | 
| 370 | } | |
| 371 | 26x | alpha <- 1 - conf.level | 
| 372 | 26x | kappa <- stats::qnorm(1 - alpha / 2) | 
| 373 | 26x | p_hat <- x / n | 
| 374 | 26x | q_hat <- 1 - p_hat | 
| 375 | 26x | est <- p_hat | 
| 376 | 26x | switch(match.arg(arg = method, choices = c( | 
| 377 | 26x | "wilson", | 
| 378 | 26x | "wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys", | 
| 379 | 26x | "modified wilson", "modified jeffreys", "clopper-pearson", | 
| 380 | 26x | "arcsine", "logit", "witting", "pratt", "midp", "lik", | 
| 381 | 26x | "blaker" | 
| 382 | )), | |
| 383 | 26x |     wald = { | 
| 384 | 1x | term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) | 
| 385 | 1x | ci_lwr <- max(0, p_hat - term2) | 
| 386 | 1x | ci_upr <- min(1, p_hat + term2) | 
| 387 | }, | |
| 388 | 26x |     waldcc = { | 
| 389 | 1x | term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) | 
| 390 | 1x | term2 <- term2 + 1 / (2 * n) | 
| 391 | 1x | ci_lwr <- max(0, p_hat - term2) | 
| 392 | 1x | ci_upr <- min(1, p_hat + term2) | 
| 393 | }, | |
| 394 | 26x |     wilson = { | 
| 395 | 8x | term1 <- (x + kappa^2 / 2) / (n + kappa^2) | 
| 396 | 8x | term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) | 
| 397 | 8x | ci_lwr <- max(0, term1 - term2) | 
| 398 | 8x | ci_upr <- min(1, term1 + term2) | 
| 399 | }, | |
| 400 | 26x |     wilsoncc = { | 
| 401 | 3x | lci <- ( | 
| 402 | 3x | 2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - 2 - 1 / n + 4 * p_hat * (n * q_hat + 1)) | 
| 403 | 3x | ) / (2 * (n + kappa^2)) | 
| 404 | 3x | uci <- ( | 
| 405 | 3x | 2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + 2 - 1 / n + 4 * p_hat * (n * q_hat - 1)) | 
| 406 | 3x | ) / (2 * (n + kappa^2)) | 
| 407 | 3x | ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci)) | 
| 408 | 3x | ci_upr <- min(1, ifelse(p_hat == 1, 1, uci)) | 
| 409 | }, | |
| 410 | 26x |     `agresti-coull` = { | 
| 411 | 1x | x_tilde <- x + kappa^2 / 2 | 
| 412 | 1x | n_tilde <- n + kappa^2 | 
| 413 | 1x | p_tilde <- x_tilde / n_tilde | 
| 414 | 1x | q_tilde <- 1 - p_tilde | 
| 415 | 1x | est <- p_tilde | 
| 416 | 1x | term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) | 
| 417 | 1x | ci_lwr <- max(0, p_tilde - term2) | 
| 418 | 1x | ci_upr <- min(1, p_tilde + term2) | 
| 419 | }, | |
| 420 | 26x |     jeffreys = { | 
| 421 | 1x |       if (x == 0) { | 
| 422 | ! | ci_lwr <- 0 | 
| 423 |       } else { | |
| 424 | 1x | ci_lwr <- stats::qbeta( | 
| 425 | 1x | alpha / 2, | 
| 426 | 1x | x + 0.5, n - x + 0.5 | 
| 427 | ) | |
| 428 | } | |
| 429 | 1x |       if (x == n) { | 
| 430 | ! | ci_upr <- 1 | 
| 431 |       } else { | |
| 432 | 1x | ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) | 
| 433 | } | |
| 434 | }, | |
| 435 | 26x |     `modified wilson` = { | 
| 436 | 1x | term1 <- (x + kappa^2 / 2) / (n + kappa^2) | 
| 437 | 1x | term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) | 
| 438 | 1x |       if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% c(1:3))) { | 
| 439 | ! | ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * x) / n | 
| 440 |       } else { | |
| 441 | 1x | ci_lwr <- max(0, term1 - term2) | 
| 442 | } | |
| 443 | 1x |       if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & x %in% c(n - (1:3)))) { | 
| 444 | ! | ci_upr <- 1 - 0.5 * stats::qchisq( | 
| 445 | ! | alpha, | 
| 446 | ! | 2 * (n - x) | 
| 447 | ! | ) / n | 
| 448 |       } else { | |
| 449 | 1x | ci_upr <- min(1, term1 + term2) | 
| 450 | } | |
| 451 | }, | |
| 452 | 26x |     `modified jeffreys` = { | 
| 453 | 1x |       if (x == n) { | 
| 454 | ! | ci_lwr <- (alpha / 2)^(1 / n) | 
| 455 |       } else { | |
| 456 | 1x |         if (x <= 1) { | 
| 457 | ! | ci_lwr <- 0 | 
| 458 |         } else { | |
| 459 | 1x | ci_lwr <- stats::qbeta( | 
| 460 | 1x | alpha / 2, | 
| 461 | 1x | x + 0.5, n - x + 0.5 | 
| 462 | ) | |
| 463 | } | |
| 464 | } | |
| 465 | 1x |       if (x == 0) { | 
| 466 | ! | ci_upr <- 1 - (alpha / 2)^(1 / n) | 
| 467 |       } else { | |
| 468 | 1x |         if (x >= n - 1) { | 
| 469 | ! | ci_upr <- 1 | 
| 470 |         } else { | |
| 471 | 1x | ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) | 
| 472 | } | |
| 473 | } | |
| 474 | }, | |
| 475 | 26x |     `clopper-pearson` = { | 
| 476 | 1x | ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1) | 
| 477 | 1x | ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x) | 
| 478 | }, | |
| 479 | 26x |     arcsine = { | 
| 480 | 1x | p_tilde <- (x + 0.375) / (n + 0.75) | 
| 481 | 1x | est <- p_tilde | 
| 482 | 1x | ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2 | 
| 483 | 1x | ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2 | 
| 484 | }, | |
| 485 | 26x |     logit = { | 
| 486 | 1x | lambda_hat <- log(x / (n - x)) | 
| 487 | 1x | V_hat <- n / (x * (n - x)) # nolint | 
| 488 | 1x | lambda_lower <- lambda_hat - kappa * sqrt(V_hat) | 
| 489 | 1x | lambda_upper <- lambda_hat + kappa * sqrt(V_hat) | 
| 490 | 1x | ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower)) | 
| 491 | 1x | ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper)) | 
| 492 | }, | |
| 493 | 26x |     witting = { | 
| 494 | 1x | set.seed(rand) | 
| 495 | 1x | x_tilde <- x + stats::runif(1, min = 0, max = 1) | 
| 496 | 1x |       pbinom_abscont <- function(q, size, prob) { | 
| 497 | 22x | v <- trunc(q) | 
| 498 | 22x | term1 <- stats::pbinom(v - 1, size = size, prob = prob) | 
| 499 | 22x | term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob) | 
| 500 | 22x | return(term1 + term2) | 
| 501 | } | |
| 502 | 1x |       qbinom_abscont <- function(p, size, x) { | 
| 503 | 2x |         fun <- function(prob, size, x, p) { | 
| 504 | 22x | pbinom_abscont(x, size, prob) - p | 
| 505 | } | |
| 506 | 2x | stats::uniroot(fun, | 
| 507 | 2x | interval = c(0, 1), size = size, | 
| 508 | 2x | x = x, p = p | 
| 509 | 2x | )$root | 
| 510 | } | |
| 511 | 1x | ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde) | 
| 512 | 1x | ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde) | 
| 513 | }, | |
| 514 | 26x |     pratt = { | 
| 515 | 1x |       if (x == 0) { | 
| 516 | ! | ci_lwr <- 0 | 
| 517 | ! | ci_upr <- 1 - alpha^(1 / n) | 
| 518 | 1x |       } else if (x == 1) { | 
| 519 | ! | ci_lwr <- 1 - (1 - alpha / 2)^(1 / n) | 
| 520 | ! | ci_upr <- 1 - (alpha / 2)^(1 / n) | 
| 521 | 1x |       } else if (x == (n - 1)) { | 
| 522 | ! | ci_lwr <- (alpha / 2)^(1 / n) | 
| 523 | ! | ci_upr <- (1 - alpha / 2)^(1 / n) | 
| 524 | 1x |       } else if (x == n) { | 
| 525 | ! | ci_lwr <- alpha^(1 / n) | 
| 526 | ! | ci_upr <- 1 | 
| 527 |       } else { | |
| 528 | 1x | z <- stats::qnorm(1 - alpha / 2) | 
| 529 | 1x | A <- ((x + 1) / (n - x))^2 # nolint | 
| 530 | 1x | B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint | 
| 531 | 1x | C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint | 
| 532 | 1x | D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint | 
| 533 | 1x | E <- 1 + A * ((B + C) / D)^3 # nolint | 
| 534 | 1x | ci_upr <- 1 / E | 
| 535 | 1x | A <- (x / (n - x - 1))^2 # nolint | 
| 536 | 1x | B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint | 
| 537 | 1x | C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint | 
| 538 | 1x | D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint | 
| 539 | 1x | E <- 1 + A * ((B + C) / D)^3 # nolint | 
| 540 | 1x | ci_lwr <- 1 / E | 
| 541 | } | |
| 542 | }, | |
| 543 | 26x |     midp = { | 
| 544 | 1x |       f_low <- function(pi, x, n) { | 
| 545 | 12x | 1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x, | 
| 546 | 12x | size = n, prob = pi, lower.tail = FALSE | 
| 547 | ) - | |
| 548 | 12x | (1 - conf.level) / 2 | 
| 549 | } | |
| 550 | 1x |       f_up <- function(pi, x, n) { | 
| 551 | 12x | 1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - 1, size = n, prob = pi) - (1 - conf.level) / 2 | 
| 552 | } | |
| 553 | 1x | ci_lwr <- 0 | 
| 554 | 1x | ci_upr <- 1 | 
| 555 | 1x |       if (x != 0) { | 
| 556 | 1x | ci_lwr <- stats::uniroot(f_low, | 
| 557 | 1x | interval = c(0, p_hat), | 
| 558 | 1x | x = x, n = n | 
| 559 | 1x | )$root | 
| 560 | } | |
| 561 | 1x |       if (x != n) { | 
| 562 | 1x | ci_upr <- stats::uniroot(f_up, interval = c( | 
| 563 | 1x | p_hat, | 
| 564 | 1x | 1 | 
| 565 | 1x | ), x = x, n = n)$root | 
| 566 | } | |
| 567 | }, | |
| 568 | 26x |     lik = { | 
| 569 | 2x | ci_lwr <- 0 | 
| 570 | 2x | ci_upr <- 1 | 
| 571 | 2x | z <- stats::qnorm(1 - alpha * 0.5) | 
| 572 | 2x | tol <- .Machine$double.eps^0.5 | 
| 573 | 2x | BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint | 
| 574 |                          ...) { | |
| 575 | 40x | ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt, | 
| 576 | 40x | y, | 
| 577 | 40x | log = TRUE | 
| 578 | )) | |
| 579 | 40x | ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x, | 
| 580 | 40x | wt, mu, | 
| 581 | 40x | log = TRUE | 
| 582 | )) | |
| 583 | 40x | res <- ifelse(abs(y - mu) < tol, 0, sign(y - mu) * sqrt(-2 * (ll_y - ll_mu))) | 
| 584 | 40x | return(res - bound) | 
| 585 | } | |
| 586 | 2x |       if (x != 0 && tol < p_hat) { | 
| 587 | 2x | ci_lwr <- if (BinDev( | 
| 588 | 2x | tol, x, p_hat, n, -z, | 
| 589 | 2x | tol | 
| 590 | 2x |         ) <= 0) { | 
| 591 | 2x | stats::uniroot( | 
| 592 | 2x |             f = BinDev, interval = c(tol, if (p_hat < tol || p_hat == 1) { | 
| 593 | ! | 1 - tol | 
| 594 |             } else { | |
| 595 | 2x | p_hat | 
| 596 | 2x | }), bound = -z, | 
| 597 | 2x | x = x, mu = p_hat, wt = n | 
| 598 | 2x | )$root | 
| 599 | } | |
| 600 | } | |
| 601 | 2x |       if (x != n && p_hat < (1 - tol)) { | 
| 602 | 2x | ci_upr <- if ( | 
| 603 | 2x |           BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > 1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < 0) { # nolint | 
| 604 | ! | ci_lwr <- if (BinDev( | 
| 605 | ! |             tol, x, if (p_hat < tol || p_hat == 1) { | 
| 606 | ! | 1 - tol | 
| 607 |             } else { | |
| 608 | ! | p_hat | 
| 609 | ! | }, n, | 
| 610 | ! | -z, tol | 
| 611 | ! |           ) <= 0) { | 
| 612 | ! | stats::uniroot( | 
| 613 | ! | f = BinDev, interval = c(tol, p_hat), | 
| 614 | ! | bound = -z, x = x, mu = p_hat, wt = n | 
| 615 | ! | )$root | 
| 616 | } | |
| 617 |         } else { | |
| 618 | 2x | stats::uniroot( | 
| 619 | 2x |             f = BinDev, interval = c(if (p_hat > 1 - tol) { | 
| 620 | ! | tol | 
| 621 |             } else { | |
| 622 | 2x | p_hat | 
| 623 | 2x | }, 1 - tol), bound = z, | 
| 624 | 2x | x = x, mu = p_hat, wt = n | 
| 625 | 2x | )$root | 
| 626 | } | |
| 627 | } | |
| 628 | }, | |
| 629 | 26x |     blaker = { | 
| 630 | 1x |       acceptbin <- function(x, n, p) { | 
| 631 | 3954x | p1 <- 1 - stats::pbinom(x - 1, n, p) | 
| 632 | 3954x | p2 <- stats::pbinom(x, n, p) | 
| 633 | 3954x | a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p) | 
| 634 | 3954x | a2 <- p2 + 1 - stats::pbinom( | 
| 635 | 3954x | stats::qbinom(1 - p2, n, p), n, | 
| 636 | 3954x | p | 
| 637 | ) | |
| 638 | 3954x | return(min(a1, a2)) | 
| 639 | } | |
| 640 | 1x | ci_lwr <- 0 | 
| 641 | 1x | ci_upr <- 1 | 
| 642 | 1x |       if (x != 0) { | 
| 643 | 1x | ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - x + 1) | 
| 644 | 1x |         while (acceptbin(x, n, ci_lwr + tol) < (1 - conf.level)) { | 
| 645 | 1976x | ci_lwr <- ci_lwr + tol | 
| 646 | } | |
| 647 | } | |
| 648 | 1x |       if (x != n) { | 
| 649 | 1x | ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + 1, n - x) | 
| 650 | 1x |         while (acceptbin(x, n, ci_upr - tol) < (1 - conf.level)) { | 
| 651 | 1976x | ci_upr <- ci_upr - tol | 
| 652 | } | |
| 653 | } | |
| 654 | } | |
| 655 | ) | |
| 656 | 26x | ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min( | 
| 657 | 26x | 1, | 
| 658 | 26x | ci_upr | 
| 659 | )) | |
| 660 | 26x |     if (sides == "left") { | 
| 661 | 1x | ci[3] <- 1 | 
| 662 | 25x |     } else if (sides == "right") { | 
| 663 | ! | ci[2] <- 0 | 
| 664 | } | |
| 665 | 26x | return(ci) | 
| 666 | } | |
| 667 | 26x | lst <- list( | 
| 668 | 26x | x = x, n = n, conf.level = conf.level, sides = sides, | 
| 669 | 26x | method = method, rand = rand | 
| 670 | ) | |
| 671 | 26x | maxdim <- max(unlist(lapply(lst, length))) | 
| 672 | 26x | lgp <- lapply(lst, rep, length.out = maxdim) | 
| 673 | 26x |   lgn <- h_recycle(x = if (is.null(names(x))) { | 
| 674 | 26x |     paste("x", seq_along(x), sep = ".") | 
| 675 |   } else { | |
| 676 | ! | names(x) | 
| 677 | 26x |   }, n = if (is.null(names(n))) { | 
| 678 | 26x |     paste("n", seq_along(n), sep = ".") | 
| 679 |   } else { | |
| 680 | ! | names(n) | 
| 681 | 26x | }, conf.level = conf.level, sides = sides, method = method) | 
| 682 | 26x |   xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { | 
| 683 | 130x | length(unique(x)) != | 
| 684 | 130x | 1 | 
| 685 | 26x | })]), 1, paste, collapse = ":") | 
| 686 | 26x |   res <- t(sapply(1:maxdim, function(i) { | 
| 687 | 26x | iBinomCI( | 
| 688 | 26x | x = lgp$x[i], | 
| 689 | 26x | n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i], | 
| 690 | 26x | method = lgp$method[i], rand = lgp$rand[i] | 
| 691 | ) | |
| 692 | })) | |
| 693 | 26x |   colnames(res)[1] <- c("est") | 
| 694 | 26x | rownames(res) <- xn | 
| 695 | 26x | return(res) | 
| 696 | } | 
| 1 | #' Control function for Cox regression | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Sets a list of parameters for Cox regression fit. Used internally. | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`. | |
| 9 | #' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied | |
| 10 | #' treatment and candidate covariate. Note that for univariate models without treatment arm, and | |
| 11 | #' multivariate models, no interaction can be used so that this needs to be `FALSE`. | |
| 12 | #' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`, | |
| 13 | #' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R. | |
| 14 | #' | |
| 15 | #' @return A `list` of items with names corresponding to the arguments. | |
| 16 | #' | |
| 17 | #' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()]. | |
| 18 | #' | |
| 19 | #' @examples | |
| 20 | #' control_coxreg() | |
| 21 | #' | |
| 22 | #' @export | |
| 23 | control_coxreg <- function(pval_method = c("wald", "likelihood"), | |
| 24 |                            ties = c("exact", "efron", "breslow"), | |
| 25 | conf_level = 0.95, | |
| 26 |                            interaction = FALSE) { | |
| 27 | 55x | pval_method <- match.arg(pval_method) | 
| 28 | 55x | ties <- match.arg(ties) | 
| 29 | 55x | checkmate::assert_flag(interaction) | 
| 30 | 55x | assert_proportion_value(conf_level) | 
| 31 | 55x | list( | 
| 32 | 55x | pval_method = pval_method, | 
| 33 | 55x | ties = ties, | 
| 34 | 55x | conf_level = conf_level, | 
| 35 | 55x | interaction = interaction | 
| 36 | ) | |
| 37 | } | |
| 38 | ||
| 39 | #' Custom tidy methods for Cox regression | |
| 40 | #' | |
| 41 | #' @description `r lifecycle::badge("stable")` | |
| 42 | #' | |
| 43 | #' @inheritParams argument_convention | |
| 44 | #' @param x (`list`)\cr result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models) | |
| 45 | #' or [fit_coxreg_multivar()] (for multivariate models). | |
| 46 | #' | |
| 47 | #' @return [broom::tidy()] returns: | |
| 48 | #' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`, | |
| 49 | #' `upper .95`, `level`, and `n`. | |
| 50 | #' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`, | |
| 51 | #' `lcl`, `ucl`, `pval`, and `ci`. | |
| 52 | #' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`, | |
| 53 | #' `level`, and `ci`. | |
| 54 | #' | |
| 55 | #' @seealso [cox_regression] | |
| 56 | #' | |
| 57 | #' @name tidy_coxreg | |
| 58 | NULL | |
| 59 | ||
| 60 | #' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results. | |
| 61 | #' | |
| 62 | #' Tidy the [survival::coxph()] results into a `data.frame` to extract model results. | |
| 63 | #' | |
| 64 | #' @method tidy summary.coxph | |
| 65 | #' | |
| 66 | #' @examples | |
| 67 | #' library(survival) | |
| 68 | #' library(broom) | |
| 69 | #' | |
| 70 | #' set.seed(1, kind = "Mersenne-Twister") | |
| 71 | #' | |
| 72 | #' dta_bladder <- with( | |
| 73 | #' data = bladder[bladder$enum < 5, ], | |
| 74 | #' data.frame( | |
| 75 | #' time = stop, | |
| 76 | #' status = event, | |
| 77 | #' armcd = as.factor(rx), | |
| 78 | #' covar1 = as.factor(enum), | |
| 79 | #' covar2 = factor( | |
| 80 | #' sample(as.factor(enum)), | |
| 81 | #'       levels = 1:4, labels = c("F", "F", "M", "M") | |
| 82 | #' ) | |
| 83 | #' ) | |
| 84 | #' ) | |
| 85 | #' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)") | |
| 86 | #' formatters::var_labels(dta_bladder)[names(labels)] <- labels | |
| 87 | #' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) | |
| 88 | #' | |
| 89 | #' formula <- "survival::Surv(time, status) ~ armcd + covar1" | |
| 90 | #' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder)) | |
| 91 | #' tidy(msum) | |
| 92 | #' | |
| 93 | #' @export | |
| 94 | tidy.summary.coxph <- function(x, # nolint | |
| 95 |                                ...) { | |
| 96 | 199x | checkmate::assert_class(x, "summary.coxph") | 
| 97 | 199x | pval <- x$coefficients | 
| 98 | 199x | confint <- x$conf.int | 
| 99 | 199x | levels <- rownames(pval) | 
| 100 | ||
| 101 | 199x | pval <- tibble::as_tibble(pval) | 
| 102 | 199x | confint <- tibble::as_tibble(confint) | 
| 103 | ||
| 104 | 199x |   ret <- cbind(pval[, grepl("Pr", names(pval))], confint) | 
| 105 | 199x | ret$level <- levels | 
| 106 | 199x | ret$n <- x[["n"]] | 
| 107 | 199x | ret | 
| 108 | } | |
| 109 | ||
| 110 | #' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression. | |
| 111 | #' | |
| 112 | #' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()]. | |
| 113 | #' | |
| 114 | #' @method tidy coxreg.univar | |
| 115 | #' | |
| 116 | #' @examples | |
| 117 | #' ## Cox regression: arm + 1 covariate. | |
| 118 | #' mod1 <- fit_coxreg_univar( | |
| 119 | #' variables = list( | |
| 120 | #' time = "time", event = "status", arm = "armcd", | |
| 121 | #' covariates = "covar1" | |
| 122 | #' ), | |
| 123 | #' data = dta_bladder, | |
| 124 | #' control = control_coxreg(conf_level = 0.91) | |
| 125 | #' ) | |
| 126 | #' | |
| 127 | #' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates. | |
| 128 | #' mod2 <- fit_coxreg_univar( | |
| 129 | #' variables = list( | |
| 130 | #' time = "time", event = "status", arm = "armcd", | |
| 131 | #'     covariates = c("covar1", "covar2") | |
| 132 | #' ), | |
| 133 | #' data = dta_bladder, | |
| 134 | #' control = control_coxreg(conf_level = 0.91, interaction = TRUE) | |
| 135 | #' ) | |
| 136 | #' | |
| 137 | #' tidy(mod1) | |
| 138 | #' tidy(mod2) | |
| 139 | #' | |
| 140 | #' @export | |
| 141 | tidy.coxreg.univar <- function(x, # nolint | |
| 142 |                                ...) { | |
| 143 | 38x | checkmate::assert_class(x, "coxreg.univar") | 
| 144 | 38x | mod <- x$mod | 
| 145 | 38x | vars <- c(x$vars$arm, x$vars$covariates) | 
| 146 | 38x | has_arm <- "arm" %in% names(x$vars) | 
| 147 | ||
| 148 | 38x |   result <- if (!has_arm) { | 
| 149 | 5x | Map( | 
| 150 | 5x | mod = mod, vars = vars, | 
| 151 | 5x |       f = function(mod, vars) { | 
| 152 | 6x | h_coxreg_multivar_extract( | 
| 153 | 6x | var = vars, | 
| 154 | 6x | data = x$data, | 
| 155 | 6x | mod = mod, | 
| 156 | 6x | control = x$control | 
| 157 | ) | |
| 158 | } | |
| 159 | ) | |
| 160 | 38x |   } else if (x$control$interaction) { | 
| 161 | 12x | Map( | 
| 162 | 12x | mod = mod, covar = vars, | 
| 163 | 12x |       f = function(mod, covar) { | 
| 164 | 26x | h_coxreg_extract_interaction( | 
| 165 | 26x | effect = x$vars$arm, covar = covar, mod = mod, data = x$data, | 
| 166 | 26x | at = x$at, control = x$control | 
| 167 | ) | |
| 168 | } | |
| 169 | ) | |
| 170 |   } else { | |
| 171 | 21x | Map( | 
| 172 | 21x | mod = mod, vars = vars, | 
| 173 | 21x |       f = function(mod, vars) { | 
| 174 | 53x | h_coxreg_univar_extract( | 
| 175 | 53x | effect = x$vars$arm, covar = vars, data = x$data, mod = mod, | 
| 176 | 53x | control = x$control | 
| 177 | ) | |
| 178 | } | |
| 179 | ) | |
| 180 | } | |
| 181 | 38x | result <- do.call(rbind, result) | 
| 182 | ||
| 183 | 38x | result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) | 
| 184 | 38x | result$n <- lapply(result$n, empty_vector_if_na) | 
| 185 | 38x | result$ci <- lapply(result$ci, empty_vector_if_na) | 
| 186 | 38x | result$hr <- lapply(result$hr, empty_vector_if_na) | 
| 187 | 38x |   if (x$control$interaction) { | 
| 188 | 12x | result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na) | 
| 189 | # Remove interaction p-values due to change in specifications. | |
| 190 | 12x | result$pval[result$effect != "Treatment:"] <- NA | 
| 191 | } | |
| 192 | 38x | result$pval <- lapply(result$pval, empty_vector_if_na) | 
| 193 | 38x | attr(result, "conf_level") <- x$control$conf_level | 
| 194 | 38x | result | 
| 195 | } | |
| 196 | ||
| 197 | #' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression. | |
| 198 | #' | |
| 199 | #' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()]. | |
| 200 | #' | |
| 201 | #' @method tidy coxreg.multivar | |
| 202 | #' | |
| 203 | #' @examples | |
| 204 | #' multivar_model <- fit_coxreg_multivar( | |
| 205 | #' variables = list( | |
| 206 | #' time = "time", event = "status", arm = "armcd", | |
| 207 | #'     covariates = c("covar1", "covar2") | |
| 208 | #' ), | |
| 209 | #' data = dta_bladder | |
| 210 | #' ) | |
| 211 | #' broom::tidy(multivar_model) | |
| 212 | #' | |
| 213 | #' @export | |
| 214 | tidy.coxreg.multivar <- function(x, # nolint | |
| 215 |                                  ...) { | |
| 216 | 16x | checkmate::assert_class(x, "coxreg.multivar") | 
| 217 | 16x | vars <- c(x$vars$arm, x$vars$covariates) | 
| 218 | ||
| 219 | # Convert the model summaries to data. | |
| 220 | 16x | result <- Map( | 
| 221 | 16x | vars = vars, | 
| 222 | 16x |     f = function(vars) { | 
| 223 | 60x | h_coxreg_multivar_extract( | 
| 224 | 60x | var = vars, data = x$data, | 
| 225 | 60x | mod = x$mod, control = x$control | 
| 226 | ) | |
| 227 | } | |
| 228 | ) | |
| 229 | 16x | result <- do.call(rbind, result) | 
| 230 | ||
| 231 | 16x | result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) | 
| 232 | 16x | result$ci <- lapply(result$ci, empty_vector_if_na) | 
| 233 | 16x | result$hr <- lapply(result$hr, empty_vector_if_na) | 
| 234 | 16x | result$pval <- lapply(result$pval, empty_vector_if_na) | 
| 235 | 16x | result <- result[, names(result) != "n"] | 
| 236 | 16x | attr(result, "conf_level") <- x$control$conf_level | 
| 237 | ||
| 238 | 16x | result | 
| 239 | } | |
| 240 | ||
| 241 | #' Fitting functions for Cox proportional hazards regression | |
| 242 | #' | |
| 243 | #' @description `r lifecycle::badge("stable")` | |
| 244 | #' | |
| 245 | #' Fitting functions for univariate and multivariate Cox regression models. | |
| 246 | #' | |
| 247 | #' @param variables (named `list`)\cr the names of the variables found in `data`, passed as a named list and | |
| 248 | #' corresponding to the `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from | |
| 249 | #' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect | |
| 250 | #' estimates will be tabulated later. | |
| 251 | #' @param data (`data.frame`)\cr the dataset containing the variables to fit the models. | |
| 252 | #' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify | |
| 253 | #' the value of the covariate at which the effect should be estimated. | |
| 254 | #' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()]. | |
| 255 | #' | |
| 256 | #' @seealso [h_cox_regression] for relevant helper functions, [cox_regression]. | |
| 257 | #' | |
| 258 | #' @examples | |
| 259 | #' library(survival) | |
| 260 | #' | |
| 261 | #' set.seed(1, kind = "Mersenne-Twister") | |
| 262 | #' | |
| 263 | #' # Testing dataset [survival::bladder]. | |
| 264 | #' dta_bladder <- with( | |
| 265 | #' data = bladder[bladder$enum < 5, ], | |
| 266 | #' data.frame( | |
| 267 | #' time = stop, | |
| 268 | #' status = event, | |
| 269 | #' armcd = as.factor(rx), | |
| 270 | #' covar1 = as.factor(enum), | |
| 271 | #' covar2 = factor( | |
| 272 | #' sample(as.factor(enum)), | |
| 273 | #'       levels = 1:4, labels = c("F", "F", "M", "M") | |
| 274 | #' ) | |
| 275 | #' ) | |
| 276 | #' ) | |
| 277 | #' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)") | |
| 278 | #' formatters::var_labels(dta_bladder)[names(labels)] <- labels | |
| 279 | #' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) | |
| 280 | #' | |
| 281 | #' plot( | |
| 282 | #' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder), | |
| 283 | #' lty = 2:4, | |
| 284 | #' xlab = "Months", | |
| 285 | #'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4") | |
| 286 | #' ) | |
| 287 | #' | |
| 288 | #' @name fit_coxreg | |
| 289 | NULL | |
| 290 | ||
| 291 | #' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs. | |
| 292 | #' | |
| 293 | #' @return | |
| 294 | #' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list` | |
| 295 | #' with 5 elements: | |
| 296 | #' * `mod`: Cox regression models fitted by [survival::coxph()]. | |
| 297 | #' * `data`: The original data frame input. | |
| 298 | #' * `control`: The original control input. | |
| 299 | #' * `vars`: The variables used in the model. | |
| 300 | #' * `at`: Value of the covariate at which the effect should be estimated. | |
| 301 | #' | |
| 302 | #' @note When using `fit_coxreg_univar` there should be two study arms. | |
| 303 | #' | |
| 304 | #' @examples | |
| 305 | #' # fit_coxreg_univar | |
| 306 | #' | |
| 307 | #' ## Cox regression: arm + 1 covariate. | |
| 308 | #' mod1 <- fit_coxreg_univar( | |
| 309 | #' variables = list( | |
| 310 | #' time = "time", event = "status", arm = "armcd", | |
| 311 | #' covariates = "covar1" | |
| 312 | #' ), | |
| 313 | #' data = dta_bladder, | |
| 314 | #' control = control_coxreg(conf_level = 0.91) | |
| 315 | #' ) | |
| 316 | #' | |
| 317 | #' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates. | |
| 318 | #' mod2 <- fit_coxreg_univar( | |
| 319 | #' variables = list( | |
| 320 | #' time = "time", event = "status", arm = "armcd", | |
| 321 | #'     covariates = c("covar1", "covar2") | |
| 322 | #' ), | |
| 323 | #' data = dta_bladder, | |
| 324 | #' control = control_coxreg(conf_level = 0.91, interaction = TRUE) | |
| 325 | #' ) | |
| 326 | #' | |
| 327 | #' ## Cox regression: arm + 1 covariate, stratified analysis. | |
| 328 | #' mod3 <- fit_coxreg_univar( | |
| 329 | #' variables = list( | |
| 330 | #' time = "time", event = "status", arm = "armcd", strata = "covar2", | |
| 331 | #'     covariates = c("covar1") | |
| 332 | #' ), | |
| 333 | #' data = dta_bladder, | |
| 334 | #' control = control_coxreg(conf_level = 0.91) | |
| 335 | #' ) | |
| 336 | #' | |
| 337 | #' ## Cox regression: no arm, only covariates. | |
| 338 | #' mod4 <- fit_coxreg_univar( | |
| 339 | #' variables = list( | |
| 340 | #' time = "time", event = "status", | |
| 341 | #'     covariates = c("covar1", "covar2") | |
| 342 | #' ), | |
| 343 | #' data = dta_bladder | |
| 344 | #' ) | |
| 345 | #' | |
| 346 | #' @export | |
| 347 | fit_coxreg_univar <- function(variables, | |
| 348 | data, | |
| 349 | at = list(), | |
| 350 |                               control = control_coxreg()) { | |
| 351 | 43x | checkmate::assert_list(variables, names = "named") | 
| 352 | 43x | has_arm <- "arm" %in% names(variables) | 
| 353 | 43x | arm_name <- if (has_arm) "arm" else NULL | 
| 354 | ||
| 355 | 43x | checkmate::assert_character(variables$covariates, null.ok = TRUE) | 
| 356 | ||
| 357 | 43x | assert_df_with_variables(data, variables) | 
| 358 | 43x | assert_list_of_variables(variables[c(arm_name, "event", "time")]) | 
| 359 | ||
| 360 | 43x |   if (!is.null(variables$strata)) { | 
| 361 | 4x | checkmate::assert_disjunct(control$pval_method, "likelihood") | 
| 362 | } | |
| 363 | 42x |   if (has_arm) { | 
| 364 | 36x | assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) | 
| 365 | } | |
| 366 | 41x | vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE) | 
| 367 | 41x |   for (i in vars) { | 
| 368 | 94x |     if (is.factor(data[[i]])) { | 
| 369 | 82x | attr(data[[i]], "levels") <- levels(droplevels(data[[i]])) | 
| 370 | } | |
| 371 | } | |
| 372 | 41x | forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction) | 
| 373 | 41x | mod <- lapply( | 
| 374 | 41x |     forms, function(x) { | 
| 375 | 90x | survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties) | 
| 376 | } | |
| 377 | ) | |
| 378 | 41x | structure( | 
| 379 | 41x | list( | 
| 380 | 41x | mod = mod, | 
| 381 | 41x | data = data, | 
| 382 | 41x | control = control, | 
| 383 | 41x | vars = variables, | 
| 384 | 41x | at = at | 
| 385 | ), | |
| 386 | 41x | class = "coxreg.univar" | 
| 387 | ) | |
| 388 | } | |
| 389 | ||
| 390 | #' @describeIn fit_coxreg Fit a multivariate Cox regression model. | |
| 391 | #' | |
| 392 | #' @return | |
| 393 | #' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list | |
| 394 | #' with 4 elements: | |
| 395 | #' * `mod`: Cox regression model fitted by [survival::coxph()]. | |
| 396 | #' * `data`: The original data frame input. | |
| 397 | #' * `control`: The original control input. | |
| 398 | #' * `vars`: The variables used in the model. | |
| 399 | #' | |
| 400 | #' @examples | |
| 401 | #' # fit_coxreg_multivar | |
| 402 | #' | |
| 403 | #' ## Cox regression: multivariate Cox regression. | |
| 404 | #' multivar_model <- fit_coxreg_multivar( | |
| 405 | #' variables = list( | |
| 406 | #' time = "time", event = "status", arm = "armcd", | |
| 407 | #'     covariates = c("covar1", "covar2") | |
| 408 | #' ), | |
| 409 | #' data = dta_bladder | |
| 410 | #' ) | |
| 411 | #' | |
| 412 | #' # Example without treatment arm. | |
| 413 | #' multivar_covs_model <- fit_coxreg_multivar( | |
| 414 | #' variables = list( | |
| 415 | #' time = "time", event = "status", | |
| 416 | #'     covariates = c("covar1", "covar2") | |
| 417 | #' ), | |
| 418 | #' data = dta_bladder | |
| 419 | #' ) | |
| 420 | #' | |
| 421 | #' @export | |
| 422 | fit_coxreg_multivar <- function(variables, | |
| 423 | data, | |
| 424 |                                 control = control_coxreg()) { | |
| 425 | 83x | checkmate::assert_list(variables, names = "named") | 
| 426 | 83x | has_arm <- "arm" %in% names(variables) | 
| 427 | 83x | arm_name <- if (has_arm) "arm" else NULL | 
| 428 | ||
| 429 | 83x |   if (!is.null(variables$covariates)) { | 
| 430 | 21x | checkmate::assert_character(variables$covariates) | 
| 431 | } | |
| 432 | ||
| 433 | 83x | checkmate::assert_false(control$interaction) | 
| 434 | 83x | assert_df_with_variables(data, variables) | 
| 435 | 83x | assert_list_of_variables(variables[c(arm_name, "event", "time")]) | 
| 436 | ||
| 437 | 83x |   if (!is.null(variables$strata)) { | 
| 438 | 3x | checkmate::assert_disjunct(control$pval_method, "likelihood") | 
| 439 | } | |
| 440 | ||
| 441 | 82x | form <- h_coxreg_multivar_formula(variables) | 
| 442 | 82x | mod <- survival::coxph( | 
| 443 | 82x | formula = stats::as.formula(form), | 
| 444 | 82x | data = data, | 
| 445 | 82x | ties = control$ties | 
| 446 | ) | |
| 447 | 82x | structure( | 
| 448 | 82x | list( | 
| 449 | 82x | mod = mod, | 
| 450 | 82x | data = data, | 
| 451 | 82x | control = control, | 
| 452 | 82x | vars = variables | 
| 453 | ), | |
| 454 | 82x | class = "coxreg.multivar" | 
| 455 | ) | |
| 456 | } | |
| 457 | ||
| 458 | #' Muffled `car::Anova` | |
| 459 | #' | |
| 460 | #' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when | |
| 461 | #' present, this function deliberately muffles this message. | |
| 462 | #' | |
| 463 | #' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()]. | |
| 464 | #' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`. | |
| 465 | #' | |
| 466 | #' @return The output of [car::Anova()], with convergence message muffled. | |
| 467 | #' | |
| 468 | #' @keywords internal | |
| 469 | muffled_car_anova <- function(mod, test_statistic) { | |
| 470 | 219x | tryCatch( | 
| 471 | 219x | withCallingHandlers( | 
| 472 | 219x |       expr = { | 
| 473 | 219x | car::Anova( | 
| 474 | 219x | mod, | 
| 475 | 219x | test.statistic = test_statistic, | 
| 476 | 219x | type = "III" | 
| 477 | ) | |
| 478 | }, | |
| 479 | 219x |       message = function(m) invokeRestart("muffleMessage"), | 
| 480 | 219x |       error = function(e) { | 
| 481 | 1x | stop(paste( | 
| 482 | 1x | "the model seems to have convergence problems, please try to change", | 
| 483 | 1x | "the configuration of covariates or strata variables, e.g.", | 
| 484 | 1x | "- original error:", e | 
| 485 | )) | |
| 486 | } | |
| 487 | ) | |
| 488 | ) | |
| 489 | } | 
| 1 | #' Stack multiple grobs | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("deprecated")` | |
| 4 | #' | |
| 5 | #' Stack grobs as a new grob with 1 column and multiple rows layout. | |
| 6 | #' | |
| 7 | #' @param ... grobs. | |
| 8 | #' @param grobs (`list` of `grob`)\cr a list of grobs. | |
| 9 | #' @param padding (`grid::unit`)\cr unit of length 1, space between each grob. | |
| 10 | #' @param vp (`viewport` or `NULL`)\cr a [viewport()] object (or `NULL`). | |
| 11 | #' @param name (`string`)\cr a character identifier for the grob. | |
| 12 | #' @param gp (`gpar`)\cr a [gpar()] object. | |
| 13 | #' | |
| 14 | #' @return A `grob`. | |
| 15 | #' | |
| 16 | #' @examples | |
| 17 | #' library(grid) | |
| 18 | #' | |
| 19 | #' g1 <- circleGrob(gp = gpar(col = "blue")) | |
| 20 | #' g2 <- circleGrob(gp = gpar(col = "red")) | |
| 21 | #' g3 <- textGrob("TEST TEXT") | |
| 22 | #' grid.newpage() | |
| 23 | #' grid.draw(stack_grobs(g1, g2, g3)) | |
| 24 | #' | |
| 25 | #' showViewport() | |
| 26 | #' | |
| 27 | #' grid.newpage() | |
| 28 | #' pushViewport(viewport(layout = grid.layout(1, 2))) | |
| 29 | #' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2) | |
| 30 | #' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test")) | |
| 31 | #' | |
| 32 | #' showViewport() | |
| 33 | #' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE) | |
| 34 | #' | |
| 35 | #' @export | |
| 36 | stack_grobs <- function(..., | |
| 37 | grobs = list(...), | |
| 38 | padding = grid::unit(2, "line"), | |
| 39 | vp = NULL, | |
| 40 | gp = NULL, | |
| 41 |                         name = NULL) { | |
| 42 | 4x | lifecycle::deprecate_warn( | 
| 43 | 4x | "0.9.4", | 
| 44 | 4x | "stack_grobs()", | 
| 45 | 4x | details = "`tern` plotting functions no longer generate `grob` objects." | 
| 46 | ) | |
| 47 | ||
| 48 | 4x | checkmate::assert_true( | 
| 49 | 4x | all(vapply(grobs, grid::is.grob, logical(1))) | 
| 50 | ) | |
| 51 | ||
| 52 | 4x |   if (length(grobs) == 1) { | 
| 53 | 1x | return(grobs[[1]]) | 
| 54 | } | |
| 55 | ||
| 56 | 3x | n_layout <- 2 * length(grobs) - 1 | 
| 57 | 3x | hts <- lapply( | 
| 58 | 3x | seq(1, n_layout), | 
| 59 | 3x |     function(i) { | 
| 60 | 39x |       if (i %% 2 != 0) { | 
| 61 | 21x | grid::unit(1, "null") | 
| 62 |       } else { | |
| 63 | 18x | padding | 
| 64 | } | |
| 65 | } | |
| 66 | ) | |
| 67 | 3x | hts <- do.call(grid::unit.c, hts) | 
| 68 | ||
| 69 | 3x | main_vp <- grid::viewport( | 
| 70 | 3x | layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts) | 
| 71 | ) | |
| 72 | ||
| 73 | 3x |   nested_grobs <- Map(function(g, i) { | 
| 74 | 21x | grid::gTree( | 
| 75 | 21x | children = grid::gList(g), | 
| 76 | 21x | vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1) | 
| 77 | ) | |
| 78 | 3x | }, grobs, seq_along(grobs) * 2 - 1) | 
| 79 | ||
| 80 | 3x | grobs_mainvp <- grid::gTree( | 
| 81 | 3x | children = do.call(grid::gList, nested_grobs), | 
| 82 | 3x | vp = main_vp | 
| 83 | ) | |
| 84 | ||
| 85 | 3x | grid::gTree( | 
| 86 | 3x | children = grid::gList(grobs_mainvp), | 
| 87 | 3x | vp = vp, | 
| 88 | 3x | gp = gp, | 
| 89 | 3x | name = name | 
| 90 | ) | |
| 91 | } | |
| 92 | ||
| 93 | #' Arrange multiple grobs | |
| 94 | #' | |
| 95 | #' @description `r lifecycle::badge("deprecated")` | |
| 96 | #' | |
| 97 | #' Arrange grobs as a new grob with `n * m (rows * cols)` layout. | |
| 98 | #' | |
| 99 | #' @inheritParams stack_grobs | |
| 100 | #' @param ncol (`integer(1)`)\cr number of columns in layout. | |
| 101 | #' @param nrow (`integer(1)`)\cr number of rows in layout. | |
| 102 | #' @param padding_ht (`grid::unit`)\cr unit of length 1, vertical space between each grob. | |
| 103 | #' @param padding_wt (`grid::unit`)\cr unit of length 1, horizontal space between each grob. | |
| 104 | #' | |
| 105 | #' @return A `grob`. | |
| 106 | #' | |
| 107 | #' @examples | |
| 108 | #' library(grid) | |
| 109 | #' | |
| 110 | #' \donttest{ | |
| 111 | #' num <- lapply(1:9, textGrob) | |
| 112 | #' grid::grid.newpage() | |
| 113 | #' grid.draw(arrange_grobs(grobs = num, ncol = 2)) | |
| 114 | #' | |
| 115 | #' showViewport() | |
| 116 | #' | |
| 117 | #' g1 <- circleGrob(gp = gpar(col = "blue")) | |
| 118 | #' g2 <- circleGrob(gp = gpar(col = "red")) | |
| 119 | #' g3 <- textGrob("TEST TEXT") | |
| 120 | #' grid::grid.newpage() | |
| 121 | #' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2)) | |
| 122 | #' | |
| 123 | #' showViewport() | |
| 124 | #' | |
| 125 | #' grid::grid.newpage() | |
| 126 | #' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3)) | |
| 127 | #' | |
| 128 | #' grid::grid.newpage() | |
| 129 | #' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2))) | |
| 130 | #' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2) | |
| 131 | #' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1)) | |
| 132 | #' | |
| 133 | #' showViewport() | |
| 134 | #' } | |
| 135 | #' @export | |
| 136 | arrange_grobs <- function(..., | |
| 137 | grobs = list(...), | |
| 138 | ncol = NULL, nrow = NULL, | |
| 139 | padding_ht = grid::unit(2, "line"), | |
| 140 | padding_wt = grid::unit(2, "line"), | |
| 141 | vp = NULL, | |
| 142 | gp = NULL, | |
| 143 |                           name = NULL) { | |
| 144 | 5x | lifecycle::deprecate_warn( | 
| 145 | 5x | "0.9.4", | 
| 146 | 5x | "arrange_grobs()", | 
| 147 | 5x | details = "`tern` plotting functions no longer generate `grob` objects." | 
| 148 | ) | |
| 149 | ||
| 150 | 5x | checkmate::assert_true( | 
| 151 | 5x | all(vapply(grobs, grid::is.grob, logical(1))) | 
| 152 | ) | |
| 153 | ||
| 154 | 5x |   if (length(grobs) == 1) { | 
| 155 | 1x | return(grobs[[1]]) | 
| 156 | } | |
| 157 | ||
| 158 | 4x |   if (is.null(ncol) && is.null(nrow)) { | 
| 159 | 1x | ncol <- 1 | 
| 160 | 1x | nrow <- ceiling(length(grobs) / ncol) | 
| 161 | 3x |   } else if (!is.null(ncol) && is.null(nrow)) { | 
| 162 | 1x | nrow <- ceiling(length(grobs) / ncol) | 
| 163 | 2x |   } else if (is.null(ncol) && !is.null(nrow)) { | 
| 164 | ! | ncol <- ceiling(length(grobs) / nrow) | 
| 165 | } | |
| 166 | ||
| 167 | 4x |   if (ncol * nrow < length(grobs)) { | 
| 168 | 1x |     stop("specififed ncol and nrow are not enough for arranging the grobs ") | 
| 169 | } | |
| 170 | ||
| 171 | 3x |   if (ncol == 1) { | 
| 172 | 2x | return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name)) | 
| 173 | } | |
| 174 | ||
| 175 | 1x | n_col <- 2 * ncol - 1 | 
| 176 | 1x | n_row <- 2 * nrow - 1 | 
| 177 | 1x | hts <- lapply( | 
| 178 | 1x | seq(1, n_row), | 
| 179 | 1x |     function(i) { | 
| 180 | 5x |       if (i %% 2 != 0) { | 
| 181 | 3x | grid::unit(1, "null") | 
| 182 |       } else { | |
| 183 | 2x | padding_ht | 
| 184 | } | |
| 185 | } | |
| 186 | ) | |
| 187 | 1x | hts <- do.call(grid::unit.c, hts) | 
| 188 | ||
| 189 | 1x | wts <- lapply( | 
| 190 | 1x | seq(1, n_col), | 
| 191 | 1x |     function(i) { | 
| 192 | 5x |       if (i %% 2 != 0) { | 
| 193 | 3x | grid::unit(1, "null") | 
| 194 |       } else { | |
| 195 | 2x | padding_wt | 
| 196 | } | |
| 197 | } | |
| 198 | ) | |
| 199 | 1x | wts <- do.call(grid::unit.c, wts) | 
| 200 | ||
| 201 | 1x | main_vp <- grid::viewport( | 
| 202 | 1x | layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts) | 
| 203 | ) | |
| 204 | ||
| 205 | 1x | nested_grobs <- list() | 
| 206 | 1x | k <- 0 | 
| 207 | 1x |   for (i in seq(nrow) * 2 - 1) { | 
| 208 | 3x |     for (j in seq(ncol) * 2 - 1) { | 
| 209 | 9x | k <- k + 1 | 
| 210 | 9x |       if (k <= length(grobs)) { | 
| 211 | 9x | nested_grobs <- c( | 
| 212 | 9x | nested_grobs, | 
| 213 | 9x | list(grid::gTree( | 
| 214 | 9x | children = grid::gList(grobs[[k]]), | 
| 215 | 9x | vp = grid::viewport(layout.pos.row = i, layout.pos.col = j) | 
| 216 | )) | |
| 217 | ) | |
| 218 | } | |
| 219 | } | |
| 220 | } | |
| 221 | 1x | grobs_mainvp <- grid::gTree( | 
| 222 | 1x | children = do.call(grid::gList, nested_grobs), | 
| 223 | 1x | vp = main_vp | 
| 224 | ) | |
| 225 | ||
| 226 | 1x | grid::gTree( | 
| 227 | 1x | children = grid::gList(grobs_mainvp), | 
| 228 | 1x | vp = vp, | 
| 229 | 1x | gp = gp, | 
| 230 | 1x | name = name | 
| 231 | ) | |
| 232 | } | |
| 233 | ||
| 234 | #' Draw `grob` | |
| 235 | #' | |
| 236 | #' @description `r lifecycle::badge("deprecated")` | |
| 237 | #' | |
| 238 | #' Draw grob on device page. | |
| 239 | #' | |
| 240 | #' @param grob (`grob`)\cr grid object. | |
| 241 | #' @param newpage (`flag`)\cr draw on a new page. | |
| 242 | #' @param vp (`viewport` or `NULL`)\cr a [viewport()] object (or `NULL`). | |
| 243 | #' | |
| 244 | #' @return A `grob`. | |
| 245 | #' | |
| 246 | #' @examples | |
| 247 | #' library(dplyr) | |
| 248 | #' library(grid) | |
| 249 | #' | |
| 250 | #' \donttest{ | |
| 251 | #' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc")) | |
| 252 | #' rect %>% draw_grob(vp = grid::viewport(angle = 45)) | |
| 253 | #' | |
| 254 | #' num <- lapply(1:10, textGrob) | |
| 255 | #' num %>% | |
| 256 | #' arrange_grobs(grobs = .) %>% | |
| 257 | #' draw_grob() | |
| 258 | #' showViewport() | |
| 259 | #' } | |
| 260 | #' | |
| 261 | #' @export | |
| 262 | draw_grob <- function(grob, newpage = TRUE, vp = NULL) { | |
| 263 | 3x | lifecycle::deprecate_warn( | 
| 264 | 3x | "0.9.4", | 
| 265 | 3x | "draw_grob()", | 
| 266 | 3x | details = "`tern` plotting functions no longer generate `grob` objects." | 
| 267 | ) | |
| 268 | ||
| 269 | 3x |   if (newpage) { | 
| 270 | 3x | grid::grid.newpage() | 
| 271 | } | |
| 272 | 3x |   if (!is.null(vp)) { | 
| 273 | 1x | grid::pushViewport(vp) | 
| 274 | } | |
| 275 | 3x | grid::grid.draw(grob) | 
| 276 | } | |
| 277 | ||
| 278 | tern_grob <- function(x) { | |
| 279 | ! |   class(x) <- unique(c("ternGrob", class(x))) | 
| 280 | ! | x | 
| 281 | } | |
| 282 | ||
| 283 | #' @keywords internal | |
| 284 | print.ternGrob <- function(x, ...) { | |
| 285 | ! | grid::grid.newpage() | 
| 286 | ! | grid::grid.draw(x) | 
| 287 | } | 
| 1 | #' Count number of patients and sum exposure across all patients in columns | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [analyze_patients_exposure_in_cols()] creates a layout element to count total numbers of | |
| 6 | #' patients and sum an analysis value (i.e. exposure) across all patients in columns. | |
| 7 | #' | |
| 8 | #' The primary analysis variable `ex_var` is the exposure variable used to calculate the `sum_exposure` statistic. The | |
| 9 | #' `id` variable is used to uniquely identify patients in the data such that only unique patients are counted in the | |
| 10 | #' `n_patients` statistic, and the `var` variable is used to create a row split if needed. The percentage returned as | |
| 11 | #' part of the `n_patients` statistic is the proportion of all records that correspond to a unique patient. | |
| 12 | #' | |
| 13 | #' The summarize function [summarize_patients_exposure_in_cols()] performs the same function as | |
| 14 | #' [analyze_patients_exposure_in_cols()] except it creates content rows, not data rows, to summarize the current table | |
| 15 | #' row/column context and operates on the level of the latest row split or the root of the table if no row splits have | |
| 16 | #' occurred. | |
| 17 | #' | |
| 18 | #' If a column split has not yet been performed in the table, `col_split` must be set to `TRUE` for the first call of | |
| 19 | #' [analyze_patients_exposure_in_cols()] or [summarize_patients_exposure_in_cols()]. | |
| 20 | #' | |
| 21 | #' @inheritParams argument_convention | |
| 22 | #' @param ex_var (`string`)\cr name of the variable in `df` containing exposure values. | |
| 23 | #' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty, this will be used as label. | |
| 24 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 25 | #' | |
| 26 | #'   Options are: ``r shQuote(get_stats("analyze_patients_exposure_in_cols"), type = "sh")`` | |
| 27 | #' | |
| 28 | #' @name summarize_patients_exposure_in_cols | |
| 29 | #' @order 1 | |
| 30 | NULL | |
| 31 | ||
| 32 | #' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers | |
| 33 | #' of patients and the sum of exposure across all patients. | |
| 34 | #' | |
| 35 | #' @return | |
| 36 | #' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics: | |
| 37 | #' * `n_patients`: Number of unique patients in `df`. | |
| 38 | #' * `sum_exposure`: Sum of `ex_var` across all patients in `df`. | |
| 39 | #' | |
| 40 | #' @keywords internal | |
| 41 | s_count_patients_sum_exposure <- function(df, | |
| 42 | labelstr = "", | |
| 43 |                                           .stats = c("n_patients", "sum_exposure"), | |
| 44 | .N_col, # nolint | |
| 45 | ..., | |
| 46 | ex_var = "AVAL", | |
| 47 | id = "USUBJID", | |
| 48 | custom_label = NULL, | |
| 49 |                                           var_level = NULL) { | |
| 50 | 56x | assert_df_with_variables(df, list(ex_var = ex_var, id = id)) | 
| 51 | 56x | checkmate::assert_string(id) | 
| 52 | 56x | checkmate::assert_string(labelstr) | 
| 53 | 56x | checkmate::assert_string(custom_label, null.ok = TRUE) | 
| 54 | 56x | checkmate::assert_numeric(df[[ex_var]]) | 
| 55 | 56x |   checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure"))) | 
| 56 | ||
| 57 | 56x |   row_label <- if (labelstr != "") { | 
| 58 | ! | labelstr | 
| 59 | 56x |   } else if (!is.null(var_level)) { | 
| 60 | 42x | var_level | 
| 61 | 56x |   } else if (!is.null(custom_label)) { | 
| 62 | 6x | custom_label | 
| 63 |   } else { | |
| 64 | 8x | "Total patients numbers/person time" | 
| 65 | } | |
| 66 | ||
| 67 | 56x | y <- list() | 
| 68 | ||
| 69 | 56x |   if ("n_patients" %in% .stats) { | 
| 70 | 56x | y$n_patients <- | 
| 71 | 56x | formatters::with_label( | 
| 72 | 56x | s_num_patients_content( | 
| 73 | 56x | df = df, | 
| 74 | 56x | .N_col = .N_col, # nolint | 
| 75 | 56x | .var = id, | 
| 76 | 56x | labelstr = "" | 
| 77 | 56x | )$unique, | 
| 78 | 56x | row_label | 
| 79 | ) | |
| 80 | } | |
| 81 | 56x |   if ("sum_exposure" %in% .stats) { | 
| 82 | 56x | y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label) | 
| 83 | } | |
| 84 | 56x | y | 
| 85 | } | |
| 86 | ||
| 87 | #' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in | |
| 88 | #' [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in | |
| 89 | #' [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`. | |
| 90 | #' | |
| 91 | #' @return | |
| 92 | #' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()]. | |
| 93 | #' | |
| 94 | #' @export | |
| 95 | a_count_patients_sum_exposure <- function(df, | |
| 96 | labelstr = "", | |
| 97 | ..., | |
| 98 | .stats = NULL, | |
| 99 | .stat_names = NULL, | |
| 100 | .formats = NULL, | |
| 101 | .labels = NULL, | |
| 102 |                                           .indent_mods = NULL) { | |
| 103 | 32x | checkmate::assert_character(.stats, len = 1) | 
| 104 | ||
| 105 | # Check for additional parameters to the statistics function | |
| 106 | 32x | dots_extra_args <- list(...) | 
| 107 | 32x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 108 | 32x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 109 | ||
| 110 | 32x | add_total_level <- dots_extra_args$add_total_level | 
| 111 | 32x | checkmate::assert_flag(add_total_level) | 
| 112 | ||
| 113 | 32x | var <- dots_extra_args$var | 
| 114 | 32x |   if (!is.null(var)) { | 
| 115 | 21x | assert_df_with_variables(df, list(var = var)) | 
| 116 | 21x | df[[var]] <- as.factor(df[[var]]) | 
| 117 | } | |
| 118 | ||
| 119 | # Check for user-defined functions | |
| 120 | 32x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 121 | 32x | .stats <- default_and_custom_stats_list$all_stats | 
| 122 | 32x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 123 | ||
| 124 | 32x | x_stats <- list() | 
| 125 | 32x |   if (!is.null(var)) { | 
| 126 | 21x |     for (lvl in levels(df[[var]])) { | 
| 127 | 42x | x_stats_i <- .apply_stat_functions( | 
| 128 | 42x | default_stat_fnc = s_count_patients_sum_exposure, | 
| 129 | 42x | custom_stat_fnc_list = custom_stat_functions, | 
| 130 | 42x | args_list = c( | 
| 131 | 42x | df = list(subset(df, get(var) == lvl)), | 
| 132 | 42x | labelstr = list(labelstr), | 
| 133 | 42x | var_level = lvl, | 
| 134 | 42x | extra_afun_params, | 
| 135 | 42x | dots_extra_args | 
| 136 | ) | |
| 137 | ) | |
| 138 | 42x | x_stats[[.stats]][[lvl]] <- x_stats_i[[.stats]] | 
| 139 | } | |
| 140 | } | |
| 141 | ||
| 142 | 32x |   if (add_total_level || is.null(var)) { | 
| 143 | 13x | x_stats_total <- .apply_stat_functions( | 
| 144 | 13x | default_stat_fnc = s_count_patients_sum_exposure, | 
| 145 | 13x | custom_stat_fnc_list = custom_stat_functions, | 
| 146 | 13x | args_list = c( | 
| 147 | 13x | df = list(df), | 
| 148 | 13x | labelstr = list(labelstr), | 
| 149 | 13x | extra_afun_params, | 
| 150 | 13x | dots_extra_args | 
| 151 | ) | |
| 152 | ) | |
| 153 | 13x | x_stats[[.stats]][["Total"]] <- x_stats_total[[.stats]] | 
| 154 | } | |
| 155 | ||
| 156 | # Fill in formatting defaults | |
| 157 | 32x | .stats <- get_stats( | 
| 158 | 32x | "analyze_patients_exposure_in_cols", | 
| 159 | 32x | stats_in = .stats, | 
| 160 | 32x | custom_stats_in = names(custom_stat_functions) | 
| 161 | ) | |
| 162 | 32x | x_stats <- x_stats[.stats] | 
| 163 | 32x | levels_per_stats <- lapply(x_stats, names) | 
| 164 | 32x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 165 | 32x | .labels <- get_labels_from_stats( | 
| 166 | 32x | .stats, .labels, levels_per_stats, | 
| 167 | 32x | tern_defaults = c(lapply(x_stats[[1]], attr, "label"), tern_default_labels) | 
| 168 | ) | |
| 169 | 32x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 170 | ||
| 171 | 32x | x_stats <- x_stats[.stats] %>% | 
| 172 | 32x | .unlist_keep_nulls() %>% | 
| 173 | 32x | setNames(names(.formats)) | 
| 174 | ||
| 175 | # Auto format handling | |
| 176 | 32x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 177 | ||
| 178 | # Get and check statistical names | |
| 179 | 32x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 180 | ||
| 181 | 32x | in_rows( | 
| 182 | 32x | .list = x_stats, | 
| 183 | 32x | .formats = .formats, | 
| 184 | 32x | .names = .labels %>% .unlist_keep_nulls(), | 
| 185 | 32x | .stat_names = .stat_names, | 
| 186 | 32x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 187 | 32x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 188 | ) | |
| 189 | } | |
| 190 | ||
| 191 | #' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics | |
| 192 | #' function arguments and additional format arguments. This function is a wrapper for | |
| 193 | #' [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()]. | |
| 194 | #' | |
| 195 | #' @return | |
| 196 | #' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further | |
| 197 | #' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will | |
| 198 | #' add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in | |
| 199 | #' columns, to the table layout. | |
| 200 | #' | |
| 201 | #' @examples | |
| 202 | #' lyt5 <- basic_table() %>% | |
| 203 | #' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) | |
| 204 | #' | |
| 205 | #' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl) | |
| 206 | #' result5 | |
| 207 | #' | |
| 208 | #' lyt6 <- basic_table() %>% | |
| 209 | #' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure") | |
| 210 | #' | |
| 211 | #' result6 <- build_table(lyt6, df = df, alt_counts_df = adsl) | |
| 212 | #' result6 | |
| 213 | #' | |
| 214 | #' @export | |
| 215 | #' @order 3 | |
| 216 | summarize_patients_exposure_in_cols <- function(lyt, | |
| 217 | var, | |
| 218 | ex_var = "AVAL", | |
| 219 | id = "USUBJID", | |
| 220 | add_total_level = FALSE, | |
| 221 | custom_label = NULL, | |
| 222 | col_split = TRUE, | |
| 223 | na_str = default_na_str(), | |
| 224 | ..., | |
| 225 |                                                 .stats = c("n_patients", "sum_exposure"), | |
| 226 | .stat_names = NULL, | |
| 227 | .formats = NULL, | |
| 228 | .labels = c(n_patients = "Patients", sum_exposure = "Person time"), | |
| 229 |                                                 .indent_mods = NULL) { | |
| 230 | # Process standard extra arguments | |
| 231 | 3x | extra_args <- list() | 
| 232 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 233 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 234 | 3x | col_labels <- unlist(.labels[.stats]) | 
| 235 | 3x |   .labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")] | 
| 236 | 3x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 237 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 238 | ||
| 239 | # Process additional arguments to the statistic function | |
| 240 | 3x | extra_args <- c( | 
| 241 | 3x | extra_args, | 
| 242 | 3x | ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, | 
| 243 | ... | |
| 244 | ) | |
| 245 | ||
| 246 | # Adding additional info from layout to analysis function | |
| 247 | 3x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 248 | 3x | formals(a_count_patients_sum_exposure) <- c( | 
| 249 | 3x | formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]] | 
| 250 | ) | |
| 251 | ||
| 252 | 3x |   if (col_split) { | 
| 253 | 3x | lyt <- split_cols_by_multivar( | 
| 254 | 3x | lyt = lyt, | 
| 255 | 3x | vars = rep(var, length(.stats)), | 
| 256 | 3x | varlabels = col_labels, | 
| 257 | 3x | extra_args = list(.stats = .stats) | 
| 258 | ) | |
| 259 | } | |
| 260 | 3x | summarize_row_groups( | 
| 261 | 3x | lyt = lyt, | 
| 262 | 3x | var = var, | 
| 263 | 3x | cfun = a_count_patients_sum_exposure, | 
| 264 | 3x | na_str = na_str, | 
| 265 | 3x | extra_args = extra_args | 
| 266 | ) | |
| 267 | } | |
| 268 | ||
| 269 | #' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics | |
| 270 | #' function arguments and additional format arguments. This function is a wrapper for | |
| 271 | #' [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()]. | |
| 272 | #' | |
| 273 | #' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required | |
| 274 | #' column split has been done already earlier in the layout pipe. | |
| 275 | #' | |
| 276 | #' @return | |
| 277 | #' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further | |
| 278 | #' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will | |
| 279 | #' add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in | |
| 280 | #' columns, to the table layout. | |
| 281 | #' | |
| 282 | #' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows, | |
| 283 | #' `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple | |
| 284 | #' pages when pagination is used. | |
| 285 | #' | |
| 286 | #' @examples | |
| 287 | #' set.seed(1) | |
| 288 | #' df <- data.frame( | |
| 289 | #'   USUBJID = c(paste("id", seq(1, 12), sep = "")), | |
| 290 | #'   ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)), | |
| 291 | #'   SEX = c(rep("Female", 6), rep("Male", 6)), | |
| 292 | #' AVAL = as.numeric(sample(seq(1, 20), 12)), | |
| 293 | #' stringsAsFactors = TRUE | |
| 294 | #' ) | |
| 295 | #' adsl <- data.frame( | |
| 296 | #'   USUBJID = c(paste("id", seq(1, 12), sep = "")), | |
| 297 | #'   ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)), | |
| 298 | #'   SEX = c(rep("Female", 2), rep("Male", 2)), | |
| 299 | #' stringsAsFactors = TRUE | |
| 300 | #' ) | |
| 301 | #' | |
| 302 | #' lyt <- basic_table() %>% | |
| 303 | #'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>% | |
| 304 | #' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>% | |
| 305 | #' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE) | |
| 306 | #' result <- build_table(lyt, df = df, alt_counts_df = adsl) | |
| 307 | #' result | |
| 308 | #' | |
| 309 | #' lyt2 <- basic_table() %>% | |
| 310 | #'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>% | |
| 311 | #' summarize_patients_exposure_in_cols( | |
| 312 | #' var = "AVAL", col_split = TRUE, | |
| 313 | #' .stats = "n_patients", custom_label = "some custom label" | |
| 314 | #' ) %>% | |
| 315 | #' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL") | |
| 316 | #' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl) | |
| 317 | #' result2 | |
| 318 | #' | |
| 319 | #' lyt3 <- basic_table() %>% | |
| 320 | #' analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL") | |
| 321 | #' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl) | |
| 322 | #' result3 | |
| 323 | #' | |
| 324 | #' # Adding total levels and custom label | |
| 325 | #' lyt4 <- basic_table( | |
| 326 | #' show_colcounts = TRUE | |
| 327 | #' ) %>% | |
| 328 | #' analyze_patients_exposure_in_cols( | |
| 329 | #' var = "ARMCD", | |
| 330 | #' col_split = TRUE, | |
| 331 | #' add_total_level = TRUE, | |
| 332 | #' custom_label = "TOTAL" | |
| 333 | #' ) %>% | |
| 334 | #'   append_topleft(c("", "Sex")) | |
| 335 | #' | |
| 336 | #' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl) | |
| 337 | #' result4 | |
| 338 | #' | |
| 339 | #' @export | |
| 340 | #' @order 2 | |
| 341 | analyze_patients_exposure_in_cols <- function(lyt, | |
| 342 | var = NULL, | |
| 343 | ex_var = "AVAL", | |
| 344 | id = "USUBJID", | |
| 345 | add_total_level = FALSE, | |
| 346 | custom_label = NULL, | |
| 347 | col_split = TRUE, | |
| 348 | na_str = default_na_str(), | |
| 349 |                                               .stats = c("n_patients", "sum_exposure"), | |
| 350 | .stat_names = NULL, | |
| 351 | .formats = NULL, | |
| 352 | .labels = c(n_patients = "Patients", sum_exposure = "Person time"), | |
| 353 | .indent_mods = NULL, | |
| 354 |                                               ...) { | |
| 355 | # Process standard extra arguments | |
| 356 | 6x | extra_args <- list() | 
| 357 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 358 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 359 | 6x | col_labels <- unlist(.labels[.stats]) | 
| 360 | 6x |   .labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")] | 
| 361 | 6x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 362 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 363 | ||
| 364 | # Process additional arguments to the statistic function | |
| 365 | 6x | extra_args <- c( | 
| 366 | 6x | extra_args, | 
| 367 | 6x | var = var, ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, | 
| 368 | ... | |
| 369 | ) | |
| 370 | ||
| 371 | # Adding additional info from layout to analysis function | |
| 372 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 373 | 6x | formals(a_count_patients_sum_exposure) <- c( | 
| 374 | 6x | formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]] | 
| 375 | ) | |
| 376 | ||
| 377 | 6x |   if (col_split) { | 
| 378 | 4x | lyt <- split_cols_by_multivar( | 
| 379 | 4x | lyt = lyt, | 
| 380 | 4x | vars = rep(ex_var, length(.stats)), | 
| 381 | 4x | varlabels = col_labels, | 
| 382 | 4x | extra_args = list(.stats = .stats) | 
| 383 | ) | |
| 384 | } | |
| 385 | ||
| 386 | 6x | analyze_colvars( | 
| 387 | 6x | lyt = lyt, | 
| 388 | 6x | afun = a_count_patients_sum_exposure, | 
| 389 | 6x | na_str = na_str, | 
| 390 | 6x | extra_args = extra_args | 
| 391 | ) | |
| 392 | } | 
| 1 | #' Control function for descriptive statistics | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify | |
| 6 | #' details for [s_summary()]. This function family is mainly used by [analyze_vars()]. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param quantiles (`numeric(2)`)\cr vector of length two to specify the quantiles to calculate. | |
| 10 | #' @param quantile_type (`numeric(1)`)\cr number between 1 and 9 selecting quantile algorithms to be used. | |
| 11 | #' Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`. | |
| 12 | #' This differs from R's default. See more about `type` in [stats::quantile()]. | |
| 13 | #' @param test_mean (`numeric(1)`)\cr number to test against the mean under the null hypothesis when calculating | |
| 14 | #' p-value. | |
| 15 | #' | |
| 16 | #' @return A list of components with the same names as the arguments. | |
| 17 | #' | |
| 18 | #' @export | |
| 19 | control_analyze_vars <- function(conf_level = 0.95, | |
| 20 | quantiles = c(0.25, 0.75), | |
| 21 | quantile_type = 2, | |
| 22 |                                  test_mean = 0) { | |
| 23 | 1134x | checkmate::assert_vector(quantiles, len = 2) | 
| 24 | 1134x | checkmate::assert_int(quantile_type, lower = 1, upper = 9) | 
| 25 | 1134x | checkmate::assert_numeric(test_mean) | 
| 26 | 1134x | lapply(quantiles, assert_proportion_value) | 
| 27 | 1133x | assert_proportion_value(conf_level) | 
| 28 | 1132x | list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean) | 
| 29 | } | |
| 30 | ||
| 31 | # Helper function to fix numeric or counts pval if necessary | |
| 32 | .correct_num_or_counts_pval <- function(type, .stats) { | |
| 33 | 332x |   if (type == "numeric") { | 
| 34 | 92x |     if (!is.null(.stats) && any(grepl("^pval", .stats))) { | 
| 35 | 10x |       .stats[grepl("^pval", .stats)] <- "pval" # tmp fix xxx | 
| 36 | } | |
| 37 |   } else { | |
| 38 | 240x |     if (!is.null(.stats) && any(grepl("^pval", .stats))) { | 
| 39 | 9x |       .stats[grepl("^pval", .stats)] <- "pval_counts" # tmp fix xxx | 
| 40 | } | |
| 41 | } | |
| 42 | 332x | .stats | 
| 43 | } | |
| 44 | ||
| 45 | #' Analyze variables | |
| 46 | #' | |
| 47 | #' @description `r lifecycle::badge("stable")` | |
| 48 | #' | |
| 49 | #' The analyze function [analyze_vars()] creates a layout element to summarize one or more variables, using the S3 | |
| 50 | #' generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics for | |
| 51 | #' numeric variables can be viewed by running `get_stats("analyze_vars_numeric")` and for non-numeric variables by | |
| 52 | #' running `get_stats("analyze_vars_counts")`. Use the `.stats` parameter to specify the statistics to include in your | |
| 53 | #' output summary table. Use `compare_with_ref_group = TRUE` to compare the variable with reference groups. | |
| 54 | #' | |
| 55 | #' @details | |
| 56 | #' **Automatic digit formatting:** The number of digits to display can be automatically determined from the analyzed | |
| 57 | #' variable(s) (`vars`) for certain statistics by setting the statistic format to `"auto"` in `.formats`. | |
| 58 | #' This utilizes the [format_auto()] formatting function. Note that only data for the current row & variable (for all | |
| 59 | #' columns) will be considered (`.df_row[[.var]]`, see [`rtables::additional_fun_params`]) and not the whole dataset. | |
| 60 | #' | |
| 61 | #' @inheritParams argument_convention | |
| 62 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 63 | #' | |
| 64 | #'   Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")`` | |
| 65 | #' | |
| 66 | #'   Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts"), type = "sh")`` | |
| 67 | #' | |
| 68 | #' @name analyze_variables | |
| 69 | #' @order 1 | |
| 70 | NULL | |
| 71 | ||
| 72 | #' @describeIn analyze_variables S3 generic function to produces a variable summary. | |
| 73 | #' | |
| 74 | #' @return | |
| 75 | #' * `s_summary()` returns different statistics depending on the class of `x`. | |
| 76 | #' | |
| 77 | #' @export | |
| 78 | s_summary <- function(x, ...) { | |
| 79 | 1697x |   UseMethod("s_summary", x) | 
| 80 | } | |
| 81 | ||
| 82 | #' @describeIn analyze_variables Method for `numeric` class. | |
| 83 | #' | |
| 84 | #' @param control (`list`)\cr parameters for descriptive statistics details, specified by using | |
| 85 | #' the helper function [control_analyze_vars()]. Some possible parameter options are: | |
| 86 | #' * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median. | |
| 87 | #' * `quantiles` (`numeric(2)`)\cr vector of length two to specify the quantiles. | |
| 88 | #' * `quantile_type` (`numeric(1)`)\cr between 1 and 9 selecting quantile algorithms to be used. | |
| 89 | #' See more about `type` in [stats::quantile()]. | |
| 90 | #' * `test_mean` (`numeric(1)`)\cr value to test against the mean under the null hypothesis when calculating p-value. | |
| 91 | #' | |
| 92 | #' @return | |
| 93 | #' * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items: | |
| 94 | #' * `n`: The [length()] of `x`. | |
| 95 | #' * `sum`: The [sum()] of `x`. | |
| 96 | #' * `mean`: The [mean()] of `x`. | |
| 97 | #' * `sd`: The [stats::sd()] of `x`. | |
| 98 | #' * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`). | |
| 99 | #' * `mean_sd`: The [mean()] and [stats::sd()] of `x`. | |
| 100 | #' * `mean_se`: The [mean()] of `x` and its standard error (see above). | |
| 101 | #' * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]). | |
| 102 | #' * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]). | |
| 103 | #' * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]). | |
| 104 | #' * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]). | |
| 105 | #' * `median`: The [stats::median()] of `x`. | |
| 106 | #' * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`, | |
| 107 | #' where `xc` = `x` - [stats::median()]). | |
| 108 | #' * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]). | |
| 109 | #' * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]). | |
| 110 | #' * `iqr`: The [stats::IQR()] of `x`. | |
| 111 | #' * `range`: The [range_noinf()] of `x`. | |
| 112 | #' * `min`: The [max()] of `x`. | |
| 113 | #' * `max`: The [min()] of `x`. | |
| 114 | #' * `median_range`: The [median()] and [range_noinf()] of `x`. | |
| 115 | #' * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100). | |
| 116 | #' * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`). | |
| 117 | #' * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`). | |
| 118 | #' | |
| 119 | #' @note | |
| 120 | #' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in | |
| 121 | #' `rtables` when the intersection of a column and a row delimits an empty data selection. | |
| 122 | #' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter | |
| 123 | #' being standard behavior in R. | |
| 124 | #' | |
| 125 | #' @method s_summary numeric | |
| 126 | #' | |
| 127 | #' @examples | |
| 128 | #' # `s_summary.numeric` | |
| 129 | #' | |
| 130 | #' ## Basic usage: empty numeric returns NA-filled items. | |
| 131 | #' s_summary(numeric()) | |
| 132 | #' | |
| 133 | #' ## Management of NA values. | |
| 134 | #' x <- c(NA_real_, 1) | |
| 135 | #' s_summary(x, na_rm = TRUE) | |
| 136 | #' s_summary(x, na_rm = FALSE) | |
| 137 | #' | |
| 138 | #' x <- c(NA_real_, 1, 2) | |
| 139 | #' s_summary(x) | |
| 140 | #' | |
| 141 | #' ## Benefits in `rtables` contructions: | |
| 142 | #' dta_test <- data.frame( | |
| 143 | #' Group = rep(LETTERS[seq(3)], each = 2), | |
| 144 | #' sub_group = rep(letters[seq(2)], each = 3), | |
| 145 | #' x = seq(6) | |
| 146 | #' ) | |
| 147 | #' | |
| 148 | #' ## The summary obtained in with `rtables`: | |
| 149 | #' basic_table() %>% | |
| 150 | #' split_cols_by(var = "Group") %>% | |
| 151 | #' split_rows_by(var = "sub_group") %>% | |
| 152 | #' analyze(vars = "x", afun = s_summary) %>% | |
| 153 | #' build_table(df = dta_test) | |
| 154 | #' | |
| 155 | #' ## By comparison with `lapply`: | |
| 156 | #' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group))) | |
| 157 | #' lapply(X, function(x) s_summary(x$x)) | |
| 158 | #' | |
| 159 | #' @export | |
| 160 | s_summary.numeric <- function(x, control = control_analyze_vars(), ...) { | |
| 161 | 1179x | checkmate::assert_numeric(x) | 
| 162 | 1179x | args_list <- list(...) | 
| 163 | 1179x | .N_row <- args_list[[".N_row"]] # nolint | 
| 164 | 1179x | .N_col <- args_list[[".N_col"]] # nolint | 
| 165 | 1179x | na_rm <- args_list[["na_rm"]] %||% TRUE | 
| 166 | 1179x | compare_with_ref_group <- args_list[["compare_with_ref_group"]] | 
| 167 | ||
| 168 | 1179x |   if (na_rm) { | 
| 169 | 1177x | x <- x[!is.na(x)] | 
| 170 | } | |
| 171 | ||
| 172 | 1179x | y <- list() | 
| 173 | ||
| 174 | 1179x |   y$n <- c("n" = length(x)) | 
| 175 | ||
| 176 | 1179x |   y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE))) | 
| 177 | ||
| 178 | 1179x |   y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE))) | 
| 179 | ||
| 180 | 1179x |   y$sd <- c("sd" = stats::sd(x, na.rm = FALSE)) | 
| 181 | ||
| 182 | 1179x |   y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x)))) | 
| 183 | ||
| 184 | 1179x | y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE)) | 
| 185 | ||
| 186 | 1179x | y$mean_se <- c(y$mean, y$se) | 
| 187 | ||
| 188 | 1179x | mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE) | 
| 189 | 1179x |   y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level))) | 
| 190 | ||
| 191 | 1179x | mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n) | 
| 192 | 1179x |   names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr") | 
| 193 | 1179x | y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE") | 
| 194 | ||
| 195 | 1179x | mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) | 
| 196 | 1179x |   names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr") | 
| 197 | 1179x | y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD") | 
| 198 | 1179x | mean_ci_3d <- c(y$mean, y$mean_ci) | 
| 199 | 1179x |   y$mean_ci_3d <- formatters::with_label(mean_ci_3d, paste0("Mean (", f_conf_level(control$conf_level), ")")) | 
| 200 | ||
| 201 | 1179x | mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2) | 
| 202 | 1179x |   y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean))) | 
| 203 | ||
| 204 | 1179x |   y$median <- c("median" = stats::median(x, na.rm = FALSE)) | 
| 205 | ||
| 206 | 1179x |   y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE)) | 
| 207 | ||
| 208 | 1179x | median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE) | 
| 209 | 1179x |   y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level))) | 
| 210 | ||
| 211 | 1179x | median_ci_3d <- c(y$median, median_ci) | 
| 212 | 1179x |   y$median_ci_3d <- formatters::with_label(median_ci_3d, paste0("Median (", f_conf_level(control$conf_level), ")")) | 
| 213 | ||
| 214 | 1179x | q <- control$quantiles | 
| 215 | 1179x |   if (any(is.na(x))) { | 
| 216 | 2x | qnts <- rep(NA_real_, length(q)) | 
| 217 |   } else { | |
| 218 | 1177x | qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE) | 
| 219 | } | |
| 220 | 1179x |   names(qnts) <- paste("quantile", q, sep = "_") | 
| 221 | 1179x | y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile")) | 
| 222 | ||
| 223 | 1179x |   y$iqr <- c("iqr" = ifelse( | 
| 224 | 1179x | any(is.na(x)), | 
| 225 | 1179x | NA_real_, | 
| 226 | 1179x | stats::IQR(x, na.rm = FALSE, type = control$quantile_type) | 
| 227 | )) | |
| 228 | ||
| 229 | 1179x |   y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max")) | 
| 230 | 1179x | y$min <- y$range[1] | 
| 231 | 1179x | y$max <- y$range[2] | 
| 232 | ||
| 233 | 1179x | y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)") | 
| 234 | ||
| 235 | 1179x |   y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100) | 
| 236 | ||
| 237 | # Geometric Mean - Convert negative values to NA for log calculation. | |
| 238 | 1179x | geom_verbose <- args_list[["geom_verbose"]] %||% FALSE # Additional info if requested | 
| 239 | 1179x | checkmate::assert_flag(geom_verbose) | 
| 240 | 1179x | x_no_negative_vals <- x | 
| 241 | 1179x |   if (identical(x_no_negative_vals, numeric())) { | 
| 242 | 76x | x_no_negative_vals <- NA | 
| 243 | } | |
| 244 | 1179x | x_no_negative_vals[x_no_negative_vals <= 0] <- NA | 
| 245 | 1179x |   if (geom_verbose) { | 
| 246 | 2x |     if (any(x <= 0)) { | 
| 247 | 2x |       warning("Negative values were converted to NA for calculation of the geometric mean.") | 
| 248 | } | |
| 249 | 2x |     if (all(is.na(x_no_negative_vals))) { | 
| 250 | 1x |       warning("Since all values are negative or NA, the geometric mean is NA.") | 
| 251 | } | |
| 252 | } | |
| 253 | 1179x |   y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE))) | 
| 254 | 1179x |   y$geom_sd <- c("geom_sd" = geom_sd <- exp(sd(log(x_no_negative_vals), na.rm = FALSE))) | 
| 255 | 1179x | y$geom_mean_sd <- c(y$geom_mean, y$geom_sd) | 
| 256 | 1179x | geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE) | 
| 257 | 1179x |   y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level))) | 
| 258 | ||
| 259 | 1179x |   y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off | 
| 260 | ||
| 261 | 1179x | geom_mean_ci_3d <- c(y$geom_mean, y$geom_mean_ci) | 
| 262 | 1179x | y$geom_mean_ci_3d <- formatters::with_label( | 
| 263 | 1179x | geom_mean_ci_3d, | 
| 264 | 1179x |     paste0("Geometric Mean (", f_conf_level(control$conf_level), ")") | 
| 265 | ) | |
| 266 | ||
| 267 | # Compare with reference group | |
| 268 | 1179x |   if (isTRUE(compare_with_ref_group)) { | 
| 269 | 13x | .ref_group <- args_list[[".ref_group"]] | 
| 270 | 13x | .in_ref_col <- args_list[[".in_ref_col"]] | 
| 271 | 13x | checkmate::assert_numeric(.ref_group) | 
| 272 | 13x | checkmate::assert_flag(.in_ref_col) | 
| 273 | ||
| 274 | 13x | y$pval <- numeric() | 
| 275 | 13x |     if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) { | 
| 276 | 9x | y$pval <- stats::t.test(x, .ref_group)$p.value | 
| 277 | } | |
| 278 | } | |
| 279 | ||
| 280 | 1179x | y | 
| 281 | } | |
| 282 | ||
| 283 | #' @describeIn analyze_variables Method for `factor` class. | |
| 284 | #' | |
| 285 | #' @return | |
| 286 | #' * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items: | |
| 287 | #' * `n`: The [length()] of `x`. | |
| 288 | #' * `count`: A list with the number of cases for each level of the factor `x`. | |
| 289 | #' * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the | |
| 290 | #' factor `x` relative to the denominator, or `NA` if the denominator is zero. | |
| 291 | #' | |
| 292 | #' @note | |
| 293 | #' * If `x` is an empty `factor`, a list is still returned for `counts` with one element | |
| 294 | #' per factor level. If there are no levels in `x`, the function fails. | |
| 295 | #' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values | |
| 296 | #' set `na_rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit | |
| 297 | #' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the | |
| 298 | #' default `na_level` (`"<Missing>"`) will also be excluded when `na_rm` is set to `TRUE`. | |
| 299 | #' | |
| 300 | #' @method s_summary factor | |
| 301 | #' | |
| 302 | #' @examples | |
| 303 | #' # `s_summary.factor` | |
| 304 | #' | |
| 305 | #' ## Basic usage: | |
| 306 | #' s_summary(factor(c("a", "a", "b", "c", "a"))) | |
| 307 | #' | |
| 308 | #' # Empty factor returns zero-filled items. | |
| 309 | #' s_summary(factor(levels = c("a", "b", "c"))) | |
| 310 | #' | |
| 311 | #' ## Management of NA values. | |
| 312 | #' x <- factor(c(NA, "Female")) | |
| 313 | #' x <- explicit_na(x) | |
| 314 | #' s_summary(x, na_rm = TRUE) | |
| 315 | #' s_summary(x, na_rm = FALSE) | |
| 316 | #' | |
| 317 | #' ## Different denominators. | |
| 318 | #' x <- factor(c("a", "a", "b", "c", "a")) | |
| 319 | #' s_summary(x, denom = "N_row", .N_row = 10L) | |
| 320 | #' s_summary(x, denom = "N_col", .N_col = 20L) | |
| 321 | #' | |
| 322 | #' @export | |
| 323 | s_summary.factor <- function(x, denom = c("n", "N_col", "N_row"), ...) { | |
| 324 | 304x | assert_valid_factor(x) | 
| 325 | 301x | args_list <- list(...) | 
| 326 | 301x | .N_row <- args_list[[".N_row"]] # nolint | 
| 327 | 301x | .N_col <- args_list[[".N_col"]] # nolint | 
| 328 | 301x | na_rm <- args_list[["na_rm"]] %||% TRUE | 
| 329 | 301x | verbose <- args_list[["verbose"]] %||% TRUE | 
| 330 | 301x | compare_with_ref_group <- args_list[["compare_with_ref_group"]] | 
| 331 | ||
| 332 | 301x |   if (na_rm) { | 
| 333 | 292x |     x <- x[!is.na(x)] %>% fct_discard("<Missing>") | 
| 334 |   } else { | |
| 335 | 9x | x <- x %>% explicit_na(label = "NA") | 
| 336 | } | |
| 337 | ||
| 338 | 301x | y <- list() | 
| 339 | ||
| 340 | 301x |   y$n <- list("n" = c("n" = length(x))) # all list of a list | 
| 341 | ||
| 342 | 301x | y$count <- lapply(as.list(table(x, useNA = "ifany")), setNames, nm = "count") | 
| 343 | ||
| 344 | 301x | denom <- match.arg(denom) %>% | 
| 345 | 301x | switch( | 
| 346 | 301x | n = length(x), | 
| 347 | 301x | N_row = .N_row, | 
| 348 | 301x | N_col = .N_col | 
| 349 | ) | |
| 350 | ||
| 351 | 301x | y$count_fraction <- lapply( | 
| 352 | 301x | y$count, | 
| 353 | 301x |     function(x) { | 
| 354 | 2182x | c(x, "p" = ifelse(denom > 0, x / denom, 0)) | 
| 355 | } | |
| 356 | ) | |
| 357 | ||
| 358 | 301x | y$count_fraction_fixed_dp <- y$count_fraction | 
| 359 | ||
| 360 | 301x | y$fraction <- lapply( | 
| 361 | 301x | y$count, | 
| 362 | 301x |     function(count) c("num" = unname(count), "denom" = denom) | 
| 363 | ) | |
| 364 | ||
| 365 | 301x |   y$n_blq <- list("n_blq" = c("n_blq" = sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x)))) | 
| 366 | ||
| 367 | ||
| 368 | 301x |   if (isTRUE(compare_with_ref_group)) { | 
| 369 | 16x | .ref_group <- as_factor_keep_attributes(args_list[[".ref_group"]], verbose = verbose) | 
| 370 | 16x | .in_ref_col <- args_list[[".in_ref_col"]] | 
| 371 | 16x | checkmate::assert_flag(.in_ref_col) | 
| 372 | 16x | assert_valid_factor(x) | 
| 373 | 16x | assert_valid_factor(.ref_group) | 
| 374 | ||
| 375 | 16x |     if (na_rm) { | 
| 376 | 14x |       x <- x[!is.na(x)] %>% fct_discard("<Missing>") | 
| 377 | 14x |       .ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>") | 
| 378 |     } else { | |
| 379 | 2x | x <- x %>% explicit_na(label = "NA") | 
| 380 | 2x | .ref_group <- .ref_group %>% explicit_na(label = "NA") | 
| 381 | } | |
| 382 | ||
| 383 | 1x |     if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA") | 
| 384 | 16x | checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2) | 
| 385 | ||
| 386 | 16x | y$pval_counts <- numeric() | 
| 387 | 16x |     if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { | 
| 388 | 13x | tab <- rbind(table(x), table(.ref_group)) | 
| 389 | 13x | res <- suppressWarnings(stats::chisq.test(tab)) | 
| 390 | 13x | y$pval_counts <- res$p.value | 
| 391 | } | |
| 392 | } | |
| 393 | ||
| 394 | 301x | y | 
| 395 | } | |
| 396 | ||
| 397 | #' @describeIn analyze_variables Method for `character` class. This makes an automatic | |
| 398 | #' conversion to factor (with a warning) and then forwards to the method for factors. | |
| 399 | #' | |
| 400 | #' @note | |
| 401 | #' * Automatic conversion of character to factor does not guarantee that the table | |
| 402 | #' can be generated correctly. In particular for sparse tables this very likely can fail. | |
| 403 | #' It is therefore better to always pre-process the dataset such that factors are manually | |
| 404 | #' created from character variables before passing the dataset to [rtables::build_table()]. | |
| 405 | #' | |
| 406 | #' @method s_summary character | |
| 407 | #' | |
| 408 | #' @examples | |
| 409 | #' # `s_summary.character` | |
| 410 | #' | |
| 411 | #' ## Basic usage: | |
| 412 | #' s_summary(c("a", "a", "b", "c", "a"), verbose = FALSE) | |
| 413 | #' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na_rm = FALSE, verbose = FALSE) | |
| 414 | #' | |
| 415 | #' @export | |
| 416 | s_summary.character <- function(x, denom = c("n", "N_col", "N_row"), ...) { | |
| 417 | 12x | args_list <- list(...) | 
| 418 | 12x | na_rm <- args_list[["na_rm"]] %||% TRUE | 
| 419 | 12x | verbose <- args_list[["verbose"]] %||% TRUE | 
| 420 | ||
| 421 | 12x |   if (na_rm) { | 
| 422 | 11x | y <- as_factor_keep_attributes(x, verbose = verbose) | 
| 423 |   } else { | |
| 424 | 1x | y <- as_factor_keep_attributes(x, verbose = verbose, na_level = "NA") | 
| 425 | } | |
| 426 | ||
| 427 | 12x | s_summary(x = y, denom = denom, ...) | 
| 428 | } | |
| 429 | ||
| 430 | #' @describeIn analyze_variables Method for `logical` class. | |
| 431 | #' | |
| 432 | #' @return | |
| 433 | #' * If `x` is of class `logical`, returns a `list` with named `numeric` items: | |
| 434 | #' * `n`: The [length()] of `x` (possibly after removing `NA`s). | |
| 435 | #' * `count`: Count of `TRUE` in `x`. | |
| 436 | #' * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the | |
| 437 | #' denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here. | |
| 438 | #' | |
| 439 | #' @method s_summary logical | |
| 440 | #' | |
| 441 | #' @examples | |
| 442 | #' # `s_summary.logical` | |
| 443 | #' | |
| 444 | #' ## Basic usage: | |
| 445 | #' s_summary(c(TRUE, FALSE, TRUE, TRUE)) | |
| 446 | #' | |
| 447 | #' # Empty factor returns zero-filled items. | |
| 448 | #' s_summary(as.logical(c())) | |
| 449 | #' | |
| 450 | #' ## Management of NA values. | |
| 451 | #' x <- c(NA, TRUE, FALSE) | |
| 452 | #' s_summary(x, na_rm = TRUE) | |
| 453 | #' s_summary(x, na_rm = FALSE) | |
| 454 | #' | |
| 455 | #' ## Different denominators. | |
| 456 | #' x <- c(TRUE, FALSE, TRUE, TRUE) | |
| 457 | #' s_summary(x, denom = "N_row", .N_row = 10L) | |
| 458 | #' s_summary(x, denom = "N_col", .N_col = 20L) | |
| 459 | #' | |
| 460 | #' @export | |
| 461 | s_summary.logical <- function(x, denom = c("n", "N_col", "N_row"), ...) { | |
| 462 | 211x | checkmate::assert_logical(x) | 
| 463 | 211x | args_list <- list(...) | 
| 464 | 211x | .N_row <- args_list[[".N_row"]] # nolint | 
| 465 | 211x | .N_col <- args_list[[".N_col"]] # nolint | 
| 466 | 211x | na_rm <- args_list[["na_rm"]] %||% TRUE | 
| 467 | 211x | compare_with_ref_group <- args_list[["compare_with_ref_group"]] | 
| 468 | ||
| 469 | 211x |   if (na_rm) { | 
| 470 | 208x | x <- x[!is.na(x)] | 
| 471 | } | |
| 472 | ||
| 473 | 211x | y <- list() | 
| 474 | 211x |   y$n <- c("n" = length(x)) | 
| 475 | 211x | denom <- match.arg(denom) %>% | 
| 476 | 211x | switch( | 
| 477 | 211x | n = length(x), | 
| 478 | 211x | N_row = .N_row, | 
| 479 | 211x | N_col = .N_col | 
| 480 | ) | |
| 481 | 211x |   y$count <- c("count" = sum(x, na.rm = TRUE)) | 
| 482 | 211x | y$count_fraction <- c(y$count, "fraction" = ifelse(denom > 0, y$count / denom, 0)) | 
| 483 | 211x | y$count_fraction_fixed_dp <- y$count_fraction | 
| 484 | 211x |   y$fraction <- c("num" = unname(y$count), "denom" = denom) | 
| 485 | 211x |   y$n_blq <- c("n_blq" = 0L) | 
| 486 | ||
| 487 | ||
| 488 | 211x |   if (isTRUE(compare_with_ref_group)) { | 
| 489 | 4x | .ref_group <- args_list[[".ref_group"]] | 
| 490 | 4x | .in_ref_col <- args_list[[".in_ref_col"]] | 
| 491 | 4x | checkmate::assert_flag(.in_ref_col) | 
| 492 | ||
| 493 | 4x |     if (na_rm) { | 
| 494 | 3x | x <- stats::na.omit(x) | 
| 495 | 3x | .ref_group <- stats::na.omit(.ref_group) | 
| 496 |     } else { | |
| 497 | 1x | x[is.na(x)] <- FALSE | 
| 498 | 1x | .ref_group[is.na(.ref_group)] <- FALSE | 
| 499 | } | |
| 500 | ||
| 501 | 4x | y$pval_counts <- numeric() | 
| 502 | 4x |     if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { | 
| 503 | 4x | x <- factor(x, levels = c(TRUE, FALSE)) | 
| 504 | 4x | .ref_group <- factor(.ref_group, levels = c(TRUE, FALSE)) | 
| 505 | 4x | tbl <- rbind(table(x), table(.ref_group)) | 
| 506 | 4x | y$pval_counts <- suppressWarnings(prop_chisq(tbl)) | 
| 507 | } | |
| 508 | } | |
| 509 | ||
| 510 | 211x | y | 
| 511 | } | |
| 512 | ||
| 513 | #' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and | |
| 514 | #' `compare_vars()` and as `cfun` in `summarize_colvars()`. | |
| 515 | #' | |
| 516 | #' @param compare_with_ref_group (`flag`)\cr whether comparison statistics should be analyzed instead of summary | |
| 517 | #' statistics (`compare_with_ref_group = TRUE` adds `pval` statistic comparing | |
| 518 | #' against reference group). | |
| 519 | #' | |
| 520 | #' @return | |
| 521 | #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 522 | #' | |
| 523 | #' @note | |
| 524 | #' * To use for comparison (with additional p-value statistic), parameter | |
| 525 | #' `compare_with_ref_group` must be set to `TRUE`. | |
| 526 | #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. | |
| 527 | #' | |
| 528 | #' @examples | |
| 529 | #' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) | |
| 530 | #' a_summary( | |
| 531 | #'   factor(c("a", "a", "b", "c", "a")), | |
| 532 | #'   .ref_group = factor(c("a", "a", "b", "c")), compare_with_ref_group = TRUE, .in_ref_col = TRUE | |
| 533 | #' ) | |
| 534 | #' | |
| 535 | #' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) | |
| 536 | #' a_summary( | |
| 537 | #'   c("A", "B", "A", "C"), | |
| 538 | #'   .ref_group = c("B", "A", "C"), .var = "x", compare_with_ref_group = TRUE, verbose = FALSE, | |
| 539 | #' .in_ref_col = FALSE | |
| 540 | #' ) | |
| 541 | #' | |
| 542 | #' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) | |
| 543 | #' a_summary( | |
| 544 | #' c(TRUE, FALSE, FALSE, TRUE, TRUE), | |
| 545 | #' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare_with_ref_group = TRUE, | |
| 546 | #' .in_ref_col = FALSE | |
| 547 | #' ) | |
| 548 | #' | |
| 549 | #' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") | |
| 550 | #' a_summary(rnorm(10, 5, 1), | |
| 551 | #' .ref_group = rnorm(20, -5, 1), .var = "bla", compare_with_ref_group = TRUE, | |
| 552 | #' .in_ref_col = FALSE | |
| 553 | #' ) | |
| 554 | #' | |
| 555 | #' @export | |
| 556 | a_summary <- function(x, | |
| 557 | ..., | |
| 558 | .stats = NULL, | |
| 559 | .stat_names = NULL, | |
| 560 | .formats = NULL, | |
| 561 | .labels = NULL, | |
| 562 |                       .indent_mods = NULL) { | |
| 563 | 332x | dots_extra_args <- list(...) | 
| 564 | ||
| 565 | # Check if there are user-defined functions | |
| 566 | 332x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 567 | 332x | .stats <- default_and_custom_stats_list$all_stats # just the labels of stats | 
| 568 | 332x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 569 | ||
| 570 | # Correction of the pval indication if it is numeric or counts | |
| 571 | 332x | type <- ifelse(is.numeric(x), "numeric", "counts") # counts is "categorical" | 
| 572 | 332x | .stats <- .correct_num_or_counts_pval(type, .stats) | 
| 573 | ||
| 574 | # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) | |
| 575 | 332x | extra_afun_params <- retrieve_extra_afun_params( | 
| 576 | 332x | names(dots_extra_args$.additional_fun_parameters) | 
| 577 | ) | |
| 578 | 332x | dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore | 
| 579 | ||
| 580 | # Check if compare_with_ref_group is TRUE but no ref col is set | |
| 581 | 332x | if (isTRUE(dots_extra_args$compare_with_ref_group) && | 
| 582 | 332x | all( | 
| 583 | 332x | length(dots_extra_args[[".ref_group"]]) == 0, # only used for testing | 
| 584 | 332x | length(extra_afun_params[[".ref_group"]]) == 0 | 
| 585 | ) | |
| 586 |   ) { | |
| 587 | ! | stop( | 
| 588 | ! | "For comparison (compare_with_ref_group = TRUE), the reference group must be specified.", | 
| 589 | ! | "\nSee ref_group in split_cols_by()." | 
| 590 | ) | |
| 591 | } | |
| 592 | ||
| 593 | # Main statistical functions application | |
| 594 | 332x | x_stats <- .apply_stat_functions( | 
| 595 | 332x | default_stat_fnc = s_summary, | 
| 596 | 332x | custom_stat_fnc_list = custom_stat_functions, | 
| 597 | 332x | args_list = c( | 
| 598 | 332x | x = list(x), | 
| 599 | 332x | extra_afun_params, | 
| 600 | 332x | dots_extra_args | 
| 601 | ) | |
| 602 | ) | |
| 603 | ||
| 604 | # Fill in with stats defaults if needed | |
| 605 | 332x |   met_grp <- paste0(c("analyze_vars", type), collapse = "_") | 
| 606 | 332x | .stats <- get_stats( | 
| 607 | 332x | met_grp, | 
| 608 | 332x | stats_in = .stats, | 
| 609 | 332x | custom_stats_in = names(custom_stat_functions), | 
| 610 | 332x | add_pval = dots_extra_args$compare_with_ref_group %||% FALSE | 
| 611 | ) | |
| 612 | ||
| 613 | 332x | x_stats <- x_stats[.stats] | 
| 614 | ||
| 615 | 332x | is_char <- is.character(x) || is.factor(x) | 
| 616 | 332x |   if (is_char) { | 
| 617 | 236x | levels_per_stats <- lapply(x_stats, names) | 
| 618 |   } else { | |
| 619 | 96x | levels_per_stats <- names(x_stats) %>% | 
| 620 | 96x | as.list() %>% | 
| 621 | 96x | setNames(names(x_stats)) | 
| 622 | } | |
| 623 | ||
| 624 | # Fill in formats/indents/labels with custom input and defaults | |
| 625 | 332x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 626 | 332x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 627 | 332x | lbls <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 628 | ||
| 629 | 332x |   if (is_char) { | 
| 630 | # Keep pval_counts stat if present from comparisons and empty | |
| 631 | 236x |     if ("pval_counts" %in% names(x_stats) && length(x_stats[["pval_counts"]]) == 0) { | 
| 632 | 3x |       x_stats[["pval_counts"]] <- list(NULL) %>% setNames("pval_counts") | 
| 633 | } | |
| 634 | ||
| 635 | # Unlist stats | |
| 636 | 236x | x_stats <- x_stats %>% | 
| 637 | 236x | .unlist_keep_nulls() %>% | 
| 638 | 236x | setNames(names(.formats)) | 
| 639 | } | |
| 640 | ||
| 641 | # Check for custom labels from control_analyze_vars | |
| 642 | 332x |   .labels <- if ("control" %in% names(dots_extra_args)) { | 
| 643 | 2x | labels_use_control(lbls, dots_extra_args[["control"]], .labels) | 
| 644 |   } else { | |
| 645 | 330x | lbls | 
| 646 | } | |
| 647 | ||
| 648 | # Auto format handling | |
| 649 | 332x | .formats <- apply_auto_formatting( | 
| 650 | 332x | .formats, | 
| 651 | 332x | x_stats, | 
| 652 | 332x | extra_afun_params$.df_row, | 
| 653 | 332x | extra_afun_params$.var | 
| 654 | ) | |
| 655 | ||
| 656 | # Get and check statistical names from defaults | |
| 657 | 332x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 658 | ||
| 659 | 332x | in_rows( | 
| 660 | 332x | .list = x_stats, | 
| 661 | 332x | .formats = .formats, | 
| 662 | 332x | .names = names(.labels), | 
| 663 | 332x | .stat_names = .stat_names, | 
| 664 | 332x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 665 | 332x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 666 | ) | |
| 667 | } | |
| 668 | ||
| 669 | #' @describeIn analyze_variables Layout-creating function which can take statistics function arguments | |
| 670 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 671 | #' | |
| 672 | #' @param ... additional arguments passed to `s_summary()`, including: | |
| 673 | #' * `denom`: (`string`) See parameter description below. | |
| 674 | #' * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no | |
| 675 | #' column-based subsetting). | |
| 676 | #' * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within. | |
| 677 | #' * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out | |
| 678 | #' information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only. | |
| 679 | #' @param compare_with_ref_group (logical)\cr whether to compare the variable with a reference group. | |
| 680 | #' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector | |
| 681 | #' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation | |
| 682 | #' for that statistic's row label. | |
| 683 | #' | |
| 684 | #' @return | |
| 685 | #' * `analyze_vars()` returns a layout object suitable for passing to further layouting functions, | |
| 686 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 687 | #' the statistics from `s_summary()` to the table layout. | |
| 688 | #' | |
| 689 | #' @examples | |
| 690 | #' ## Fabricated dataset. | |
| 691 | #' dta_test <- data.frame( | |
| 692 | #' USUBJID = rep(1:6, each = 3), | |
| 693 | #'   PARAMCD = rep("lab", 6 * 3), | |
| 694 | #'   AVISIT  = rep(paste0("V", 1:3), 6), | |
| 695 | #' ARM = rep(LETTERS[1:3], rep(6, 3)), | |
| 696 | #' AVAL = c(9:1, rep(NA, 9)) | |
| 697 | #' ) | |
| 698 | #' | |
| 699 | #' # `analyze_vars()` in `rtables` pipelines | |
| 700 | #' ## Default output within a `rtables` pipeline. | |
| 701 | #' l <- basic_table() %>% | |
| 702 | #' split_cols_by(var = "ARM") %>% | |
| 703 | #' split_rows_by(var = "AVISIT") %>% | |
| 704 | #' analyze_vars(vars = "AVAL") | |
| 705 | #' | |
| 706 | #' build_table(l, df = dta_test) | |
| 707 | #' | |
| 708 | #' ## Select and format statistics output. | |
| 709 | #' l <- basic_table() %>% | |
| 710 | #' split_cols_by(var = "ARM") %>% | |
| 711 | #' split_rows_by(var = "AVISIT") %>% | |
| 712 | #' analyze_vars( | |
| 713 | #' vars = "AVAL", | |
| 714 | #'     .stats = c("n", "mean_sd", "quantiles"), | |
| 715 | #'     .formats = c("mean_sd" = "xx.x, xx.x"), | |
| 716 | #'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3")) | |
| 717 | #' ) | |
| 718 | #' | |
| 719 | #' build_table(l, df = dta_test) | |
| 720 | #' | |
| 721 | #' ## Use arguments interpreted by `s_summary`. | |
| 722 | #' l <- basic_table() %>% | |
| 723 | #' split_cols_by(var = "ARM") %>% | |
| 724 | #' split_rows_by(var = "AVISIT") %>% | |
| 725 | #' analyze_vars(vars = "AVAL", na_rm = FALSE) | |
| 726 | #' | |
| 727 | #' build_table(l, df = dta_test) | |
| 728 | #' | |
| 729 | #' ## Handle `NA` levels first when summarizing factors. | |
| 730 | #' dta_test$AVISIT <- NA_character_ | |
| 731 | #' dta_test <- df_explicit_na(dta_test) | |
| 732 | #' l <- basic_table() %>% | |
| 733 | #' split_cols_by(var = "ARM") %>% | |
| 734 | #' analyze_vars(vars = "AVISIT", na_rm = FALSE) | |
| 735 | #' | |
| 736 | #' build_table(l, df = dta_test) | |
| 737 | #' | |
| 738 | #' # auto format | |
| 739 | #' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4)) | |
| 740 | #' basic_table() %>% | |
| 741 | #' analyze_vars( | |
| 742 | #' vars = "VAR", | |
| 743 | #'     .stats = c("n", "mean", "mean_sd", "range"), | |
| 744 | #'     .formats = c("mean_sd" = "auto", "range" = "auto") | |
| 745 | #' ) %>% | |
| 746 | #' build_table(dt) | |
| 747 | #' | |
| 748 | #' @export | |
| 749 | #' @order 2 | |
| 750 | analyze_vars <- function(lyt, | |
| 751 | vars, | |
| 752 | var_labels = vars, | |
| 753 | na_str = default_na_str(), | |
| 754 | nested = TRUE, | |
| 755 | show_labels = "default", | |
| 756 | table_names = vars, | |
| 757 | section_div = NA_character_, | |
| 758 | ..., | |
| 759 | na_rm = TRUE, | |
| 760 | compare_with_ref_group = FALSE, | |
| 761 |                          .stats = c("n", "mean_sd", "median", "range", "count_fraction"), | |
| 762 | .stat_names = NULL, | |
| 763 | .formats = NULL, | |
| 764 | .labels = NULL, | |
| 765 |                          .indent_mods = NULL) { | |
| 766 | # Depending on main functions | |
| 767 | 40x | extra_args <- list( | 
| 768 | 40x | "na_rm" = na_rm, | 
| 769 | 40x | "compare_with_ref_group" = compare_with_ref_group, | 
| 770 | ... | |
| 771 | ) | |
| 772 | ||
| 773 | # Needed defaults | |
| 774 | 40x | if (!is.null(.stats)) extra_args[[".stats"]] <- .stats | 
| 775 | 3x | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 776 | 9x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 777 | 4x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 778 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 779 | ||
| 780 | # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) | |
| 781 | 40x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 782 | 40x | formals(a_summary) <- c( | 
| 783 | 40x | formals(a_summary), | 
| 784 | 40x | extra_args[[".additional_fun_parameters"]] | 
| 785 | ) | |
| 786 | ||
| 787 |   # Main {rtables} structural call | |
| 788 | 40x | analyze( | 
| 789 | 40x | lyt = lyt, | 
| 790 | 40x | vars = vars, | 
| 791 | 40x | var_labels = var_labels, | 
| 792 | 40x | afun = a_summary, | 
| 793 | 40x | na_str = na_str, | 
| 794 | 40x | inclNAs = !na_rm, | 
| 795 | 40x | nested = nested, | 
| 796 | 40x | extra_args = extra_args, | 
| 797 | 40x | show_labels = show_labels, | 
| 798 | 40x | table_names = table_names, | 
| 799 | 40x | section_div = section_div | 
| 800 | ) | |
| 801 | } | 
| 1 | #' Formatting functions | |
| 2 | #' | |
| 3 | #' See below for the list of formatting functions created in `tern` to work with `rtables`. | |
| 4 | #' | |
| 5 | #' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional | |
| 6 | #' custom formats can be created via the [`formatters::sprintf_format()`] function. | |
| 7 | #' | |
| 8 | #' @family formatting functions | |
| 9 | #' @name formatting_functions | |
| 10 | NULL | |
| 11 | ||
| 12 | #' Format fraction and percentage | |
| 13 | #' | |
| 14 | #' @description `r lifecycle::badge("stable")` | |
| 15 | #' | |
| 16 | #' Formats a fraction together with ratio in percent. | |
| 17 | #' | |
| 18 | #' @param x (named `integer`)\cr vector with elements `num` and `denom`. | |
| 19 | #' @param ... not used. Required for `rtables` interface. | |
| 20 | #' | |
| 21 | #' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. | |
| 22 | #' | |
| 23 | #' @examples | |
| 24 | #' format_fraction(x = c(num = 2L, denom = 3L)) | |
| 25 | #' format_fraction(x = c(num = 0L, denom = 3L)) | |
| 26 | #' | |
| 27 | #' @family formatting functions | |
| 28 | #' @export | |
| 29 | format_fraction <- function(x, ...) { | |
| 30 | 220x | attr(x, "label") <- NULL | 
| 31 | ||
| 32 | 220x | checkmate::assert_vector(x) | 
| 33 | 220x | checkmate::assert_count(x["num"]) | 
| 34 | 218x | checkmate::assert_count(x["denom"]) | 
| 35 | ||
| 36 | 218x |   result <- if (x["num"] == 0) { | 
| 37 | 10x | paste0(x["num"], "/", x["denom"]) | 
| 38 |   } else { | |
| 39 | 208x | paste0( | 
| 40 | 208x | x["num"], "/", x["denom"], | 
| 41 | 208x |       " (", round(x["num"] / x["denom"] * 100, 1), "%)" | 
| 42 | ) | |
| 43 | } | |
| 44 | ||
| 45 | 218x | return(result) | 
| 46 | } | |
| 47 | ||
| 48 | #' Format fraction and percentage with fixed single decimal place | |
| 49 | #' | |
| 50 | #' @description `r lifecycle::badge("stable")` | |
| 51 | #' | |
| 52 | #' Formats a fraction together with ratio in percent with fixed single decimal place. | |
| 53 | #' Includes trailing zero in case of whole number percentages to always keep one decimal place. | |
| 54 | #' | |
| 55 | #' @inheritParams format_fraction | |
| 56 | #' | |
| 57 | #' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. | |
| 58 | #' | |
| 59 | #' @examples | |
| 60 | #' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L)) | |
| 61 | #' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L)) | |
| 62 | #' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L)) | |
| 63 | #' | |
| 64 | #' @family formatting functions | |
| 65 | #' @export | |
| 66 | format_fraction_fixed_dp <- function(x, ...) { | |
| 67 | 3x | attr(x, "label") <- NULL | 
| 68 | 3x | checkmate::assert_vector(x) | 
| 69 | 3x | checkmate::assert_count(x["num"]) | 
| 70 | 3x | checkmate::assert_count(x["denom"]) | 
| 71 | ||
| 72 | 3x |   result <- if (x["num"] == 0) { | 
| 73 | 1x | paste0(x["num"], "/", x["denom"]) | 
| 74 |   } else { | |
| 75 | 2x | paste0( | 
| 76 | 2x | x["num"], "/", x["denom"], | 
| 77 | 2x |       " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)" | 
| 78 | ) | |
| 79 | } | |
| 80 | 3x | return(result) | 
| 81 | } | |
| 82 | ||
| 83 | #' Format count and fraction | |
| 84 | #' | |
| 85 | #' @description `r lifecycle::badge("stable")` | |
| 86 | #' | |
| 87 | #' Formats a count together with fraction with special consideration when count is `0`. | |
| 88 | #' | |
| 89 | #' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively. | |
| 90 | #' @param ... not used. Required for `rtables` interface. | |
| 91 | #' | |
| 92 | #' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. | |
| 93 | #' | |
| 94 | #' @examples | |
| 95 | #' format_count_fraction(x = c(2, 0.6667)) | |
| 96 | #' format_count_fraction(x = c(0, 0)) | |
| 97 | #' | |
| 98 | #' @family formatting functions | |
| 99 | #' @export | |
| 100 | format_count_fraction <- function(x, ...) { | |
| 101 | 102x | attr(x, "label") <- NULL | 
| 102 | ||
| 103 | 102x |   if (any(is.na(x))) { | 
| 104 | 1x |     return("NA") | 
| 105 | } | |
| 106 | ||
| 107 | 101x | checkmate::assert_vector(x) | 
| 108 | 101x | checkmate::assert_integerish(x[1]) | 
| 109 | 101x | assert_proportion_value(x[2], include_boundaries = TRUE) | 
| 110 | ||
| 111 | 101x |   result <- if (x[1] == 0) { | 
| 112 | 13x | "0" | 
| 113 |   } else { | |
| 114 | 88x |     paste0(x[1], " (", round(x[2] * 100, 1), "%)") | 
| 115 | } | |
| 116 | ||
| 117 | 101x | return(result) | 
| 118 | } | |
| 119 | ||
| 120 | #' Format count and percentage with fixed single decimal place | |
| 121 | #' | |
| 122 | #' @description `r lifecycle::badge("experimental")` | |
| 123 | #' | |
| 124 | #' Formats a count together with fraction with special consideration when count is `0`. | |
| 125 | #' | |
| 126 | #' @inheritParams format_count_fraction | |
| 127 | #' | |
| 128 | #' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. | |
| 129 | #' | |
| 130 | #' @examples | |
| 131 | #' format_count_fraction_fixed_dp(x = c(2, 0.6667)) | |
| 132 | #' format_count_fraction_fixed_dp(x = c(2, 0.5)) | |
| 133 | #' format_count_fraction_fixed_dp(x = c(0, 0)) | |
| 134 | #' | |
| 135 | #' @family formatting functions | |
| 136 | #' @export | |
| 137 | format_count_fraction_fixed_dp <- function(x, ...) { | |
| 138 | 1408x | attr(x, "label") <- NULL | 
| 139 | ||
| 140 | 1408x |   if (any(is.na(x))) { | 
| 141 | ! |     return("NA") | 
| 142 | } | |
| 143 | ||
| 144 | 1408x | checkmate::assert_vector(x) | 
| 145 | 1408x | checkmate::assert_integerish(x[1]) | 
| 146 | 1408x | assert_proportion_value(x[2], include_boundaries = TRUE) | 
| 147 | ||
| 148 | 1408x |   result <- if (x[1] == 0) { | 
| 149 | 195x | "0" | 
| 150 | 1408x |   } else if (.is_equal_float(x[2], 1)) { | 
| 151 | 549x |     sprintf("%d (100%%)", x[1]) | 
| 152 |   } else { | |
| 153 | 664x |     sprintf("%d (%.1f%%)", x[1], x[2] * 100) | 
| 154 | } | |
| 155 | ||
| 156 | 1408x | return(result) | 
| 157 | } | |
| 158 | ||
| 159 | #' Format count and fraction with special case for count < 10 | |
| 160 | #' | |
| 161 | #' @description `r lifecycle::badge("stable")` | |
| 162 | #' | |
| 163 | #' Formats a count together with fraction with special consideration when count is less than 10. | |
| 164 | #' | |
| 165 | #' @inheritParams format_count_fraction | |
| 166 | #' | |
| 167 | #' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed. | |
| 168 | #' | |
| 169 | #' @examples | |
| 170 | #' format_count_fraction_lt10(x = c(275, 0.9673)) | |
| 171 | #' format_count_fraction_lt10(x = c(2, 0.6667)) | |
| 172 | #' format_count_fraction_lt10(x = c(9, 1)) | |
| 173 | #' | |
| 174 | #' @family formatting functions | |
| 175 | #' @export | |
| 176 | format_count_fraction_lt10 <- function(x, ...) { | |
| 177 | 7x | attr(x, "label") <- NULL | 
| 178 | ||
| 179 | 7x |   if (any(is.na(x))) { | 
| 180 | 1x |     return("NA") | 
| 181 | } | |
| 182 | ||
| 183 | 6x | checkmate::assert_vector(x) | 
| 184 | 6x | checkmate::assert_integerish(x[1]) | 
| 185 | 6x | assert_proportion_value(x[2], include_boundaries = TRUE) | 
| 186 | ||
| 187 | 6x |   result <- if (x[1] < 10) { | 
| 188 | 3x | paste0(x[1]) | 
| 189 |   } else { | |
| 190 | 3x |     paste0(x[1], " (", round(x[2] * 100, 1), "%)") | 
| 191 | } | |
| 192 | ||
| 193 | 6x | return(result) | 
| 194 | } | |
| 195 | ||
| 196 | #' Format XX as a formatting function | |
| 197 | #' | |
| 198 | #' Translate a string where x and dots are interpreted as number place | |
| 199 | #' holders, and others as formatting elements. | |
| 200 | #' | |
| 201 | #' @param str (`string`)\cr template. | |
| 202 | #' | |
| 203 | #' @return An `rtables` formatting function. | |
| 204 | #' | |
| 205 | #' @examples | |
| 206 | #' test <- list(c(1.658, 0.5761), c(1e1, 785.6)) | |
| 207 | #' | |
| 208 | #' z <- format_xx("xx (xx.x)") | |
| 209 | #' sapply(test, z) | |
| 210 | #' | |
| 211 | #' z <- format_xx("xx.x - xx.x") | |
| 212 | #' sapply(test, z) | |
| 213 | #' | |
| 214 | #' z <- format_xx("xx.x, incl. xx.x% NE") | |
| 215 | #' sapply(test, z) | |
| 216 | #' | |
| 217 | #' @family formatting functions | |
| 218 | #' @export | |
| 219 | format_xx <- function(str) { | |
| 220 | # Find position in the string. | |
| 221 | 1x | positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE) | 
| 222 | 1x | x_positions <- regmatches(x = str, m = positions)[[1]] | 
| 223 | ||
| 224 | # Roundings depends on the number of x behind [.]. | |
| 225 | 1x | roundings <- lapply( | 
| 226 | 1x | X = x_positions, | 
| 227 | 1x |     function(x) { | 
| 228 | 2x | y <- strsplit(split = "\\.", x = x)[[1]] | 
| 229 | 2x |       rounding <- function(x) { | 
| 230 | 4x | round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)) | 
| 231 | } | |
| 232 | 2x | return(rounding) | 
| 233 | } | |
| 234 | ) | |
| 235 | ||
| 236 | 1x |   rtable_format <- function(x, output) { | 
| 237 | 2x | values <- Map(y = x, fun = roundings, function(y, fun) fun(y)) | 
| 238 | 2x | regmatches(x = str, m = positions)[[1]] <- values | 
| 239 | 2x | return(str) | 
| 240 | } | |
| 241 | ||
| 242 | 1x | return(rtable_format) | 
| 243 | } | |
| 244 | ||
| 245 | #' Format numeric values by significant figures | |
| 246 | #' | |
| 247 | #' Format numeric values to print with a specified number of significant figures. | |
| 248 | #' | |
| 249 | #' @param sigfig (`integer(1)`)\cr number of significant figures to display. | |
| 250 | #' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal | |
| 251 | #' places in string are ignored in favor of formatting by significant figures. Formats options are: | |
| 252 | #' `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`. | |
| 253 | #' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for | |
| 254 | #' standard significant figures formatting - fixed (non-scientific notation) format (`"f"`) | |
| 255 | #' and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the | |
| 256 | #' [formatC()] `format` argument for more options. | |
| 257 | #' | |
| 258 | #' @return An `rtables` formatting function. | |
| 259 | #' | |
| 260 | #' @examples | |
| 261 | #' fmt_3sf <- format_sigfig(3) | |
| 262 | #' fmt_3sf(1.658) | |
| 263 | #' fmt_3sf(1e1) | |
| 264 | #' | |
| 265 | #' fmt_5sf <- format_sigfig(5) | |
| 266 | #' fmt_5sf(0.57) | |
| 267 | #' fmt_5sf(0.000025645) | |
| 268 | #' | |
| 269 | #' @family formatting functions | |
| 270 | #' @export | |
| 271 | format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") { | |
| 272 | 3x | checkmate::assert_integerish(sigfig) | 
| 273 | 3x |   format <- gsub("xx\\.|xx\\.x+", "xx", format) | 
| 274 | 3x |   checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)")) | 
| 275 | 3x |   function(x, ...) { | 
| 276 | ! |     if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.") | 
| 277 | 12x | num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#") | 
| 278 | 12x |     num <- gsub("\\.$", "", num) # remove trailing "." | 
| 279 | ||
| 280 | 12x | format_value(num, format) | 
| 281 | } | |
| 282 | } | |
| 283 | ||
| 284 | #' Format fraction with lower threshold | |
| 285 | #' | |
| 286 | #' @description `r lifecycle::badge("stable")` | |
| 287 | #' | |
| 288 | #' Formats a fraction when the second element of the input `x` is the fraction. It applies | |
| 289 | #' a lower threshold, below which it is just stated that the fraction is smaller than that. | |
| 290 | #' | |
| 291 | #' @param threshold (`proportion`)\cr lower threshold. | |
| 292 | #' | |
| 293 | #' @return An `rtables` formatting function that takes numeric input `x` where the second | |
| 294 | #' element is the fraction that is formatted. If the fraction is above or equal to the threshold, | |
| 295 | #' then it is displayed in percentage. If it is positive but below the threshold, it returns, | |
| 296 | #' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned. | |
| 297 | #' | |
| 298 | #' @examples | |
| 299 | #' format_fun <- format_fraction_threshold(0.05) | |
| 300 | #' format_fun(x = c(20, 0.1)) | |
| 301 | #' format_fun(x = c(2, 0.01)) | |
| 302 | #' format_fun(x = c(0, 0)) | |
| 303 | #' | |
| 304 | #' @family formatting functions | |
| 305 | #' @export | |
| 306 | format_fraction_threshold <- function(threshold) { | |
| 307 | 1x | assert_proportion_value(threshold) | 
| 308 | 1x |   string_below_threshold <- paste0("<", round(threshold * 100)) | 
| 309 | 1x |   function(x, ...) { | 
| 310 | 3x | assert_proportion_value(x[2], include_boundaries = TRUE) | 
| 311 | 3x | ifelse( | 
| 312 | 3x | x[2] > 0.01, | 
| 313 | 3x | round(x[2] * 100), | 
| 314 | 3x | ifelse( | 
| 315 | 3x | x[2] == 0, | 
| 316 | 3x | "0", | 
| 317 | 3x | string_below_threshold | 
| 318 | ) | |
| 319 | ) | |
| 320 | } | |
| 321 | } | |
| 322 | ||
| 323 | #' Format extreme values | |
| 324 | #' | |
| 325 | #' @description `r lifecycle::badge("stable")` | |
| 326 | #' | |
| 327 | #' `rtables` formatting functions that handle extreme values. | |
| 328 | #' | |
| 329 | #' @param digits (`integer(1)`)\cr number of decimal places to display. | |
| 330 | #' | |
| 331 | #' @details For each input, apply a format to the specified number of `digits`. If the value is | |
| 332 | #' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is | |
| 333 | #' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2. | |
| 334 | #' If it is zero, then returns "0.00". | |
| 335 | #' | |
| 336 | #' @family formatting functions | |
| 337 | #' @name extreme_format | |
| 338 | NULL | |
| 339 | ||
| 340 | #' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings | |
| 341 | #' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`. | |
| 342 | #' | |
| 343 | #' @return | |
| 344 | #' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds, | |
| 345 | #' and `format_string`, with thresholds formatted as strings. | |
| 346 | #' | |
| 347 | #' @examples | |
| 348 | #' h_get_format_threshold(2L) | |
| 349 | #' | |
| 350 | #' @export | |
| 351 | h_get_format_threshold <- function(digits = 2L) { | |
| 352 | 2013x | checkmate::assert_integerish(digits) | 
| 353 | ||
| 354 | 2013x | low_threshold <- 1 / (10 ^ digits) # styler: off | 
| 355 | 2013x | high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off | 
| 356 | ||
| 357 | 2013x |   string_below_threshold <- paste0("<", low_threshold) | 
| 358 | 2013x |   string_above_threshold <- paste0(">", high_threshold) | 
| 359 | ||
| 360 | 2013x | list( | 
| 361 | 2013x | "threshold" = c(low = low_threshold, high = high_threshold), | 
| 362 | 2013x | "format_string" = c(low = string_below_threshold, high = string_above_threshold) | 
| 363 | ) | |
| 364 | } | |
| 365 | ||
| 366 | #' @describeIn extreme_format Internal helper function to apply a threshold format to a value. | |
| 367 | #' Creates a formatted string to be used in Formatting Functions. | |
| 368 | #' | |
| 369 | #' @param x (`numeric(1)`)\cr value to format. | |
| 370 | #' | |
| 371 | #' @return | |
| 372 | #' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation | |
| 373 | #' of the given value to the digit threshold, as a formatted string. | |
| 374 | #' | |
| 375 | #' @examples | |
| 376 | #' h_format_threshold(0.001) | |
| 377 | #' h_format_threshold(1000) | |
| 378 | #' | |
| 379 | #' @export | |
| 380 | h_format_threshold <- function(x, digits = 2L) { | |
| 381 | 2015x |   if (is.na(x)) { | 
| 382 | 4x | return(x) | 
| 383 | } | |
| 384 | ||
| 385 | 2011x | checkmate::assert_numeric(x, lower = 0) | 
| 386 | ||
| 387 | 2011x | l_fmt <- h_get_format_threshold(digits) | 
| 388 | ||
| 389 | 2011x |   result <- if (x < l_fmt$threshold["low"] && 0 < x) { | 
| 390 | 44x | l_fmt$format_string["low"] | 
| 391 | 2011x |   } else if (x > l_fmt$threshold["high"]) { | 
| 392 | 99x | l_fmt$format_string["high"] | 
| 393 |   } else { | |
| 394 | 1868x |     sprintf(fmt = paste0("%.", digits, "f"), x) | 
| 395 | } | |
| 396 | ||
| 397 | 2011x | unname(result) | 
| 398 | } | |
| 399 | ||
| 400 | #' Format a single extreme value | |
| 401 | #' | |
| 402 | #' @description `r lifecycle::badge("stable")` | |
| 403 | #' | |
| 404 | #' Create a formatting function for a single extreme value. | |
| 405 | #' | |
| 406 | #' @inheritParams extreme_format | |
| 407 | #' | |
| 408 | #' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value. | |
| 409 | #' | |
| 410 | #' @examples | |
| 411 | #' format_fun <- format_extreme_values(2L) | |
| 412 | #' format_fun(x = 0.127) | |
| 413 | #' format_fun(x = Inf) | |
| 414 | #' format_fun(x = 0) | |
| 415 | #' format_fun(x = 0.009) | |
| 416 | #' | |
| 417 | #' @family formatting functions | |
| 418 | #' @export | |
| 419 | format_extreme_values <- function(digits = 2L) { | |
| 420 | 1x |   function(x, ...) { | 
| 421 | 5x | checkmate::assert_scalar(x, na.ok = TRUE) | 
| 422 | ||
| 423 | 5x | h_format_threshold(x = x, digits = digits) | 
| 424 | } | |
| 425 | } | |
| 426 | ||
| 427 | #' Format extreme values part of a confidence interval | |
| 428 | #' | |
| 429 | #' @description `r lifecycle::badge("stable")` | |
| 430 | #' | |
| 431 | #' Formatting Function for extreme values part of a confidence interval. Values | |
| 432 | #' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2. | |
| 433 | #' | |
| 434 | #' @inheritParams extreme_format | |
| 435 | #' | |
| 436 | #' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme | |
| 437 | #' values confidence interval. | |
| 438 | #' | |
| 439 | #' @examples | |
| 440 | #' format_fun <- format_extreme_values_ci(2L) | |
| 441 | #' format_fun(x = c(0.127, Inf)) | |
| 442 | #' format_fun(x = c(0, 0.009)) | |
| 443 | #' | |
| 444 | #' @family formatting functions | |
| 445 | #' @export | |
| 446 | format_extreme_values_ci <- function(digits = 2L) { | |
| 447 | 9x |   function(x, ...) { | 
| 448 | 54x | checkmate::assert_vector(x, len = 2) | 
| 449 | 54x | l_result <- h_format_threshold(x = x[1], digits = digits) | 
| 450 | 54x | h_result <- h_format_threshold(x = x[2], digits = digits) | 
| 451 | ||
| 452 | 54x |     paste0("(", l_result, ", ", h_result, ")") | 
| 453 | } | |
| 454 | } | |
| 455 | ||
| 456 | #' Format automatically using data significant digits | |
| 457 | #' | |
| 458 | #' @description `r lifecycle::badge("stable")` | |
| 459 | #' | |
| 460 | #' Formatting function for the majority of default methods used in [analyze_vars()]. | |
| 461 | #' For non-derived values, the significant digits of data is used (e.g. range), while derived | |
| 462 | #' values have one more digits (measure of location and dispersion like mean, standard deviation). | |
| 463 | #' This function can be called internally with "auto" like, for example, | |
| 464 | #' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function. | |
| 465 | #' | |
| 466 | #' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to | |
| 467 | #' find significant digits. In [analyze_vars] this comes from `.df_row` (see | |
| 468 | #' [rtables::additional_fun_params]), and it is the row data after the above row splits. No | |
| 469 | #' column split is considered. | |
| 470 | #' @param x_stat (`string`)\cr string indicating the current statistical method used. | |
| 471 | #' | |
| 472 | #' @return A string that `rtables` prints in a table cell. | |
| 473 | #' | |
| 474 | #' @details | |
| 475 | #' The internal function is needed to work with `rtables` default structure for | |
| 476 | #' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation. | |
| 477 | #' It can be more than one element (e.g. for `.stats = "mean_sd"`). | |
| 478 | #' | |
| 479 | #' @examples | |
| 480 | #' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4) | |
| 481 | #' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3])) | |
| 482 | #' | |
| 483 | #' # x is the result coming into the formatting function -> res!! | |
| 484 | #' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res) | |
| 485 | #' format_auto(x_todo, "range")(x = range(x_todo)) | |
| 486 | #' no_sc_x <- c(0.0000001, 1) | |
| 487 | #' format_auto(no_sc_x, "range")(x = no_sc_x) | |
| 488 | #' | |
| 489 | #' @family formatting functions | |
| 490 | #' @export | |
| 491 | format_auto <- function(dt_var, x_stat) { | |
| 492 | 16x |   function(x = "", ...) { | 
| 493 | 56x | checkmate::assert_numeric(x, min.len = 1) | 
| 494 | 56x | checkmate::assert_numeric(dt_var, min.len = 1) | 
| 495 | # Defaults - they may be a param in the future | |
| 496 | 56x | der_stats <- c( | 
| 497 | 56x | "mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr", | 
| 498 | 56x | "mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi", | 
| 499 | 56x | "median_ci" | 
| 500 | ) | |
| 501 | 56x |     nonder_stats <- c("n", "range", "min", "max") | 
| 502 | ||
| 503 | # Safenet for miss-modifications | |
| 504 | 56x | stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint | 
| 505 | 56x | checkmate::assert_choice(x_stat, c(der_stats, nonder_stats)) | 
| 506 | ||
| 507 | # Finds the max number of digits in data | |
| 508 | 56x | detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>% | 
| 509 | 56x | max() | 
| 510 | ||
| 511 | 56x |     if (x_stat %in% der_stats) { | 
| 512 | 40x | detect_dig <- detect_dig + 1 | 
| 513 | } | |
| 514 | ||
| 515 | # Render input | |
| 516 | 56x | str_vals <- formatC(x, digits = detect_dig, format = "f") | 
| 517 | 56x | def_fmt <- get_formats_from_stats(x_stat)[[x_stat]] | 
| 518 | 56x | str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]] | 
| 519 | 56x |     if (length(str_fmt) != length(str_vals)) { | 
| 520 | 2x | stop( | 
| 521 | 2x |         "Number of inserted values as result (", length(str_vals), | 
| 522 | 2x | ") is not the same as there should be in the default tern formats for ", | 
| 523 | 2x | x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ", | 
| 524 | 2x | "See tern_default_formats to check all of them." | 
| 525 | ) | |
| 526 | } | |
| 527 | ||
| 528 | # Squashing them together | |
| 529 | 54x | inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]] | 
| 530 | 54x | stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint | 
| 531 | ||
| 532 | 54x |     out <- vector("character", length = length(inv_str_fmt) + length(str_vals)) | 
| 533 | 54x | is_even <- seq_along(out) %% 2 == 0 | 
| 534 | 54x | out[is_even] <- str_vals | 
| 535 | 54x | out[!is_even] <- inv_str_fmt | 
| 536 | ||
| 537 | 54x | return(paste0(out, collapse = "")) | 
| 538 | } | |
| 539 | } | |
| 540 | ||
| 541 | # Utility function that could be useful in general | |
| 542 | str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) { | |
| 543 | 110x | regmatches(string, gregexpr(pattern, string), invert = invert) | 
| 544 | } | |
| 545 | ||
| 546 | # Helper function | |
| 547 | count_decimalplaces <- function(dec) { | |
| 548 | 2038x |   if (is.na(dec)) { | 
| 549 | 6x | return(0) | 
| 550 | 2032x |   } else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision | 
| 551 | 1939x | nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]]) | 
| 552 |   } else { | |
| 553 | 93x | return(0) | 
| 554 | } | |
| 555 | } | |
| 556 | ||
| 557 | #' Apply automatic formatting | |
| 558 | #' | |
| 559 | #' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with | |
| 560 | #' the correct implementation of `format_auto` for the given statistics, data, and variable. | |
| 561 | #' | |
| 562 | #' @inheritParams argument_convention | |
| 563 | #' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds | |
| 564 | #' to an element in `.formats`, with matching names. | |
| 565 | #' | |
| 566 | #' @keywords internal | |
| 567 | apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) { | |
| 568 | 1574x | is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) | 
| 569 | 1574x |   if (any(is_auto_fmt)) { | 
| 570 | 8x | auto_stats <- x_stats[is_auto_fmt] | 
| 571 | 8x | var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets | 
| 572 | 8x | .formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df) | 
| 573 | } | |
| 574 | 1574x | .formats | 
| 575 | } | 
| 1 | #' Create a forest plot from an `rtable` | |
| 2 | #' | |
| 3 | #' Given a [rtables::rtable()] object with at least one column with a single value and one column with 2 | |
| 4 | #' values, converts table to a [ggplot2::ggplot()] object and generates an accompanying forest plot. The | |
| 5 | #' table and forest plot are printed side-by-side. | |
| 6 | #' | |
| 7 | #' @description `r lifecycle::badge("stable")` | |
| 8 | #' | |
| 9 | #' @inheritParams rtable2gg | |
| 10 | #' @inheritParams argument_convention | |
| 11 | #' @param tbl (`VTableTree`)\cr `rtables` table with at least one column with a single value and one column with 2 | |
| 12 | #' values. | |
| 13 | #' @param col_x (`integer(1)` or `NULL`)\cr column index with estimator. By default tries to get this from | |
| 14 | #' `tbl` attribute `col_x`, otherwise needs to be manually specified. If `NULL`, points will be excluded | |
| 15 | #' from forest plot. | |
| 16 | #' @param col_ci (`integer(1)` or `NULL`)\cr column index with confidence intervals. By default tries to get this from | |
| 17 | #' `tbl` attribute `col_ci`, otherwise needs to be manually specified. If `NULL`, lines will be excluded | |
| 18 | #' from forest plot. | |
| 19 | #' @param vline (`numeric(1)` or `NULL`)\cr x coordinate for vertical line, if `NULL` then the line is omitted. | |
| 20 | #' @param forest_header (`character(2)`)\cr text displayed to the left and right of `vline`, respectively. | |
| 21 | #' If `vline = NULL` then `forest_header` is not printed. By default tries to get this from `tbl` attribute | |
| 22 | #' `forest_header`. If `NULL`, defaults will be extracted from the table if possible, and set to | |
| 23 | #' `"Comparison\nBetter"` and `"Treatment\nBetter"` if not. | |
| 24 | #' @param xlim (`numeric(2)`)\cr limits for x axis. | |
| 25 | #' @param logx (`flag`)\cr show the x-values on logarithm scale. | |
| 26 | #' @param x_at (`numeric`)\cr x-tick locations, if `NULL`, `x_at` is set to `vline` and both `xlim` values. | |
| 27 | #' @param width_row_names `r lifecycle::badge("deprecated")` Please use the `lbl_col_padding` argument instead. | |
| 28 | #' @param width_columns (`numeric`)\cr a vector of column widths. Each element's position in | |
| 29 | #' `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths are calculated | |
| 30 | #' according to maximum number of characters per column. | |
| 31 | #' @param width_forest `r lifecycle::badge("deprecated")` Please use the `rel_width_forest` argument instead. | |
| 32 | #' @param rel_width_forest (`proportion`)\cr proportion of total width to allocate to the forest plot. Relative | |
| 33 | #' width of table is then `1 - rel_width_forest`. If `as_list = TRUE`, this parameter is ignored. | |
| 34 | #' @param font_size (`numeric(1)`)\cr font size. | |
| 35 | #' @param col_symbol_size (`numeric` or `NULL`)\cr column index from `tbl` containing data to be used | |
| 36 | #' to determine relative size for estimator plot symbol. Typically, the symbol size is proportional | |
| 37 | #' to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups. | |
| 38 | #' By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified. | |
| 39 | #' @param col (`character`)\cr color(s). | |
| 40 | #' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot. | |
| 41 | #' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list. If `TRUE`, a named list | |
| 42 | #' with two elements, `table` and `plot`, will be returned. If `FALSE` (default) the table and forest plot are | |
| 43 | #' printed side-by-side via [cowplot::plot_grid()]. | |
| 44 | #' @param gp `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument | |
| 45 | #' is no longer used. | |
| 46 | #' @param draw `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument | |
| 47 | #' is no longer used. | |
| 48 | #' @param newpage `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument | |
| 49 | #' is no longer used. | |
| 50 | #' | |
| 51 | #' @return `ggplot` forest plot and table. | |
| 52 | #' | |
| 53 | #' @examples | |
| 54 | #' library(dplyr) | |
| 55 | #' library(forcats) | |
| 56 | #' | |
| 57 | #' adrs <- tern_ex_adrs | |
| 58 | #' n_records <- 20 | |
| 59 | #' adrs_labels <- formatters::var_labels(adrs, fill = TRUE) | |
| 60 | #' adrs <- adrs %>% | |
| 61 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 62 | #'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>% | |
| 63 | #' slice(seq_len(n_records)) %>% | |
| 64 | #' droplevels() %>% | |
| 65 | #' mutate( | |
| 66 | #' # Reorder levels of factor to make the placebo group the reference arm. | |
| 67 | #' ARM = fct_relevel(ARM, "B: Placebo"), | |
| 68 | #' rsp = AVALC == "CR" | |
| 69 | #' ) | |
| 70 | #' formatters::var_labels(adrs) <- c(adrs_labels, "Response") | |
| 71 | #' df <- extract_rsp_subgroups( | |
| 72 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")), | |
| 73 | #' data = adrs | |
| 74 | #' ) | |
| 75 | #' # Full commonly used response table. | |
| 76 | #' | |
| 77 | #' tbl <- basic_table() %>% | |
| 78 | #' tabulate_rsp_subgroups(df) | |
| 79 | #' g_forest(tbl) | |
| 80 | #' | |
| 81 | #' # Odds ratio only table. | |
| 82 | #' | |
| 83 | #' tbl_or <- basic_table() %>% | |
| 84 | #'   tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci")) | |
| 85 | #' g_forest( | |
| 86 | #' tbl_or, | |
| 87 | #'   forest_header = c("Comparison\nBetter", "Treatment\nBetter") | |
| 88 | #' ) | |
| 89 | #' | |
| 90 | #' # Survival forest plot example. | |
| 91 | #' adtte <- tern_ex_adtte | |
| 92 | #' # Save variable labels before data processing steps. | |
| 93 | #' adtte_labels <- formatters::var_labels(adtte, fill = TRUE) | |
| 94 | #' adtte_f <- adtte %>% | |
| 95 | #' filter( | |
| 96 | #' PARAMCD == "OS", | |
| 97 | #'     ARM %in% c("B: Placebo", "A: Drug X"), | |
| 98 | #'     SEX %in% c("M", "F") | |
| 99 | #' ) %>% | |
| 100 | #' mutate( | |
| 101 | #' # Reorder levels of ARM to display reference arm before treatment arm. | |
| 102 | #' ARM = droplevels(fct_relevel(ARM, "B: Placebo")), | |
| 103 | #' SEX = droplevels(SEX), | |
| 104 | #' AVALU = as.character(AVALU), | |
| 105 | #' is_event = CNSR == 0 | |
| 106 | #' ) | |
| 107 | #' labels <- list( | |
| 108 | #' "ARM" = adtte_labels["ARM"], | |
| 109 | #' "SEX" = adtte_labels["SEX"], | |
| 110 | #' "AVALU" = adtte_labels["AVALU"], | |
| 111 | #' "is_event" = "Event Flag" | |
| 112 | #' ) | |
| 113 | #' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels) | |
| 114 | #' df <- extract_survival_subgroups( | |
| 115 | #' variables = list( | |
| 116 | #' tte = "AVAL", | |
| 117 | #' is_event = "is_event", | |
| 118 | #'     arm = "ARM", subgroups = c("SEX", "BMRKR2") | |
| 119 | #' ), | |
| 120 | #' data = adtte_f | |
| 121 | #' ) | |
| 122 | #' table_hr <- basic_table() %>% | |
| 123 | #' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1]) | |
| 124 | #' g_forest(table_hr) | |
| 125 | #' | |
| 126 | #' # Works with any `rtable`. | |
| 127 | #' tbl <- rtable( | |
| 128 | #'   header = c("E", "CI", "N"), | |
| 129 | #'   rrow("", 1, c(.8, 1.2), 200), | |
| 130 | #'   rrow("", 1.2, c(1.1, 1.4), 50) | |
| 131 | #' ) | |
| 132 | #' g_forest( | |
| 133 | #' tbl = tbl, | |
| 134 | #' col_x = 1, | |
| 135 | #' col_ci = 2, | |
| 136 | #' xlim = c(0.5, 2), | |
| 137 | #' x_at = c(0.5, 1, 2), | |
| 138 | #' col_symbol_size = 3 | |
| 139 | #' ) | |
| 140 | #' | |
| 141 | #' tbl <- rtable( | |
| 142 | #' header = rheader( | |
| 143 | #'     rrow("", rcell("A", colspan = 2)), | |
| 144 | #'     rrow("", "c1", "c2") | |
| 145 | #' ), | |
| 146 | #'   rrow("row 1", 1, c(.8, 1.2)), | |
| 147 | #'   rrow("row 2", 1.2, c(1.1, 1.4)) | |
| 148 | #' ) | |
| 149 | #' g_forest( | |
| 150 | #' tbl = tbl, | |
| 151 | #' col_x = 1, | |
| 152 | #' col_ci = 2, | |
| 153 | #' xlim = c(0.5, 2), | |
| 154 | #' x_at = c(0.5, 1, 2), | |
| 155 | #' vline = 1, | |
| 156 | #'   forest_header = c("Hello", "World") | |
| 157 | #' ) | |
| 158 | #' | |
| 159 | #' @export | |
| 160 | g_forest <- function(tbl, | |
| 161 | col_x = attr(tbl, "col_x"), | |
| 162 | col_ci = attr(tbl, "col_ci"), | |
| 163 | vline = 1, | |
| 164 | forest_header = attr(tbl, "forest_header"), | |
| 165 | xlim = c(0.1, 10), | |
| 166 | logx = TRUE, | |
| 167 | x_at = c(0.1, 1, 10), | |
| 168 | width_row_names = lifecycle::deprecated(), | |
| 169 | width_columns = NULL, | |
| 170 | width_forest = lifecycle::deprecated(), | |
| 171 | lbl_col_padding = 0, | |
| 172 | rel_width_forest = 0.25, | |
| 173 | font_size = 12, | |
| 174 | col_symbol_size = attr(tbl, "col_symbol_size"), | |
| 175 |                      col = getOption("ggplot2.discrete.colour")[1], | |
| 176 | ggtheme = NULL, | |
| 177 | as_list = FALSE, | |
| 178 | gp = lifecycle::deprecated(), | |
| 179 | draw = lifecycle::deprecated(), | |
| 180 |                      newpage = lifecycle::deprecated()) { | |
| 181 | # Deprecated argument warnings | |
| 182 | 4x |   if (lifecycle::is_present(width_row_names)) { | 
| 183 | 1x | lifecycle::deprecate_warn( | 
| 184 | 1x | "0.9.4", "g_forest(width_row_names)", "g_forest(lbl_col_padding)", | 
| 185 | 1x | details = "The width of the row label column can be adjusted via the `lbl_col_padding` parameter." | 
| 186 | ) | |
| 187 | } | |
| 188 | 4x |   if (lifecycle::is_present(width_forest)) { | 
| 189 | 1x | lifecycle::deprecate_warn( | 
| 190 | 1x | "0.9.4", "g_forest(width_forest)", "g_forest(rel_width_forest)", | 
| 191 | 1x | details = "Relative width of the forest plot (as a proportion) can be set via the `rel_width_forest` parameter." | 
| 192 | ) | |
| 193 | } | |
| 194 | 4x |   if (lifecycle::is_present(gp)) { | 
| 195 | 1x | lifecycle::deprecate_warn( | 
| 196 | 1x | "0.9.4", "g_forest(gp)", "g_forest(ggtheme)", | 
| 197 | 1x | details = paste( | 
| 198 | 1x | "`g_forest` is now generated as a `ggplot` object.", | 
| 199 | 1x | "Additional display settings should be supplied via the `ggtheme` parameter." | 
| 200 | ) | |
| 201 | ) | |
| 202 | } | |
| 203 | 4x |   if (lifecycle::is_present(draw)) { | 
| 204 | 1x | lifecycle::deprecate_warn( | 
| 205 | 1x | "0.9.4", "g_forest(draw)", | 
| 206 | 1x | details = "`g_forest` now generates `ggplot` objects. This parameter has no effect." | 
| 207 | ) | |
| 208 | } | |
| 209 | 4x |   if (lifecycle::is_present(newpage)) { | 
| 210 | 1x | lifecycle::deprecate_warn( | 
| 211 | 1x | "0.9.4", "g_forest(newpage)", | 
| 212 | 1x | details = "`g_forest` now generates `ggplot` objects. This parameter has no effect." | 
| 213 | ) | |
| 214 | } | |
| 215 | ||
| 216 | 4x | checkmate::assert_class(tbl, "VTableTree") | 
| 217 | 4x | checkmate::assert_number(col_x, lower = 0, upper = ncol(tbl), null.ok = TRUE) | 
| 218 | 4x | checkmate::assert_number(col_ci, lower = 0, upper = ncol(tbl), null.ok = TRUE) | 
| 219 | 4x | checkmate::assert_number(col_symbol_size, lower = 0, upper = ncol(tbl), null.ok = TRUE) | 
| 220 | 4x | checkmate::assert_number(font_size, lower = 0) | 
| 221 | 4x | checkmate::assert_character(col, null.ok = TRUE) | 
| 222 | 4x | checkmate::assert_true(is.null(col) | length(col) == 1 | length(col) == nrow(tbl)) | 
| 223 | ||
| 224 | # Extract info from table | |
| 225 | 4x | mat <- matrix_form(tbl, indent_rownames = TRUE) | 
| 226 | 4x | mat_strings <- formatters::mf_strings(mat) | 
| 227 | 4x | nlines_hdr <- formatters::mf_nlheader(mat) | 
| 228 | 4x | nrows_body <- nrow(mat_strings) - nlines_hdr | 
| 229 | 4x | tbl_stats <- mat_strings[nlines_hdr, -1] | 
| 230 | ||
| 231 | # Generate and modify table as ggplot object | |
| 232 | 4x | gg_table <- rtable2gg(tbl, fontsize = font_size, colwidths = width_columns, lbl_col_padding = lbl_col_padding) + | 
| 233 | 4x | theme(plot.margin = margin(0, 0, 0, 0.025, "npc")) | 
| 234 | 4x | gg_table$scales$scales[[1]]$expand <- c(0.01, 0.01) | 
| 235 | 4x | gg_table$scales$scales[[2]]$limits[2] <- nrow(mat_strings) + 1 | 
| 236 | 4x |   if (nlines_hdr == 2) { | 
| 237 | 4x | gg_table$scales$scales[[2]]$expand <- c(0, 0) | 
| 238 | 4x | arms <- unique(mat_strings[1, ][nzchar(trimws(mat_strings[1, ]))]) | 
| 239 |   } else { | |
| 240 | ! | arms <- NULL | 
| 241 | } | |
| 242 | ||
| 243 | 4x | tbl_df <- as_result_df(tbl) | 
| 244 | 4x | dat_cols <- seq(which(names(tbl_df) == "node_class") + 1, ncol(tbl_df)) | 
| 245 | 4x | tbl_df <- tbl_df[, c(which(names(tbl_df) == "row_num"), dat_cols)] | 
| 246 | 4x |   names(tbl_df) <- c("row_num", tbl_stats) | 
| 247 | ||
| 248 | # Check table data columns | |
| 249 | 4x |   if (!is.null(col_ci)) { | 
| 250 | 4x | ci_col <- col_ci + 1 | 
| 251 |   } else { | |
| 252 | ! | tbl_df[["empty_ci"]] <- rep(list(c(NA_real_, NA_real_)), nrow(tbl_df)) | 
| 253 | ! | ci_col <- which(names(tbl_df) == "empty_ci") | 
| 254 | } | |
| 255 | ! |   if (length(tbl_df[, ci_col][[1]]) != 2) stop("CI column must have two elements (lower and upper limits).") | 
| 256 | ||
| 257 | 4x |   if (!is.null(col_x)) { | 
| 258 | 4x | x_col <- col_x + 1 | 
| 259 |   } else { | |
| 260 | ! | tbl_df[["empty_x"]] <- NA_real_ | 
| 261 | ! | x_col <- which(names(tbl_df) == "empty_x") | 
| 262 | } | |
| 263 | 4x |   if (!is.null(col_symbol_size)) { | 
| 264 | 3x | sym_size <- unlist(tbl_df[, col_symbol_size + 1]) | 
| 265 |   } else { | |
| 266 | 1x | sym_size <- rep(1, nrow(tbl_df)) | 
| 267 | } | |
| 268 | ||
| 269 | 4x |   tbl_df[, c("ci_lwr", "ci_upr")] <- t(sapply(tbl_df[, ci_col], unlist)) | 
| 270 | 4x | x <- unlist(tbl_df[, x_col]) | 
| 271 | 4x | lwr <- unlist(tbl_df[["ci_lwr"]]) | 
| 272 | 4x | upr <- unlist(tbl_df[["ci_upr"]]) | 
| 273 | 4x | row_num <- nrow(mat_strings) - tbl_df[["row_num"]] - as.numeric(nlines_hdr == 2) | 
| 274 | ||
| 275 | ! | if (is.null(col)) col <- "#343cff" | 
| 276 | 4x | if (length(col) == 1) col <- rep(col, nrow(tbl_df)) | 
| 277 | ! | if (is.null(x_at)) x_at <- union(xlim, vline) | 
| 278 | 4x | x_labels <- x_at | 
| 279 | ||
| 280 | # Apply log transformation | |
| 281 | 4x |   if (logx) { | 
| 282 | 4x | x_t <- log(x) | 
| 283 | 4x | lwr_t <- log(lwr) | 
| 284 | 4x | upr_t <- log(upr) | 
| 285 | 4x | xlim_t <- log(xlim) | 
| 286 |   } else { | |
| 287 | ! | x_t <- x | 
| 288 | ! | lwr_t <- lwr | 
| 289 | ! | upr_t <- upr | 
| 290 | ! | xlim_t <- xlim | 
| 291 | } | |
| 292 | ||
| 293 | # Set up plot area | |
| 294 | 4x | gg_plt <- ggplot(data = tbl_df) + | 
| 295 | 4x | theme( | 
| 296 | 4x | panel.background = element_rect(fill = "transparent", color = NA_character_), | 
| 297 | 4x | plot.background = element_rect(fill = "transparent", color = NA_character_), | 
| 298 | 4x | panel.grid.major = element_blank(), | 
| 299 | 4x | panel.grid.minor = element_blank(), | 
| 300 | 4x | axis.title.x = element_blank(), | 
| 301 | 4x | axis.title.y = element_blank(), | 
| 302 | 4x | axis.line.x = element_line(), | 
| 303 | 4x | axis.text = element_text(size = font_size), | 
| 304 | 4x | legend.position = "none", | 
| 305 | 4x | plot.margin = margin(0, 0.1, 0.05, 0, "npc") | 
| 306 | ) + | |
| 307 | 4x | scale_x_continuous( | 
| 308 | 4x | transform = ifelse(logx, "log", "identity"), | 
| 309 | 4x | limits = xlim, | 
| 310 | 4x | breaks = x_at, | 
| 311 | 4x | labels = x_labels, | 
| 312 | 4x | expand = c(0.01, 0) | 
| 313 | ) + | |
| 314 | 4x | scale_y_continuous( | 
| 315 | 4x | limits = c(0, nrow(mat_strings) + 1), | 
| 316 | 4x | breaks = NULL, | 
| 317 | 4x | expand = c(0, 0) | 
| 318 | ) + | |
| 319 | 4x | coord_cartesian(clip = "off") | 
| 320 | ||
| 321 | 4x |   if (is.null(ggtheme)) { | 
| 322 | 4x | gg_plt <- gg_plt + annotate( | 
| 323 | 4x | "rect", | 
| 324 | 4x | xmin = xlim[1], | 
| 325 | 4x | xmax = xlim[2], | 
| 326 | 4x | ymin = 0, | 
| 327 | 4x | ymax = nrows_body + 0.5, | 
| 328 | 4x | fill = "grey92" | 
| 329 | ) | |
| 330 | } | |
| 331 | ||
| 332 | 4x |   if (!is.null(vline)) { | 
| 333 | # Set default forest header | |
| 334 | 4x |     if (is.null(forest_header)) { | 
| 335 | ! | forest_header <- c( | 
| 336 | ! | paste(if (length(arms) == 2) arms[1] else "Comparison", "Better", sep = "\n"), | 
| 337 | ! | paste(if (length(arms) == 2) arms[2] else "Treatment", "Better", sep = "\n") | 
| 338 | ) | |
| 339 | } | |
| 340 | ||
| 341 | # Add vline and forest header labels | |
| 342 | 4x |     mid_pts <- if (logx) { | 
| 343 | 4x | c(exp(mean(log(c(xlim[1], vline)))), exp(mean(log(c(vline, xlim[2]))))) | 
| 344 |     } else { | |
| 345 | ! | c(mean(c(xlim[1], vline)), mean(c(vline, xlim[2]))) | 
| 346 | } | |
| 347 | 4x | gg_plt <- gg_plt + | 
| 348 | 4x | annotate( | 
| 349 | 4x | "segment", | 
| 350 | 4x | x = vline, xend = vline, y = 0, yend = nrows_body + 0.5 | 
| 351 | ) + | |
| 352 | 4x | annotate( | 
| 353 | 4x | "text", | 
| 354 | 4x | x = mid_pts[1], y = nrows_body + 1.25, | 
| 355 | 4x | label = forest_header[1], | 
| 356 | 4x | size = font_size / .pt, | 
| 357 | 4x | lineheight = 0.9 | 
| 358 | ) + | |
| 359 | 4x | annotate( | 
| 360 | 4x | "text", | 
| 361 | 4x | x = mid_pts[2], y = nrows_body + 1.25, | 
| 362 | 4x | label = forest_header[2], | 
| 363 | 4x | size = font_size / .pt, | 
| 364 | 4x | lineheight = 0.9 | 
| 365 | ) | |
| 366 | } | |
| 367 | ||
| 368 | # Add points to plot | |
| 369 | 4x |   if (any(!is.na(x_t))) { | 
| 370 | 4x | x_t[x < xlim[1] | x > xlim[2]] <- NA | 
| 371 | 4x | gg_plt <- gg_plt + geom_point( | 
| 372 | 4x | x = x_t, | 
| 373 | 4x | y = row_num, | 
| 374 | 4x | color = col, | 
| 375 | 4x | aes(size = sym_size), | 
| 376 | 4x | na.rm = TRUE | 
| 377 | ) | |
| 378 | } | |
| 379 | ||
| 380 | 4x |   for (i in seq_len(nrow(tbl_df))) { | 
| 381 | # Determine which arrow(s) to add to CI lines | |
| 382 | 17x | which_arrow <- c(lwr_t[i] < xlim_t[1], upr_t[i] > xlim_t[2]) | 
| 383 | 17x | which_arrow <- dplyr::case_when( | 
| 384 | 17x | all(which_arrow) ~ "both", | 
| 385 | 17x | which_arrow[1] ~ "first", | 
| 386 | 17x | which_arrow[2] ~ "last", | 
| 387 | 17x | TRUE ~ NA_character_ | 
| 388 | ) | |
| 389 | ||
| 390 | # Add CI lines | |
| 391 | 17x | gg_plt <- gg_plt + | 
| 392 | 17x |       if (!is.na(which_arrow)) { | 
| 393 | 15x | annotate( | 
| 394 | 15x | "segment", | 
| 395 | 15x |           x = if (!which_arrow %in% c("first", "both")) lwr[i] else xlim[1], | 
| 396 | 15x |           xend = if (!which_arrow %in% c("last", "both")) upr[i] else xlim[2], | 
| 397 | 15x | y = row_num[i], yend = row_num[i], | 
| 398 | 15x | color = if (length(col) == 1) col else col[i], | 
| 399 | 15x | arrow = arrow(length = unit(0.05, "npc"), ends = which_arrow), | 
| 400 | 15x | na.rm = TRUE | 
| 401 | ) | |
| 402 |       } else { | |
| 403 | 2x | annotate( | 
| 404 | 2x | "segment", | 
| 405 | 2x | x = lwr[i], xend = upr[i], | 
| 406 | 2x | y = row_num[i], yend = row_num[i], | 
| 407 | 2x | color = if (length(col) == 1) col else col[i], | 
| 408 | 2x | na.rm = TRUE | 
| 409 | ) | |
| 410 | } | |
| 411 | } | |
| 412 | ||
| 413 | # Apply custom ggtheme to plot | |
| 414 | ! | if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme | 
| 415 | ||
| 416 | 4x |   if (as_list) { | 
| 417 | 1x | list( | 
| 418 | 1x | table = gg_table, | 
| 419 | 1x | plot = gg_plt | 
| 420 | ) | |
| 421 |   } else { | |
| 422 | 3x | cowplot::plot_grid( | 
| 423 | 3x | gg_table, | 
| 424 | 3x | gg_plt, | 
| 425 | 3x | align = "h", | 
| 426 | 3x | axis = "tblr", | 
| 427 | 3x | rel_widths = c(1 - rel_width_forest, rel_width_forest) | 
| 428 | ) | |
| 429 | } | |
| 430 | } | |
| 431 | ||
| 432 | #' Forest plot grob | |
| 433 | #' | |
| 434 | #' @description `r lifecycle::badge("deprecated")` | |
| 435 | #' | |
| 436 | #' @inheritParams g_forest | |
| 437 | #' @param tbl (`VTableTree`)\cr `rtables` table object. | |
| 438 | #' @param x (`numeric`)\cr coordinate of point. | |
| 439 | #' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval. | |
| 440 | #' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol. | |
| 441 | #' If `NULL`, the same symbol size is used. | |
| 442 | #' | |
| 443 | #' @details | |
| 444 | #' The heights get automatically determined. | |
| 445 | #' | |
| 446 | #' @examples | |
| 447 | #' tbl <- rtable( | |
| 448 | #' header = rheader( | |
| 449 | #'     rrow("", "E", rcell("CI", colspan = 2), "N"), | |
| 450 | #'     rrow("", "A", "B", "C", "D") | |
| 451 | #' ), | |
| 452 | #'   rrow("row 1", 1, 0.8, 1.1, 16), | |
| 453 | #'   rrow("row 2", 1.4, 0.8, 1.6, 25), | |
| 454 | #'   rrow("row 3", 1.2, 0.8, 1.6, 36) | |
| 455 | #' ) | |
| 456 | #' | |
| 457 | #' x <- c(1, 1.4, 1.2) | |
| 458 | #' lower <- c(0.8, 0.8, 0.8) | |
| 459 | #' upper <- c(1.1, 1.6, 1.6) | |
| 460 | #' # numeric vector with multiplication factor to scale each circle radius | |
| 461 | #' # default radius is 1/3.5 lines | |
| 462 | #' symbol_scale <- c(1, 1.25, 1.5) | |
| 463 | #' | |
| 464 | #' # Internal function - forest_grob | |
| 465 | #' \donttest{ | |
| 466 | #' p <- forest_grob(tbl, x, lower, upper, | |
| 467 | #'   vline = 1, forest_header = c("A", "B"), | |
| 468 | #' x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale, | |
| 469 | #' vp = grid::plotViewport(margins = c(1, 1, 1, 1)) | |
| 470 | #' ) | |
| 471 | #' | |
| 472 | #' draw_grob(p) | |
| 473 | #' } | |
| 474 | #' | |
| 475 | #' @noRd | |
| 476 | #' @keywords internal | |
| 477 | forest_grob <- function(tbl, | |
| 478 | x, | |
| 479 | lower, | |
| 480 | upper, | |
| 481 | vline, | |
| 482 | forest_header, | |
| 483 | xlim = NULL, | |
| 484 | logx = FALSE, | |
| 485 | x_at = NULL, | |
| 486 | width_row_names = NULL, | |
| 487 | width_columns = NULL, | |
| 488 | width_forest = grid::unit(1, "null"), | |
| 489 | symbol_size = NULL, | |
| 490 | col = "blue", | |
| 491 | name = NULL, | |
| 492 | gp = NULL, | |
| 493 |                         vp = NULL) { | |
| 494 | 1x | lifecycle::deprecate_warn( | 
| 495 | 1x | "0.9.4", "forest_grob()", | 
| 496 | 1x | details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 497 | ) | |
| 498 | ||
| 499 | 1x | nr <- nrow(tbl) | 
| 500 | 1x |   if (is.null(vline)) { | 
| 501 | ! | checkmate::assert_true(is.null(forest_header)) | 
| 502 |   } else { | |
| 503 | 1x | checkmate::assert_number(vline) | 
| 504 | 1x | checkmate::assert_character(forest_header, len = 2, null.ok = TRUE) | 
| 505 | } | |
| 506 | ||
| 507 | 1x | checkmate::assert_numeric(x, len = nr) | 
| 508 | 1x | checkmate::assert_numeric(lower, len = nr) | 
| 509 | 1x | checkmate::assert_numeric(upper, len = nr) | 
| 510 | 1x | checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE) | 
| 511 | 1x | checkmate::assert_character(col) | 
| 512 | ||
| 513 | 1x |   if (is.null(symbol_size)) { | 
| 514 | ! | symbol_size <- rep(1, nr) | 
| 515 | } | |
| 516 | ||
| 517 | 1x |   if (is.null(xlim)) { | 
| 518 | ! | r <- range(c(x, lower, upper), na.rm = TRUE) | 
| 519 | ! | xlim <- r + c(-0.05, 0.05) * diff(r) | 
| 520 | } | |
| 521 | ||
| 522 | 1x |   if (logx) { | 
| 523 | 1x |     if (is.null(x_at)) { | 
| 524 | ! | x_at <- pretty(log(stats::na.omit(c(x, lower, upper)))) | 
| 525 | ! | x_labels <- exp(x_at) | 
| 526 |     } else { | |
| 527 | 1x | x_labels <- x_at | 
| 528 | 1x | x_at <- log(x_at) | 
| 529 | } | |
| 530 | 1x | xlim <- log(xlim) | 
| 531 | 1x | x <- log(x) | 
| 532 | 1x | lower <- log(lower) | 
| 533 | 1x | upper <- log(upper) | 
| 534 | 1x |     if (!is.null(vline)) { | 
| 535 | 1x | vline <- log(vline) | 
| 536 | } | |
| 537 |   } else { | |
| 538 | ! | x_labels <- TRUE | 
| 539 | } | |
| 540 | ||
| 541 | 1x | data_forest_vp <- grid::dataViewport(xlim, c(0, 1)) | 
| 542 | ||
| 543 | # Get table content as matrix form. | |
| 544 | 1x | mf <- matrix_form(tbl) | 
| 545 | ||
| 546 | # Use `rtables` indent_string eventually. | |
| 547 | 1x | mf$strings[, 1] <- paste0( | 
| 548 | 1x |     strrep("    ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)), | 
| 549 | 1x | mf$strings[, 1] | 
| 550 | ) | |
| 551 | ||
| 552 | 1x | n_header <- attr(mf, "nrow_header") | 
| 553 | ||
| 554 | ! |   if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed") | 
| 555 | ||
| 556 | # Pre-process the data to be used in lapply and cell_in_rows. | |
| 557 | 1x |   to_args_for_cell_in_rows_fun <- function(part = c("body", "header"), | 
| 558 | 1x |                                            underline_colspan = FALSE) { | 
| 559 | 2x | part <- match.arg(part) | 
| 560 | 2x |     if (part == "body") { | 
| 561 | 1x | mat_row_indices <- seq_len(nrow(tbl)) + n_header | 
| 562 | 1x | row_ind_offset <- -n_header | 
| 563 |     } else { | |
| 564 | 1x | mat_row_indices <- seq_len(n_header) | 
| 565 | 1x | row_ind_offset <- 0 | 
| 566 | } | |
| 567 | ||
| 568 | 2x |     lapply(mat_row_indices, function(i) { | 
| 569 | 5x | disp <- mf$display[i, -1] | 
| 570 | 5x | list( | 
| 571 | 5x | row_name = mf$strings[i, 1], | 
| 572 | 5x | cells = mf$strings[i, -1][disp], | 
| 573 | 5x | cell_spans = mf$spans[i, -1][disp], | 
| 574 | 5x | row_index = i + row_ind_offset, | 
| 575 | 5x | underline_colspan = underline_colspan | 
| 576 | ) | |
| 577 | }) | |
| 578 | } | |
| 579 | ||
| 580 | 1x |   args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE) | 
| 581 | 1x |   args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE) | 
| 582 | ||
| 583 | 1x | grid::gTree( | 
| 584 | 1x | name = name, | 
| 585 | 1x | children = grid::gList( | 
| 586 | 1x | grid::gTree( | 
| 587 | 1x | children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)), | 
| 588 | 1x |         vp = grid::vpPath("vp_table_layout", "vp_header") | 
| 589 | ), | |
| 590 | 1x | grid::gTree( | 
| 591 | 1x | children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)), | 
| 592 | 1x |         vp = grid::vpPath("vp_table_layout", "vp_body") | 
| 593 | ), | |
| 594 | 1x | grid::linesGrob( | 
| 595 | 1x | grid::unit(c(0, 1), "npc"), | 
| 596 | 1x | y = grid::unit(c(.5, .5), "npc"), | 
| 597 | 1x |         vp = grid::vpPath("vp_table_layout", "vp_spacer") | 
| 598 | ), | |
| 599 | # forest part | |
| 600 | 1x |       if (is.null(vline)) { | 
| 601 | ! | NULL | 
| 602 |       } else { | |
| 603 | 1x | grid::gTree( | 
| 604 | 1x | children = grid::gList( | 
| 605 | 1x | grid::gTree( | 
| 606 | 1x | children = grid::gList( | 
| 607 | 1x | grid::textGrob( | 
| 608 | 1x | forest_header[1], | 
| 609 | 1x | x = grid::unit(vline, "native") - grid::unit(1, "lines"), | 
| 610 | 1x |                   just = c("right", "center") | 
| 611 | ), | |
| 612 | 1x | grid::textGrob( | 
| 613 | 1x | forest_header[2], | 
| 614 | 1x | x = grid::unit(vline, "native") + grid::unit(1, "lines"), | 
| 615 | 1x |                   just = c("left", "center") | 
| 616 | ) | |
| 617 | ), | |
| 618 | 1x | vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp) | 
| 619 | ) | |
| 620 | ), | |
| 621 | 1x |           vp = grid::vpPath("vp_table_layout", "vp_header") | 
| 622 | ) | |
| 623 | }, | |
| 624 | 1x | grid::gTree( | 
| 625 | 1x | children = grid::gList( | 
| 626 | 1x | grid::gTree( | 
| 627 | 1x | children = grid::gList( | 
| 628 | 1x | grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")), | 
| 629 | 1x |               if (is.null(vline)) { | 
| 630 | ! | NULL | 
| 631 |               } else { | |
| 632 | 1x | grid::linesGrob( | 
| 633 | 1x | x = grid::unit(rep(vline, 2), "native"), | 
| 634 | 1x | y = grid::unit(c(0, 1), "npc"), | 
| 635 | 1x | gp = grid::gpar(lwd = 2), | 
| 636 | 1x | vp = data_forest_vp | 
| 637 | ) | |
| 638 | }, | |
| 639 | 1x | grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp) | 
| 640 | ), | |
| 641 | 1x | vp = grid::viewport(layout.pos.col = ncol(tbl) + 2) | 
| 642 | ) | |
| 643 | ), | |
| 644 | 1x |         vp = grid::vpPath("vp_table_layout", "vp_body") | 
| 645 | ), | |
| 646 | 1x | grid::gTree( | 
| 647 | 1x | children = do.call( | 
| 648 | 1x | grid::gList, | 
| 649 | 1x | Map( | 
| 650 | 1x |             function(xi, li, ui, row_index, size_i, col) { | 
| 651 | 3x | forest_dot_line( | 
| 652 | 3x | xi, | 
| 653 | 3x | li, | 
| 654 | 3x | ui, | 
| 655 | 3x | row_index, | 
| 656 | 3x | xlim, | 
| 657 | 3x | symbol_size = size_i, | 
| 658 | 3x | col = col, | 
| 659 | 3x | datavp = data_forest_vp | 
| 660 | ) | |
| 661 | }, | |
| 662 | 1x | x, | 
| 663 | 1x | lower, | 
| 664 | 1x | upper, | 
| 665 | 1x | seq_along(x), | 
| 666 | 1x | symbol_size, | 
| 667 | 1x | col, | 
| 668 | 1x | USE.NAMES = FALSE | 
| 669 | ) | |
| 670 | ), | |
| 671 | 1x |         vp = grid::vpPath("vp_table_layout", "vp_body") | 
| 672 | ) | |
| 673 | ), | |
| 674 | 1x | childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest), | 
| 675 | 1x | vp = vp, | 
| 676 | 1x | gp = gp | 
| 677 | ) | |
| 678 | } | |
| 679 | ||
| 680 | cell_in_rows <- function(row_name, | |
| 681 | cells, | |
| 682 | cell_spans, | |
| 683 | row_index, | |
| 684 |                          underline_colspan = FALSE) { | |
| 685 | 5x | checkmate::assert_string(row_name) | 
| 686 | 5x | checkmate::assert_character(cells, min.len = 1, any.missing = FALSE) | 
| 687 | 5x | checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE) | 
| 688 | 5x | checkmate::assert_number(row_index) | 
| 689 | 5x | checkmate::assert_flag(underline_colspan) | 
| 690 | ||
| 691 | 5x |   vp_name_rn <- paste0("rowname-", row_index) | 
| 692 | 5x |   g_rowname <- if (!is.null(row_name) && row_name != "") { | 
| 693 | 3x | grid::textGrob( | 
| 694 | 3x | name = vp_name_rn, | 
| 695 | 3x | label = row_name, | 
| 696 | 3x | x = grid::unit(0, "npc"), | 
| 697 | 3x |       just = c("left", "center"), | 
| 698 | 3x |       vp = grid::vpPath(paste0("rowname-", row_index)) | 
| 699 | ) | |
| 700 |   } else { | |
| 701 | 2x | NULL | 
| 702 | } | |
| 703 | ||
| 704 | 5x |   gl_cols <- if (!(length(cells) > 0)) { | 
| 705 | ! | list(NULL) | 
| 706 |   } else { | |
| 707 | 5x | j <- 1 # column index of cell | 
| 708 | ||
| 709 | 5x |     lapply(seq_along(cells), function(k) { | 
| 710 | 19x | cell_ascii <- cells[[k]] | 
| 711 | 19x | cs <- cell_spans[[k]] | 
| 712 | ||
| 713 | 19x |       if (is.na(cell_ascii) || is.null(cell_ascii)) { | 
| 714 | ! | cell_ascii <- "NA" | 
| 715 | } | |
| 716 | ||
| 717 | 19x |       cell_name <- paste0("g-cell-", row_index, "-", j) | 
| 718 | ||
| 719 | 19x |       cell_grobs <- if (identical(cell_ascii, "")) { | 
| 720 | ! | NULL | 
| 721 |       } else { | |
| 722 | 19x |         if (cs == 1) { | 
| 723 | 18x | grid::textGrob( | 
| 724 | 18x | label = cell_ascii, | 
| 725 | 18x | name = cell_name, | 
| 726 | 18x |             vp = grid::vpPath(paste0("cell-", row_index, "-", j)) | 
| 727 | ) | |
| 728 |         } else { | |
| 729 | # +1 because of rowname | |
| 730 | 1x | vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs)) | 
| 731 | ||
| 732 | 1x | lab <- grid::textGrob( | 
| 733 | 1x | label = cell_ascii, | 
| 734 | 1x | name = cell_name, | 
| 735 | 1x | vp = vp_joined_cols | 
| 736 | ) | |
| 737 | ||
| 738 | 1x |           if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) { | 
| 739 | ! | lab | 
| 740 |           } else { | |
| 741 | 1x | grid::gList( | 
| 742 | 1x | lab, | 
| 743 | 1x | grid::linesGrob( | 
| 744 | 1x | x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")), | 
| 745 | 1x | y = grid::unit(c(0, 0), "npc"), | 
| 746 | 1x | vp = vp_joined_cols | 
| 747 | ) | |
| 748 | ) | |
| 749 | } | |
| 750 | } | |
| 751 | } | |
| 752 | 19x | j <<- j + cs | 
| 753 | ||
| 754 | 19x | cell_grobs | 
| 755 | }) | |
| 756 | } | |
| 757 | ||
| 758 | 5x | grid::gList( | 
| 759 | 5x | g_rowname, | 
| 760 | 5x | do.call(grid::gList, gl_cols) | 
| 761 | ) | |
| 762 | } | |
| 763 | ||
| 764 | #' Graphic object: forest dot line | |
| 765 | #' | |
| 766 | #' @description `r lifecycle::badge("deprecated")` | |
| 767 | #' | |
| 768 | #' Calculate the `grob` corresponding to the dot line within the forest plot. | |
| 769 | #' | |
| 770 | #' @noRd | |
| 771 | #' @keywords internal | |
| 772 | forest_dot_line <- function(x, | |
| 773 | lower, | |
| 774 | upper, | |
| 775 | row_index, | |
| 776 | xlim, | |
| 777 | symbol_size = 1, | |
| 778 | col = "blue", | |
| 779 |                             datavp) { | |
| 780 | 3x | lifecycle::deprecate_warn( | 
| 781 | 3x | "0.9.4", "forest_dot_line()", | 
| 782 | 3x | details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 783 | ) | |
| 784 | ||
| 785 | 3x | ci <- c(lower, upper) | 
| 786 | 3x |   if (any(!is.na(c(x, ci)))) { | 
| 787 | # line | |
| 788 | 3x | y <- grid::unit(c(0.5, 0.5), "npc") | 
| 789 | ||
| 790 | 3x |     g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) { | 
| 791 | # - | |
| 792 | 3x |       if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) { | 
| 793 | 3x | grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y) | 
| 794 | ! |       } else if (ci[1] < xlim[1] && ci[2] > xlim[2]) { | 
| 795 | # <-> | |
| 796 | ! | grid::linesGrob( | 
| 797 | ! | x = grid::unit(xlim, "native"), | 
| 798 | ! | y = y, | 
| 799 | ! | arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both") | 
| 800 | ) | |
| 801 | ! |       } else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) { | 
| 802 | # <- | |
| 803 | ! | grid::linesGrob( | 
| 804 | ! | x = grid::unit(c(xlim[1], ci[2]), "native"), | 
| 805 | ! | y = y, | 
| 806 | ! | arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first") | 
| 807 | ) | |
| 808 | ! |       } else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) { | 
| 809 | # -> | |
| 810 | ! | grid::linesGrob( | 
| 811 | ! | x = grid::unit(c(ci[1], xlim[2]), "native"), | 
| 812 | ! | y = y, | 
| 813 | ! | arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last") | 
| 814 | ) | |
| 815 | } | |
| 816 |     } else { | |
| 817 | ! | NULL | 
| 818 | } | |
| 819 | ||
| 820 | 3x |     g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) { | 
| 821 | 3x | grid::circleGrob( | 
| 822 | 3x | x = grid::unit(x, "native"), | 
| 823 | 3x | y = y, | 
| 824 | 3x | r = grid::unit(1 / 3.5 * symbol_size, "lines"), | 
| 825 | 3x | name = "point" | 
| 826 | ) | |
| 827 |     } else { | |
| 828 | ! | NULL | 
| 829 | } | |
| 830 | ||
| 831 | 3x | grid::gTree( | 
| 832 | 3x | children = grid::gList( | 
| 833 | 3x | grid::gTree( | 
| 834 | 3x | children = grid::gList( | 
| 835 | 3x | grid::gList( | 
| 836 | 3x | g_line, | 
| 837 | 3x | g_circle | 
| 838 | ) | |
| 839 | ), | |
| 840 | 3x | vp = datavp, | 
| 841 | 3x | gp = grid::gpar(col = col, fill = col) | 
| 842 | ) | |
| 843 | ), | |
| 844 | 3x |       vp = grid::vpPath(paste0("forest-", row_index)) | 
| 845 | ) | |
| 846 |   } else { | |
| 847 | ! | NULL | 
| 848 | } | |
| 849 | } | |
| 850 | ||
| 851 | #' Create a viewport tree for the forest plot | |
| 852 | #' | |
| 853 | #' @description `r lifecycle::badge("deprecated")` | |
| 854 | #' | |
| 855 | #' @param tbl (`VTableTree`)\cr `rtables` table object. | |
| 856 | #' @param width_row_names (`grid::unit`)\cr width of row names. | |
| 857 | #' @param width_columns (`grid::unit`)\cr width of column spans. | |
| 858 | #' @param width_forest (`grid::unit`)\cr width of the forest plot. | |
| 859 | #' @param gap_column (`grid::unit`)\cr gap width between the columns. | |
| 860 | #' @param gap_header (`grid::unit`)\cr gap width between the header. | |
| 861 | #' @param mat_form (`MatrixPrintForm`)\cr matrix print form of the table. | |
| 862 | #' | |
| 863 | #' @return A viewport tree. | |
| 864 | #' | |
| 865 | #' @examples | |
| 866 | #' library(grid) | |
| 867 | #' | |
| 868 | #' tbl <- rtable( | |
| 869 | #' header = rheader( | |
| 870 | #'     rrow("", "E", rcell("CI", colspan = 2)), | |
| 871 | #'     rrow("", "A", "B", "C") | |
| 872 | #' ), | |
| 873 | #'   rrow("row 1", 1, 0.8, 1.1), | |
| 874 | #'   rrow("row 2", 1.4, 0.8, 1.6), | |
| 875 | #'   rrow("row 3", 1.2, 0.8, 1.2) | |
| 876 | #' ) | |
| 877 | #' | |
| 878 | #' \donttest{ | |
| 879 | #' v <- forest_viewport(tbl) | |
| 880 | #' | |
| 881 | #' grid::grid.newpage() | |
| 882 | #' showViewport(v) | |
| 883 | #' } | |
| 884 | #' | |
| 885 | #' @export | |
| 886 | forest_viewport <- function(tbl, | |
| 887 | width_row_names = NULL, | |
| 888 | width_columns = NULL, | |
| 889 | width_forest = grid::unit(1, "null"), | |
| 890 | gap_column = grid::unit(1, "lines"), | |
| 891 | gap_header = grid::unit(1, "lines"), | |
| 892 |                             mat_form = NULL) { | |
| 893 | 2x | lifecycle::deprecate_warn( | 
| 894 | 2x | "0.9.4", | 
| 895 | 2x | "forest_viewport()", | 
| 896 | 2x | details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 897 | ) | |
| 898 | ||
| 899 | 2x | checkmate::assert_class(tbl, "VTableTree") | 
| 900 | 2x | checkmate::assert_true(grid::is.unit(width_forest)) | 
| 901 | 2x |   if (!is.null(width_row_names)) { | 
| 902 | ! | checkmate::assert_true(grid::is.unit(width_row_names)) | 
| 903 | } | |
| 904 | 2x |   if (!is.null(width_columns)) { | 
| 905 | ! | checkmate::assert_true(grid::is.unit(width_columns)) | 
| 906 | } | |
| 907 | ||
| 908 | 2x | if (is.null(mat_form)) mat_form <- matrix_form(tbl) | 
| 909 | ||
| 910 | 2x | mat_form$strings[!mat_form$display] <- "" | 
| 911 | ||
| 912 | 2x | nr <- nrow(tbl) | 
| 913 | 2x | nc <- ncol(tbl) | 
| 914 | 2x | nr_h <- attr(mat_form, "nrow_header") | 
| 915 | ||
| 916 | 2x |   if (is.null(width_row_names) || is.null(width_columns)) { | 
| 917 | 2x | tbl_widths <- formatters::propose_column_widths(mat_form) | 
| 918 | 2x |     strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts | 
| 919 | 2x | if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1]) | 
| 920 | 2x | if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1]) | 
| 921 | } | |
| 922 | ||
| 923 | # Widths for row name, cols, forest. | |
| 924 | 2x | widths <- grid::unit.c( | 
| 925 | 2x | width_row_names + gap_column, | 
| 926 | 2x | width_columns + gap_column, | 
| 927 | 2x | width_forest | 
| 928 | ) | |
| 929 | ||
| 930 | 2x | n_lines_per_row <- apply( | 
| 931 | 2x | X = mat_form$strings, | 
| 932 | 2x | MARGIN = 1, | 
| 933 | 2x |     FUN = function(row) { | 
| 934 | 10x | tmp <- vapply( | 
| 935 | 10x |         gregexpr("\n", row, fixed = TRUE), | 
| 936 | 10x | attr, numeric(1), | 
| 937 | 10x | "match.length" | 
| 938 | 10x | ) + 1 | 
| 939 | 10x | max(c(tmp, 1)) | 
| 940 | } | |
| 941 | ) | |
| 942 | ||
| 943 | 2x | i_header <- seq_len(nr_h) | 
| 944 | ||
| 945 | 2x | height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines") | 
| 946 | 2x | height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines") | 
| 947 | ||
| 948 | 2x | height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines") | 
| 949 | 2x | height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines") | 
| 950 | ||
| 951 | 2x | nc_g <- nc + 2 # number of columns incl. row names and forest | 
| 952 | ||
| 953 | 2x | vp_tbl <- grid::vpTree( | 
| 954 | 2x | parent = grid::viewport( | 
| 955 | 2x | name = "vp_table_layout", | 
| 956 | 2x | layout = grid::grid.layout( | 
| 957 | 2x | nrow = 3, ncol = 1, | 
| 958 | 2x | heights = grid::unit.c(height_header, gap_header, height_body) | 
| 959 | ) | |
| 960 | ), | |
| 961 | 2x | children = grid::vpList( | 
| 962 | 2x | vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"), | 
| 963 | 2x | vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"), | 
| 964 | 2x | grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1) | 
| 965 | ) | |
| 966 | ) | |
| 967 | 2x | vp_tbl | 
| 968 | } | |
| 969 | ||
| 970 | #' Viewport forest plot: table part | |
| 971 | #' | |
| 972 | #' @description `r lifecycle::badge("deprecated")` | |
| 973 | #' | |
| 974 | #' Prepares a viewport for the table included in the forest plot. | |
| 975 | #' | |
| 976 | #' @noRd | |
| 977 | #' @keywords internal | |
| 978 | vp_forest_table_part <- function(nrow, | |
| 979 | ncol, | |
| 980 | l_row, | |
| 981 | l_col, | |
| 982 | widths, | |
| 983 | heights, | |
| 984 |                                  name) { | |
| 985 | 4x | lifecycle::deprecate_warn( | 
| 986 | 4x | "0.9.4", "vp_forest_table_part()", | 
| 987 | 4x | details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 988 | ) | |
| 989 | ||
| 990 | 4x | grid::vpTree( | 
| 991 | 4x | grid::viewport( | 
| 992 | 4x | name = name, | 
| 993 | 4x | layout.pos.row = l_row, | 
| 994 | 4x | layout.pos.col = l_col, | 
| 995 | 4x | layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights) | 
| 996 | ), | |
| 997 | 4x | children = grid::vpList( | 
| 998 | 4x | do.call( | 
| 999 | 4x | grid::vpList, | 
| 1000 | 4x | lapply( | 
| 1001 | 4x |           seq_len(nrow), function(i) { | 
| 1002 | 10x |             grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i)) | 
| 1003 | } | |
| 1004 | ) | |
| 1005 | ), | |
| 1006 | 4x | do.call( | 
| 1007 | 4x | grid::vpList, | 
| 1008 | 4x | apply( | 
| 1009 | 4x | expand.grid(seq_len(nrow), seq_len(ncol - 2)), | 
| 1010 | 4x | 1, | 
| 1011 | 4x |           function(x) { | 
| 1012 | 35x | i <- x[1] | 
| 1013 | 35x | j <- x[2] | 
| 1014 | 35x |             grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j)) | 
| 1015 | } | |
| 1016 | ) | |
| 1017 | ), | |
| 1018 | 4x | do.call( | 
| 1019 | 4x | grid::vpList, | 
| 1020 | 4x | lapply( | 
| 1021 | 4x | seq_len(nrow), | 
| 1022 | 4x |           function(i) { | 
| 1023 | 10x |             grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i)) | 
| 1024 | } | |
| 1025 | ) | |
| 1026 | ) | |
| 1027 | ) | |
| 1028 | ) | |
| 1029 | } | |
| 1030 | ||
| 1031 | #' Forest rendering | |
| 1032 | #' | |
| 1033 | #' @description `r lifecycle::badge("deprecated")` | |
| 1034 | #' | |
| 1035 | #' Renders the forest grob. | |
| 1036 | #' | |
| 1037 | #' @noRd | |
| 1038 | #' @keywords internal | |
| 1039 | grid.forest <- function(...) { # nolint | |
| 1040 | ! | lifecycle::deprecate_warn( | 
| 1041 | ! | "0.9.4", "grid.forest()", | 
| 1042 | ! | details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." | 
| 1043 | ) | |
| 1044 | ||
| 1045 | ! | grid::grid.draw(forest_grob(...)) | 
| 1046 | } | 
| 1 | #' Tabulate survival duration by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The [tabulate_survival_subgroups()] function creates a layout element to tabulate survival duration by subgroup, | |
| 6 | #' returning statistics including median survival time and hazard ratio for each population subgroup. The table is | |
| 7 | #' created from `df`, a list of data frames returned by [extract_survival_subgroups()], with the statistics to include | |
| 8 | #' specified via the `vars` parameter. | |
| 9 | #' | |
| 10 | #' A forest plot can be created from the resulting table using the [g_forest()] function. | |
| 11 | #' | |
| 12 | #' @inheritParams argument_convention | |
| 13 | #' @inheritParams survival_coxph_pairwise | |
| 14 | #' @param df (`list`)\cr list of data frames containing all analysis variables. List should be | |
| 15 | #' created using [extract_survival_subgroups()]. | |
| 16 | #' @param vars (`character`)\cr the names of statistics to be reported among: | |
| 17 | #' * `n_tot_events`: Total number of events per group. | |
| 18 | #' * `n_events`: Number of events per group. | |
| 19 | #' * `n_tot`: Total number of observations per group. | |
| 20 | #' * `n`: Number of observations per group. | |
| 21 | #' * `median`: Median survival time. | |
| 22 | #' * `hr`: Hazard ratio. | |
| 23 | #' * `ci`: Confidence interval of hazard ratio. | |
| 24 | #' * `pval`: p-value of the effect. | |
| 25 | #' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` | |
| 26 | #' are required. | |
| 27 | #' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit. | |
| 28 | #' | |
| 29 | #' @details These functions create a layout starting from a data frame which contains | |
| 30 | #' the required statistics. Tables typically used as part of forest plot. | |
| 31 | #' | |
| 32 | #' @seealso [extract_survival_subgroups()] | |
| 33 | #' | |
| 34 | #' @examples | |
| 35 | #' library(dplyr) | |
| 36 | #' | |
| 37 | #' adtte <- tern_ex_adtte | |
| 38 | #' | |
| 39 | #' # Save variable labels before data processing steps. | |
| 40 | #' adtte_labels <- formatters::var_labels(adtte) | |
| 41 | #' | |
| 42 | #' adtte_f <- adtte %>% | |
| 43 | #' filter( | |
| 44 | #' PARAMCD == "OS", | |
| 45 | #'     ARM %in% c("B: Placebo", "A: Drug X"), | |
| 46 | #'     SEX %in% c("M", "F") | |
| 47 | #' ) %>% | |
| 48 | #' mutate( | |
| 49 | #' # Reorder levels of ARM to display reference arm before treatment arm. | |
| 50 | #' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")), | |
| 51 | #' SEX = droplevels(SEX), | |
| 52 | #' AVALU = as.character(AVALU), | |
| 53 | #' is_event = CNSR == 0 | |
| 54 | #' ) | |
| 55 | #' labels <- c( | |
| 56 | #' "ARM" = adtte_labels[["ARM"]], | |
| 57 | #' "SEX" = adtte_labels[["SEX"]], | |
| 58 | #' "AVALU" = adtte_labels[["AVALU"]], | |
| 59 | #' "is_event" = "Event Flag" | |
| 60 | #' ) | |
| 61 | #' formatters::var_labels(adtte_f)[names(labels)] <- labels | |
| 62 | #' | |
| 63 | #' df <- extract_survival_subgroups( | |
| 64 | #' variables = list( | |
| 65 | #' tte = "AVAL", | |
| 66 | #' is_event = "is_event", | |
| 67 | #'     arm = "ARM", subgroups = c("SEX", "BMRKR2") | |
| 68 | #' ), | |
| 69 | #' label_all = "Total Patients", | |
| 70 | #' data = adtte_f | |
| 71 | #' ) | |
| 72 | #' df | |
| 73 | #' | |
| 74 | #' df_grouped <- extract_survival_subgroups( | |
| 75 | #' variables = list( | |
| 76 | #' tte = "AVAL", | |
| 77 | #' is_event = "is_event", | |
| 78 | #'     arm = "ARM", subgroups = c("SEX", "BMRKR2") | |
| 79 | #' ), | |
| 80 | #' data = adtte_f, | |
| 81 | #' groups_lists = list( | |
| 82 | #' BMRKR2 = list( | |
| 83 | #' "low" = "LOW", | |
| 84 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 85 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 86 | #' ) | |
| 87 | #' ) | |
| 88 | #' ) | |
| 89 | #' df_grouped | |
| 90 | #' | |
| 91 | #' @name survival_duration_subgroups | |
| 92 | #' @order 1 | |
| 93 | NULL | |
| 94 | ||
| 95 | #' Prepare survival data for population subgroups in data frames | |
| 96 | #' | |
| 97 | #' @description `r lifecycle::badge("stable")` | |
| 98 | #' | |
| 99 | #' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in | |
| 100 | #' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list` | |
| 101 | #' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`, | |
| 102 | #' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strata`. | |
| 103 | #' `groups_lists` optionally specifies groupings for `subgroups` variables. | |
| 104 | #' | |
| 105 | #' @inheritParams argument_convention | |
| 106 | #' @inheritParams survival_duration_subgroups | |
| 107 | #' @inheritParams survival_coxph_pairwise | |
| 108 | #' | |
| 109 | #' @return A named `list` of two elements: | |
| 110 | #' * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`, | |
| 111 | #' `var_label`, and `row_type`. | |
| 112 | #' * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`, | |
| 113 | #' `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`. | |
| 114 | #' | |
| 115 | #' @seealso [survival_duration_subgroups] | |
| 116 | #' | |
| 117 | #' @export | |
| 118 | extract_survival_subgroups <- function(variables, | |
| 119 | data, | |
| 120 | groups_lists = list(), | |
| 121 | control = control_coxph(), | |
| 122 |                                        label_all = "All Patients") { | |
| 123 | 12x |   if ("strat" %in% names(variables)) { | 
| 124 | ! | warning( | 
| 125 | ! | "Warning: the `strat` element name of the `variables` list argument to `extract_survival_subgroups() ", | 
| 126 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 127 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 128 | ) | |
| 129 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 130 | } | |
| 131 | ||
| 132 | 12x | df_survtime <- h_survtime_subgroups_df( | 
| 133 | 12x | variables, | 
| 134 | 12x | data, | 
| 135 | 12x | groups_lists = groups_lists, | 
| 136 | 12x | label_all = label_all | 
| 137 | ) | |
| 138 | 12x | df_hr <- h_coxph_subgroups_df( | 
| 139 | 12x | variables, | 
| 140 | 12x | data, | 
| 141 | 12x | groups_lists = groups_lists, | 
| 142 | 12x | control = control, | 
| 143 | 12x | label_all = label_all | 
| 144 | ) | |
| 145 | ||
| 146 | 12x | list(survtime = df_survtime, hr = df_hr) | 
| 147 | } | |
| 148 | ||
| 149 | #' @describeIn survival_duration_subgroups Formatted analysis function which is used as | |
| 150 | #' `afun` in `tabulate_survival_subgroups()`. | |
| 151 | #' | |
| 152 | #' @return | |
| 153 | #' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 154 | #' | |
| 155 | #' @keywords internal | |
| 156 | a_survival_subgroups <- function(df, | |
| 157 | labelstr = "", | |
| 158 | ..., | |
| 159 | .stats = NULL, | |
| 160 | .stat_names = NULL, | |
| 161 | .formats = NULL, | |
| 162 | .labels = NULL, | |
| 163 |                                  .indent_mods = NULL) { | |
| 164 | # Check for additional parameters to the statistics function | |
| 165 | 335x | dots_extra_args <- list(...) | 
| 166 | 335x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 167 | 335x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 168 | 335x | cur_col_stat <- extra_afun_params$.var %||% .stats | 
| 169 | ||
| 170 | # Uniquely name & label rows | |
| 171 | 335x |   var_lvls <- if ("biomarker" %in% names(dots_extra_args) && "biomarker" %in% names(df)) { | 
| 172 | 126x |     if ("overall" %in% names(dots_extra_args)) { # label rows for (nested) biomarker tables - e.g. "AGE", "BMRKR1" | 
| 173 | 54x | as.character(df$biomarker) | 
| 174 | 335x |     } else { # data rows for (nested) biomarker tables - e.g. "AGE.LOW", "BMRKR1.Total Patients" | 
| 175 | 72x | paste(as.character(df$biomarker), as.character(df$subgroup), sep = ".") | 
| 176 | } | |
| 177 | 335x |   } else { # data rows for non-biomarker tables - e.g. "Total Patients", "F", "M" | 
| 178 | 209x | make.unique(as.character(df$subgroup)) | 
| 179 | } | |
| 180 | ||
| 181 | # if empty, return NA | |
| 182 | 335x |   if (nrow(df) == 0) { | 
| 183 | 1x | return(in_rows(.list = list(NA) %>% stats::setNames(cur_col_stat))) | 
| 184 | } | |
| 185 | ||
| 186 | # Main statistics taken from df | |
| 187 | 334x | x_stats <- as.list(df) | 
| 188 | ||
| 189 | # Fill in formatting defaults | |
| 190 | 334x |   .stats <- get_stats("tabulate_survival_subgroups", stats_in = cur_col_stat) | 
| 191 | 334x | levels_per_stats <- rep(list(var_lvls), length(.stats)) %>% setNames(.stats) | 
| 192 | 334x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 193 | 334x | .labels <- get_labels_from_stats( | 
| 194 | 334x | .stats, .labels, levels_per_stats, | 
| 195 | # default labels are pre-determined in extract_*() function | |
| 196 | 334x | tern_defaults = as.list(as.character(df$subgroup)) %>% setNames(var_lvls) | 
| 197 | ) | |
| 198 | 334x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 199 | ||
| 200 | 334x | x_stats <- lapply( | 
| 201 | 334x | .stats, | 
| 202 | 334x | function(x) x_stats[[x]] %>% stats::setNames(var_lvls) | 
| 203 | ) %>% | |
| 204 | 334x | stats::setNames(.stats) %>% | 
| 205 | 334x | .unlist_keep_nulls() | 
| 206 | ||
| 207 | # Auto format handling | |
| 208 | 334x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 209 | ||
| 210 | # Get and check statistical names | |
| 211 | 334x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 212 | ||
| 213 | 334x | in_rows( | 
| 214 | 334x | .list = x_stats, | 
| 215 | 334x | .formats = .formats, | 
| 216 | 334x | .names = names(.labels), | 
| 217 | 334x | .stat_names = .stat_names, | 
| 218 | 334x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 219 | 334x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 220 | ) | |
| 221 | } | |
| 222 | ||
| 223 | #' @describeIn survival_duration_subgroups Table-creating function which creates a table | |
| 224 | #' summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()] | |
| 225 | #' and [rtables::summarize_row_groups()]. | |
| 226 | #' | |
| 227 | #' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the | |
| 228 | #' [extract_survival_subgroups()] function when creating `df`. | |
| 229 | #' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply | |
| 230 | #' within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If | |
| 231 | #' `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$survtime$arm` will be used as `arm_x` | |
| 232 | #' and the second level as `arm_y`. | |
| 233 | #' | |
| 234 | #' @return An `rtables` table summarizing survival by subgroup. | |
| 235 | #' | |
| 236 | #' @examples | |
| 237 | #' ## Table with default columns. | |
| 238 | #' basic_table() %>% | |
| 239 | #' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1]) | |
| 240 | #' | |
| 241 | #' ## Table with a manually chosen set of columns: adding "pval". | |
| 242 | #' basic_table() %>% | |
| 243 | #' tabulate_survival_subgroups( | |
| 244 | #' df = df, | |
| 245 | #'     vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"), | |
| 246 | #' time_unit = adtte_f$AVALU[1] | |
| 247 | #' ) | |
| 248 | #' | |
| 249 | #' @export | |
| 250 | #' @order 2 | |
| 251 | tabulate_survival_subgroups <- function(lyt, | |
| 252 | df, | |
| 253 |                                         vars = c("n_tot_events", "n_events", "median", "hr", "ci"), | |
| 254 | groups_lists = list(), | |
| 255 | label_all = lifecycle::deprecated(), | |
| 256 | time_unit = NULL, | |
| 257 | riskdiff = NULL, | |
| 258 | na_str = default_na_str(), | |
| 259 | ..., | |
| 260 | .stat_names = NULL, | |
| 261 | .formats = NULL, | |
| 262 | .labels = NULL, | |
| 263 |                                         .indent_mods = NULL) { | |
| 264 | 11x | checkmate::assert_list(riskdiff, null.ok = TRUE) | 
| 265 | 11x |   checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars)) | 
| 266 | 11x |   checkmate::assert_true(all(c("hr", "ci") %in% vars)) | 
| 267 | 11x |   if ("pval" %in% vars && !"pval" %in% names(df$hr)) { | 
| 268 | ! | warning( | 
| 269 | ! | 'The "pval" statistic has been selected but is not present in "df" so it will not be included in the output ', | 
| 270 | ! | 'table. To include the "pval" statistic, please specify a p-value test when generating "df" via ', | 
| 271 | ! | 'the "method" argument to `extract_survival_subgroups()`. If method = "cmh", strata must also be specified via ', | 
| 272 | ! | 'the "variables" argument to `extract_survival_subgroups()`.' | 
| 273 | ) | |
| 274 | } | |
| 275 | ||
| 276 | 11x |   if (lifecycle::is_present(label_all)) { | 
| 277 | 1x | lifecycle::deprecate_warn( | 
| 278 | 1x | "0.9.5", "tabulate_survival_subgroups(label_all)", | 
| 279 | 1x | details = | 
| 280 | 1x | "Please assign the `label_all` parameter within the `extract_survival_subgroups()` function when creating `df`." | 
| 281 | ) | |
| 282 | } | |
| 283 | ||
| 284 | # Process standard extra arguments | |
| 285 | 11x |   extra_args <- list(".stats" = vars) | 
| 286 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 287 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 288 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 289 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 290 | ||
| 291 | # Create "ci" column from "lcl" and "ucl" | |
| 292 | 11x | df$hr$ci <- combine_vectors(df$hr$lcl, df$hr$ucl) | 
| 293 | ||
| 294 | # Extract additional parameters from df | |
| 295 | 11x | conf_level <- df$hr$conf_level[1] | 
| 296 | 11x |   method <- if ("pval_label" %in% names(df$hr)) df$hr$pval_label[1] else NULL | 
| 297 | 11x | colvars <- d_survival_subgroups_colvars(vars, conf_level = conf_level, method = method, time_unit = time_unit) | 
| 298 | 11x |   survtime_vars <- intersect(colvars$vars, c("n", "n_events", "median")) | 
| 299 | 11x |   hr_vars <- intersect(names(colvars$labels), c("n_tot", "n_tot_events", "hr", "ci", "pval")) | 
| 300 | 11x | colvars_survtime <- list(vars = survtime_vars, labels = colvars$labels[survtime_vars]) | 
| 301 | 11x | colvars_hr <- list(vars = hr_vars, labels = colvars$labels[hr_vars]) | 
| 302 | ||
| 303 | # Process additional arguments to the statistic function | |
| 304 | 11x | extra_args <- c( | 
| 305 | 11x | extra_args, | 
| 306 | 11x | groups_lists = list(groups_lists), conf_level = conf_level, method = method, | 
| 307 | ... | |
| 308 | ) | |
| 309 | ||
| 310 | # Adding additional info from layout to analysis function | |
| 311 | 11x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 312 | 11x | formals(a_survival_subgroups) <- c(formals(a_survival_subgroups), extra_args[[".additional_fun_parameters"]]) | 
| 313 | ||
| 314 | # Add risk difference column | |
| 315 | 11x |   if (!is.null(riskdiff)) { | 
| 316 | 2x | if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$survtime$arm)[1] | 
| 317 | 2x | if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$survtime$arm)[2] | 
| 318 | 2x | colvars_hr$vars <- c(colvars_hr$vars, "riskdiff") | 
| 319 | 2x | colvars_hr$labels <- c(colvars_hr$labels, riskdiff = riskdiff$col_label) | 
| 320 | 2x |     arm_cols <- paste(rep(c("n_events", "n_events", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_") | 
| 321 | ||
| 322 | 2x | df_prop_diff <- df$survtime %>% | 
| 323 | 2x | dplyr::select(-"median") %>% | 
| 324 | 2x | tidyr::pivot_wider( | 
| 325 | 2x |         id_cols = c("subgroup", "var", "var_label", "row_type"), | 
| 326 | 2x | names_from = "arm", | 
| 327 | 2x |         values_from = c("n", "n_events") | 
| 328 | ) %>% | |
| 329 | 2x | dplyr::rowwise() %>% | 
| 330 | 2x | dplyr::mutate( | 
| 331 | 2x | riskdiff = stat_propdiff_ci( | 
| 332 | 2x | x = as.list(.data[[arm_cols[1]]]), | 
| 333 | 2x | y = as.list(.data[[arm_cols[2]]]), | 
| 334 | 2x | N_x = .data[[arm_cols[3]]], | 
| 335 | 2x | N_y = .data[[arm_cols[4]]], | 
| 336 | 2x | pct = riskdiff$pct | 
| 337 | ) | |
| 338 | ) %>% | |
| 339 | 2x | dplyr::select(-dplyr::all_of(arm_cols)) | 
| 340 | ||
| 341 | 2x | df$hr <- df$hr %>% | 
| 342 | 2x | dplyr::left_join( | 
| 343 | 2x | df_prop_diff, | 
| 344 | 2x |         by = c("subgroup", "var", "var_label", "row_type") | 
| 345 | ) | |
| 346 | } | |
| 347 | ||
| 348 | # Add columns from table_survtime (optional) | |
| 349 | 11x |   if (length(colvars_survtime$vars) > 0) { | 
| 350 | 10x | lyt_survtime <- split_cols_by(lyt = lyt, var = "arm") | 
| 351 | 10x | lyt_survtime <- split_cols_by_multivar( | 
| 352 | 10x | lyt = lyt_survtime, | 
| 353 | 10x | vars = colvars_survtime$vars, | 
| 354 | 10x | varlabels = colvars_survtime$labels | 
| 355 | ) | |
| 356 | ||
| 357 | # Add "All Patients" row | |
| 358 | 10x | lyt_survtime <- split_rows_by( | 
| 359 | 10x | lyt = lyt_survtime, | 
| 360 | 10x | var = "row_type", | 
| 361 | 10x |       split_fun = keep_split_levels("content"), | 
| 362 | 10x | nested = FALSE, | 
| 363 | 10x | child_labels = "hidden", | 
| 364 | 10x | parent_name = "All Patients" | 
| 365 | ) | |
| 366 | 10x | lyt_survtime <- analyze_colvars( | 
| 367 | 10x | lyt = lyt_survtime, | 
| 368 | 10x | afun = a_survival_subgroups, | 
| 369 | 10x | na_str = na_str, | 
| 370 | 10x | extra_args = extra_args | 
| 371 | ) | |
| 372 | ||
| 373 | # Add analysis rows | |
| 374 | 10x |     if ("analysis" %in% df$survtime$row_type) { | 
| 375 | 9x | lyt_survtime <- split_rows_by( | 
| 376 | 9x | lyt = lyt_survtime, | 
| 377 | 9x | var = "row_type", | 
| 378 | 9x |         split_fun = keep_split_levels("analysis"), | 
| 379 | 9x | nested = FALSE, | 
| 380 | 9x | child_labels = "hidden", | 
| 381 | 9x | parent_name = "analysis rows" | 
| 382 | ) | |
| 383 | 9x | lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE) | 
| 384 | 9x | lyt_survtime <- analyze_colvars( | 
| 385 | 9x | lyt = lyt_survtime, | 
| 386 | 9x | afun = a_survival_subgroups, | 
| 387 | 9x | na_str = na_str, | 
| 388 | 9x | inclNAs = TRUE, | 
| 389 | 9x | extra_args = extra_args | 
| 390 | ) | |
| 391 | } | |
| 392 | ||
| 393 | 10x | table_survtime <- build_table(lyt_survtime, df = df$survtime) | 
| 394 |   } else { | |
| 395 | 1x | table_survtime <- NULL | 
| 396 | } | |
| 397 | ||
| 398 |   # Add columns from table_hr ("n_tot_events" or "n_tot", "hr" and "ci" required) | |
| 399 | 11x | lyt_hr <- split_cols_by(lyt = lyt, var = "arm") | 
| 400 | 11x | lyt_hr <- split_cols_by_multivar( | 
| 401 | 11x | lyt = lyt_hr, | 
| 402 | 11x | vars = colvars_hr$vars, | 
| 403 | 11x | varlabels = colvars_hr$labels | 
| 404 | ) | |
| 405 | ||
| 406 | # Add "All Patients" row | |
| 407 | 11x | lyt_hr <- split_rows_by( | 
| 408 | 11x | lyt = lyt_hr, | 
| 409 | 11x | var = "row_type", | 
| 410 | 11x |     split_fun = keep_split_levels("content"), | 
| 411 | 11x | nested = FALSE, | 
| 412 | 11x | child_labels = "hidden", | 
| 413 | 11x | parent_name = "All patient row" | 
| 414 | ) | |
| 415 | 11x | lyt_hr <- analyze_colvars( | 
| 416 | 11x | lyt = lyt_hr, | 
| 417 | 11x | afun = a_survival_subgroups, | 
| 418 | 11x | na_str = na_str, | 
| 419 | 11x | extra_args = extra_args | 
| 420 | ) %>% | |
| 421 | 11x |     append_topleft("Baseline Risk Factors") | 
| 422 | ||
| 423 | # Add analysis rows | |
| 424 | 11x |   if ("analysis" %in% df$survtime$row_type) { | 
| 425 | 10x | lyt_hr <- split_rows_by( | 
| 426 | 10x | lyt = lyt_hr, | 
| 427 | 10x | var = "row_type", | 
| 428 | 10x |       split_fun = keep_split_levels("analysis"), | 
| 429 | 10x | nested = FALSE, | 
| 430 | 10x | child_labels = "hidden", | 
| 431 | 10x | parent_name = "analysis rows" | 
| 432 | ) | |
| 433 | 10x | lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE) | 
| 434 | 10x | lyt_hr <- analyze_colvars( | 
| 435 | 10x | lyt = lyt_hr, | 
| 436 | 10x | afun = a_survival_subgroups, | 
| 437 | 10x | na_str = na_str, | 
| 438 | 10x | inclNAs = TRUE, | 
| 439 | 10x | extra_args = extra_args | 
| 440 | ) | |
| 441 | } | |
| 442 | ||
| 443 | 11x | table_hr <- build_table(lyt_hr, df = df$hr) | 
| 444 | ||
| 445 | # Join tables, add forest plot attributes | |
| 446 | 11x |   n_tot_ids <- grep("^n_tot", colvars_hr$vars) | 
| 447 | 11x |   if (is.null(table_survtime)) { | 
| 448 | 1x | result <- table_hr | 
| 449 | 1x |     hr_id <- match("hr", colvars_hr$vars) | 
| 450 | 1x |     ci_id <- match("ci", colvars_hr$vars) | 
| 451 |   } else { | |
| 452 | 10x | result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids]) | 
| 453 | 10x |     hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids]) | 
| 454 | 10x |     ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("ci", colvars_hr$vars[-n_tot_ids]) | 
| 455 | 10x | n_tot_ids <- seq_along(n_tot_ids) | 
| 456 | } | |
| 457 | 11x | structure( | 
| 458 | 11x | result, | 
| 459 | 11x | forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"), | 
| 460 | 11x | col_x = hr_id, | 
| 461 | 11x | col_ci = ci_id, | 
| 462 | 11x | col_symbol_size = n_tot_ids[1] # for scaling the symbol sizes in forest plots | 
| 463 | ) | |
| 464 | } | |
| 465 | ||
| 466 | #' Labels for column variables in survival duration by subgroup table | |
| 467 | #' | |
| 468 | #' @description `r lifecycle::badge("stable")` | |
| 469 | #' | |
| 470 | #' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels. | |
| 471 | #' | |
| 472 | #' @inheritParams tabulate_survival_subgroups | |
| 473 | #' @inheritParams argument_convention | |
| 474 | #' @param method (`string`)\cr p-value method for testing hazard ratio = 1. | |
| 475 | #' | |
| 476 | #' @return A `list` of variables and their labels to tabulate. | |
| 477 | #' | |
| 478 | #' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`. | |
| 479 | #' | |
| 480 | #' @export | |
| 481 | d_survival_subgroups_colvars <- function(vars, | |
| 482 | conf_level, | |
| 483 | method, | |
| 484 |                                          time_unit = NULL) { | |
| 485 | 18x | checkmate::assert_character(vars) | 
| 486 | 18x | checkmate::assert_string(time_unit, null.ok = TRUE) | 
| 487 | 18x |   checkmate::assert_subset(c("hr", "ci"), vars) | 
| 488 | 18x |   checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars)) | 
| 489 | 18x | checkmate::assert_subset( | 
| 490 | 18x | vars, | 
| 491 | 18x |     c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval") | 
| 492 | ) | |
| 493 | ||
| 494 | 18x |   propcase_time_label <- if (!is.null(time_unit)) { | 
| 495 | 17x |     paste0("Median (", time_unit, ")") | 
| 496 |   } else { | |
| 497 | 1x | "Median" | 
| 498 | } | |
| 499 | ||
| 500 | 18x | varlabels <- c( | 
| 501 | 18x | n = "n", | 
| 502 | 18x | n_events = "Events", | 
| 503 | 18x | median = propcase_time_label, | 
| 504 | 18x | n_tot = "Total n", | 
| 505 | 18x | n_tot_events = "Total Events", | 
| 506 | 18x | hr = "Hazard Ratio", | 
| 507 | 18x | ci = paste0(100 * conf_level, "% Wald CI"), | 
| 508 | 18x | pval = method | 
| 509 | ) | |
| 510 | ||
| 511 | 18x | colvars <- vars | 
| 512 | ||
| 513 | 18x | list( | 
| 514 | 18x | vars = colvars, | 
| 515 | 18x | labels = varlabels[vars] | 
| 516 | ) | |
| 517 | } | 
| 1 | #' Proportion estimation | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [estimate_proportion()] creates a layout element to estimate the proportion of responders | |
| 6 | #' within a studied population. The primary analysis variable, `vars`, indicates whether a response has occurred for | |
| 7 | #' each record. See the `method` parameter for options of methods to use when constructing the confidence interval of | |
| 8 | #' the proportion. Additionally, a stratification variable can be supplied via the `strata` element of the `variables` | |
| 9 | #' argument. | |
| 10 | #' | |
| 11 | #' @inheritParams prop_strat_wilson | |
| 12 | #' @inheritParams argument_convention | |
| 13 | #' @param method (`string`)\cr the method used to construct the confidence interval | |
| 14 | #' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`, | |
| 15 | #' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`. | |
| 16 | #' @param long (`flag`)\cr whether a long description is required. | |
| 17 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 18 | #' | |
| 19 | #'   Options are: ``r shQuote(get_stats("estimate_proportion"), type = "sh")`` | |
| 20 | #' | |
| 21 | #' @seealso [h_proportions] | |
| 22 | #' | |
| 23 | #' @name estimate_proportion | |
| 24 | #' @order 1 | |
| 25 | NULL | |
| 26 | ||
| 27 | #' @describeIn estimate_proportion Statistics function estimating a | |
| 28 | #' proportion along with its confidence interval. | |
| 29 | #' | |
| 30 | #' @param df (`logical` or `data.frame`)\cr if only a logical vector is used, | |
| 31 | #' it indicates whether each subject is a responder or not. `TRUE` represents | |
| 32 | #' a successful outcome. If a `data.frame` is provided, also the `strata` variable | |
| 33 | #' names must be provided in `variables` as a list element with the strata strings. | |
| 34 | #' In the case of `data.frame`, the logical vector of responses must be indicated as a | |
| 35 | #' variable name in `.var`. | |
| 36 | #' | |
| 37 | #' @return | |
| 38 | #' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a | |
| 39 | #' given variable. | |
| 40 | #' | |
| 41 | #' @examples | |
| 42 | #' # Case with only logical vector. | |
| 43 | #' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0) | |
| 44 | #' s_proportion(rsp_v) | |
| 45 | #' | |
| 46 | #' # Example for Stratified Wilson CI | |
| 47 | #' nex <- 100 # Number of example rows | |
| 48 | #' dta <- data.frame( | |
| 49 | #' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), | |
| 50 | #'   "grp" = sample(c("A", "B"), nex, TRUE), | |
| 51 | #'   "f1" = sample(c("a1", "a2"), nex, TRUE), | |
| 52 | #'   "f2" = sample(c("x", "y", "z"), nex, TRUE), | |
| 53 | #' stringsAsFactors = TRUE | |
| 54 | #' ) | |
| 55 | #' | |
| 56 | #' s_proportion( | |
| 57 | #' df = dta, | |
| 58 | #' .var = "rsp", | |
| 59 | #'   variables = list(strata = c("f1", "f2")), | |
| 60 | #' conf_level = 0.90, | |
| 61 | #' method = "strat_wilson" | |
| 62 | #' ) | |
| 63 | #' | |
| 64 | #' @export | |
| 65 | s_proportion <- function(df, | |
| 66 | .var, | |
| 67 | conf_level = 0.95, | |
| 68 | method = c( | |
| 69 | "waldcc", "wald", "clopper-pearson", | |
| 70 | "wilson", "wilsonc", "strat_wilson", "strat_wilsonc", | |
| 71 | "agresti-coull", "jeffreys" | |
| 72 | ), | |
| 73 | weights = NULL, | |
| 74 | max_iterations = 50, | |
| 75 | variables = list(strata = NULL), | |
| 76 | long = FALSE, | |
| 77 |                          denom = c("n", "N_col", "N_row"), | |
| 78 |                          ...) { | |
| 79 | 182x | method <- match.arg(method) | 
| 80 | 182x | checkmate::assert_flag(long) | 
| 81 | 182x | assert_proportion_value(conf_level) | 
| 82 | 182x | args_list <- list(...) | 
| 83 | 182x | .N_row <- args_list[[".N_row"]] # nolint | 
| 84 | 182x | .N_col <- args_list[[".N_col"]] # nolint | 
| 85 | ||
| 86 | 182x |   if (!is.null(variables$strata)) { | 
| 87 | # Checks for strata | |
| 88 | ! |     if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.") | 
| 89 | 9x | strata_colnames <- variables$strata | 
| 90 | 9x | checkmate::assert_character(strata_colnames, null.ok = FALSE) | 
| 91 | 9x | strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) | 
| 92 | 9x | assert_df_with_variables(df, strata_vars) | 
| 93 | ||
| 94 | 9x | strata <- interaction(df[strata_colnames]) | 
| 95 | 9x | strata <- as.factor(strata) | 
| 96 | ||
| 97 | # Pushing down checks to prop_strat_wilson | |
| 98 | 173x |   } else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) { | 
| 99 | ! |     stop("To use stratified methods you need to specify the strata variables.") | 
| 100 | } | |
| 101 | ||
| 102 | # Finding the Responders | |
| 103 | 182x |   if (checkmate::test_atomic_vector(df)) { | 
| 104 | 167x | rsp <- as.logical(df) | 
| 105 |   } else { | |
| 106 | 15x | rsp <- as.logical(df[[.var]]) | 
| 107 | } | |
| 108 | ||
| 109 | # Stop for stratified analysis | |
| 110 | 182x |   if (method %in% c("strat_wilson", "strat_wilsonc") && denom[1] != "n") { | 
| 111 | 1x | stop( | 
| 112 | 1x | "Stratified methods only support 'n' as the denominator (denom). ", | 
| 113 | 1x | "Consider adding negative responders directly to the dataset." | 
| 114 | ) | |
| 115 | } | |
| 116 | ||
| 117 | 181x | denom <- match.arg(denom) %>% | 
| 118 | 181x | switch( | 
| 119 | 181x | n = length(rsp), | 
| 120 | 181x | N_row = .N_row, | 
| 121 | 181x | N_col = .N_col | 
| 122 | ) | |
| 123 | 181x | n_rsp <- sum(rsp) | 
| 124 | 181x | p_hat <- ifelse(denom > 0, n_rsp / denom, 0) | 
| 125 | ||
| 126 | 181x | prop_ci <- switch(method, | 
| 127 | 181x | "clopper-pearson" = prop_clopper_pearson(rsp, n = denom, conf_level), | 
| 128 | 181x | "wilson" = prop_wilson(rsp, n = denom, conf_level), | 
| 129 | 181x | "wilsonc" = prop_wilson(rsp, n = denom, conf_level, correct = TRUE), | 
| 130 | 181x | "strat_wilson" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = FALSE)$conf_int, | 
| 131 | 181x | "strat_wilsonc" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = TRUE)$conf_int, | 
| 132 | 181x | "wald" = prop_wald(rsp, n = denom, conf_level), | 
| 133 | 181x | "waldcc" = prop_wald(rsp, n = denom, conf_level, correct = TRUE), | 
| 134 | 181x | "agresti-coull" = prop_agresti_coull(rsp, n = denom, conf_level), | 
| 135 | 181x | "jeffreys" = prop_jeffreys(rsp, n = denom, conf_level) | 
| 136 | ) | |
| 137 | ||
| 138 | 181x | list( | 
| 139 | 181x | "n_prop" = formatters::with_label(c(n_rsp, p_hat), "Responders"), | 
| 140 | 181x | "prop_ci" = formatters::with_label(x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)) | 
| 141 | ) | |
| 142 | } | |
| 143 | ||
| 144 | #' @describeIn estimate_proportion Formatted analysis function which is used as `afun` | |
| 145 | #' in `estimate_proportion()`. | |
| 146 | #' | |
| 147 | #' @return | |
| 148 | #' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 149 | #' | |
| 150 | #' @export | |
| 151 | a_proportion <- function(df, | |
| 152 | ..., | |
| 153 | .stats = NULL, | |
| 154 | .stat_names = NULL, | |
| 155 | .formats = NULL, | |
| 156 | .labels = NULL, | |
| 157 |                          .indent_mods = NULL) { | |
| 158 | # Check for additional parameters to the statistics function | |
| 159 | 15x | dots_extra_args <- list(...) | 
| 160 | 15x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 161 | 15x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 162 | ||
| 163 | # Check for user-defined functions | |
| 164 | 15x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 165 | 15x | .stats <- default_and_custom_stats_list$all_stats | 
| 166 | 15x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 167 | ||
| 168 | # Apply statistics function | |
| 169 | 15x | x_stats <- .apply_stat_functions( | 
| 170 | 15x | default_stat_fnc = s_proportion, | 
| 171 | 15x | custom_stat_fnc_list = custom_stat_functions, | 
| 172 | 15x | args_list = c( | 
| 173 | 15x | df = list(df), | 
| 174 | 15x | extra_afun_params, | 
| 175 | 15x | dots_extra_args | 
| 176 | ) | |
| 177 | ) | |
| 178 | ||
| 179 | # Fill in formatting defaults | |
| 180 | 14x |   .stats <- get_stats("estimate_proportion", | 
| 181 | 14x | stats_in = .stats, | 
| 182 | 14x | custom_stats_in = names(custom_stat_functions) | 
| 183 | ) | |
| 184 | 14x | x_stats <- x_stats[.stats] | 
| 185 | 14x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 186 | 14x | .labels <- get_labels_from_stats( | 
| 187 | 14x | .stats, .labels, | 
| 188 | 14x | tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) | 
| 189 | ) | |
| 190 | 14x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 191 | ||
| 192 | # Auto format handling | |
| 193 | 14x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 194 | ||
| 195 | # Get and check statistical names | |
| 196 | 14x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 197 | ||
| 198 | 14x | in_rows( | 
| 199 | 14x | .list = x_stats, | 
| 200 | 14x | .formats = .formats, | 
| 201 | 14x | .names = .labels %>% .unlist_keep_nulls(), | 
| 202 | 14x | .stat_names = .stat_names, | 
| 203 | 14x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 204 | 14x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 205 | ) | |
| 206 | } | |
| 207 | ||
| 208 | #' @describeIn estimate_proportion Layout-creating function which can take statistics function arguments | |
| 209 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 210 | #' | |
| 211 | #' @return | |
| 212 | #' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions, | |
| 213 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 214 | #' the statistics from `s_proportion()` to the table layout. | |
| 215 | #' | |
| 216 | #' @examples | |
| 217 | #' dta_test <- data.frame( | |
| 218 | #'   USUBJID = paste0("S", 1:12), | |
| 219 | #' ARM = rep(LETTERS[1:3], each = 4), | |
| 220 | #' AVAL = rep(LETTERS[1:3], each = 4) | |
| 221 | #' ) %>% | |
| 222 | #' dplyr::mutate(is_rsp = AVAL == "A") | |
| 223 | #' | |
| 224 | #' basic_table() %>% | |
| 225 | #'   split_cols_by("ARM") %>% | |
| 226 | #' estimate_proportion(vars = "is_rsp") %>% | |
| 227 | #' build_table(df = dta_test) | |
| 228 | #' | |
| 229 | #' @export | |
| 230 | #' @order 2 | |
| 231 | estimate_proportion <- function(lyt, | |
| 232 | vars, | |
| 233 | conf_level = 0.95, | |
| 234 | method = c( | |
| 235 | "waldcc", "wald", "clopper-pearson", | |
| 236 | "wilson", "wilsonc", "strat_wilson", "strat_wilsonc", | |
| 237 | "agresti-coull", "jeffreys" | |
| 238 | ), | |
| 239 | weights = NULL, | |
| 240 | max_iterations = 50, | |
| 241 | variables = list(strata = NULL), | |
| 242 | long = FALSE, | |
| 243 | na_str = default_na_str(), | |
| 244 | nested = TRUE, | |
| 245 | ..., | |
| 246 | show_labels = "hidden", | |
| 247 | table_names = vars, | |
| 248 |                                 .stats = c("n_prop", "prop_ci"), | |
| 249 | .stat_names = NULL, | |
| 250 | .formats = NULL, | |
| 251 | .labels = NULL, | |
| 252 |                                 .indent_mods = NULL) { | |
| 253 | # Process standard extra arguments | |
| 254 | 6x |   extra_args <- list(".stats" = .stats) | 
| 255 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 256 | 3x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 257 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 258 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 259 | ||
| 260 | # Process additional arguments to the statistic function | |
| 261 | 6x | extra_args <- c( | 
| 262 | 6x | extra_args, | 
| 263 | 6x | conf_level = list(conf_level), method = list(method), weights = list(weights), | 
| 264 | 6x | max_iterations = list(max_iterations), variables = list(variables), long = list(long), | 
| 265 | ... | |
| 266 | ) | |
| 267 | ||
| 268 | # Append additional info from layout to the analysis function | |
| 269 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 270 | 6x | formals(a_proportion) <- c(formals(a_proportion), extra_args[[".additional_fun_parameters"]]) | 
| 271 | ||
| 272 | 6x | analyze( | 
| 273 | 6x | lyt = lyt, | 
| 274 | 6x | vars = vars, | 
| 275 | 6x | afun = a_proportion, | 
| 276 | 6x | na_str = na_str, | 
| 277 | 6x | nested = nested, | 
| 278 | 6x | extra_args = extra_args, | 
| 279 | 6x | show_labels = show_labels, | 
| 280 | 6x | table_names = table_names | 
| 281 | ) | |
| 282 | } | |
| 283 | ||
| 284 | #' Helper functions for calculating proportion confidence intervals | |
| 285 | #' | |
| 286 | #' @description `r lifecycle::badge("stable")` | |
| 287 | #' | |
| 288 | #' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()]. | |
| 289 | #' | |
| 290 | #' @inheritParams argument_convention | |
| 291 | #' @inheritParams estimate_proportion | |
| 292 | #' | |
| 293 | #' @return Confidence interval of a proportion. | |
| 294 | #' | |
| 295 | #' @seealso [estimate_proportion], descriptive function [d_proportion()], | |
| 296 | #' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()]. | |
| 297 | #' | |
| 298 | #' @name h_proportions | |
| 299 | NULL | |
| 300 | ||
| 301 | #' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()]. | |
| 302 | #' Also referred to as Wilson score interval. | |
| 303 | #' | |
| 304 | #' @examples | |
| 305 | #' rsp <- c( | |
| 306 | #' TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 307 | #' FALSE, FALSE, FALSE, FALSE, FALSE | |
| 308 | #' ) | |
| 309 | #' prop_wilson(rsp, conf_level = 0.9) | |
| 310 | #' | |
| 311 | #' @export | |
| 312 | prop_wilson <- function(rsp, n = length(rsp), conf_level, correct = FALSE) { | |
| 313 | 5x | y <- stats::prop.test( | 
| 314 | 5x | sum(rsp), | 
| 315 | 5x | n, | 
| 316 | 5x | correct = correct, | 
| 317 | 5x | conf.level = conf_level | 
| 318 | ) | |
| 319 | ||
| 320 | 5x | as.numeric(y$conf.int) | 
| 321 | } | |
| 322 | ||
| 323 | #' @describeIn h_proportions Calculates the stratified Wilson confidence | |
| 324 | #'   interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern} | |
| 325 | #' | |
| 326 | #' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. | |
| 327 | #' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are | |
| 328 | #'   estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that | |
| 329 | #' minimizes the weighted squared length of the confidence interval. | |
| 330 | #' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used | |
| 331 | #' to find estimates of optimal weights. | |
| 332 | #' @param correct (`flag`)\cr whether to include the continuity correction. For further information, see for example | |
| 333 | #' for [stats::prop.test()]. | |
| 334 | #' | |
| 335 | #' @references | |
| 336 | #' \insertRef{Yan2010-jt}{tern} | |
| 337 | #' | |
| 338 | #' @examples | |
| 339 | #' # Stratified Wilson confidence interval with unequal probabilities | |
| 340 | #' | |
| 341 | #' set.seed(1) | |
| 342 | #' rsp <- sample(c(TRUE, FALSE), 100, TRUE) | |
| 343 | #' strata_data <- data.frame( | |
| 344 | #'   "f1" = sample(c("a", "b"), 100, TRUE), | |
| 345 | #'   "f2" = sample(c("x", "y", "z"), 100, TRUE), | |
| 346 | #' stringsAsFactors = TRUE | |
| 347 | #' ) | |
| 348 | #' strata <- interaction(strata_data) | |
| 349 | #' n_strata <- ncol(table(rsp, strata)) # Number of strata | |
| 350 | #' | |
| 351 | #' prop_strat_wilson( | |
| 352 | #' rsp = rsp, strata = strata, | |
| 353 | #' conf_level = 0.90 | |
| 354 | #' ) | |
| 355 | #' | |
| 356 | #' # Not automatic setting of weights | |
| 357 | #' prop_strat_wilson( | |
| 358 | #' rsp = rsp, strata = strata, | |
| 359 | #' weights = rep(1 / n_strata, n_strata), | |
| 360 | #' conf_level = 0.90 | |
| 361 | #' ) | |
| 362 | #' | |
| 363 | #' @export | |
| 364 | prop_strat_wilson <- function(rsp, | |
| 365 | strata, | |
| 366 | weights = NULL, | |
| 367 | conf_level = 0.95, | |
| 368 | max_iterations = NULL, | |
| 369 |                               correct = FALSE) { | |
| 370 | 20x | checkmate::assert_logical(rsp, any.missing = FALSE) | 
| 371 | 20x | checkmate::assert_factor(strata, len = length(rsp)) | 
| 372 | 20x | assert_proportion_value(conf_level) | 
| 373 | ||
| 374 | 20x | tbl <- table(rsp, strata) | 
| 375 | 20x | n_strata <- length(unique(strata)) | 
| 376 | ||
| 377 | # Checking the weights and maximum number of iterations. | |
| 378 | 20x | do_iter <- FALSE | 
| 379 | 20x |   if (is.null(weights)) { | 
| 380 | 6x | weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure | 
| 381 | 6x | do_iter <- TRUE | 
| 382 | ||
| 383 | # Iteration parameters | |
| 384 | 2x | if (is.null(max_iterations)) max_iterations <- 10 | 
| 385 | 6x | checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1) | 
| 386 | } | |
| 387 | 20x | checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata) | 
| 388 | 20x | sum_weights <- checkmate::assert_int(sum(weights)) | 
| 389 | ! |   if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.") | 
| 390 | ||
| 391 | 20x | xs <- tbl["TRUE", ] | 
| 392 | 20x | ns <- colSums(tbl) | 
| 393 | 20x | use_stratum <- (ns > 0) | 
| 394 | 20x | ns <- ns[use_stratum] | 
| 395 | 20x | xs <- xs[use_stratum] | 
| 396 | 20x | ests <- xs / ns | 
| 397 | 20x | vars <- ests * (1 - ests) / ns | 
| 398 | ||
| 399 | 20x | strata_qnorm <- strata_normal_quantile(vars, weights, conf_level) | 
| 400 | ||
| 401 | # Iterative setting of weights if they were not set externally | |
| 402 | 20x |   weights_new <- if (do_iter) { | 
| 403 | 6x | update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights | 
| 404 |   } else { | |
| 405 | 14x | weights | 
| 406 | } | |
| 407 | ||
| 408 | 20x | strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1 | 
| 409 | ||
| 410 | 20x | ci_by_strata <- Map( | 
| 411 | 20x |     function(x, n) { | 
| 412 | # Classic Wilson's confidence interval | |
| 413 | 139x | suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int) | 
| 414 | }, | |
| 415 | 20x | x = xs, | 
| 416 | 20x | n = ns | 
| 417 | ) | |
| 418 | 20x | lower_by_strata <- sapply(ci_by_strata, "[", 1L) | 
| 419 | 20x | upper_by_strata <- sapply(ci_by_strata, "[", 2L) | 
| 420 | ||
| 421 | 20x | lower <- sum(weights_new * lower_by_strata) | 
| 422 | 20x | upper <- sum(weights_new * upper_by_strata) | 
| 423 | ||
| 424 | # Return values | |
| 425 | 20x |   if (do_iter) { | 
| 426 | 6x | list( | 
| 427 | 6x | conf_int = c( | 
| 428 | 6x | lower = lower, | 
| 429 | 6x | upper = upper | 
| 430 | ), | |
| 431 | 6x | weights = weights_new | 
| 432 | ) | |
| 433 |   } else { | |
| 434 | 14x | list( | 
| 435 | 14x | conf_int = c( | 
| 436 | 14x | lower = lower, | 
| 437 | 14x | upper = upper | 
| 438 | ) | |
| 439 | ) | |
| 440 | } | |
| 441 | } | |
| 442 | ||
| 443 | #' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()]. | |
| 444 | #' Also referred to as the `exact` method. | |
| 445 | #' | |
| 446 | #' @param n (`count`)\cr number of participants (if `denom = "N_col"`) or the number of responders | |
| 447 | #' (if `denom = "n"`, the default). | |
| 448 | #' | |
| 449 | #' @examples | |
| 450 | #' prop_clopper_pearson(rsp, conf_level = .95) | |
| 451 | #' | |
| 452 | #' @export | |
| 453 | prop_clopper_pearson <- function(rsp, n = length(rsp), conf_level) { | |
| 454 | 1x | y <- stats::binom.test( | 
| 455 | 1x | x = sum(rsp), | 
| 456 | 1x | n = n, | 
| 457 | 1x | conf.level = conf_level | 
| 458 | ) | |
| 459 | 1x | as.numeric(y$conf.int) | 
| 460 | } | |
| 461 | ||
| 462 | #' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition | |
| 463 | #' for a single proportion confidence interval using the normal approximation. | |
| 464 | #' | |
| 465 | #' @param correct (`flag`)\cr whether to apply continuity correction. | |
| 466 | #' | |
| 467 | #' @examples | |
| 468 | #' prop_wald(rsp, conf_level = 0.95) | |
| 469 | #' prop_wald(rsp, conf_level = 0.95, correct = TRUE) | |
| 470 | #' | |
| 471 | #' @export | |
| 472 | prop_wald <- function(rsp, n = length(rsp), conf_level, correct = FALSE) { | |
| 473 | 165x | p_hat <- ifelse(n > 0, sum(rsp) / n, 0) | 
| 474 | 165x | z <- stats::qnorm((1 + conf_level) / 2) | 
| 475 | 165x | q_hat <- 1 - p_hat | 
| 476 | 165x | correct <- if (correct) 1 / (2 * n) else 0 | 
| 477 | ||
| 478 | 165x | err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct | 
| 479 | 165x | l_ci <- max(0, p_hat - err) | 
| 480 | 165x | u_ci <- min(1, p_hat + err) | 
| 481 | ||
| 482 | 165x | c(l_ci, u_ci) | 
| 483 | } | |
| 484 | ||
| 485 | #' @describeIn h_proportions Calculates the Agresti-Coull interval. Constructed (for 95% CI) by adding two successes | |
| 486 | #' and two failures to the data and then using the Wald formula to construct a CI. | |
| 487 | #' | |
| 488 | #' @examples | |
| 489 | #' prop_agresti_coull(rsp, conf_level = 0.95) | |
| 490 | #' | |
| 491 | #' @export | |
| 492 | prop_agresti_coull <- function(rsp, n = length(rsp), conf_level) { | |
| 493 | 3x | x_sum <- sum(rsp) | 
| 494 | 3x | z <- stats::qnorm((1 + conf_level) / 2) | 
| 495 | ||
| 496 | # Add here both z^2 / 2 successes and failures. | |
| 497 | 3x | x_sum_tilde <- x_sum + z^2 / 2 | 
| 498 | 3x | n_tilde <- n + z^2 | 
| 499 | ||
| 500 | # Then proceed as with the Wald interval. | |
| 501 | 3x | p_tilde <- x_sum_tilde / n_tilde | 
| 502 | 3x | q_tilde <- 1 - p_tilde | 
| 503 | 3x | err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) | 
| 504 | 3x | l_ci <- max(0, p_tilde - err) | 
| 505 | 3x | u_ci <- min(1, p_tilde + err) | 
| 506 | ||
| 507 | 3x | c(l_ci, u_ci) | 
| 508 | } | |
| 509 | ||
| 510 | #' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the | |
| 511 | #' non-informative Jeffreys prior for a binomial proportion. | |
| 512 | #' | |
| 513 | #' @examples | |
| 514 | #' prop_jeffreys(rsp, conf_level = 0.95) | |
| 515 | #' | |
| 516 | #' @export | |
| 517 | prop_jeffreys <- function(rsp, n = length(rsp), conf_level) { | |
| 518 | 5x | x_sum <- sum(rsp) | 
| 519 | ||
| 520 | 5x | alpha <- 1 - conf_level | 
| 521 | 5x | l_ci <- ifelse( | 
| 522 | 5x | x_sum == 0, | 
| 523 | 5x | 0, | 
| 524 | 5x | stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5) | 
| 525 | ) | |
| 526 | ||
| 527 | 5x | u_ci <- ifelse( | 
| 528 | 5x | x_sum == n, | 
| 529 | 5x | 1, | 
| 530 | 5x | stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5) | 
| 531 | ) | |
| 532 | ||
| 533 | 5x | c(l_ci, u_ci) | 
| 534 | } | |
| 535 | ||
| 536 | #' Description of the proportion summary | |
| 537 | #' | |
| 538 | #' @description `r lifecycle::badge("stable")` | |
| 539 | #' | |
| 540 | #' This is a helper function that describes the analysis in [s_proportion()]. | |
| 541 | #' | |
| 542 | #' @inheritParams s_proportion | |
| 543 | #' @param long (`flag`)\cr whether a long or a short (default) description is required. | |
| 544 | #' | |
| 545 | #' @return String describing the analysis. | |
| 546 | #' | |
| 547 | #' @export | |
| 548 | d_proportion <- function(conf_level, | |
| 549 | method, | |
| 550 |                          long = FALSE) { | |
| 551 | 181x | label <- paste0(conf_level * 100, "% CI") | 
| 552 | ||
| 553 | ! | if (long) label <- paste(label, "for Response Rates") | 
| 554 | ||
| 555 | 181x | method_part <- switch(method, | 
| 556 | 181x | "clopper-pearson" = "Clopper-Pearson", | 
| 557 | 181x | "waldcc" = "Wald, with correction", | 
| 558 | 181x | "wald" = "Wald, without correction", | 
| 559 | 181x | "wilson" = "Wilson, without correction", | 
| 560 | 181x | "strat_wilson" = "Stratified Wilson, without correction", | 
| 561 | 181x | "wilsonc" = "Wilson, with correction", | 
| 562 | 181x | "strat_wilsonc" = "Stratified Wilson, with correction", | 
| 563 | 181x | "agresti-coull" = "Agresti-Coull", | 
| 564 | 181x | "jeffreys" = "Jeffreys", | 
| 565 | 181x | stop(paste(method, "does not have a description")) | 
| 566 | ) | |
| 567 | ||
| 568 | 181x |   paste0(label, " (", method_part, ")") | 
| 569 | } | |
| 570 | ||
| 571 | #' Helper function for the estimation of stratified quantiles | |
| 572 | #' | |
| 573 | #' @description `r lifecycle::badge("stable")` | |
| 574 | #' | |
| 575 | #' This function wraps the estimation of stratified percentiles when we assume | |
| 576 | #' the approximation for large numbers. This is necessary only in the case | |
| 577 | #' proportions for each strata are unequal. | |
| 578 | #' | |
| 579 | #' @inheritParams argument_convention | |
| 580 | #' @inheritParams prop_strat_wilson | |
| 581 | #' | |
| 582 | #' @return Stratified quantile. | |
| 583 | #' | |
| 584 | #' @seealso [prop_strat_wilson()] | |
| 585 | #' | |
| 586 | #' @examples | |
| 587 | #' strata_data <- table(data.frame( | |
| 588 | #' "f1" = sample(c(TRUE, FALSE), 100, TRUE), | |
| 589 | #'   "f2" = sample(c("x", "y", "z"), 100, TRUE), | |
| 590 | #' stringsAsFactors = TRUE | |
| 591 | #' )) | |
| 592 | #' ns <- colSums(strata_data) | |
| 593 | #' ests <- strata_data["TRUE", ] / ns | |
| 594 | #' vars <- ests * (1 - ests) / ns | |
| 595 | #' weights <- rep(1 / length(ns), length(ns)) | |
| 596 | #' | |
| 597 | #' strata_normal_quantile(vars, weights, 0.95) | |
| 598 | #' | |
| 599 | #' @export | |
| 600 | strata_normal_quantile <- function(vars, weights, conf_level) { | |
| 601 | 43x | summands <- weights^2 * vars | 
| 602 | # Stratified quantile | |
| 603 | 43x | sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2) | 
| 604 | } | |
| 605 | ||
| 606 | #' Helper function for the estimation of weights for `prop_strat_wilson()` | |
| 607 | #' | |
| 608 | #' @description `r lifecycle::badge("stable")` | |
| 609 | #' | |
| 610 | #' This function wraps the iteration procedure that allows you to estimate | |
| 611 | #' the weights for each proportional strata. This assumes to minimize the | |
| 612 | #' weighted squared length of the confidence interval. | |
| 613 | #' | |
| 614 | #' @inheritParams prop_strat_wilson | |
| 615 | #' @param vars (`numeric`)\cr normalized proportions for each strata. | |
| 616 | #' @param strata_qnorm (`numeric(1)`)\cr initial estimation with identical weights of the quantiles. | |
| 617 | #' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can | |
| 618 | #' be optimized in the future if we need to estimate better initial weights. | |
| 619 | #' @param n_per_strata (`numeric`)\cr number of elements in each strata. | |
| 620 | #' @param max_iterations (`integer(1)`)\cr maximum number of iterations to be tried. Convergence is always checked. | |
| 621 | #' @param tol (`numeric(1)`)\cr tolerance threshold for convergence. | |
| 622 | #' | |
| 623 | #' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`. | |
| 624 | #' | |
| 625 | #' @seealso For references and details see [prop_strat_wilson()]. | |
| 626 | #' | |
| 627 | #' @examples | |
| 628 | #' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018) | |
| 629 | #' sq <- 0.674 | |
| 630 | #' ws <- rep(1 / length(vs), length(vs)) | |
| 631 | #' ns <- c(22, 18, 17, 17, 14, 12) | |
| 632 | #' | |
| 633 | #' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001) | |
| 634 | #' | |
| 635 | #' @export | |
| 636 | update_weights_strat_wilson <- function(vars, | |
| 637 | strata_qnorm, | |
| 638 | initial_weights, | |
| 639 | n_per_strata, | |
| 640 | max_iterations = 50, | |
| 641 | conf_level = 0.95, | |
| 642 |                                         tol = 0.001) { | |
| 643 | 9x | it <- 0 | 
| 644 | 9x | diff_v <- NULL | 
| 645 | ||
| 646 | 9x |   while (it < max_iterations) { | 
| 647 | 21x | it <- it + 1 | 
| 648 | 21x | weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2 | 
| 649 | 21x | weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2)) | 
| 650 | 21x | weights_new <- weights_new_t / weights_new_b | 
| 651 | 21x | weights_new <- weights_new / sum(weights_new) | 
| 652 | 21x | strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level) | 
| 653 | 21x | diff_v <- c(diff_v, sum(abs(weights_new - initial_weights))) | 
| 654 | 8x | if (diff_v[length(diff_v)] < tol) break | 
| 655 | 13x | initial_weights <- weights_new | 
| 656 | } | |
| 657 | ||
| 658 | 9x |   if (it == max_iterations) { | 
| 659 | 1x |     warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations) | 
| 660 | } | |
| 661 | ||
| 662 | 9x | list( | 
| 663 | 9x | "n_it" = it, | 
| 664 | 9x | "weights" = weights_new, | 
| 665 | 9x | "diff_v" = diff_v | 
| 666 | ) | |
| 667 | } | 
| 1 | #' Get default statistical methods and their associated formats, labels, and indent modifiers | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("experimental")` | |
| 4 | #' | |
| 5 | #' Utility functions to get valid statistic methods for different method groups | |
| 6 | #' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers | |
| 7 | #' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be | |
| 8 | #' seen in [analyze_vars()]. See notes to understand why this is experimental. | |
| 9 | #' | |
| 10 | #' @param stats (`character`)\cr statistical methods to return defaults for. | |
| 11 | #' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a | |
| 12 | #' statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name), | |
| 13 | #' each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only | |
| 14 | #' calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be | |
| 15 | #' flattened such that the names of the list elements returned by the function have the format `statistic.level` (or | |
| 16 | #' just `statistic` for statistics calculated for a single row). Defaults to `NULL`. | |
| 17 | #' @param tern_defaults (`list` or `vector`)\cr defaults to use to fill in missing values if no user input is given. | |
| 18 | #' Must be of the same type as the values that are being filled in (e.g. indentation must be integers). | |
| 19 | #' | |
| 20 | #' @details | |
| 21 | #' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`. | |
| 22 | #' | |
| 23 | #' @note | |
| 24 | #' These defaults are experimental because we use the names of functions to retrieve the default | |
| 25 | #' statistics. This should be generalized in groups of methods according to more reasonable groupings. | |
| 26 | #' | |
| 27 | #' @name default_stats_formats_labels | |
| 28 | NULL | |
| 29 | ||
| 30 | #' @describeIn default_stats_formats_labels Get statistics available for a given method | |
| 31 | #' group (analyze function). To check available defaults see `tern::tern_default_stats` list. | |
| 32 | #' | |
| 33 | #' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function) | |
| 34 | #' to retrieve default statistics for. A character vector can be used to specify more than one statistical | |
| 35 | #' method group. | |
| 36 | #' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical | |
| 37 | #' functions are used, `stats_in` needs to have them in too. | |
| 38 | #' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics. | |
| 39 | #' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains | |
| 40 | #' `"analyze_vars_counts"`) be added to the statistical methods? | |
| 41 | #' | |
| 42 | #' @return | |
| 43 | #' * `get_stats()` returns a `character` vector of statistical methods. | |
| 44 | #' | |
| 45 | #' @examples | |
| 46 | #' # analyze_vars is numeric | |
| 47 | #' num_stats <- get_stats("analyze_vars_numeric") # also the default | |
| 48 | #' | |
| 49 | #' # Other type | |
| 50 | #' cnt_stats <- get_stats("analyze_vars_counts") | |
| 51 | #' | |
| 52 | #' # Weirdly taking the pval from count_occurrences | |
| 53 | #' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval") | |
| 54 | #' | |
| 55 | #' # All count_occurrences | |
| 56 | #' all_cnt_occ <- get_stats("count_occurrences") | |
| 57 | #' | |
| 58 | #' # Multiple | |
| 59 | #' get_stats(c("count_occurrences", "analyze_vars_counts")) | |
| 60 | #' | |
| 61 | #' @export | |
| 62 | get_stats <- function(method_groups = "analyze_vars_numeric", | |
| 63 |                       stats_in = NULL, custom_stats_in = NULL, add_pval = FALSE) { | |
| 64 | 1646x | checkmate::assert_character(method_groups) | 
| 65 | 1646x | checkmate::assert_character(stats_in, null.ok = TRUE) | 
| 66 | 1646x | checkmate::assert_character(custom_stats_in, null.ok = TRUE) | 
| 67 | 1646x | checkmate::assert_flag(add_pval) | 
| 68 | ||
| 69 | # Default is still numeric | |
| 70 | 1646x |   if (any(method_groups == "analyze_vars")) { | 
| 71 | 3x | method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric" | 
| 72 | } | |
| 73 | ||
| 74 | 1646x |   type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks | 
| 75 | ||
| 76 | # Defaults for loop | |
| 77 | 1646x | out <- NULL | 
| 78 | ||
| 79 | # Loop for multiple method groups | |
| 80 | 1646x |   for (mgi in method_groups) { | 
| 81 | 1673x |     if (mgi %in% names(tern_default_stats)) { | 
| 82 | 1672x | out_tmp <- tern_default_stats[[mgi]] | 
| 83 |     } else { | |
| 84 | 1x |       stop("The selected method group (", mgi, ") has no default statistical method.") | 
| 85 | } | |
| 86 | 1672x | out <- unique(c(out, out_tmp)) | 
| 87 | } | |
| 88 | ||
| 89 | # Add custom stats | |
| 90 | 1645x | out <- c(out, custom_stats_in) | 
| 91 | ||
| 92 | # If you added pval to the stats_in you certainly want it | |
| 93 | 1645x |   if (!is.null(stats_in) && any(grepl("^pval", stats_in))) { | 
| 94 | 136x |     stats_in_pval_value <- stats_in[grepl("^pval", stats_in)] | 
| 95 | ||
| 96 | # Must be only one value between choices | |
| 97 | 136x |     checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts", "pvalue")) | 
| 98 | ||
| 99 | # Mismatch with counts and numeric | |
| 100 | 135x |     if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" || | 
| 101 | 135x |       any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint | 
| 102 | 2x | stop( | 
| 103 | 2x |         "Inserted p-value (", stats_in_pval_value, ") is not valid for type ", | 
| 104 | 2x | type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")), | 
| 105 | 2x | " instead." | 
| 106 | ) | |
| 107 | } | |
| 108 | ||
| 109 | # Lets add it even if present (thanks to unique) | |
| 110 | 133x | add_pval <- TRUE | 
| 111 | } | |
| 112 | ||
| 113 | # Mainly used in "analyze_vars" but it could be necessary elsewhere | |
| 114 | 1642x |   if (isTRUE(add_pval)) { | 
| 115 | 143x |     if (any(grepl("counts", method_groups))) { | 
| 116 | 16x | out <- unique(c(out, "pval_counts")) | 
| 117 |     } else { | |
| 118 | 127x | out <- unique(c(out, "pval")) | 
| 119 | } | |
| 120 | } | |
| 121 | ||
| 122 | # Filtering for stats_in (character vector) | |
| 123 | 1642x |   if (!is.null(stats_in)) { | 
| 124 | 1590x | out <- intersect(stats_in, out) # It orders them too | 
| 125 | } | |
| 126 | ||
| 127 | # If intersect did not find matches (and no pval?) -> error | |
| 128 | 1642x |   if (length(out) == 0) { | 
| 129 | 2x | stop( | 
| 130 | 2x |       "The selected method group(s) (", paste0(method_groups, collapse = ", "), ")", | 
| 131 | 2x | " do not have the required default statistical methods:\n", | 
| 132 | 2x | paste0(stats_in, collapse = " ") | 
| 133 | ) | |
| 134 | } | |
| 135 | ||
| 136 | 1640x | out | 
| 137 | } | |
| 138 | ||
| 139 | #' @describeIn default_stats_formats_labels Get statistical *names* available for a given method | |
| 140 | #' group (analyze function). Please use the `s_*` functions to get the statistical names. | |
| 141 | #' @param stat_results (`list`)\cr list of statistical results. It should be used close to the end of | |
| 142 | #' a statistical function. See examples for a structure with two statistical results and two groups. | |
| 143 | #' @param stat_names_in (`character`)\cr custom modification of statistical values. | |
| 144 | #' | |
| 145 | #' @return | |
| 146 | #' * `get_stat_names()` returns a named list of `character` vectors, indicating the names of | |
| 147 | #' statistical outputs. | |
| 148 | #' | |
| 149 | #' @examples | |
| 150 | #' stat_results <- list( | |
| 151 | #'   "n" = list("M" = 1, "F" = 2), | |
| 152 | #'   "count_fraction" = list("M" = c(1, 0.2), "F" = c(2, 0.1)) | |
| 153 | #' ) | |
| 154 | #' get_stat_names(stat_results) | |
| 155 | #' get_stat_names(stat_results, list("n" = "argh")) | |
| 156 | #' | |
| 157 | #' @export | |
| 158 | get_stat_names <- function(stat_results, stat_names_in = NULL) { | |
| 159 | 1576x | checkmate::assert_character(names(stat_results), min.len = 1) | 
| 160 | 1576x | checkmate::assert_list(stat_names_in, null.ok = TRUE) | 
| 161 | ||
| 162 | 1576x |   stat_nms_from_stats <- lapply(stat_results, function(si) { | 
| 163 | 5699x | nm <- names(si) | 
| 164 | 5699x |     if (is.null(nm)) { | 
| 165 | 2718x | nm <- rep(NA_character_, length(si)) # no statistical names | 
| 166 | } | |
| 167 | 5699x | nm | 
| 168 | }) | |
| 169 | ||
| 170 | # Modify some with custom stat names | |
| 171 | 1576x |   if (!is.null(stat_names_in)) { | 
| 172 | # Stats is the main | |
| 173 | 6x | common_names <- intersect(names(stat_nms_from_stats), names(stat_names_in)) | 
| 174 | 6x | stat_nms_from_stats[common_names] <- stat_names_in[common_names] | 
| 175 | } | |
| 176 | ||
| 177 | 1576x | stat_nms_from_stats | 
| 178 | } | |
| 179 | ||
| 180 | # Utility function used to separate custom stats (user-defined functions) from defaults | |
| 181 | .split_std_from_custom_stats <- function(stats_in) { | |
| 182 | 873x | out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL) | 
| 183 | 873x |   if (is.list(stats_in)) { | 
| 184 | 12x | is_custom_fnc <- sapply(stats_in, is.function) | 
| 185 | 12x | checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named") | 
| 186 | 12x | out[["custom_stats"]] <- stats_in[is_custom_fnc] | 
| 187 | 12x | out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) | 
| 188 | 12x | all_stats <- names(stats_in) # to keep the order | 
| 189 | 12x | all_stats[!is_custom_fnc] <- out[["default_stats"]] | 
| 190 | 12x | out[["all_stats"]] <- all_stats | 
| 191 |   } else { | |
| 192 | 861x | out[["default_stats"]] <- out[["all_stats"]] <- stats_in | 
| 193 | } | |
| 194 | 873x | out | 
| 195 | } | |
| 196 | ||
| 197 | # Utility function to apply statistical functions | |
| 198 | .apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) { | |
| 199 | # Default checks | |
| 200 | 896x | checkmate::assert_function(default_stat_fnc) | 
| 201 | 896x | checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named") | 
| 202 | 896x | checkmate::assert_list(args_list) | 
| 203 | ||
| 204 | # Checking custom stats have same formals | |
| 205 | 896x |   if (!is.null(custom_stat_fnc_list)) { | 
| 206 | 12x | fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]] | 
| 207 | 12x |     for (fnc in custom_stat_fnc_list) { | 
| 208 | 17x |       if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) { | 
| 209 | 1x | stop( | 
| 210 | 1x | "The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ", | 
| 211 | 1x | "as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]], | 
| 212 | 1x | " as first parameter, while the default function has ", fundamental_call_to_data, "." | 
| 213 | ) | |
| 214 | } | |
| 215 | 16x |       if (!any(names(formals(fnc)) == "...")) { | 
| 216 | 1x | stop( | 
| 217 | 1x | "The custom statistical function needs to have `...` as a parameter to accept additional arguments. ", | 
| 218 | 1x | "In this case your custom function does not have `...`." | 
| 219 | ) | |
| 220 | } | |
| 221 | } | |
| 222 | } | |
| 223 | ||
| 224 | # Applying | |
| 225 | 894x | out_default <- do.call(default_stat_fnc, args = args_list) | 
| 226 | 892x | out_custom <- lapply(custom_stat_fnc_list, function(fnc) do.call(fnc, args = args_list)) | 
| 227 | ||
| 228 | # Merging | |
| 229 | 892x | c(out_default, out_custom) | 
| 230 | } | |
| 231 | ||
| 232 | #' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics. | |
| 233 | #' To check available defaults see list `tern::tern_default_formats`. | |
| 234 | #' | |
| 235 | #' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with | |
| 236 | #' values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows | |
| 237 | #' with no value is provided. | |
| 238 | #' | |
| 239 | #' @return | |
| 240 | #' * `get_formats_from_stats()` returns a named list of formats as strings or functions. | |
| 241 | #' | |
| 242 | #' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and | |
| 243 | #'   return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`. | |
| 244 | #' | |
| 245 | #' @examples | |
| 246 | #' # Defaults formats | |
| 247 | #' get_formats_from_stats(num_stats) | |
| 248 | #' get_formats_from_stats(cnt_stats) | |
| 249 | #' get_formats_from_stats(only_pval) | |
| 250 | #' get_formats_from_stats(all_cnt_occ) | |
| 251 | #' | |
| 252 | #' # Addition of customs | |
| 253 | #' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx"))) | |
| 254 | #' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx"))) | |
| 255 | #' | |
| 256 | #' @seealso [formatting_functions] | |
| 257 | #' | |
| 258 | #' @export | |
| 259 | get_formats_from_stats <- function(stats, | |
| 260 | formats_in = NULL, | |
| 261 | levels_per_stats = NULL, | |
| 262 |                                    tern_defaults = tern_default_formats) { | |
| 263 | 1669x | checkmate::assert_character(stats, min.len = 1) | 
| 264 | # It may be a list if there is a function in the formats | |
| 265 | 1669x |   if (checkmate::test_list(formats_in, null.ok = TRUE)) { | 
| 266 | 1550x | checkmate::assert_list(formats_in, null.ok = TRUE) | 
| 267 | # Or it may be a vector of characters | |
| 268 |   } else { | |
| 269 | 119x | checkmate::assert_character(formats_in, null.ok = TRUE) | 
| 270 | } | |
| 271 | 1669x | checkmate::assert_list(levels_per_stats, null.ok = TRUE) | 
| 272 | ||
| 273 | # If unnamed formats given as formats_in and same number of stats, use one format per stat | |
| 274 | if ( | |
| 275 | 1669x | !is.null(formats_in) && length(formats_in) == length(stats) && | 
| 276 | 1669x | is.null(names(formats_in)) && is.null(levels_per_stats) | 
| 277 |   ) { | |
| 278 | 2x | out <- as.list(formats_in) %>% setNames(stats) | 
| 279 | 2x | return(out) | 
| 280 | } | |
| 281 | ||
| 282 | # If levels_per_stats not given, assume one row per statistic | |
| 283 | 377x | if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) | 
| 284 | ||
| 285 | # Apply custom formats | |
| 286 | 1667x | out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults) | 
| 287 | ||
| 288 | # Default to NULL if no format | |
| 289 | 1667x | which_null <- names(which(sapply(levels_per_stats, is.null))) | 
| 290 | 1667x | levels_per_stats[which_null] <- which_null | 
| 291 | 1667x | case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE) | 
| 292 | 1667x | out[names(out) == out | case_input_is_not_stat] <- list(NULL) | 
| 293 | ||
| 294 | 1667x | out | 
| 295 | } | |
| 296 | ||
| 297 | #' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics. | |
| 298 | #' To check for available defaults see list `tern::tern_default_labels`. | |
| 299 | #' | |
| 300 | #' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the | |
| 301 | #' variable level (if rows correspond to levels of a variable) or statistic name will be used as label. | |
| 302 | #' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list | |
| 303 | #' of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `""` | |
| 304 | #' values are provided. | |
| 305 | #' | |
| 306 | #' @return | |
| 307 | #' * `get_labels_from_stats()` returns a named list of labels as strings. | |
| 308 | #' | |
| 309 | #' @examples | |
| 310 | #' # Defaults labels | |
| 311 | #' get_labels_from_stats(num_stats) | |
| 312 | #' get_labels_from_stats(cnt_stats) | |
| 313 | #' get_labels_from_stats(only_pval) | |
| 314 | #' get_labels_from_stats(all_cnt_occ) | |
| 315 | #' | |
| 316 | #' # Addition of customs | |
| 317 | #' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction")) | |
| 318 | #' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions"))) | |
| 319 | #' | |
| 320 | #' @export | |
| 321 | get_labels_from_stats <- function(stats, | |
| 322 | labels_in = NULL, | |
| 323 | levels_per_stats = NULL, | |
| 324 | label_attr_from_stats = NULL, | |
| 325 |                                   tern_defaults = tern_default_labels) { | |
| 326 | 1622x | checkmate::assert_character(stats, min.len = 1) | 
| 327 | ||
| 328 | # If labels_in is NULL, use label_attr_from_stats | |
| 329 | 1622x |   if (is.null(labels_in)) { | 
| 330 | 1349x | labels_in <- label_attr_from_stats | 
| 331 | 1349x | labels_in <- label_attr_from_stats[ | 
| 332 | 1349x | nzchar(label_attr_from_stats) & | 
| 333 | 1349x | !sapply(label_attr_from_stats, is.null) & | 
| 334 | 1349x | !is.na(label_attr_from_stats) | 
| 335 | ] | |
| 336 | } | |
| 337 | ||
| 338 | # It may be a list | |
| 339 | 1622x |   if (checkmate::test_list(labels_in, null.ok = TRUE)) { | 
| 340 | 1420x | checkmate::assert_list(labels_in, null.ok = TRUE) | 
| 341 | # Or it may be a vector of characters | |
| 342 |   } else { | |
| 343 | 202x | checkmate::assert_character(labels_in, null.ok = TRUE) | 
| 344 | } | |
| 345 | 1622x | checkmate::assert_list(levels_per_stats, null.ok = TRUE) | 
| 346 | ||
| 347 | # If unnamed labels given as labels_in and same number of stats, use one label per stat | |
| 348 | if ( | |
| 349 | 1622x | !is.null(labels_in) && length(labels_in) == length(stats) && | 
| 350 | 1622x | is.null(names(labels_in)) && is.null(levels_per_stats) | 
| 351 |   ) { | |
| 352 | 2x | out <- as.list(labels_in) %>% setNames(stats) | 
| 353 | 2x | return(out) | 
| 354 | } | |
| 355 | ||
| 356 | # If levels_per_stats not given, assume one row per statistic | |
| 357 | 328x | if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) | 
| 358 | ||
| 359 | # Apply custom labels | |
| 360 | 1620x | out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults) | 
| 361 | 1620x | out | 
| 362 | } | |
| 363 | ||
| 364 | #' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows. | |
| 365 | #' | |
| 366 | #' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for | |
| 367 | #' all values. | |
| 368 | #' @param row_nms `r lifecycle::badge("deprecated")` Deprecation cycle started. See the `levels_per_stats` parameter | |
| 369 | #' for details. | |
| 370 | #' | |
| 371 | #' @return | |
| 372 | #' * `get_indents_from_stats()` returns a named list of indentation modifiers as integers. | |
| 373 | #' | |
| 374 | #' @examples | |
| 375 | #' get_indents_from_stats(all_cnt_occ, indents_in = 3L) | |
| 376 | #' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) | |
| 377 | #' get_indents_from_stats( | |
| 378 | #' all_cnt_occ, | |
| 379 | #' indents_in = list(a = 2L, count.a = 1L, count.b = 5L) | |
| 380 | #' ) | |
| 381 | #' | |
| 382 | #' @export | |
| 383 | get_indents_from_stats <- function(stats, | |
| 384 | indents_in = NULL, | |
| 385 | levels_per_stats = NULL, | |
| 386 | tern_defaults = as.list(rep(0L, length(stats))) %>% setNames(stats), | |
| 387 |                                    row_nms = lifecycle::deprecated()) { | |
| 388 | 1578x | checkmate::assert_character(stats, min.len = 1) | 
| 389 | # It may be a list | |
| 390 | 1578x |   if (checkmate::test_list(indents_in, null.ok = TRUE)) { | 
| 391 | 1490x | checkmate::assert_list(indents_in, null.ok = TRUE) | 
| 392 | # Or it may be a vector of integers | |
| 393 |   } else { | |
| 394 | 88x | checkmate::assert_integerish(indents_in, null.ok = TRUE) | 
| 395 | } | |
| 396 | 1578x | checkmate::assert_list(levels_per_stats, null.ok = TRUE) | 
| 397 | ||
| 398 | # If levels_per_stats not given, assume one row per statistic | |
| 399 | 288x | if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) | 
| 400 | ||
| 401 | # Single indentation level for all rows | |
| 402 | 1578x |   if (is.null(names(indents_in)) && length(indents_in) == 1) { | 
| 403 | 20x | out <- rep(indents_in, length(levels_per_stats %>% unlist())) | 
| 404 | 20x | return(out) | 
| 405 | } | |
| 406 | ||
| 407 | # Apply custom indentation | |
| 408 | 1558x | out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults) | 
| 409 | 1558x | out | 
| 410 | } | |
| 411 | ||
| 412 | # Function to loop over each stat and levels to set correct values | |
| 413 | .fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) { | |
| 414 | 4845x | out <- list() | 
| 415 | ||
| 416 | 4845x |   for (stat_i in names(levels_per_stats)) { | 
| 417 | # Get all levels of the statistic | |
| 418 | 7749x | all_lvls <- levels_per_stats[[stat_i]] | 
| 419 | ||
| 420 | 7749x |     if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) { # One row per statistic | 
| 421 | 3995x |       out[[stat_i]] <- if (stat_i %in% names(user_in)) { # 1. Check for stat_i in user input | 
| 422 | 780x | user_in[[stat_i]] | 
| 423 | 3995x |       } else if (stat_i %in% names(tern_defaults)) { # 2. Check for stat_i in tern defaults | 
| 424 | 3167x | tern_defaults[[stat_i]] | 
| 425 | 3995x |       } else { # 3. Otherwise stat_i | 
| 426 | 48x | stat_i | 
| 427 | } | |
| 428 |     } else { # One row per combination of variable level and statistic | |
| 429 | # Loop over levels for each statistic | |
| 430 | 3754x |       for (lev_i in all_lvls) { | 
| 431 | # Construct row name (stat_i.lev_i) | |
| 432 | 13522x | row_nm <- paste(stat_i, lev_i, sep = ".") | 
| 433 | ||
| 434 | 13522x |         out[[row_nm]] <- if (row_nm %in% names(user_in)) { # 1. Check for stat_i.lev_i in user input | 
| 435 | 43x | user_in[[row_nm]] | 
| 436 | 13522x |         } else if (lev_i %in% names(user_in)) { # 2. Check for lev_i in user input | 
| 437 | 52x | user_in[[lev_i]] | 
| 438 | 13522x |         } else if (stat_i %in% names(user_in)) { # 3. Check for stat_i in user input | 
| 439 | 503x | user_in[[stat_i]] | 
| 440 | 13522x |         } else if (lev_i %in% names(tern_defaults)) { # 4. Check for lev_i in tern defaults (only used for labels) | 
| 441 | 1549x | tern_defaults[[lev_i]] | 
| 442 | 13522x |         } else if (stat_i %in% names(tern_defaults)) { # 5. Check for stat_i in tern defaults | 
| 443 | 8465x | tern_defaults[[stat_i]] | 
| 444 | 13522x |         } else { # 6. Otherwise lev_i | 
| 445 | 2910x | lev_i | 
| 446 | } | |
| 447 | } | |
| 448 | } | |
| 449 | } | |
| 450 | ||
| 451 | 4845x | out | 
| 452 | } | |
| 453 | ||
| 454 | # Custom unlist function to retain NULL as "NULL" or NA | |
| 455 | .unlist_keep_nulls <- function(lst, null_placeholder = "NULL", recursive = FALSE) { | |
| 456 | 4786x | lapply(lst, function(x) if (is.null(x)) null_placeholder else x) %>% | 
| 457 | 4786x | unlist(recursive = recursive) | 
| 458 | } | |
| 459 | ||
| 460 | #' Update labels according to control specifications | |
| 461 | #' | |
| 462 | #' @description `r lifecycle::badge("stable")` | |
| 463 | #' | |
| 464 | #' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant | |
| 465 | #' control specification. For example, if control has element `conf_level` set to `0.9`, the default | |
| 466 | #' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied | |
| 467 | #' via `labels_custom` will not be updated regardless of `control`. | |
| 468 | #' | |
| 469 | #' @param labels_default (named `character`)\cr a named vector of statistic labels to modify | |
| 470 | #' according to the control specifications. Labels that are explicitly defined in `labels_custom` will | |
| 471 | #' not be affected. | |
| 472 | #' @param labels_custom (named `character`)\cr named vector of labels that are customized by | |
| 473 | #' the user and should not be affected by `control`. | |
| 474 | #' @param control (named `list`)\cr list of control parameters to apply to adjust default labels. | |
| 475 | #' | |
| 476 | #' @return A named character vector of labels with control specifications applied to relevant labels. | |
| 477 | #' | |
| 478 | #' @examples | |
| 479 | #' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57) | |
| 480 | #' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>% | |
| 481 | #' labels_use_control(control = control) | |
| 482 | #' | |
| 483 | #' @export | |
| 484 | labels_use_control <- function(labels_default, control, labels_custom = NULL) { | |
| 485 | 21x |   if ("conf_level" %in% names(control)) { | 
| 486 | 21x | labels_default <- sapply( | 
| 487 | 21x | names(labels_default), | 
| 488 | 21x |       function(x) { | 
| 489 | 111x |         if (!x %in% names(labels_custom)) { | 
| 490 | 108x | gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]])) | 
| 491 |         } else { | |
| 492 | 3x | labels_default[[x]] | 
| 493 | } | |
| 494 | } | |
| 495 | ) | |
| 496 | } | |
| 497 | 21x |   if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) && | 
| 498 | 21x |     !"quantiles" %in% names(labels_custom)) { # nolint | 
| 499 | 16x | labels_default["quantiles"] <- gsub( | 
| 500 | 16x | "[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""), | 
| 501 | 16x | labels_default["quantiles"] | 
| 502 | ) | |
| 503 | } | |
| 504 | 21x |   if ("quantiles" %in% names(control) && "quantiles_lower" %in% names(labels_default) && | 
| 505 | 21x |     !"quantiles_lower" %in% names(labels_custom)) { # nolint | 
| 506 | 6x | labels_default["quantiles_lower"] <- gsub( | 
| 507 | 6x | "[0-9]+%-ile", paste0(control[["quantiles"]][1] * 100, "%-ile", ""), | 
| 508 | 6x | labels_default["quantiles_lower"] | 
| 509 | ) | |
| 510 | } | |
| 511 | 21x |   if ("quantiles" %in% names(control) && "quantiles_upper" %in% names(labels_default) && | 
| 512 | 21x |     !"quantiles_upper" %in% names(labels_custom)) { # nolint | 
| 513 | 6x | labels_default["quantiles_upper"] <- gsub( | 
| 514 | 6x | "[0-9]+%-ile", paste0(control[["quantiles"]][2] * 100, "%-ile", ""), | 
| 515 | 6x | labels_default["quantiles_upper"] | 
| 516 | ) | |
| 517 | } | |
| 518 | 21x |   if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) && | 
| 519 | 21x |     !"mean_pval" %in% names(labels_custom)) { # nolint | 
| 520 | 2x | labels_default["mean_pval"] <- gsub( | 
| 521 | 2x | "p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"] | 
| 522 | ) | |
| 523 | } | |
| 524 | ||
| 525 | 21x | labels_default | 
| 526 | } | |
| 527 | ||
| 528 | # tern_default_stats ----------------------------------------------------------- | |
| 529 | #' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`. | |
| 530 | #' | |
| 531 | #' @format | |
| 532 | #' * `tern_default_stats` is a named list of available statistics, with each element | |
| 533 | #' named for their corresponding statistical method group. | |
| 534 | #' | |
| 535 | #' @export | |
| 536 | tern_default_stats <- list( | |
| 537 |   abnormal = c("fraction"), | |
| 538 |   abnormal_by_baseline = c("fraction"), | |
| 539 |   abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"), | |
| 540 |   abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"), | |
| 541 |   abnormal_lab_worsen_by_baseline = c("fraction"), | |
| 542 |   analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"), | |
| 543 |   analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "fraction", "n_blq"), | |
| 544 | analyze_vars_numeric = c( | |
| 545 | "n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval", | |
| 546 | "median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv", | |
| 547 | "geom_mean", "geom_sd", "geom_mean_sd", "geom_mean_ci", "geom_cv", | |
| 548 | "median_ci_3d", | |
| 549 | "mean_ci_3d", "geom_mean_ci_3d" | |
| 550 | ), | |
| 551 |   count_cumulative = c("count_fraction"), | |
| 552 |   count_missed_doses = c("n", "count_fraction"), | |
| 553 |   count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"), | |
| 554 |   count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"), | |
| 555 |   count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), | |
| 556 |   count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), | |
| 557 |   count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), | |
| 558 |   coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"), | |
| 559 |   estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci", "n_unique", "n_rate"), | |
| 560 |   estimate_multinomial_response = c("n_prop", "prop_ci"), | |
| 561 |   estimate_odds_ratio = c("or_ci", "n_tot"), | |
| 562 |   estimate_proportion = c("n_prop", "prop_ci"), | |
| 563 |   estimate_proportion_diff = c("diff", "diff_ci"), | |
| 564 |   summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"), | |
| 565 |   summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"), | |
| 566 |   summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), | |
| 567 |   summarize_num_patients = c("unique", "nonunique", "unique_count"), | |
| 568 |   summarize_patients_events_in_cols = c("unique", "all"), | |
| 569 | surv_time = c( | |
| 570 | "median", "median_ci", "median_ci_3d", "quantiles", | |
| 571 | "quantiles_lower", "quantiles_upper", "range_censor", "range_event", "range" | |
| 572 | ), | |
| 573 |   surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_rate_3d"), | |
| 574 |   surv_timepoint_diff = c("rate_diff", "rate_diff_ci", "ztest_pval", "rate_diff_ci_3d"), | |
| 575 |   tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"), | |
| 576 |   tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval", "riskdiff"), | |
| 577 |   tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), | |
| 578 |   tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval", "riskdiff"), | |
| 579 |   test_proportion_diff = c("pval") | |
| 580 | ) | |
| 581 | ||
| 582 | # tern_default_formats --------------------------------------------------------- | |
| 583 | #' @describeIn default_stats_formats_labels Named vector of default formats for `tern`. | |
| 584 | #' | |
| 585 | #' @format | |
| 586 | #' * `tern_default_formats` is a named vector of available default formats, with each element | |
| 587 | #' named for their corresponding statistic. | |
| 588 | #' | |
| 589 | #' @export | |
| 590 | tern_default_formats <- c( | |
| 591 | ci = list(format_extreme_values_ci(2L)), | |
| 592 | count = "xx.", | |
| 593 | count_fraction = format_count_fraction, | |
| 594 | count_fraction_fixed_dp = format_count_fraction_fixed_dp, | |
| 595 | cv = "xx.x", | |
| 596 | event_free_rate = "xx.xx", | |
| 597 | fraction = format_fraction_fixed_dp, | |
| 598 | geom_cv = "xx.x", | |
| 599 | geom_mean = "xx.x", | |
| 600 | geom_mean_ci = "(xx.xx, xx.xx)", | |
| 601 | geom_mean_ci_3d = "xx.xx (xx.xx - xx.xx)", | |
| 602 | geom_mean_sd = "xx.x (xx.x)", | |
| 603 | geom_sd = "xx.x", | |
| 604 | hr = list(format_extreme_values(2L)), | |
| 605 | hr_ci = "(xx.xx, xx.xx)", | |
| 606 | hr_ci_3d = "xx.xx (xx.xx - xx.xx)", | |
| 607 | iqr = "xx.x", | |
| 608 | lsmean = "xx.xx", | |
| 609 | lsmean_diff = "xx.xx", | |
| 610 | lsmean_diff_ci = "(xx.xx, xx.xx)", | |
| 611 | mad = "xx.x", | |
| 612 | max = "xx.x", | |
| 613 | mean = "xx.x", | |
| 614 | mean_ci = "(xx.xx, xx.xx)", | |
| 615 | mean_ci_3d = "xx.xx (xx.xx - xx.xx)", | |
| 616 | mean_pval = "x.xxxx | (<0.0001)", | |
| 617 | mean_sd = "xx.x (xx.x)", | |
| 618 | mean_sdi = "(xx.xx, xx.xx)", | |
| 619 | mean_se = "xx.x (xx.x)", | |
| 620 | mean_sei = "(xx.xx, xx.xx)", | |
| 621 | median = "xx.x", | |
| 622 | median_ci = "(xx.xx, xx.xx)", | |
| 623 | median_ci_3d = "xx.xx (xx.xx - xx.xx)", | |
| 624 | median_range = "xx.x (xx.x - xx.x)", | |
| 625 | min = "xx.x", | |
| 626 | n = "xx.", | |
| 627 | n_blq = "xx.", | |
| 628 | n_events = "xx", | |
| 629 | n_patients = "xx (xx.x%)", | |
| 630 | n_prop = "xx (xx.x%)", | |
| 631 | n_rate = "xx (xx.x)", | |
| 632 | n_rsp = "xx", | |
| 633 | n_tot = "xx", | |
| 634 | n_tot_events = "xx", | |
| 635 | n_unique = "xx", | |
| 636 | nonunique = "xx", | |
| 637 | or = list(format_extreme_values(2L)), | |
| 638 | or_ci = "xx.xx (xx.xx - xx.xx)", | |
| 639 | person_years = "xx.x", | |
| 640 | prop = "xx.x%", | |
| 641 | prop_ci = "(xx.x, xx.x)", | |
| 642 | pt_at_risk = "xx", | |
| 643 | pval = "x.xxxx | (<0.0001)", | |
| 644 | pvalue = "x.xxxx | (<0.0001)", | |
| 645 | pval_counts = "x.xxxx | (<0.0001)", | |
| 646 | quantiles = "xx.x - xx.x", | |
| 647 | quantiles_lower = "xx.xx (xx.xx - xx.xx)", | |
| 648 | quantiles_upper = "xx.xx (xx.xx - xx.xx)", | |
| 649 | range = "xx.x - xx.x", | |
| 650 | range_censor = "xx.x to xx.x", | |
| 651 | range_event = "xx.x to xx.x", | |
| 652 | rate = "xx.xxxx", | |
| 653 | rate_ci = "(xx.xxxx, xx.xxxx)", | |
| 654 | rate_diff = "xx.xx", | |
| 655 | rate_diff_ci = "(xx.xx, xx.xx)", | |
| 656 |   rate_diff_ci_3d = format_xx("xx.xx (xx.xx, xx.xx)"), | |
| 657 | rate_ratio = "xx.xxxx", | |
| 658 | rate_ratio_ci = "(xx.xxxx, xx.xxxx)", | |
| 659 | rate_se = "xx.xx", | |
| 660 | riskdiff = "xx.x (xx.x - xx.x)", | |
| 661 | sd = "xx.x", | |
| 662 | se = "xx.x", | |
| 663 | sum = "xx.x", | |
| 664 | sum_exposure = "xx", | |
| 665 | unique = format_count_fraction_fixed_dp, | |
| 666 | unique_count = "xx", | |
| 667 | ztest_pval = "x.xxxx | (<0.0001)" | |
| 668 | ) | |
| 669 | ||
| 670 | # tern_default_labels ---------------------------------------------------------- | |
| 671 | #' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`. | |
| 672 | #' | |
| 673 | #' @format | |
| 674 | #' * `tern_default_labels` is a named `character` vector of available default labels, with each element | |
| 675 | #' named for their corresponding statistic. | |
| 676 | #' | |
| 677 | #' @export | |
| 678 | tern_default_labels <- c( | |
| 679 | cv = "CV (%)", | |
| 680 | iqr = "IQR", | |
| 681 | geom_cv = "CV % Geometric Mean", | |
| 682 | geom_mean = "Geometric Mean", | |
| 683 | geom_mean_sd = "Geometric Mean (SD)", | |
| 684 | geom_mean_ci = "Geometric Mean 95% CI", | |
| 685 | geom_mean_ci_3d = "Geometric Mean (95% CI)", | |
| 686 | geom_sd = "Geometric SD", | |
| 687 | mad = "Median Absolute Deviation", | |
| 688 | max = "Maximum", | |
| 689 | mean = "Mean", | |
| 690 | mean_ci = "Mean 95% CI", | |
| 691 | mean_ci_3d = "Mean (95% CI)", | |
| 692 | mean_pval = "Mean p-value (H0: mean = 0)", | |
| 693 | mean_sd = "Mean (SD)", | |
| 694 | mean_sdi = "Mean -/+ 1xSD", | |
| 695 | mean_se = "Mean (SE)", | |
| 696 | mean_sei = "Mean -/+ 1xSE", | |
| 697 | median = "Median", | |
| 698 | median_ci = "Median 95% CI", | |
| 699 | median_ci_3d = "Median (95% CI)", | |
| 700 | median_range = "Median (Min - Max)", | |
| 701 | min = "Minimum", | |
| 702 | n = "n", | |
| 703 | n_blq = "n_blq", | |
| 704 | nonunique = "Number of events", | |
| 705 | pval = "p-value (t-test)", # Default for numeric | |
| 706 | pval_counts = "p-value (chi-squared test)", # Default for counts | |
| 707 | quantiles = "25% and 75%-ile", | |
| 708 | quantiles_lower = "25%-ile (95% CI)", | |
| 709 | quantiles_upper = "75%-ile (95% CI)", | |
| 710 | range = "Min - Max", | |
| 711 | range_censor = "Range (censored)", | |
| 712 | range_event = "Range (event)", | |
| 713 | rate = "Adjusted Rate", | |
| 714 | rate_ratio = "Adjusted Rate Ratio", | |
| 715 | sd = "SD", | |
| 716 | se = "SE", | |
| 717 | sum = "Sum", | |
| 718 | unique = "Number of patients with at least one event" | |
| 719 | ) | |
| 720 | ||
| 721 | #' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics: | |
| 722 | #' [analyze_vars()] and [analyze_vars_in_cols()] principally. | |
| 723 | #' | |
| 724 | #' @param type (`string`)\cr `"numeric"` or `"counts"`. | |
| 725 | #' | |
| 726 | #' @return | |
| 727 | #' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type. | |
| 728 | #' | |
| 729 | #' @examples | |
| 730 | #' summary_formats() | |
| 731 | #' summary_formats(type = "counts", include_pval = TRUE) | |
| 732 | #' | |
| 733 | #' @export | |
| 734 | summary_formats <- function(type = "numeric", include_pval = FALSE) { | |
| 735 | 2x |   met_grp <- paste0(c("analyze_vars", type), collapse = "_") | 
| 736 | 2x | get_formats_from_stats(get_stats(met_grp, add_pval = include_pval)) | 
| 737 | } | |
| 738 | ||
| 739 | #' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics. | |
| 740 | #' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`. | |
| 741 | #' | |
| 742 | #' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()]. | |
| 743 | #' | |
| 744 | #' @details | |
| 745 | #' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or | |
| 746 | #' `get_formats_from_stats` respectively to retrieve relevant information. | |
| 747 | #' | |
| 748 | #' @return | |
| 749 | #' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. | |
| 750 | #' | |
| 751 | #' @examples | |
| 752 | #' summary_labels() | |
| 753 | #' summary_labels(type = "counts", include_pval = TRUE) | |
| 754 | #' | |
| 755 | #' @export | |
| 756 | summary_labels <- function(type = "numeric", include_pval = FALSE) { | |
| 757 | 2x |   met_grp <- paste0(c("analyze_vars", type), collapse = "_") | 
| 758 | 2x | get_labels_from_stats(get_stats(met_grp, add_pval = include_pval)) | 
| 759 | } | 
| 1 | #' Bland-Altman analysis | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("experimental")` | |
| 4 | #' | |
| 5 | #' Statistics function that uses the Bland-Altman method to assess the agreement between two numerical vectors | |
| 6 | #' and calculates a variety of statistics. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param y (`numeric`)\cr vector of numbers we want to analyze, to be compared with `x`. | |
| 10 | #' | |
| 11 | #' @return | |
| 12 | #' A named list of the following elements: | |
| 13 | #' * `df` | |
| 14 | #' * `difference_mean` | |
| 15 | #' * `ci_mean` | |
| 16 | #' * `difference_sd` | |
| 17 | #' * `difference_se` | |
| 18 | #' * `upper_agreement_limit` | |
| 19 | #' * `lower_agreement_limit` | |
| 20 | #' * `agreement_limit_se` | |
| 21 | #' * `upper_agreement_limit_ci` | |
| 22 | #' * `lower_agreement_limit_ci` | |
| 23 | #' * `t_value` | |
| 24 | #' * `n` | |
| 25 | #' | |
| 26 | #' @examples | |
| 27 | #' x <- seq(1, 60, 5) | |
| 28 | #' y <- seq(5, 50, 4) | |
| 29 | #' | |
| 30 | #' s_bland_altman(x, y, conf_level = 0.9) | |
| 31 | #' | |
| 32 | #' @export | |
| 33 | s_bland_altman <- function(x, y, conf_level = 0.95) { | |
| 34 | 7x | checkmate::assert_numeric(x, min.len = 1, any.missing = TRUE) | 
| 35 | 6x | checkmate::assert_numeric(y, len = length(x), any.missing = TRUE) | 
| 36 | 5x | checkmate::assert_numeric(conf_level, lower = 0, upper = 1, any.missing = TRUE) | 
| 37 | ||
| 38 | 4x | alpha <- 1 - conf_level | 
| 39 | ||
| 40 | 4x | ind <- complete.cases(x, y) # use only pairwise complete observations, and check if x and y have the same length | 
| 41 | 4x | x <- x[ind] | 
| 42 | 4x | y <- y[ind] | 
| 43 | 4x | n <- sum(ind) # number of 'observations' | 
| 44 | ||
| 45 | 4x |   if (n == 0) { | 
| 46 | ! |     stop("there is no valid paired data") | 
| 47 | } | |
| 48 | ||
| 49 | 4x | difference <- x - y # vector of differences | 
| 50 | 4x | average <- (x + y) / 2 # vector of means | 
| 51 | 4x | difference_mean <- mean(difference) # mean difference | 
| 52 | 4x | difference_sd <- sd(difference) # SD of differences | 
| 53 | 4x | al <- qnorm(1 - alpha / 2) * difference_sd | 
| 54 | 4x | upper_agreement_limit <- difference_mean + al # agreement limits | 
| 55 | 4x | lower_agreement_limit <- difference_mean - al | 
| 56 | ||
| 57 | 4x | difference_se <- difference_sd / sqrt(n) # standard error of the mean | 
| 58 | 4x | al_se <- difference_sd * sqrt(3) / sqrt(n) # standard error of the agreement limit | 
| 59 | 4x | tvalue <- qt(1 - alpha / 2, n - 1) # t value for 95% CI calculation | 
| 60 | 4x | difference_mean_ci <- difference_se * tvalue | 
| 61 | 4x | al_ci <- al_se * tvalue | 
| 62 | 4x | upper_agreement_limit_ci <- c(upper_agreement_limit - al_ci, upper_agreement_limit + al_ci) | 
| 63 | 4x | lower_agreement_limit_ci <- c(lower_agreement_limit - al_ci, lower_agreement_limit + al_ci) | 
| 64 | ||
| 65 | 4x | list( | 
| 66 | 4x | df = data.frame(average, difference), | 
| 67 | 4x | difference_mean = difference_mean, | 
| 68 | 4x | ci_mean = difference_mean + c(-1, 1) * difference_mean_ci, | 
| 69 | 4x | difference_sd = difference_sd, | 
| 70 | 4x | difference_se = difference_se, | 
| 71 | 4x | upper_agreement_limit = upper_agreement_limit, | 
| 72 | 4x | lower_agreement_limit = lower_agreement_limit, | 
| 73 | 4x | agreement_limit_se = al_se, | 
| 74 | 4x | upper_agreement_limit_ci = upper_agreement_limit_ci, | 
| 75 | 4x | lower_agreement_limit_ci = lower_agreement_limit_ci, | 
| 76 | 4x | t_value = tvalue, | 
| 77 | 4x | n = n | 
| 78 | ) | |
| 79 | } | |
| 80 | ||
| 81 | #' Bland-Altman plot | |
| 82 | #' | |
| 83 | #' @description `r lifecycle::badge("experimental")` | |
| 84 | #' | |
| 85 | #' Graphing function that produces a Bland-Altman plot. | |
| 86 | #' | |
| 87 | #' @inheritParams s_bland_altman | |
| 88 | #' | |
| 89 | #' @return A `ggplot` Bland-Altman plot. | |
| 90 | #' | |
| 91 | #' @examples | |
| 92 | #' x <- seq(1, 60, 5) | |
| 93 | #' y <- seq(5, 50, 4) | |
| 94 | #' | |
| 95 | #' g_bland_altman(x = x, y = y, conf_level = 0.9) | |
| 96 | #' | |
| 97 | #' @export | |
| 98 | #' @aliases bland_altman | |
| 99 | g_bland_altman <- function(x, y, conf_level = 0.95) { | |
| 100 | 1x | result_tem <- s_bland_altman(x, y, conf_level = conf_level) | 
| 101 | 1x | xpos <- max(result_tem$df$average) * 0.9 + min(result_tem$df$average) * 0.1 | 
| 102 | 1x | yrange <- diff(range(result_tem$df$difference)) | 
| 103 | ||
| 104 | 1x | p <- ggplot(result_tem$df) + | 
| 105 | 1x | geom_point(aes(x = average, y = difference), color = "blue") + | 
| 106 | 1x | geom_hline(yintercept = result_tem$difference_mean, color = "blue", linetype = 1) + | 
| 107 | 1x | geom_hline(yintercept = 0, color = "blue", linetype = 2) + | 
| 108 | 1x | geom_hline(yintercept = result_tem$lower_agreement_limit, color = "red", linetype = 2) + | 
| 109 | 1x | geom_hline(yintercept = result_tem$upper_agreement_limit, color = "red", linetype = 2) + | 
| 110 | 1x | annotate( | 
| 111 | 1x | "text", | 
| 112 | 1x | x = xpos, | 
| 113 | 1x | y = result_tem$lower_agreement_limit + 0.03 * yrange, | 
| 114 | 1x | label = "lower limits of agreement", | 
| 115 | 1x | color = "red" | 
| 116 | ) + | |
| 117 | 1x | annotate( | 
| 118 | 1x | "text", | 
| 119 | 1x | x = xpos, | 
| 120 | 1x | y = result_tem$upper_agreement_limit + 0.03 * yrange, | 
| 121 | 1x | label = "upper limits of agreement", | 
| 122 | 1x | color = "red" | 
| 123 | ) + | |
| 124 | 1x | annotate( | 
| 125 | 1x | "text", | 
| 126 | 1x | x = xpos, | 
| 127 | 1x | y = result_tem$difference_mean + 0.03 * yrange, | 
| 128 | 1x | label = "mean of difference between two measures", | 
| 129 | 1x | color = "blue" | 
| 130 | ) + | |
| 131 | 1x | annotate( | 
| 132 | 1x | "text", | 
| 133 | 1x | x = xpos, | 
| 134 | 1x | y = result_tem$lower_agreement_limit - 0.03 * yrange, | 
| 135 | 1x |       label = sprintf("%.2f", result_tem$lower_agreement_limit), | 
| 136 | 1x | color = "red" | 
| 137 | ) + | |
| 138 | 1x | annotate( | 
| 139 | 1x | "text", | 
| 140 | 1x | x = xpos, | 
| 141 | 1x | y = result_tem$upper_agreement_limit - 0.03 * yrange, | 
| 142 | 1x |       label = sprintf("%.2f", result_tem$upper_agreement_limit), | 
| 143 | 1x | color = "red" | 
| 144 | ) + | |
| 145 | 1x | annotate( | 
| 146 | 1x | "text", | 
| 147 | 1x | x = xpos, | 
| 148 | 1x | y = result_tem$difference_mean - 0.03 * yrange, | 
| 149 | 1x |       label = sprintf("%.2f", result_tem$difference_meanm), | 
| 150 | 1x | color = "blue" | 
| 151 | ) + | |
| 152 | 1x |     xlab("Average of two measures") + | 
| 153 | 1x |     ylab("Difference between two measures") | 
| 154 | ||
| 155 | 1x | return(p) | 
| 156 | } | 
| 1 | # Utility functions to cooperate with {rtables} package | |
| 2 | ||
| 3 | #' Convert table into matrix of strings | |
| 4 | #' | |
| 5 | #' @description `r lifecycle::badge("stable")` | |
| 6 | #' | |
| 7 | #' Helper function to use mostly within tests. `with_spaces`parameter allows | |
| 8 | #' to test not only for content but also indentation and table structure. | |
| 9 | #' `print_txt_to_copy` instead facilitate the testing development by returning a well | |
| 10 | #' formatted text that needs only to be copied and pasted in the expected output. | |
| 11 | #' | |
| 12 | #' @inheritParams formatters::toString | |
| 13 | #' @param x (`VTableTree`)\cr `rtables` table object. | |
| 14 | #' @param with_spaces (`flag`)\cr whether the tested table should keep the indentation and other relevant spaces. | |
| 15 | #' @param print_txt_to_copy (`flag`)\cr utility to have a way to copy the input table directly | |
| 16 | #' into the expected variable instead of copying it too manually. | |
| 17 | #' | |
| 18 | #' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the | |
| 19 | #' table will be printed to console, ready to be copied as a expected value. | |
| 20 | #' | |
| 21 | #' @examples | |
| 22 | #' tbl <- basic_table() %>% | |
| 23 | #'   split_rows_by("SEX") %>% | |
| 24 | #'   split_cols_by("ARM") %>% | |
| 25 | #'   analyze("AGE") %>% | |
| 26 | #' build_table(tern_ex_adsl) | |
| 27 | #' | |
| 28 | #' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2)) | |
| 29 | #' | |
| 30 | #' @export | |
| 31 | to_string_matrix <- function(x, widths = NULL, max_width = NULL, | |
| 32 | hsep = formatters::default_hsep(), | |
| 33 |                              with_spaces = TRUE, print_txt_to_copy = FALSE) { | |
| 34 | 11x | checkmate::assert_flag(with_spaces) | 
| 35 | 11x | checkmate::assert_flag(print_txt_to_copy) | 
| 36 | 11x | checkmate::assert_int(max_width, null.ok = TRUE) | 
| 37 | ||
| 38 | 11x |   if (inherits(x, "MatrixPrintForm")) { | 
| 39 | ! | tx <- x | 
| 40 |   } else { | |
| 41 | 11x | tx <- matrix_form(x, TRUE) | 
| 42 | } | |
| 43 | ||
| 44 | 11x | tf_wrap <- FALSE | 
| 45 | 11x |   if (!is.null(max_width)) { | 
| 46 | ! | tf_wrap <- TRUE | 
| 47 | } | |
| 48 | ||
| 49 | # Producing the matrix to test | |
| 50 | 11x |   if (with_spaces) { | 
| 51 | 2x | out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\n")[[1]] | 
| 52 |   } else { | |
| 53 | 9x | out <- tx$strings | 
| 54 | } | |
| 55 | ||
| 56 | # Printing to console formatted output that needs to be copied in "expected" | |
| 57 | 11x |   if (print_txt_to_copy) { | 
| 58 | 2x | out_tmp <- out | 
| 59 | 2x |     if (!with_spaces) { | 
| 60 | 1x | out_tmp <- apply(out, 1, paste0, collapse = '", "') | 
| 61 | } | |
| 62 | 2x |     cat(paste0('c(\n  "', paste0(out_tmp, collapse = '",\n  "'), '"\n)')) | 
| 63 | } | |
| 64 | ||
| 65 | # Return values | |
| 66 | 11x | out | 
| 67 | } | |
| 68 | ||
| 69 | #' Blank for missing input | |
| 70 | #' | |
| 71 | #' Helper function to use in tabulating model results. | |
| 72 | #' | |
| 73 | #' @param x (`vector`)\cr input for a cell. | |
| 74 | #' | |
| 75 | #' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise | |
| 76 | #' the unlisted version of `x`. | |
| 77 | #' | |
| 78 | #' @keywords internal | |
| 79 | unlist_and_blank_na <- function(x) { | |
| 80 | 267x | unl <- unlist(x) | 
| 81 | 267x |   if (all(is.na(unl))) { | 
| 82 | 161x | character() | 
| 83 |   } else { | |
| 84 | 106x | unl | 
| 85 | } | |
| 86 | } | |
| 87 | ||
| 88 | #' Constructor for content functions given a data frame with flag input | |
| 89 | #' | |
| 90 | #' This can be useful for tabulating model results. | |
| 91 | #' | |
| 92 | #' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the | |
| 93 | #' content function. | |
| 94 | #' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned. | |
| 95 | #' @param format (`string`)\cr `rtables` format to use. | |
| 96 | #' | |
| 97 | #' @return A content function which gives `df$analysis_var` at the row identified by | |
| 98 | #' `.df_row$flag` in the given format. | |
| 99 | #' | |
| 100 | #' @keywords internal | |
| 101 | cfun_by_flag <- function(analysis_var, | |
| 102 | flag_var, | |
| 103 | format = "xx", | |
| 104 |                          .indent_mods = NULL) { | |
| 105 | 61x | checkmate::assert_string(analysis_var) | 
| 106 | 61x | checkmate::assert_string(flag_var) | 
| 107 | 61x |   function(df, labelstr) { | 
| 108 | 265x | row_index <- which(df[[flag_var]]) | 
| 109 | 265x | x <- unlist_and_blank_na(df[[analysis_var]][row_index]) | 
| 110 | 265x | formatters::with_label( | 
| 111 | 265x | rcell(x, format = format, indent_mod = .indent_mods), | 
| 112 | 265x | labelstr | 
| 113 | ) | |
| 114 | } | |
| 115 | } | |
| 116 | ||
| 117 | #' Content row function to add row total to labels | |
| 118 | #' | |
| 119 | #' This takes the label of the latest row split level and adds the row total from `df` in parentheses. | |
| 120 | #' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than | |
| 121 | #' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`. | |
| 122 | #' | |
| 123 | #' @inheritParams argument_convention | |
| 124 | #' | |
| 125 | #' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label. | |
| 126 | #' | |
| 127 | #' @note It is important here to not use `df` but rather `.N_row` in the implementation, because | |
| 128 | #' the former is already split by columns and will refer to the first column of the data only. | |
| 129 | #' | |
| 130 | #' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from | |
| 131 | #' `alt_counts_df` instead of `df`. | |
| 132 | #' | |
| 133 | #' @keywords internal | |
| 134 | c_label_n <- function(df, | |
| 135 | labelstr, | |
| 136 |                       .N_row) { # nolint | |
| 137 | 273x | label <- paste0(labelstr, " (N=", .N_row, ")") | 
| 138 | 273x | in_rows( | 
| 139 | 273x | .list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)), | 
| 140 | 273x | .formats = c(row_count = function(x, ...) "") | 
| 141 | ) | |
| 142 | } | |
| 143 | ||
| 144 | #' Content row function to add `alt_counts_df` row total to labels | |
| 145 | #' | |
| 146 | #' This takes the label of the latest row split level and adds the row total from `alt_counts_df` | |
| 147 | #' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df` | |
| 148 | #' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`. | |
| 149 | #' | |
| 150 | #' @inheritParams argument_convention | |
| 151 | #' | |
| 152 | #' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label. | |
| 153 | #' | |
| 154 | #' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead | |
| 155 | #' of `alt_counts_df`. | |
| 156 | #' | |
| 157 | #' @keywords internal | |
| 158 | c_label_n_alt <- function(df, | |
| 159 | labelstr, | |
| 160 |                           .alt_df_row) { | |
| 161 | 7x | N_row_alt <- nrow(.alt_df_row) # nolint | 
| 162 | 7x | label <- paste0(labelstr, " (N=", N_row_alt, ")") | 
| 163 | 7x | in_rows( | 
| 164 | 7x | .list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)), | 
| 165 | 7x | .formats = c(row_count = function(x, ...) "") | 
| 166 | ) | |
| 167 | } | |
| 168 | ||
| 169 | #' Layout-creating function to add row total counts | |
| 170 | #' | |
| 171 | #' @description `r lifecycle::badge("stable")` | |
| 172 | #' | |
| 173 | #' This works analogously to [rtables::add_colcounts()] but on the rows. This function | |
| 174 | #' is a wrapper for [rtables::summarize_row_groups()]. | |
| 175 | #' | |
| 176 | #' @inheritParams argument_convention | |
| 177 | #' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`) | |
| 178 | #' or from `df` (`FALSE`). Defaults to `FALSE`. | |
| 179 | #' | |
| 180 | #' @return A modified layout where the latest row split labels now have the row-wise | |
| 181 | #' total counts (i.e. without column-based subsetting) attached in parentheses. | |
| 182 | #' | |
| 183 | #' @note Row count values are contained in these row count rows but are not displayed | |
| 184 | #' so that they are not considered zero rows by default when pruning. | |
| 185 | #' | |
| 186 | #' @examples | |
| 187 | #' basic_table() %>% | |
| 188 | #'   split_cols_by("ARM") %>% | |
| 189 | #' add_colcounts() %>% | |
| 190 | #'   split_rows_by("RACE", split_fun = drop_split_levels) %>% | |
| 191 | #' add_rowcounts() %>% | |
| 192 | #'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>% | |
| 193 | #' build_table(DM) | |
| 194 | #' | |
| 195 | #' @export | |
| 196 | add_rowcounts <- function(lyt, alt_counts = FALSE) { | |
| 197 | 7x | summarize_row_groups( | 
| 198 | 7x | lyt, | 
| 199 | 7x | cfun = if (alt_counts) c_label_n_alt else c_label_n | 
| 200 | ) | |
| 201 | } | |
| 202 | ||
| 203 | #' Obtain column indices | |
| 204 | #' | |
| 205 | #' @description `r lifecycle::badge("stable")` | |
| 206 | #' | |
| 207 | #' Helper function to extract column indices from a `VTableTree` for a given | |
| 208 | #' vector of column names. | |
| 209 | #' | |
| 210 | #' @param table_tree (`VTableTree`)\cr `rtables` table object to extract the indices from. | |
| 211 | #' @param col_names (`character`)\cr vector of column names. | |
| 212 | #' | |
| 213 | #' @return A vector of column indices. | |
| 214 | #' | |
| 215 | #' @export | |
| 216 | h_col_indices <- function(table_tree, col_names) { | |
| 217 | 1256x | checkmate::assert_class(table_tree, "VTableNodeInfo") | 
| 218 | 1256x | checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE) | 
| 219 | 1256x | match(col_names, names(attr(col_info(table_tree), "cextra_args"))) | 
| 220 | } | |
| 221 | ||
| 222 | #' Labels or names of list elements | |
| 223 | #' | |
| 224 | #' Helper function for working with nested statistic function results which typically | |
| 225 | #' don't have labels but names that we can use. | |
| 226 | #' | |
| 227 | #' @param x (`list`)\cr a list. | |
| 228 | #' | |
| 229 | #' @return A `character` vector with the labels or names for the list elements. | |
| 230 | #' | |
| 231 | #' @examples | |
| 232 | #' x <- data.frame( | |
| 233 | #' a = 1:10, | |
| 234 | #' b = rnorm(10) | |
| 235 | #' ) | |
| 236 | #' labels_or_names(x) | |
| 237 | #' var_labels(x) <- c(b = "Label for b", a = NA) | |
| 238 | #' labels_or_names(x) | |
| 239 | #' | |
| 240 | #' @export | |
| 241 | labels_or_names <- function(x) { | |
| 242 | 190x |   checkmate::assert_multi_class(x, c("data.frame", "list")) | 
| 243 | 190x | labs <- sapply(x, obj_label) | 
| 244 | 190x | nams <- rlang::names2(x) | 
| 245 | 190x | label_is_null <- sapply(labs, is.null) | 
| 246 | 190x | result <- unlist(ifelse(label_is_null, nams, labs)) | 
| 247 | 190x | result | 
| 248 | } | |
| 249 | ||
| 250 | #' Convert to `rtable` | |
| 251 | #' | |
| 252 | #' @description `r lifecycle::badge("stable")` | |
| 253 | #' | |
| 254 | #' This is a new generic function to convert objects to `rtable` tables. | |
| 255 | #' | |
| 256 | #' @param x (`data.frame`)\cr the object which should be converted to an `rtable`. | |
| 257 | #' @param ... additional arguments for methods. | |
| 258 | #' | |
| 259 | #' @return An `rtables` table object. Note that the concrete class will depend on the method used. | |
| 260 | #' | |
| 261 | #' @export | |
| 262 | as.rtable <- function(x, ...) { # nolint | |
| 263 | 3x |   UseMethod("as.rtable", x) | 
| 264 | } | |
| 265 | ||
| 266 | #' @describeIn as.rtable Method for converting a `data.frame` that contains numeric columns to `rtable`. | |
| 267 | #' | |
| 268 | #' @param format (`string` or `function`)\cr the format which should be used for the columns. | |
| 269 | #' | |
| 270 | #' @method as.rtable data.frame | |
| 271 | #' | |
| 272 | #' @examples | |
| 273 | #' x <- data.frame( | |
| 274 | #' a = 1:10, | |
| 275 | #' b = rnorm(10) | |
| 276 | #' ) | |
| 277 | #' as.rtable(x) | |
| 278 | #' | |
| 279 | #' @export | |
| 280 | as.rtable.data.frame <- function(x, format = "xx.xx", ...) { | |
| 281 | 3x | checkmate::assert_numeric(unlist(x)) | 
| 282 | 2x | do.call( | 
| 283 | 2x | rtable, | 
| 284 | 2x | c( | 
| 285 | 2x | list( | 
| 286 | 2x | header = labels_or_names(x), | 
| 287 | 2x | format = format | 
| 288 | ), | |
| 289 | 2x | Map( | 
| 290 | 2x |         function(row, row_name) { | 
| 291 | 20x | do.call( | 
| 292 | 20x | rrow, | 
| 293 | 20x | c(as.list(unname(row)), | 
| 294 | 20x | row.name = row_name | 
| 295 | ) | |
| 296 | ) | |
| 297 | }, | |
| 298 | 2x | row = as.data.frame(t(x)), | 
| 299 | 2x | row_name = rownames(x) | 
| 300 | ) | |
| 301 | ) | |
| 302 | ) | |
| 303 | } | |
| 304 | ||
| 305 | #' Split parameters | |
| 306 | #' | |
| 307 | #' @description `r lifecycle::badge("deprecated")` | |
| 308 | #' | |
| 309 | #' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant | |
| 310 | #' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to | |
| 311 | #' specific analysis function. | |
| 312 | #' | |
| 313 | #' @param param (`vector`)\cr the parameter to be split. | |
| 314 | #' @param value (`vector`)\cr the value used to split. | |
| 315 | #' @param f (`list`)\cr the reference to make the split. | |
| 316 | #' | |
| 317 | #' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`. | |
| 318 | #' | |
| 319 | #' @examples | |
| 320 | #' f <- list( | |
| 321 | #'   surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"), | |
| 322 | #'   surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval") | |
| 323 | #' ) | |
| 324 | #' | |
| 325 | #' .stats <- c("pt_at_risk", "rate_diff") | |
| 326 | #' h_split_param(.stats, .stats, f = f) | |
| 327 | #' | |
| 328 | #' # $surv | |
| 329 | #' # [1] "pt_at_risk" | |
| 330 | #' # | |
| 331 | #' # $surv_diff | |
| 332 | #' # [1] "rate_diff" | |
| 333 | #' | |
| 334 | #' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx") | |
| 335 | #' h_split_param(.formats, names(.formats), f = f) | |
| 336 | #' | |
| 337 | #' # $surv | |
| 338 | #' # pt_at_risk event_free_rate | |
| 339 | #' # "xx" "xxx" | |
| 340 | #' # | |
| 341 | #' # $surv_diff | |
| 342 | #' # NULL | |
| 343 | #' | |
| 344 | #' @export | |
| 345 | h_split_param <- function(param, | |
| 346 | value, | |
| 347 |                           f) { | |
| 348 | 2x |   lifecycle::deprecate_warn("0.9.8", "h_split_param()") | 
| 349 | ||
| 350 | 2x | y <- lapply(f, function(x) param[value %in% x]) | 
| 351 | 2x | lapply(y, function(x) if (length(x) == 0) NULL else x) | 
| 352 | } | |
| 353 | ||
| 354 | #' Get selected statistics names | |
| 355 | #' | |
| 356 | #' Helper function to be used for creating `afun`. | |
| 357 | #' | |
| 358 | #' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means | |
| 359 | #' in this context that all default statistics should be used. | |
| 360 | #' @param all_stats (`character`)\cr all statistics which can be selected here potentially. | |
| 361 | #' | |
| 362 | #' @return A `character` vector with the selected statistics. | |
| 363 | #' | |
| 364 | #' @keywords internal | |
| 365 | afun_selected_stats <- function(.stats, all_stats) { | |
| 366 | 2x | checkmate::assert_character(.stats, null.ok = TRUE) | 
| 367 | 2x | checkmate::assert_character(all_stats) | 
| 368 | 2x |   if (is.null(.stats)) { | 
| 369 | 1x | all_stats | 
| 370 |   } else { | |
| 371 | 1x | intersect(.stats, all_stats) | 
| 372 | } | |
| 373 | } | |
| 374 | ||
| 375 | #' Add variable labels to top left corner in table | |
| 376 | #' | |
| 377 | #' @description `r lifecycle::badge("stable")` | |
| 378 | #' | |
| 379 | #' Helper layout-creating function to append the variable labels of a given variables vector | |
| 380 | #' from a given dataset in the top left corner. If a variable label is not found then the | |
| 381 | #' variable name itself is used instead. Multiple variable labels are concatenated with slashes. | |
| 382 | #' | |
| 383 | #' @inheritParams argument_convention | |
| 384 | #' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`. | |
| 385 | #' @param indent (`integer(1)`)\cr non-negative number of nested indent space, default to 0L which means no indent. | |
| 386 | #' 1L means two spaces indent, 2L means four spaces indent and so on. | |
| 387 | #' | |
| 388 | #' @return A modified layout with the new variable label(s) added to the top-left material. | |
| 389 | #' | |
| 390 | #' @note This is not an optimal implementation of course, since we are using here the data set | |
| 391 | #' itself during the layout creation. When we have a more mature `rtables` implementation then | |
| 392 | #' this will also be improved or not necessary anymore. | |
| 393 | #' | |
| 394 | #' @examples | |
| 395 | #' lyt <- basic_table() %>% | |
| 396 | #'   split_cols_by("ARM") %>% | |
| 397 | #' add_colcounts() %>% | |
| 398 | #'   split_rows_by("SEX") %>% | |
| 399 | #' append_varlabels(DM, "SEX") %>% | |
| 400 | #'   analyze("AGE", afun = mean) %>% | |
| 401 | #' append_varlabels(DM, "AGE", indent = 1) | |
| 402 | #' build_table(lyt, DM) | |
| 403 | #' | |
| 404 | #' lyt <- basic_table() %>% | |
| 405 | #'   split_cols_by("ARM") %>% | |
| 406 | #'   split_rows_by("SEX") %>% | |
| 407 | #'   analyze("AGE", afun = mean) %>% | |
| 408 | #'   append_varlabels(DM, c("SEX", "AGE")) | |
| 409 | #' build_table(lyt, DM) | |
| 410 | #' | |
| 411 | #' @export | |
| 412 | append_varlabels <- function(lyt, df, vars, indent = 0L) { | |
| 413 | 3x |   if (checkmate::test_flag(indent)) { | 
| 414 | ! |     warning("indent argument is now accepting integers. Boolean indent will be converted to integers.") | 
| 415 | ! | indent <- as.integer(indent) | 
| 416 | } | |
| 417 | ||
| 418 | 3x | checkmate::assert_data_frame(df) | 
| 419 | 3x | checkmate::assert_character(vars) | 
| 420 | 3x | checkmate::assert_count(indent) | 
| 421 | ||
| 422 | 3x | lab <- formatters::var_labels(df[vars], fill = TRUE) | 
| 423 | 3x | lab <- paste(lab, collapse = " / ") | 
| 424 | 3x |   space <- paste(rep(" ", indent * 2), collapse = "") | 
| 425 | 3x | lab <- paste0(space, lab) | 
| 426 | ||
| 427 | 3x | append_topleft(lyt, lab) | 
| 428 | } | |
| 429 | ||
| 430 | #' Default string replacement for `NA` values | |
| 431 | #' | |
| 432 | #' @description `r lifecycle::badge("stable")` | |
| 433 | #' | |
| 434 | #' The default string used to represent `NA` values. This value is used as the default | |
| 435 | #' value for the `na_str` argument throughout the `tern` package, and printed in place | |
| 436 | #' of `NA` values in output tables. If not specified for each `tern` function by the user | |
| 437 | #' via the `na_str` argument, or in the R environment options via [set_default_na_str()], | |
| 438 | #' then `NA` is used. | |
| 439 | #' | |
| 440 | #' @param na_str (`string`)\cr single string value to set in the R environment options as | |
| 441 | #'   the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the | |
| 442 | #' current value set in the R environment (defaults to `NULL` if not set). | |
| 443 | #' | |
| 444 | #' @name default_na_str | |
| 445 | NULL | |
| 446 | ||
| 447 | #' @describeIn default_na_str Accessor for default `NA` value replacement string. | |
| 448 | #' | |
| 449 | #' @return | |
| 450 | #' * `default_na_str` returns the current value if an R environment option has been set | |
| 451 | #' for `"tern_default_na_str"`, or `NA_character_` otherwise. | |
| 452 | #' | |
| 453 | #' @examples | |
| 454 | #' # Default settings | |
| 455 | #' default_na_str() | |
| 456 | #' getOption("tern_default_na_str") | |
| 457 | #' | |
| 458 | #' # Set custom value | |
| 459 | #' set_default_na_str("<Missing>") | |
| 460 | #' | |
| 461 | #' # Settings after value has been set | |
| 462 | #' default_na_str() | |
| 463 | #' getOption("tern_default_na_str") | |
| 464 | #' | |
| 465 | #' @export | |
| 466 | default_na_str <- function() { | |
| 467 | 274x |   getOption("tern_default_na_str", default = NA_character_) | 
| 468 | } | |
| 469 | ||
| 470 | #' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the | |
| 471 | #' option `"tern_default_na_str"` within the R environment. | |
| 472 | #' | |
| 473 | #' @return | |
| 474 | #' * `set_default_na_str` has no return value. | |
| 475 | #' | |
| 476 | #' @export | |
| 477 | set_default_na_str <- function(na_str) { | |
| 478 | 4x | checkmate::assert_character(na_str, len = 1, null.ok = TRUE) | 
| 479 | 4x |   options("tern_default_na_str" = na_str) | 
| 480 | } | |
| 481 | ||
| 482 | ||
| 483 | #' Utilities to handle extra arguments in analysis functions | |
| 484 | #' | |
| 485 | #' @description `r lifecycle::badge("stable")` | |
| 486 | #' Important additional parameters, useful to modify behavior of analysis and summary | |
| 487 | #' functions are listed in [rtables::additional_fun_params]. With these utility functions | |
| 488 | #' we can retrieve a curated list of these parameters from the environment, and pass them | |
| 489 | #' to the analysis functions with dedicated `...`; notice that the final `s_*` function | |
| 490 | #' will get them through argument matching. | |
| 491 | #' | |
| 492 | #' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be | |
| 493 | #' retrieved from the environment. Curated list is present in [rtables::additional_fun_params]. | |
| 494 | #' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row` | |
| 495 | #' parameters. | |
| 496 | #' | |
| 497 | #' @name util_handling_additional_fun_params | |
| 498 | NULL | |
| 499 | ||
| 500 | #' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment. | |
| 501 | #' | |
| 502 | #' @return | |
| 503 | #' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment. | |
| 504 | #' | |
| 505 | #' @keywords internal | |
| 506 | retrieve_extra_afun_params <- function(extra_afun_params) { | |
| 507 | 1583x | out <- list() | 
| 508 | 1583x |   for (extra_param in extra_afun_params) { | 
| 509 | 15851x | out <- c(out, list(get(extra_param, envir = parent.frame()))) | 
| 510 | } | |
| 511 | 1583x | setNames(out, extra_afun_params) | 
| 512 | } | |
| 513 | ||
| 514 | #' @describeIn util_handling_additional_fun_params Curated list of additional parameters for | |
| 515 | #' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions. | |
| 516 | #' | |
| 517 | #' @return | |
| 518 | #' * `get_additional_afun_params` returns a list of additional parameters. | |
| 519 | #' | |
| 520 | #' @keywords internal | |
| 521 | get_additional_afun_params <- function(add_alt_df = FALSE) { | |
| 522 | 240x | out_list <- list( | 
| 523 | 240x | .N_col = integer(), | 
| 524 | 240x | .N_total = integer(), | 
| 525 | 240x | .N_row = integer(), | 
| 526 | 240x | .df_row = data.frame(), | 
| 527 | 240x | .var = character(), | 
| 528 | 240x | .ref_group = character(), | 
| 529 | 240x | .ref_full = vector(mode = "numeric"), | 
| 530 | 240x | .in_ref_col = logical(), | 
| 531 | 240x | .spl_context = data.frame(), | 
| 532 | 240x | .all_col_exprs = vector(mode = "expression"), | 
| 533 | 240x | .all_col_counts = vector(mode = "integer") | 
| 534 | ) | |
| 535 | ||
| 536 | 240x |   if (isTRUE(add_alt_df)) { | 
| 537 | ! | out_list <- c( | 
| 538 | ! | out_list, | 
| 539 | ! | .alt_df_row = data.frame(), | 
| 540 | ! | .alt_df = data.frame() | 
| 541 | ) | |
| 542 | } | |
| 543 | ||
| 544 | 240x | out_list | 
| 545 | } | 
| 1 | #' Helper function for deriving analysis datasets for select laboratory tables | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper function that merges ADSL and ADLB datasets so that missing lab test records are inserted in the | |
| 6 | #' output dataset. Remember that `na_level` must match the needed pre-processing | |
| 7 | #' done with [df_explicit_na()] to have the desired output. | |
| 8 | #' | |
| 9 | #' @param adsl (`data.frame`)\cr ADSL data frame. | |
| 10 | #' @param adlb (`data.frame`)\cr ADLB data frame. | |
| 11 | #' @param worst_flag (named `character`)\cr worst post-baseline lab flag variable. See how this is implemented in the | |
| 12 | #' following examples. | |
| 13 | #' @param by_visit (`flag`)\cr defaults to `FALSE` to generate worst grade per patient. | |
| 14 | #' If worst grade per patient per visit is specified for `worst_flag`, then | |
| 15 | #' `by_visit` should be `TRUE` to generate worst grade patient per visit. | |
| 16 | #' @param no_fillin_visits (named `character`)\cr visits that are not considered for post-baseline worst toxicity | |
| 17 | #'   grade. Defaults to `c("SCREENING", "BASELINE")`. | |
| 18 | #' | |
| 19 | #' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`, | |
| 20 | #' `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when | |
| 21 | #'   `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`. | |
| 22 | #' | |
| 23 | #' @details In the result data missing records will be created for the following situations: | |
| 24 | #' * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline). | |
| 25 | #' * Patients who do not have any post-baseline lab values. | |
| 26 | #' * Patients without any post-baseline values flagged as the worst. | |
| 27 | #' | |
| 28 | #' @examples | |
| 29 | #' # `h_adsl_adlb_merge_using_worst_flag` | |
| 30 | #' adlb_out <- h_adsl_adlb_merge_using_worst_flag( | |
| 31 | #' tern_ex_adsl, | |
| 32 | #' tern_ex_adlb, | |
| 33 | #'   worst_flag = c("WGRHIFL" = "Y") | |
| 34 | #' ) | |
| 35 | #' | |
| 36 | #' # `h_adsl_adlb_merge_using_worst_flag` by visit example | |
| 37 | #' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag( | |
| 38 | #' tern_ex_adsl, | |
| 39 | #' tern_ex_adlb, | |
| 40 | #'   worst_flag = c("WGRLOVFL" = "Y"), | |
| 41 | #' by_visit = TRUE | |
| 42 | #' ) | |
| 43 | #' | |
| 44 | #' @export | |
| 45 | h_adsl_adlb_merge_using_worst_flag <- function(adsl, | |
| 46 | adlb, | |
| 47 |                                                worst_flag = c("WGRHIFL" = "Y"), | |
| 48 | by_visit = FALSE, | |
| 49 |                                                no_fillin_visits = c("SCREENING", "BASELINE")) { | |
| 50 | 5x | col_names <- names(worst_flag) | 
| 51 | 5x | filter_values <- worst_flag | 
| 52 | ||
| 53 | 5x | temp <- Map( | 
| 54 | 5x | function(x, y) which(adlb[[x]] == y), | 
| 55 | 5x | col_names, | 
| 56 | 5x | filter_values | 
| 57 | ) | |
| 58 | ||
| 59 | 5x | position_satisfy_filters <- Reduce(intersect, temp) | 
| 60 | ||
| 61 | 5x | adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb)) | 
| 62 | 5x |   columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR") | 
| 63 | ||
| 64 | 5x | adlb_f <- adlb[position_satisfy_filters, ] %>% | 
| 65 | 5x | dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) | 
| 66 | 5x | adlb_f <- adlb_f[, columns_from_adlb] | 
| 67 | ||
| 68 | 5x | avisits_grid <- adlb %>% | 
| 69 | 5x | dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>% | 
| 70 | 5x | dplyr::pull(.data[["AVISIT"]]) %>% | 
| 71 | 5x | unique() | 
| 72 | ||
| 73 | 5x |   if (by_visit) { | 
| 74 | 1x | adsl_lb <- expand.grid( | 
| 75 | 1x | USUBJID = unique(adsl$USUBJID), | 
| 76 | 1x | AVISIT = avisits_grid, | 
| 77 | 1x | PARAMCD = unique(adlb$PARAMCD) | 
| 78 | ) | |
| 79 | ||
| 80 | 1x | adsl_lb <- adsl_lb %>% | 
| 81 | 1x |       dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>% | 
| 82 | 1x |       dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD") | 
| 83 | ||
| 84 | 1x | adsl1 <- adsl[, adsl_adlb_common_columns] | 
| 85 | 1x | adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID") | 
| 86 | ||
| 87 | 1x |     by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM") | 
| 88 | ||
| 89 | 1x | adlb_btoxgr <- adlb %>% | 
| 90 | 1x |       dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>% | 
| 91 | 1x | unique() %>% | 
| 92 | 1x |       dplyr::rename("BTOXGR_MAP" = "BTOXGR") | 
| 93 | ||
| 94 | 1x | adlb_out <- merge( | 
| 95 | 1x | adlb_f, | 
| 96 | 1x | adsl_lb, | 
| 97 | 1x | by = by_variables_from_adlb, | 
| 98 | 1x | all = TRUE, | 
| 99 | 1x | sort = FALSE | 
| 100 | ) | |
| 101 | 1x | adlb_out <- adlb_out %>% | 
| 102 | 1x |       dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>% | 
| 103 | 1x | dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>% | 
| 104 | 1x | dplyr::select(-"BTOXGR_MAP") | 
| 105 | ||
| 106 | 1x | adlb_var_labels <- c( | 
| 107 | 1x | formatters::var_labels(adlb[by_variables_from_adlb]), | 
| 108 | 1x | formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]), | 
| 109 | 1x | formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]]) | 
| 110 | ) | |
| 111 |   } else { | |
| 112 | 4x | adsl_lb <- expand.grid( | 
| 113 | 4x | USUBJID = unique(adsl$USUBJID), | 
| 114 | 4x | PARAMCD = unique(adlb$PARAMCD) | 
| 115 | ) | |
| 116 | ||
| 117 | 4x |     adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD") | 
| 118 | ||
| 119 | 4x | adsl1 <- adsl[, adsl_adlb_common_columns] | 
| 120 | 4x | adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID") | 
| 121 | ||
| 122 | 4x |     by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM") | 
| 123 | ||
| 124 | 4x | adlb_out <- merge( | 
| 125 | 4x | adlb_f, | 
| 126 | 4x | adsl_lb, | 
| 127 | 4x | by = by_variables_from_adlb, | 
| 128 | 4x | all = TRUE, | 
| 129 | 4x | sort = FALSE | 
| 130 | ) | |
| 131 | ||
| 132 | 4x | adlb_var_labels <- c( | 
| 133 | 4x | formatters::var_labels(adlb[by_variables_from_adlb]), | 
| 134 | 4x | formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]), | 
| 135 | 4x | formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]]) | 
| 136 | ) | |
| 137 | } | |
| 138 | ||
| 139 | 5x | adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR) | 
| 140 | 5x | adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR) | 
| 141 | ||
| 142 | 5x | formatters::var_labels(adlb_out) <- adlb_var_labels | 
| 143 | ||
| 144 | 5x | adlb_out | 
| 145 | } | 
| 1 | #' Missing data | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Substitute missing data with a string or factor level. | |
| 6 | #' | |
| 7 | #' @param x (`factor` or `character`)\cr values for which any missing values should be substituted. | |
| 8 | #' @param label (`string`)\cr string that missing data should be replaced with. | |
| 9 | #' | |
| 10 | #' @return `x` with any `NA` values substituted by `label`. | |
| 11 | #' | |
| 12 | #' @examples | |
| 13 | #' explicit_na(c(NA, "a", "b")) | |
| 14 | #' is.na(explicit_na(c(NA, "a", "b"))) | |
| 15 | #' | |
| 16 | #' explicit_na(factor(c(NA, "a", "b"))) | |
| 17 | #' is.na(explicit_na(factor(c(NA, "a", "b")))) | |
| 18 | #' | |
| 19 | #' explicit_na(sas_na(c("a", ""))) | |
| 20 | #' | |
| 21 | #' @export | |
| 22 | explicit_na <- function(x, label = "<Missing>") { | |
| 23 | 256x | checkmate::assert_string(label) | 
| 24 | ||
| 25 | 256x |   if (is.factor(x)) { | 
| 26 | 151x | x <- forcats::fct_na_value_to_level(x, label) | 
| 27 | 151x | forcats::fct_drop(x, only = label) | 
| 28 | 105x |   } else if (is.character(x)) { | 
| 29 | 105x | x[is.na(x)] <- label | 
| 30 | 105x | x | 
| 31 |   } else { | |
| 32 | ! |     stop("only factors and character vectors allowed") | 
| 33 | } | |
| 34 | } | |
| 35 | ||
| 36 | #' Convert strings to `NA` | |
| 37 | #' | |
| 38 | #' @description `r lifecycle::badge("stable")` | |
| 39 | #' | |
| 40 | #' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to | |
| 41 | #' convert these values to `NA`s. | |
| 42 | #' | |
| 43 | #' @inheritParams explicit_na | |
| 44 | #' @param empty (`flag`)\cr if `TRUE`, empty strings get replaced by `NA`. | |
| 45 | #' @param whitespaces (`flag`)\cr if `TRUE`, strings made from only whitespaces get replaced with `NA`. | |
| 46 | #' | |
| 47 | #' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of | |
| 48 | #' `empty` and `whitespaces`. | |
| 49 | #' | |
| 50 | #' @examples | |
| 51 | #' sas_na(c("1", "", " ", "   ", "b")) | |
| 52 | #' sas_na(factor(c("", " ", "b"))) | |
| 53 | #' | |
| 54 | #' is.na(sas_na(c("1", "", " ", "   ", "b"))) | |
| 55 | #' | |
| 56 | #' @export | |
| 57 | sas_na <- function(x, empty = TRUE, whitespaces = TRUE) { | |
| 58 | 245x | checkmate::assert_flag(empty) | 
| 59 | 245x | checkmate::assert_flag(whitespaces) | 
| 60 | ||
| 61 | 245x |   if (is.factor(x)) { | 
| 62 | 135x | empty_levels <- levels(x) == "" | 
| 63 | 11x | if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA | 
| 64 | ||
| 65 | 135x |     ws_levels <- grepl("^\\s+$", levels(x)) | 
| 66 | ! | if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA | 
| 67 | ||
| 68 | 135x | x | 
| 69 | 110x |   } else if (is.character(x)) { | 
| 70 | 110x | if (empty) x[x == ""] <- NA_character_ | 
| 71 | ||
| 72 | 110x |     if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_ | 
| 73 | ||
| 74 | 110x | x | 
| 75 |   } else { | |
| 76 | ! |     stop("only factors and character vectors allowed") | 
| 77 | } | |
| 78 | } | 
| 1 | #' Survival time point analysis | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [surv_timepoint()] creates a layout element to analyze patient survival rates and difference | |
| 6 | #' of survival rates between groups at a given time point. The primary analysis variable `vars` is the time variable. | |
| 7 | #' Other required inputs are `time_point`, the numeric time point of interest, and `is_event`, a variable that | |
| 8 | #' indicates whether or not an event has occurred. The `method` argument is used to specify whether you want to analyze | |
| 9 | #' survival estimations (`"surv"`), difference in survival with the control (`"surv_diff"`), or both of these | |
| 10 | #' (`"both"`). | |
| 11 | #' | |
| 12 | #' @inheritParams argument_convention | |
| 13 | #' @inheritParams s_surv_time | |
| 14 | #' @param time_point (`numeric(1)`)\cr survival time point of interest. | |
| 15 | #' @param control (`list`)\cr parameters for comparison details, specified by using the helper function | |
| 16 | #' [control_surv_timepoint()]. Some possible parameter options are: | |
| 17 | #' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate. | |
| 18 | #' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log", | |
| 19 | #' see more in [survival::survfit()]. Note option "none" is no longer supported. | |
| 20 | #' @param method (`string`)\cr `"surv"` (survival estimations), `"surv_diff"` (difference in survival with the | |
| 21 | #' control), or `"both"`. | |
| 22 | #' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to | |
| 23 | #' avoid warnings from duplicate table names. | |
| 24 | #' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector | |
| 25 | #' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation | |
| 26 | #' for that statistic's row label. | |
| 27 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 28 | #' | |
| 29 | #'   Options are: ``r shQuote(get_stats("surv_timepoint"), type = "sh")`` | |
| 30 | #' | |
| 31 | #' @name survival_timepoint | |
| 32 | #' @order 1 | |
| 33 | NULL | |
| 34 | ||
| 35 | #' @describeIn survival_timepoint Statistics function which analyzes survival rate. | |
| 36 | #' | |
| 37 | #' @return | |
| 38 | #' * `s_surv_timepoint()` returns the statistics: | |
| 39 | #' * `pt_at_risk`: Patients remaining at risk. | |
| 40 | #' * `event_free_rate`: Event-free rate (%). | |
| 41 | #' * `rate_se`: Standard error of event free rate. | |
| 42 | #' * `rate_ci`: Confidence interval for event free rate. | |
| 43 | #' * `event_free_rate_3d`: Event-free rate (%) with Confidence interval. | |
| 44 | #' | |
| 45 | #' @examples | |
| 46 | #' library(dplyr) | |
| 47 | #' | |
| 48 | #' adtte_f <- tern_ex_adtte %>% | |
| 49 | #' filter(PARAMCD == "OS") %>% | |
| 50 | #' mutate( | |
| 51 | #' AVAL = day2month(AVAL), | |
| 52 | #' is_event = CNSR == 0 | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' s_surv_timepoint( | |
| 56 | #' df = subset(adtte_f, ARMCD == "ARM A"), | |
| 57 | #' .var = "AVAL", | |
| 58 | #' is_event = "is_event", | |
| 59 | #' time_point = c(10), | |
| 60 | #' control = control_surv_timepoint() | |
| 61 | #' ) | |
| 62 | #' | |
| 63 | #' @export | |
| 64 | s_surv_timepoint <- function(df, | |
| 65 | .var, | |
| 66 | time_point, | |
| 67 | is_event, | |
| 68 | control = control_surv_timepoint(), | |
| 69 |                              ...) { | |
| 70 | 35x | checkmate::assert_string(.var) | 
| 71 | 35x | assert_df_with_variables(df, list(tte = .var, is_event = is_event)) | 
| 72 | 35x | checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE) | 
| 73 | 35x | checkmate::assert_number(time_point) | 
| 74 | 35x | checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE) | 
| 75 | ||
| 76 | 35x | conf_type <- control$conf_type | 
| 77 | 35x | conf_level <- control$conf_level | 
| 78 | ||
| 79 | 35x |   formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1")) | 
| 80 | 35x | srv_fit <- survival::survfit( | 
| 81 | 35x | formula = formula, | 
| 82 | 35x | data = df, | 
| 83 | 35x | conf.int = conf_level, | 
| 84 | 35x | conf.type = conf_type | 
| 85 | ) | |
| 86 | 35x | s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE) | 
| 87 | 35x |   df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")]) | 
| 88 | 35x |   if (df_srv_fit[["n.risk"]] == 0) { | 
| 89 | 1x | pt_at_risk <- event_free_rate <- rate_se <- NA_real_ | 
| 90 | 1x | rate_ci <- c(NA_real_, NA_real_) | 
| 91 |   } else { | |
| 92 | 34x | pt_at_risk <- df_srv_fit$n.risk | 
| 93 | 34x | event_free_rate <- df_srv_fit$surv | 
| 94 | 34x | rate_se <- df_srv_fit$std.err | 
| 95 | 34x | rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper) | 
| 96 | } | |
| 97 | 35x | event_free_rate_3d <- c(event_free_rate, rate_ci) | 
| 98 | 35x | list( | 
| 99 | 35x | pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"), | 
| 100 | 35x | event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"), | 
| 101 | 35x | rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"), | 
| 102 | 35x | rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level)), | 
| 103 | 35x | event_free_rate_3d = formatters::with_label( | 
| 104 | 35x |       event_free_rate_3d * 100, paste0("Event Free Rate (", f_conf_level(conf_level), ")") | 
| 105 | ) | |
| 106 | ) | |
| 107 | } | |
| 108 | ||
| 109 | #' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates. | |
| 110 | #' | |
| 111 | #' @return | |
| 112 | #' * `s_surv_timepoint_diff()` returns the statistics: | |
| 113 | #' * `rate_diff`: Event-free rate difference between two groups. | |
| 114 | #' * `rate_diff_ci`: Confidence interval for the difference. | |
| 115 | #' * `rate_diff_ci_3d`: Event-free rate difference and confidence interval between two groups. | |
| 116 | #' * `ztest_pval`: p-value to test the difference is 0. | |
| 117 | #' | |
| 118 | #' @keywords internal | |
| 119 | s_surv_timepoint_diff <- function(df, | |
| 120 | .var, | |
| 121 | .ref_group, | |
| 122 | .in_ref_col, | |
| 123 | time_point, | |
| 124 | control = control_surv_timepoint(), | |
| 125 |                                   ...) { | |
| 126 | 14x |   if (.in_ref_col) { | 
| 127 | 4x | return( | 
| 128 | 4x | list( | 
| 129 | 4x | rate_diff = formatters::with_label(numeric(), "Difference in Event Free Rate"), | 
| 130 | 4x | rate_diff_ci = formatters::with_label(numeric(), f_conf_level(control$conf_level)), | 
| 131 | 4x | rate_diff_ci_3d = formatters::with_label( | 
| 132 | 4x |           numeric(), paste0("Difference in Event Free Rate", f_conf_level(control$conf_level)) | 
| 133 | ), | |
| 134 | 4x | ztest_pval = formatters::with_label(numeric(), "p-value (Z-test)") | 
| 135 | ) | |
| 136 | ) | |
| 137 | } | |
| 138 | 10x | data <- rbind(.ref_group, df) | 
| 139 | 10x |   group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x")) | 
| 140 | 10x |   res_per_group <- lapply(split(data, group), function(x) { | 
| 141 | 20x | s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...) | 
| 142 | }) | |
| 143 | ||
| 144 | 10x | res_x <- res_per_group[[2]] | 
| 145 | 10x | res_ref <- res_per_group[[1]] | 
| 146 | 10x | rate_diff <- res_x$event_free_rate - res_ref$event_free_rate | 
| 147 | 10x | se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2) | 
| 148 | ||
| 149 | 10x | qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2) | 
| 150 | 10x | rate_diff_ci <- rate_diff + qs * se_diff | 
| 151 | 10x | rate_diff_ci_3d <- c(rate_diff, rate_diff_ci) | 
| 152 | 10x |   ztest_pval <- if (is.na(rate_diff)) { | 
| 153 | 10x | NA | 
| 154 |   } else { | |
| 155 | 10x | 2 * (1 - stats::pnorm(abs(rate_diff) / se_diff)) | 
| 156 | } | |
| 157 | 10x | list( | 
| 158 | 10x | rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"), | 
| 159 | 10x | rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)), | 
| 160 | 10x | rate_diff_ci_3d = formatters::with_label( | 
| 161 | 10x |       rate_diff_ci_3d, paste0("Difference in Event Free Rate", f_conf_level(control$conf_level)) | 
| 162 | ), | |
| 163 | 10x | ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)") | 
| 164 | ) | |
| 165 | } | |
| 166 | ||
| 167 | #' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`. | |
| 168 | #' | |
| 169 | #' @return | |
| 170 | #' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 171 | #' | |
| 172 | #' @keywords internal | |
| 173 | a_surv_timepoint <- function(df, | |
| 174 | ..., | |
| 175 | .stats = NULL, | |
| 176 | .stat_names = NULL, | |
| 177 | .formats = NULL, | |
| 178 | .labels = NULL, | |
| 179 |                              .indent_mods = NULL) { | |
| 180 | # Check for additional parameters to the statistics function | |
| 181 | 24x | dots_extra_args <- list(...) | 
| 182 | 24x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 183 | 24x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 184 | 24x | method <- dots_extra_args$method | 
| 185 | ||
| 186 | # Check for user-defined functions | |
| 187 | 24x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 188 | 24x | .stats <- default_and_custom_stats_list$all_stats | 
| 189 | 24x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 190 | ||
| 191 | # Apply statistics function | |
| 192 | 24x | x_stats <- .apply_stat_functions( | 
| 193 | 24x | default_stat_fnc = if (method == "surv") s_surv_timepoint else s_surv_timepoint_diff, | 
| 194 | 24x | custom_stat_fnc_list = custom_stat_functions, | 
| 195 | 24x | args_list = c( | 
| 196 | 24x | df = list(df), | 
| 197 | 24x | extra_afun_params, | 
| 198 | 24x | dots_extra_args | 
| 199 | ) | |
| 200 | ) | |
| 201 | ||
| 202 | # Fill in formatting defaults | |
| 203 | 24x | .stats <- get_stats(if (method == "surv") "surv_timepoint" else "surv_timepoint_diff", | 
| 204 | 24x | stats_in = .stats, | 
| 205 | 24x | custom_stats_in = names(custom_stat_functions) | 
| 206 | ) | |
| 207 | 24x | x_stats <- x_stats[.stats] | 
| 208 | 24x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 209 | 24x | .labels <- get_labels_from_stats( | 
| 210 | 24x | .stats, .labels, | 
| 211 | 24x | tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) | 
| 212 | ) | |
| 213 | 24x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 214 | ||
| 215 | # Auto format handling | |
| 216 | 24x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 217 | ||
| 218 | # Get and check statistical names | |
| 219 | 24x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 220 | ||
| 221 | 24x | in_rows( | 
| 222 | 24x | .list = x_stats, | 
| 223 | 24x | .formats = .formats, | 
| 224 | 24x | .names = .labels %>% .unlist_keep_nulls(), | 
| 225 | 24x | .stat_names = .stat_names, | 
| 226 | 24x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 227 | 24x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 228 | ) | |
| 229 | } | |
| 230 | ||
| 231 | #' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments | |
| 232 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 233 | #' | |
| 234 | #' @return | |
| 235 | #' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions, | |
| 236 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 237 | #' the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on | |
| 238 | #' the value of `method`. | |
| 239 | #' | |
| 240 | #' @examples | |
| 241 | #' library(dplyr) | |
| 242 | #' | |
| 243 | #' adtte_f <- tern_ex_adtte %>% | |
| 244 | #' filter(PARAMCD == "OS") %>% | |
| 245 | #' mutate( | |
| 246 | #' AVAL = day2month(AVAL), | |
| 247 | #' is_event = CNSR == 0 | |
| 248 | #' ) | |
| 249 | #' | |
| 250 | #' # Survival at given time points. | |
| 251 | #' basic_table() %>% | |
| 252 | #' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>% | |
| 253 | #' add_colcounts() %>% | |
| 254 | #' surv_timepoint( | |
| 255 | #' vars = "AVAL", | |
| 256 | #' var_labels = "Months", | |
| 257 | #' is_event = "is_event", | |
| 258 | #' time_point = 7 | |
| 259 | #' ) %>% | |
| 260 | #' build_table(df = adtte_f) | |
| 261 | #' | |
| 262 | #' # Difference in survival at given time points. | |
| 263 | #' basic_table() %>% | |
| 264 | #' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>% | |
| 265 | #' add_colcounts() %>% | |
| 266 | #' surv_timepoint( | |
| 267 | #' vars = "AVAL", | |
| 268 | #' var_labels = "Months", | |
| 269 | #' is_event = "is_event", | |
| 270 | #' time_point = 9, | |
| 271 | #' method = "surv_diff", | |
| 272 | #'     .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L) | |
| 273 | #' ) %>% | |
| 274 | #' build_table(df = adtte_f) | |
| 275 | #' | |
| 276 | #' # Survival and difference in survival at given time points. | |
| 277 | #' basic_table() %>% | |
| 278 | #' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>% | |
| 279 | #' add_colcounts() %>% | |
| 280 | #' surv_timepoint( | |
| 281 | #' vars = "AVAL", | |
| 282 | #' var_labels = "Months", | |
| 283 | #' is_event = "is_event", | |
| 284 | #' time_point = 9, | |
| 285 | #' method = "both" | |
| 286 | #' ) %>% | |
| 287 | #' build_table(df = adtte_f) | |
| 288 | #' | |
| 289 | #' @export | |
| 290 | #' @order 2 | |
| 291 | surv_timepoint <- function(lyt, | |
| 292 | vars, | |
| 293 | time_point, | |
| 294 | is_event, | |
| 295 | control = control_surv_timepoint(), | |
| 296 |                            method = c("surv", "surv_diff", "both"), | |
| 297 | na_str = default_na_str(), | |
| 298 | nested = TRUE, | |
| 299 | ..., | |
| 300 | table_names_suffix = "", | |
| 301 | var_labels = "Time", | |
| 302 | show_labels = "visible", | |
| 303 | .stats = c( | |
| 304 | "pt_at_risk", "event_free_rate", "rate_ci", | |
| 305 | "rate_diff", "rate_diff_ci", "ztest_pval" | |
| 306 | ), | |
| 307 | .stat_names = NULL, | |
| 308 | .formats = list(rate_ci = "(xx.xx, xx.xx)"), | |
| 309 | .labels = NULL, | |
| 310 |                            .indent_mods = if (method == "both") { | |
| 311 | 2x | c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L) | 
| 312 |                            } else { | |
| 313 | 4x | c(rate_diff_ci = 1L, ztest_pval = 1L) | 
| 314 |                            }) { | |
| 315 | 6x | method <- match.arg(method) | 
| 316 | 6x | checkmate::assert_string(table_names_suffix) | 
| 317 | ||
| 318 | # Process standard extra arguments | |
| 319 | 6x |   extra_args <- list(".stats" = .stats) | 
| 320 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 321 | 6x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 322 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 323 | 6x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 324 | ||
| 325 | # Process additional arguments to the statistic function | |
| 326 | 6x | extra_args <- c( | 
| 327 | 6x | extra_args, | 
| 328 | 6x | time_point = list(time_point), is_event = is_event, control = list(control), | 
| 329 | ... | |
| 330 | ) | |
| 331 | ||
| 332 | # Append additional info from layout to the analysis function | |
| 333 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 334 | 6x | formals(a_surv_timepoint) <- c(formals(a_surv_timepoint), extra_args[[".additional_fun_parameters"]]) | 
| 335 | ||
| 336 | 6x |   for (i in seq_along(time_point)) { | 
| 337 | 6x | extra_args[["time_point"]] <- time_point[i] | 
| 338 | ||
| 339 | 6x |     if (method %in% c("surv", "both")) { | 
| 340 | 4x | extra_args_i <- extra_args | 
| 341 | 4x | extra_args_i[["method"]] <- "surv" | 
| 342 | ||
| 343 | 4x | lyt <- analyze( | 
| 344 | 4x | lyt = lyt, | 
| 345 | 4x | vars = vars, | 
| 346 | 4x | afun = a_surv_timepoint, | 
| 347 | 4x | na_str = na_str, | 
| 348 | 4x | nested = nested, | 
| 349 | 4x | extra_args = extra_args_i, | 
| 350 | 4x | var_labels = paste(time_point[i], var_labels), | 
| 351 | 4x | show_labels = show_labels, | 
| 352 | 4x |         table_names = paste0("surv_", time_point[i], table_names_suffix) | 
| 353 | ) | |
| 354 | } | |
| 355 | ||
| 356 | 6x |     if (method %in% c("surv_diff", "both")) { | 
| 357 | 4x | extra_args_i <- extra_args | 
| 358 | 4x | extra_args_i[["method"]] <- "surv_diff" | 
| 359 | ||
| 360 | 4x | lyt <- analyze( | 
| 361 | 4x | lyt = lyt, | 
| 362 | 4x | vars = vars, | 
| 363 | 4x | afun = a_surv_timepoint, | 
| 364 | 4x | na_str = na_str, | 
| 365 | 4x | nested = nested, | 
| 366 | 4x | extra_args = extra_args_i, | 
| 367 | 4x | var_labels = paste(time_point[i], var_labels), | 
| 368 | 4x | show_labels = ifelse(method == "both", "hidden", show_labels), | 
| 369 | 4x |         table_names = paste0("surv_diff_", time_point[i], table_names_suffix) | 
| 370 | ) | |
| 371 | } | |
| 372 | } | |
| 373 | ||
| 374 | 6x | lyt | 
| 375 | } | 
| 1 | #' Confidence interval for mean | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the | |
| 6 | #' geometric mean. It can be used as a `ggplot` helper function for plotting. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean. | |
| 10 | #' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s. | |
| 11 | #' @param geom_mean (`flag`)\cr whether the geometric mean should be calculated. | |
| 12 | #' | |
| 13 | #' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`. | |
| 14 | #' | |
| 15 | #' @examples | |
| 16 | #' stat_mean_ci(sample(10), gg_helper = FALSE) | |
| 17 | #' | |
| 18 | #' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) + | |
| 19 | #' ggplot2::geom_point() | |
| 20 | #' | |
| 21 | #' p + ggplot2::stat_summary( | |
| 22 | #' fun.data = stat_mean_ci, | |
| 23 | #' geom = "errorbar" | |
| 24 | #' ) | |
| 25 | #' | |
| 26 | #' p + ggplot2::stat_summary( | |
| 27 | #' fun.data = stat_mean_ci, | |
| 28 | #' fun.args = list(conf_level = 0.5), | |
| 29 | #' geom = "errorbar" | |
| 30 | #' ) | |
| 31 | #' | |
| 32 | #' p + ggplot2::stat_summary( | |
| 33 | #' fun.data = stat_mean_ci, | |
| 34 | #' fun.args = list(conf_level = 0.5, geom_mean = TRUE), | |
| 35 | #' geom = "errorbar" | |
| 36 | #' ) | |
| 37 | #' | |
| 38 | #' @export | |
| 39 | stat_mean_ci <- function(x, | |
| 40 | conf_level = 0.95, | |
| 41 | na.rm = TRUE, # nolint | |
| 42 | n_min = 2, | |
| 43 | gg_helper = TRUE, | |
| 44 |                          geom_mean = FALSE) { | |
| 45 | 2373x |   if (na.rm) { | 
| 46 | 10x | x <- stats::na.omit(x) | 
| 47 | } | |
| 48 | 2373x | n <- length(x) | 
| 49 | ||
| 50 | 2373x |   if (!geom_mean) { | 
| 51 | 1194x | m <- mean(x) | 
| 52 |   } else { | |
| 53 | 1179x | negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0) | 
| 54 | 1179x |     if (negative_values_exist) { | 
| 55 | 26x | m <- NA_real_ | 
| 56 |     } else { | |
| 57 | 1153x | x <- log(x) | 
| 58 | 1153x | m <- mean(x) | 
| 59 | } | |
| 60 | } | |
| 61 | ||
| 62 | 2373x |   if (n < n_min || is.na(m)) { | 
| 63 | 306x | ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_) | 
| 64 |   } else { | |
| 65 | 2067x | hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n) | 
| 66 | 2067x | ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci) | 
| 67 | 2067x |     if (geom_mean) { | 
| 68 | 1022x | ci <- exp(ci) | 
| 69 | } | |
| 70 | } | |
| 71 | ||
| 72 | 2373x |   if (gg_helper) { | 
| 73 | 4x | m <- ifelse(is.na(m), NA_real_, m) | 
| 74 | 4x | ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]]) | 
| 75 | } | |
| 76 | ||
| 77 | 2373x | return(ci) | 
| 78 | } | |
| 79 | ||
| 80 | #' Confidence interval for median | |
| 81 | #' | |
| 82 | #' @description `r lifecycle::badge("stable")` | |
| 83 | #' | |
| 84 | #' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper | |
| 85 | #' function for plotting. | |
| 86 | #' | |
| 87 | #' @inheritParams argument_convention | |
| 88 | #' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s. | |
| 89 | #' | |
| 90 | #' @details This function was adapted from `DescTools/versions/0.99.35/source` | |
| 91 | #' | |
| 92 | #' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`. | |
| 93 | #' | |
| 94 | #' @examples | |
| 95 | #' stat_median_ci(sample(10), gg_helper = FALSE) | |
| 96 | #' | |
| 97 | #' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) + | |
| 98 | #' ggplot2::geom_point() | |
| 99 | #' p + ggplot2::stat_summary( | |
| 100 | #' fun.data = stat_median_ci, | |
| 101 | #' geom = "errorbar" | |
| 102 | #' ) | |
| 103 | #' | |
| 104 | #' @export | |
| 105 | stat_median_ci <- function(x, | |
| 106 | conf_level = 0.95, | |
| 107 | na.rm = TRUE, # nolint | |
| 108 |                            gg_helper = TRUE) { | |
| 109 | 1192x | x <- unname(x) | 
| 110 | 1192x |   if (na.rm) { | 
| 111 | 9x | x <- x[!is.na(x)] | 
| 112 | } | |
| 113 | 1192x | n <- length(x) | 
| 114 | 1192x | med <- stats::median(x) | 
| 115 | ||
| 116 | 1192x | k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE) | 
| 117 | ||
| 118 | # k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range | |
| 119 | 1192x |   if (k == 0 || is.na(med)) { | 
| 120 | 248x | ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_) | 
| 121 | 248x | empir_conf_level <- NA_real_ | 
| 122 |   } else { | |
| 123 | 944x | x_sort <- sort(x) | 
| 124 | 944x | ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1]) | 
| 125 | 944x | empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5) | 
| 126 | } | |
| 127 | ||
| 128 | 1192x |   if (gg_helper) { | 
| 129 | 4x | ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]]) | 
| 130 | } | |
| 131 | ||
| 132 | 1192x | attr(ci, "conf_level") <- empir_conf_level | 
| 133 | ||
| 134 | 1192x | return(ci) | 
| 135 | } | |
| 136 | ||
| 137 | #' p-Value of the mean | |
| 138 | #' | |
| 139 | #' @description `r lifecycle::badge("stable")` | |
| 140 | #' | |
| 141 | #' Convenient function for calculating the two-sided p-value of the mean. | |
| 142 | #' | |
| 143 | #' @inheritParams argument_convention | |
| 144 | #' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean. | |
| 145 | #' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis. | |
| 146 | #' | |
| 147 | #' @return A p-value. | |
| 148 | #' | |
| 149 | #' @examples | |
| 150 | #' stat_mean_pval(sample(10)) | |
| 151 | #' | |
| 152 | #' stat_mean_pval(rnorm(10), test_mean = 0.5) | |
| 153 | #' | |
| 154 | #' @export | |
| 155 | stat_mean_pval <- function(x, | |
| 156 | na.rm = TRUE, # nolint | |
| 157 | n_min = 2, | |
| 158 |                            test_mean = 0) { | |
| 159 | 1192x |   if (na.rm) { | 
| 160 | 9x | x <- stats::na.omit(x) | 
| 161 | } | |
| 162 | 1192x | n <- length(x) | 
| 163 | ||
| 164 | 1192x | x_mean <- mean(x) | 
| 165 | 1192x | x_sd <- stats::sd(x) | 
| 166 | ||
| 167 | 1192x |   if (n < n_min) { | 
| 168 | 140x | pv <- c(p_value = NA_real_) | 
| 169 |   } else { | |
| 170 | 1052x | x_se <- stats::sd(x) / sqrt(n) | 
| 171 | 1052x | ttest <- (x_mean - test_mean) / x_se | 
| 172 | 1052x | pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1)) | 
| 173 | } | |
| 174 | ||
| 175 | 1192x | return(pv) | 
| 176 | } | |
| 177 | ||
| 178 | #' Proportion difference and confidence interval | |
| 179 | #' | |
| 180 | #' @description `r lifecycle::badge("stable")` | |
| 181 | #' | |
| 182 | #' Function for calculating the proportion (or risk) difference and confidence interval between arm | |
| 183 | #' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence | |
| 184 | #' in arm Y from cumulative incidence in arm X. | |
| 185 | #' | |
| 186 | #' @inheritParams argument_convention | |
| 187 | #' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group). | |
| 188 | #' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`. | |
| 189 | #' @param N_x (`numeric(1)`)\cr total number of records in arm X. | |
| 190 | #' @param N_y (`numeric(1)`)\cr total number of records in arm Y. | |
| 191 | #' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in | |
| 192 | #' `x` and `y`. Must be of equal length to `x` and `y`. | |
| 193 | #' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`. | |
| 194 | #' | |
| 195 | #' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and | |
| 196 | #' `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound. | |
| 197 | #' | |
| 198 | #' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] | |
| 199 | #' with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing | |
| 200 | #' proportion (risk) difference to an `rtables` layout. | |
| 201 | #' | |
| 202 | #' @examples | |
| 203 | #' stat_propdiff_ci( | |
| 204 | #' x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9 | |
| 205 | #' ) | |
| 206 | #' | |
| 207 | #' stat_propdiff_ci( | |
| 208 | #' x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE | |
| 209 | #' ) | |
| 210 | #' | |
| 211 | #' @export | |
| 212 | stat_propdiff_ci <- function(x, | |
| 213 | y, | |
| 214 | N_x, # nolint | |
| 215 | N_y, # nolint | |
| 216 | list_names = NULL, | |
| 217 | conf_level = 0.95, | |
| 218 |                              pct = TRUE) { | |
| 219 | 62x | checkmate::assert_list(x, types = "numeric") | 
| 220 | 62x | checkmate::assert_list(y, types = "numeric", len = length(x)) | 
| 221 | 62x | checkmate::assert_character(list_names, len = length(x), null.ok = TRUE) | 
| 222 | 62x |   rd_list <- lapply(seq_along(x), function(i) { | 
| 223 | 145x | p_x <- x[[i]] / N_x | 
| 224 | 145x | p_y <- y[[i]] / N_y | 
| 225 | 145x | rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) * | 
| 226 | 145x | sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y) | 
| 227 | 145x | c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1) | 
| 228 | }) | |
| 229 | 62x | names(rd_list) <- list_names | 
| 230 | 62x | rd_list | 
| 231 | } | 
| 1 | #' Kaplan-Meier plot | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' From a survival model, a graphic is rendered along with tabulated annotation | |
| 6 | #' including the number of patient at risk at given time and the median survival | |
| 7 | #' per group. | |
| 8 | #' | |
| 9 | #' @inheritParams argument_convention | |
| 10 | #' @param variables (named `list`)\cr variable names. Details are: | |
| 11 | #' * `tte` (`numeric`)\cr variable indicating time-to-event duration values. | |
| 12 | #' * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored. | |
| 13 | #' * `arm` (`factor`)\cr the treatment group variable. | |
| 14 | #' * `strata` (`character` or `NULL`)\cr variable names indicating stratification factors. | |
| 15 | #' @param control_surv (`list`)\cr parameters for comparison details, specified by using | |
| 16 | #' the helper function [control_surv_timepoint()]. Some possible parameter options are: | |
| 17 | #' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate. | |
| 18 | #' * `conf_type` (`string`)\cr `"plain"` (default), `"log"`, `"log-log"` for confidence interval type, | |
| 19 | #' see more in [survival::survfit()]. Note that the option "none" is no longer supported. | |
| 20 | #' @param col (`character`)\cr lines colors. Length of a vector should be equal | |
| 21 | #' to number of strata from [survival::survfit()]. | |
| 22 | #' @param lty (`numeric`)\cr line type. If a vector is given, its length should be equal to the number of strata from | |
| 23 | #' [survival::survfit()]. | |
| 24 | #' @param lwd (`numeric`)\cr line width. If a vector is given, its length should be equal to the number of strata from | |
| 25 | #' [survival::survfit()]. | |
| 26 | #' @param censor_show (`flag`)\cr whether to show censored observations. | |
| 27 | #' @param pch (`string`)\cr name of symbol or character to use as point symbol to indicate censored cases. | |
| 28 | #' @param size (`numeric(1)`)\cr size of censored point symbols. | |
| 29 | #' @param max_time (`numeric(1)`)\cr maximum value to show on x-axis. Only data values less than or up to | |
| 30 | #' this threshold value will be plotted (defaults to `NULL`). | |
| 31 | #' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing | |
| 32 | #' between ticks on the x-axis. If `NULL` (default), [labeling::extended()] is used to determine | |
| 33 | #' optimal tick positions on the x-axis. | |
| 34 | #' @param xlab (`string`)\cr x-axis label. | |
| 35 | #' @param yval (`string`)\cr type of plot, to be plotted on the y-axis. Options are `Survival` (default) and `Failure` | |
| 36 | #' probability. | |
| 37 | #' @param ylab (`string`)\cr y-axis label. | |
| 38 | #' @param title (`string`)\cr plot title. | |
| 39 | #' @param footnotes (`string`)\cr plot footnotes. | |
| 40 | #' @param font_size (`numeric(1)`)\cr font size to use for all text. | |
| 41 | #' @param ci_ribbon (`flag`)\cr whether the confidence interval should be drawn around the Kaplan-Meier curve. | |
| 42 | #' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk | |
| 43 | #' matching the main grid of the Kaplan-Meier curve. | |
| 44 | #' @param annot_at_risk_title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk` | |
| 45 | #' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`. | |
| 46 | #' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the | |
| 47 | #' median survival time per group. | |
| 48 | #' @param annot_coxph (`flag`)\cr whether to add the annotation table from a [survival::coxph()] model. | |
| 49 | #' @param annot_stats (`string` or `NULL`)\cr statistics annotations to add to the plot. Options are | |
| 50 | #' `median` (median survival follow-up time) and `min` (minimum survival follow-up time). | |
| 51 | #' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics | |
| 52 | #' specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added. | |
| 53 | #' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified using the helper function | |
| 54 | #' [control_coxph()]. Some possible parameter options are: | |
| 55 | #' * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1. | |
| 56 | #' Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`. | |
| 57 | #' * `ties` (`string`)\cr method for tie handling. Default is `"efron"`, | |
| 58 | #' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()] | |
| 59 | #' * `conf_level` (`proportion`)\cr confidence level of the interval for HR. | |
| 60 | #' @param ref_group_coxph (`string` or `NULL`)\cr level of arm variable to use as reference group in calculations for | |
| 61 | #' `annot_coxph` table. If `NULL` (default), uses the first level of the arm variable. | |
| 62 | #' @param control_annot_surv_med (`list`)\cr parameters to control the position and size of the annotation table added | |
| 63 | #' to the plot when `annot_surv_med = TRUE`, specified using the [control_surv_med_annot()] function. Parameter | |
| 64 | #' options are: `x`, `y`, `w`, `h`, and `fill`. See [control_surv_med_annot()] for details. | |
| 65 | #' @param control_annot_coxph (`list`)\cr parameters to control the position and size of the annotation table added | |
| 66 | #' to the plot when `annot_coxph = TRUE`, specified using the [control_coxph_annot()] function. Parameter | |
| 67 | #' options are: `x`, `y`, `w`, `h`, `fill`, and `ref_lbls`. See [control_coxph_annot()] for details. | |
| 68 | #' @param legend_pos (`numeric(2)` or `NULL`)\cr vector containing x- and y-coordinates, respectively, for the legend | |
| 69 | #' position relative to the KM plot area. If `NULL` (default), the legend is positioned in the bottom right corner of | |
| 70 | #' the plot, or the middle right of the plot if needed to prevent overlapping. | |
| 71 | #' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the Kaplan-Meier plot. | |
| 72 | #' Relative height of patients at risk table is then `1 - rel_height_plot`. If `annot_at_risk = FALSE` or | |
| 73 | #' `as_list = TRUE`, this parameter is ignored. | |
| 74 | #' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to format the Kaplan-Meier plot. | |
| 75 | #' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `annot_at_risk = TRUE`. | |
| 76 | #' If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the patients | |
| 77 | #' at risk table is printed below the plot via [cowplot::plot_grid()]. | |
| 78 | #' @param draw `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects. | |
| 79 | #' @param newpage `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects. | |
| 80 | #' @param gp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects. | |
| 81 | #' @param vp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects. | |
| 82 | #' @param name `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects. | |
| 83 | #' @param annot_coxph_ref_lbls `r lifecycle::badge("deprecated")` Please use the `ref_lbls` element of | |
| 84 | #' `control_annot_coxph` instead. | |
| 85 | #' @param position_coxph `r lifecycle::badge("deprecated")`  Please use the `x` and `y` elements of | |
| 86 | #' `control_annot_coxph` instead. | |
| 87 | #' @param position_surv_med `r lifecycle::badge("deprecated")` Please use the `x` and `y` elements of | |
| 88 | #' `control_annot_surv_med` instead. | |
| 89 | #' @param width_annots `r lifecycle::badge("deprecated")` Please use the `w` element of `control_annot_surv_med` | |
| 90 | #' (for `surv_med`) and `control_annot_coxph` (for `coxph`)." | |
| 91 | #' | |
| 92 | #' @return A `ggplot` Kaplan-Meier plot and (optionally) summary table. | |
| 93 | #' | |
| 94 | #' @examples | |
| 95 | #' library(dplyr) | |
| 96 | #' | |
| 97 | #' df <- tern_ex_adtte %>% | |
| 98 | #' filter(PARAMCD == "OS") %>% | |
| 99 | #' mutate(is_event = CNSR == 0) | |
| 100 | #' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD") | |
| 101 | #' | |
| 102 | #' # Basic examples | |
| 103 | #' g_km(df = df, variables = variables) | |
| 104 | #' g_km(df = df, variables = variables, yval = "Failure") | |
| 105 | #' | |
| 106 | #' # Examples with customization parameters applied | |
| 107 | #' g_km( | |
| 108 | #' df = df, | |
| 109 | #' variables = variables, | |
| 110 | #' control_surv = control_surv_timepoint(conf_level = 0.9), | |
| 111 | #'   col = c("grey25", "grey50", "grey75"), | |
| 112 | #' annot_at_risk_title = FALSE, | |
| 113 | #' lty = 1:3, | |
| 114 | #' font_size = 8 | |
| 115 | #' ) | |
| 116 | #' g_km( | |
| 117 | #' df = df, | |
| 118 | #' variables = variables, | |
| 119 | #'   annot_stats = c("min", "median"), | |
| 120 | #' annot_stats_vlines = TRUE, | |
| 121 | #' max_time = 3000, | |
| 122 | #' ggtheme = ggplot2::theme_minimal() | |
| 123 | #' ) | |
| 124 | #' | |
| 125 | #' # Example with pairwise Cox-PH analysis annotation table, adjusted annotation tables | |
| 126 | #' g_km( | |
| 127 | #' df = df, variables = variables, | |
| 128 | #' annot_coxph = TRUE, | |
| 129 | #' control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99), | |
| 130 | #' control_annot_coxph = control_coxph_annot(x = 0.26, w = 0.35), | |
| 131 | #' control_annot_surv_med = control_surv_med_annot(x = 0.8, y = 0.9, w = 0.35) | |
| 132 | #' ) | |
| 133 | #' | |
| 134 | #' @aliases kaplan_meier | |
| 135 | #' @export | |
| 136 | g_km <- function(df, | |
| 137 | variables, | |
| 138 | control_surv = control_surv_timepoint(), | |
| 139 | col = NULL, | |
| 140 | lty = NULL, | |
| 141 | lwd = 0.5, | |
| 142 | censor_show = TRUE, | |
| 143 | pch = 3, | |
| 144 | size = 2, | |
| 145 | max_time = NULL, | |
| 146 | xticks = NULL, | |
| 147 | xlab = "Days", | |
| 148 |                  yval = c("Survival", "Failure"), | |
| 149 | ylab = paste(yval, "Probability"), | |
| 150 | ylim = NULL, | |
| 151 | title = NULL, | |
| 152 | footnotes = NULL, | |
| 153 | font_size = 10, | |
| 154 | ci_ribbon = FALSE, | |
| 155 | annot_at_risk = TRUE, | |
| 156 | annot_at_risk_title = TRUE, | |
| 157 | annot_surv_med = TRUE, | |
| 158 | annot_coxph = FALSE, | |
| 159 | annot_stats = NULL, | |
| 160 | annot_stats_vlines = FALSE, | |
| 161 | control_coxph_pw = control_coxph(), | |
| 162 | ref_group_coxph = NULL, | |
| 163 | control_annot_surv_med = control_surv_med_annot(), | |
| 164 | control_annot_coxph = control_coxph_annot(), | |
| 165 | legend_pos = NULL, | |
| 166 | rel_height_plot = 0.75, | |
| 167 | ggtheme = NULL, | |
| 168 | as_list = FALSE, | |
| 169 | draw = lifecycle::deprecated(), | |
| 170 | newpage = lifecycle::deprecated(), | |
| 171 | gp = lifecycle::deprecated(), | |
| 172 | vp = lifecycle::deprecated(), | |
| 173 | name = lifecycle::deprecated(), | |
| 174 | annot_coxph_ref_lbls = lifecycle::deprecated(), | |
| 175 | position_coxph = lifecycle::deprecated(), | |
| 176 | position_surv_med = lifecycle::deprecated(), | |
| 177 |                  width_annots = lifecycle::deprecated()) { | |
| 178 | # Deprecated argument warnings | |
| 179 | 10x |   if (lifecycle::is_present(draw)) { | 
| 180 | 1x | lifecycle::deprecate_warn( | 
| 181 | 1x | "0.9.4", "g_km(draw)", | 
| 182 | 1x | details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." | 
| 183 | ) | |
| 184 | } | |
| 185 | 10x |   if (lifecycle::is_present(newpage)) { | 
| 186 | 1x | lifecycle::deprecate_warn( | 
| 187 | 1x | "0.9.4", "g_km(newpage)", | 
| 188 | 1x | details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." | 
| 189 | ) | |
| 190 | } | |
| 191 | 10x |   if (lifecycle::is_present(gp)) { | 
| 192 | 1x | lifecycle::deprecate_warn( | 
| 193 | 1x | "0.9.4", "g_km(gp)", | 
| 194 | 1x | details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." | 
| 195 | ) | |
| 196 | } | |
| 197 | 10x |   if (lifecycle::is_present(vp)) { | 
| 198 | 1x | lifecycle::deprecate_warn( | 
| 199 | 1x | "0.9.4", "g_km(vp)", | 
| 200 | 1x | details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." | 
| 201 | ) | |
| 202 | } | |
| 203 | 10x |   if (lifecycle::is_present(name)) { | 
| 204 | 1x | lifecycle::deprecate_warn( | 
| 205 | 1x | "0.9.4", "g_km(name)", | 
| 206 | 1x | details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." | 
| 207 | ) | |
| 208 | } | |
| 209 | 10x |   if (lifecycle::is_present(annot_coxph_ref_lbls)) { | 
| 210 | 1x | lifecycle::deprecate_warn( | 
| 211 | 1x | "0.9.4", "g_km(annot_coxph_ref_lbls)", | 
| 212 | 1x | details = "Please specify this setting using the 'ref_lbls' element of control_annot_coxph." | 
| 213 | ) | |
| 214 | 1x | control_annot_coxph[["ref_lbls"]] <- annot_coxph_ref_lbls | 
| 215 | } | |
| 216 | 10x |   if (lifecycle::is_present(position_coxph)) { | 
| 217 | 1x | lifecycle::deprecate_warn( | 
| 218 | 1x | "0.9.4", "g_km(position_coxph)", | 
| 219 | 1x | details = "Please specify this setting using the 'x' and 'y' elements of control_annot_coxph." | 
| 220 | ) | |
| 221 | 1x | control_annot_coxph[["x"]] <- position_coxph[1] | 
| 222 | 1x | control_annot_coxph[["y"]] <- position_coxph[2] | 
| 223 | } | |
| 224 | 10x |   if (lifecycle::is_present(position_surv_med)) { | 
| 225 | 1x | lifecycle::deprecate_warn( | 
| 226 | 1x | "0.9.4", "g_km(position_surv_med)", | 
| 227 | 1x | details = "Please specify this setting using the 'x' and 'y' elements of control_annot_surv_med." | 
| 228 | ) | |
| 229 | 1x | control_annot_surv_med[["x"]] <- position_surv_med[1] | 
| 230 | 1x | control_annot_surv_med[["y"]] <- position_surv_med[2] | 
| 231 | } | |
| 232 | 10x |   if (lifecycle::is_present(width_annots)) { | 
| 233 | 1x | lifecycle::deprecate_warn( | 
| 234 | 1x | "0.9.4", "g_km(width_annots)", | 
| 235 | 1x | details = paste( | 
| 236 | 1x | "Please specify widths of annotation tables relative to the plot area using the 'w' element of", | 
| 237 | 1x | "control_annot_surv_med (for surv_med) and control_annot_coxph (for coxph)." | 
| 238 | ) | |
| 239 | ) | |
| 240 | 1x | control_annot_surv_med[["w"]] <- as.numeric(width_annots[["surv_med"]]) | 
| 241 | 1x | control_annot_coxph[["w"]] <- as.numeric(width_annots[["coxph"]]) | 
| 242 | } | |
| 243 | ||
| 244 | 10x | checkmate::assert_list(variables) | 
| 245 | 10x |   checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables)) | 
| 246 | 10x | checkmate::assert_logical(censor_show, len = 1) | 
| 247 | 10x | checkmate::assert_numeric(size, len = 1) | 
| 248 | 10x | checkmate::assert_numeric(max_time, len = 1, null.ok = TRUE) | 
| 249 | 10x | checkmate::assert_numeric(xticks, null.ok = TRUE) | 
| 250 | 10x | checkmate::assert_character(xlab, len = 1, null.ok = TRUE) | 
| 251 | 10x | checkmate::assert_character(yval) | 
| 252 | 10x | checkmate::assert_character(ylab, null.ok = TRUE) | 
| 253 | 10x | checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) | 
| 254 | 10x | checkmate::assert_character(title, len = 1, null.ok = TRUE) | 
| 255 | 10x | checkmate::assert_character(footnotes, len = 1, null.ok = TRUE) | 
| 256 | 10x | checkmate::assert_numeric(font_size, len = 1) | 
| 257 | 10x | checkmate::assert_logical(ci_ribbon, len = 1) | 
| 258 | 10x | checkmate::assert_logical(annot_at_risk, len = 1) | 
| 259 | 10x | checkmate::assert_logical(annot_at_risk_title, len = 1) | 
| 260 | 10x | checkmate::assert_logical(annot_surv_med, len = 1) | 
| 261 | 10x | checkmate::assert_logical(annot_coxph, len = 1) | 
| 262 | 10x |   checkmate::assert_subset(annot_stats, c("median", "min")) | 
| 263 | 10x | checkmate::assert_logical(annot_stats_vlines) | 
| 264 | 10x | checkmate::assert_list(control_coxph_pw) | 
| 265 | 10x | checkmate::assert_character(ref_group_coxph, len = 1, null.ok = TRUE) | 
| 266 | 10x | checkmate::assert_list(control_annot_surv_med) | 
| 267 | 10x | checkmate::assert_list(control_annot_coxph) | 
| 268 | 10x | checkmate::assert_numeric(legend_pos, finite = TRUE, any.missing = FALSE, len = 2, null.ok = TRUE) | 
| 269 | 10x | assert_proportion_value(rel_height_plot) | 
| 270 | 10x | checkmate::assert_logical(as_list) | 
| 271 | ||
| 272 | 10x | tte <- variables$tte | 
| 273 | 10x | is_event <- variables$is_event | 
| 274 | 10x | arm <- variables$arm | 
| 275 | 10x | assert_valid_factor(df[[arm]]) | 
| 276 | 10x | armval <- as.character(unique(df[[arm]])) | 
| 277 | 10x | assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) | 
| 278 | 10x | checkmate::assert_logical(df[[is_event]], min.len = 1) | 
| 279 | 10x | checkmate::assert_numeric(df[[tte]], min.len = 1) | 
| 280 | 10x | checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) | 
| 281 | 10x | checkmate::assert_vector(lty, null.ok = TRUE) | 
| 282 | 10x | checkmate::assert_numeric(lwd, len = 1, null.ok = TRUE) | 
| 283 | ||
| 284 | 10x |   if (annot_coxph && length(armval) < 2) { | 
| 285 | ! | stop(paste( | 
| 286 | ! | "When `annot_coxph` = TRUE, `df` must contain at least 2 levels of `variables$arm`", | 
| 287 | ! | "in order to calculate the hazard ratio." | 
| 288 | )) | |
| 289 | } | |
| 290 | ||
| 291 | # process model | |
| 292 | 10x | yval <- match.arg(yval) | 
| 293 | 10x |   formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) | 
| 294 | 10x | fit_km <- survival::survfit( | 
| 295 | 10x | formula = formula, | 
| 296 | 10x | data = df, | 
| 297 | 10x | conf.int = control_surv$conf_level, | 
| 298 | 10x | conf.type = control_surv$conf_type | 
| 299 | ) | |
| 300 | 10x | data <- h_data_plot(fit_km, armval = armval, max_time = max_time) | 
| 301 | ||
| 302 | # calculate x-ticks | |
| 303 | 10x | xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) | 
| 304 | ||
| 305 | # change estimates of survival to estimates of failure (1 - survival) | |
| 306 | 10x |   if (yval == "Failure") { | 
| 307 | ! |     data[c("estimate", "conf.low", "conf.high", "censor")] <- list( | 
| 308 | ! | 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor | 
| 309 | ) | |
| 310 | } | |
| 311 | ||
| 312 | # derive y-axis limits | |
| 313 | 10x |   if (is.null(ylim)) { | 
| 314 | 10x |     if (!is.null(max_time)) { | 
| 315 | 1x | y_lwr <- min(data[data$time < max_time, ][["estimate"]]) | 
| 316 | 1x | y_upr <- max(data[data$time < max_time, ][["estimate"]]) | 
| 317 |     } else { | |
| 318 | 9x | y_lwr <- min(data[["estimate"]]) | 
| 319 | 9x | y_upr <- max(data[["estimate"]]) | 
| 320 | } | |
| 321 | 10x | ylim <- c(y_lwr, y_upr) | 
| 322 | } | |
| 323 | ||
| 324 | # initialize ggplot | |
| 325 | 10x | gg_plt <- ggplot( | 
| 326 | 10x | data = data, | 
| 327 | 10x | mapping = aes( | 
| 328 | 10x | x = .data[["time"]], | 
| 329 | 10x | y = .data[["estimate"]], | 
| 330 | 10x | ymin = .data[["conf.low"]], | 
| 331 | 10x | ymax = .data[["conf.high"]], | 
| 332 | 10x | color = .data[["strata"]], | 
| 333 | 10x | fill = .data[["strata"]] | 
| 334 | ) | |
| 335 | ) + | |
| 336 | 10x | theme_bw(base_size = font_size) + | 
| 337 | 10x | scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + | 
| 338 | 10x | labs(title = title, x = xlab, y = ylab, caption = footnotes) + | 
| 339 | 10x | theme( | 
| 340 | 10x | axis.text = element_text(size = font_size), | 
| 341 | 10x | axis.title = element_text(size = font_size), | 
| 342 | 10x | legend.title = element_blank(), | 
| 343 | 10x | legend.text = element_text(size = font_size), | 
| 344 | 10x | legend.box.background = element_rect(fill = "white", linewidth = 0.5), | 
| 345 | 10x | legend.background = element_blank(), | 
| 346 | 10x | legend.position = "inside", | 
| 347 | 10x | legend.spacing.y = unit(-0.02, "npc"), | 
| 348 | 10x | panel.grid.major = element_blank(), | 
| 349 | 10x | panel.grid.minor = element_blank() | 
| 350 | ) | |
| 351 | ||
| 352 | # derive x-axis limits | |
| 353 | 10x |   if (!is.null(max_time) && !is.null(xticks)) { | 
| 354 | 1x | gg_plt <- gg_plt + scale_x_continuous( | 
| 355 | 1x | breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) | 
| 356 | ) | |
| 357 | 9x |   } else if (!is.null(xticks)) { | 
| 358 | 9x |     if (max(data$time) <= max(xticks)) { | 
| 359 | 9x | gg_plt <- gg_plt + scale_x_continuous( | 
| 360 | 9x | breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) | 
| 361 | ) | |
| 362 |     } else { | |
| 363 | ! | gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) | 
| 364 | } | |
| 365 | ! |   } else if (!is.null(max_time)) { | 
| 366 | ! | gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) | 
| 367 | } | |
| 368 | ||
| 369 | # set legend position | |
| 370 | 10x |   if (!is.null(legend_pos)) { | 
| 371 | 2x | gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) | 
| 372 |   } else { | |
| 373 | 8x | max_time2 <- sort( | 
| 374 | 8x | data$time, | 
| 375 | 8x | partial = nrow(data) - length(armval) - 1 | 
| 376 | 8x | )[nrow(data) - length(armval) - 1] | 
| 377 | ||
| 378 | 8x | y_rng <- ylim[2] - ylim[1] | 
| 379 | ||
| 380 | 8x | if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && | 
| 381 | 8x |       all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint | 
| 382 | 1x | gg_plt <- gg_plt + | 
| 383 | 1x | theme( | 
| 384 | 1x | legend.position.inside = c(1, 0.5), | 
| 385 | 1x | legend.justification = c(1.1, 0.6) | 
| 386 | ) | |
| 387 |     } else { | |
| 388 | 7x | gg_plt <- gg_plt + | 
| 389 | 7x | theme( | 
| 390 | 7x | legend.position.inside = c(1, 0), | 
| 391 | 7x | legend.justification = c(1.1, -0.4) | 
| 392 | ) | |
| 393 | } | |
| 394 | } | |
| 395 | ||
| 396 | # add lines | |
| 397 | 10x |   gg_plt <- if (is.null(lty)) { | 
| 398 | 9x | gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) | 
| 399 | 10x |   } else if (length(lty) == 1) { | 
| 400 | ! | gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) | 
| 401 |   } else { | |
| 402 | 1x | gg_plt + | 
| 403 | 1x | geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + | 
| 404 | 1x | scale_linetype_manual(values = lty) | 
| 405 | } | |
| 406 | ||
| 407 | # add censor marks | |
| 408 | 10x |   if (censor_show) { | 
| 409 | 10x | gg_plt <- gg_plt + geom_point( | 
| 410 | 10x | data = data[data$n.censor != 0, ], | 
| 411 | 10x | aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), | 
| 412 | 10x | size = size, | 
| 413 | 10x | na.rm = TRUE | 
| 414 | ) + | |
| 415 | 10x | scale_shape_manual(name = NULL, values = pch) + | 
| 416 | 10x | guides(fill = guide_legend(override.aes = list(shape = NA))) | 
| 417 | } | |
| 418 | ||
| 419 | # add ci ribbon | |
| 420 | 1x | if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) | 
| 421 | ||
| 422 | # control aesthetics | |
| 423 | 10x |   if (!is.null(col)) { | 
| 424 | 1x | gg_plt <- gg_plt + | 
| 425 | 1x | scale_color_manual(values = col) + | 
| 426 | 1x | scale_fill_manual(values = col) | 
| 427 | } | |
| 428 | ! | if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme | 
| 429 | ||
| 430 | # annotate with stats (text/vlines) | |
| 431 | 10x |   if (!is.null(annot_stats)) { | 
| 432 | ! |     if ("median" %in% annot_stats) { | 
| 433 | ! | fit_km_all <- survival::survfit( | 
| 434 | ! |         formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)), | 
| 435 | ! | data = df, | 
| 436 | ! | conf.int = control_surv$conf_level, | 
| 437 | ! | conf.type = control_surv$conf_type | 
| 438 | ) | |
| 439 | ! | gg_plt <- gg_plt + | 
| 440 | ! | annotate( | 
| 441 | ! | "text", | 
| 442 | ! | size = font_size / .pt, col = 1, lineheight = 0.95, | 
| 443 | ! | x = stats::median(fit_km_all) + 0.07 * max(data$time), | 
| 444 | ! | y = ifelse(yval == "Survival", 0.65, 0.35), | 
| 445 | ! |           label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1])) | 
| 446 | ) | |
| 447 | ! |       if (annot_stats_vlines) { | 
| 448 | ! | gg_plt <- gg_plt + | 
| 449 | ! | annotate( | 
| 450 | ! | "segment", | 
| 451 | ! | x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf, | 
| 452 | ! | linetype = 2, col = "darkgray" | 
| 453 | ) | |
| 454 | } | |
| 455 | } | |
| 456 | ! |     if ("min" %in% annot_stats) { | 
| 457 | ! | min_fu <- min(df[[tte]]) | 
| 458 | ! | gg_plt <- gg_plt + | 
| 459 | ! | annotate( | 
| 460 | ! | "text", | 
| 461 | ! | size = font_size / .pt, col = 1, lineheight = 0.95, | 
| 462 | ! | x = min_fu + max(data$time) * 0.07, | 
| 463 | ! | y = ifelse(yval == "Survival", 0.96, 0.05), | 
| 464 | ! |           label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1])) | 
| 465 | ) | |
| 466 | ! |       if (annot_stats_vlines) { | 
| 467 | ! | gg_plt <- gg_plt + | 
| 468 | ! | annotate( | 
| 469 | ! | "segment", | 
| 470 | ! | linetype = 2, col = "darkgray", | 
| 471 | ! | x = min_fu, xend = min_fu, y = Inf, yend = -Inf | 
| 472 | ) | |
| 473 | } | |
| 474 | } | |
| 475 | ! | gg_plt <- gg_plt + guides(fill = guide_legend(override.aes = list(shape = NA, label = ""))) | 
| 476 | } | |
| 477 | ||
| 478 | # add at risk annotation table | |
| 479 | 10x |   if (annot_at_risk) { | 
| 480 | 9x | annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) | 
| 481 | 9x |     annot_tbl <- if (is.null(fit_km$strata)) { | 
| 482 | ! | data.frame( | 
| 483 | ! | n.risk = annot_tbl$n.risk, | 
| 484 | ! | time = annot_tbl$time, | 
| 485 | ! | strata = armval | 
| 486 | ) | |
| 487 |     } else { | |
| 488 | 9x |       strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") | 
| 489 | 9x | levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] | 
| 490 | 9x | data.frame( | 
| 491 | 9x | n.risk = annot_tbl$n.risk, | 
| 492 | 9x | time = annot_tbl$time, | 
| 493 | 9x | strata = annot_tbl$strata | 
| 494 | ) | |
| 495 | } | |
| 496 | ||
| 497 | 9x | at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) | 
| 498 | 9x | at_risk_tbl[is.na(at_risk_tbl)] <- 0 | 
| 499 | 9x | rownames(at_risk_tbl) <- levels(annot_tbl$strata) | 
| 500 | ||
| 501 | 9x | gg_at_risk <- df2gg( | 
| 502 | 9x | at_risk_tbl, | 
| 503 | 9x | font_size = font_size, col_labels = FALSE, hline = FALSE, | 
| 504 | 9x | colwidths = rep(1, ncol(at_risk_tbl)) | 
| 505 | ) + | |
| 506 | 9x | labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + | 
| 507 | 9x | theme_bw(base_size = font_size) + | 
| 508 | 9x | theme( | 
| 509 | 9x | plot.title = element_text(size = font_size, vjust = 3, face = "bold"), | 
| 510 | 9x | panel.border = element_blank(), | 
| 511 | 9x | panel.grid = element_blank(), | 
| 512 | 9x | axis.title.y = element_blank(), | 
| 513 | 9x | axis.ticks.y = element_blank(), | 
| 514 | 9x | axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), | 
| 515 | 9x | axis.text.x = element_text(size = font_size), | 
| 516 | 9x | axis.line.x = element_line() | 
| 517 | ) + | |
| 518 | 9x | coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) | 
| 519 | 9x | gg_at_risk <- suppressMessages( | 
| 520 | 9x | gg_at_risk + | 
| 521 | 9x | scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + | 
| 522 | 9x | scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) | 
| 523 | ) | |
| 524 | ||
| 525 | 9x |     if (!as_list) { | 
| 526 | 8x | gg_plt <- cowplot::plot_grid( | 
| 527 | 8x | gg_plt, | 
| 528 | 8x | gg_at_risk, | 
| 529 | 8x | align = "v", | 
| 530 | 8x | axis = "tblr", | 
| 531 | 8x | ncol = 1, | 
| 532 | 8x | rel_heights = c(rel_height_plot, 1 - rel_height_plot) | 
| 533 | ) | |
| 534 | } | |
| 535 | } | |
| 536 | ||
| 537 | # add median survival time annotation table | |
| 538 | 10x |   if (annot_surv_med) { | 
| 539 | 8x | surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) | 
| 540 | 8x | bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] | 
| 541 | ||
| 542 | 8x | gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + | 
| 543 | 8x | theme( | 
| 544 | 8x | axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), | 
| 545 | 8x | plot.margin = margin(0, 2, 0, 5) | 
| 546 | ) + | |
| 547 | 8x | coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) | 
| 548 | 8x | gg_surv_med <- suppressMessages( | 
| 549 | 8x | gg_surv_med + | 
| 550 | 8x | scale_x_continuous(expand = c(0.025, 0)) + | 
| 551 | 8x | scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) | 
| 552 | ) | |
| 553 | ||
| 554 | 8x | gg_plt <- cowplot::ggdraw(gg_plt) + | 
| 555 | 8x | cowplot::draw_plot( | 
| 556 | 8x | gg_surv_med, | 
| 557 | 8x | control_annot_surv_med[["x"]], | 
| 558 | 8x | control_annot_surv_med[["y"]], | 
| 559 | 8x | width = control_annot_surv_med[["w"]], | 
| 560 | 8x | height = control_annot_surv_med[["h"]], | 
| 561 | 8x | vjust = 0.5, | 
| 562 | 8x | hjust = 0.5 | 
| 563 | ) | |
| 564 | } | |
| 565 | ||
| 566 | # add coxph annotation table | |
| 567 | 10x |   if (annot_coxph) { | 
| 568 | 1x | coxph_tbl <- h_tbl_coxph_pairwise( | 
| 569 | 1x | df = df, | 
| 570 | 1x | variables = variables, | 
| 571 | 1x | ref_group_coxph = ref_group_coxph, | 
| 572 | 1x | control_coxph_pw = control_coxph_pw, | 
| 573 | 1x | annot_coxph_ref_lbls = control_annot_coxph[["ref_lbls"]] | 
| 574 | ) | |
| 575 | 1x | bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] | 
| 576 | ||
| 577 | 1x | gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + | 
| 578 | 1x | theme( | 
| 579 | 1x | axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), | 
| 580 | 1x | plot.margin = margin(0, 2, 0, 5) | 
| 581 | ) + | |
| 582 | 1x | coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) | 
| 583 | 1x | gg_coxph <- suppressMessages( | 
| 584 | 1x | gg_coxph + | 
| 585 | 1x | scale_x_continuous(expand = c(0.025, 0)) + | 
| 586 | 1x | scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) | 
| 587 | ) | |
| 588 | ||
| 589 | 1x | gg_plt <- cowplot::ggdraw(gg_plt) + | 
| 590 | 1x | cowplot::draw_plot( | 
| 591 | 1x | gg_coxph, | 
| 592 | 1x | control_annot_coxph[["x"]], | 
| 593 | 1x | control_annot_coxph[["y"]], | 
| 594 | 1x | width = control_annot_coxph[["w"]], | 
| 595 | 1x | height = control_annot_coxph[["h"]], | 
| 596 | 1x | vjust = 0.5, | 
| 597 | 1x | hjust = 0.5 | 
| 598 | ) | |
| 599 | } | |
| 600 | ||
| 601 | 10x |   if (as_list) { | 
| 602 | 1x | list(plot = gg_plt, table = gg_at_risk) | 
| 603 |   } else { | |
| 604 | 9x | gg_plt | 
| 605 | } | |
| 606 | } | 
| 1 | #' Tabulate biomarker effects on survival by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The [tabulate_survival_biomarkers()] function creates a layout element to tabulate the estimated effects of multiple | |
| 6 | #' continuous biomarker variables on survival across subgroups, returning statistics including median survival time and | |
| 7 | #' hazard ratio for each population subgroup. The table is created from `df`, a list of data frames returned by | |
| 8 | #' [extract_survival_biomarkers()], with the statistics to include specified via the `vars` parameter. | |
| 9 | #' | |
| 10 | #' A forest plot can be created from the resulting table using the [g_forest()] function. | |
| 11 | #' | |
| 12 | #' @inheritParams fit_coxreg_multivar | |
| 13 | #' @inheritParams survival_duration_subgroups | |
| 14 | #' @inheritParams argument_convention | |
| 15 | #' @param df (`data.frame`)\cr containing all analysis variables, as returned by | |
| 16 | #' [extract_survival_biomarkers()]. | |
| 17 | #' @param vars (`character`)\cr the names of statistics to be reported among: | |
| 18 | #' * `n_tot_events`: Total number of events per group. | |
| 19 | #' * `n_tot`: Total number of observations per group. | |
| 20 | #' * `median`: Median survival time. | |
| 21 | #' * `hr`: Hazard ratio. | |
| 22 | #' * `ci`: Confidence interval of hazard ratio. | |
| 23 | #' * `pval`: p-value of the effect. | |
| 24 | #' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required. | |
| 25 | #' | |
| 26 | #' @details These functions create a layout starting from a data frame which contains | |
| 27 | #' the required statistics. The tables are then typically used as input for forest plots. | |
| 28 | #' | |
| 29 | #' @examples | |
| 30 | #' library(dplyr) | |
| 31 | #' | |
| 32 | #' adtte <- tern_ex_adtte | |
| 33 | #' | |
| 34 | #' # Save variable labels before data processing steps. | |
| 35 | #' adtte_labels <- formatters::var_labels(adtte) | |
| 36 | #' | |
| 37 | #' adtte_f <- adtte %>% | |
| 38 | #' filter(PARAMCD == "OS") %>% | |
| 39 | #' mutate( | |
| 40 | #' AVALU = as.character(AVALU), | |
| 41 | #' is_event = CNSR == 0 | |
| 42 | #' ) | |
| 43 | #' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag") | |
| 44 | #' formatters::var_labels(adtte_f)[names(labels)] <- labels | |
| 45 | #' | |
| 46 | #' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`, | |
| 47 | #' # in multiple regression models containing one covariate `RACE`, | |
| 48 | #' # as well as one stratification variable `STRATA1`. The subgroups | |
| 49 | #' # are defined by the levels of `BMRKR2`. | |
| 50 | #' | |
| 51 | #' df <- extract_survival_biomarkers( | |
| 52 | #' variables = list( | |
| 53 | #' tte = "AVAL", | |
| 54 | #' is_event = "is_event", | |
| 55 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 56 | #' strata = "STRATA1", | |
| 57 | #' covariates = "SEX", | |
| 58 | #' subgroups = "BMRKR2" | |
| 59 | #' ), | |
| 60 | #' label_all = "Total Patients", | |
| 61 | #' data = adtte_f | |
| 62 | #' ) | |
| 63 | #' df | |
| 64 | #' | |
| 65 | #' # Here we group the levels of `BMRKR2` manually. | |
| 66 | #' df_grouped <- extract_survival_biomarkers( | |
| 67 | #' variables = list( | |
| 68 | #' tte = "AVAL", | |
| 69 | #' is_event = "is_event", | |
| 70 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 71 | #' strata = "STRATA1", | |
| 72 | #' covariates = "SEX", | |
| 73 | #' subgroups = "BMRKR2" | |
| 74 | #' ), | |
| 75 | #' data = adtte_f, | |
| 76 | #' groups_lists = list( | |
| 77 | #' BMRKR2 = list( | |
| 78 | #' "low" = "LOW", | |
| 79 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 80 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 81 | #' ) | |
| 82 | #' ) | |
| 83 | #' ) | |
| 84 | #' df_grouped | |
| 85 | #' | |
| 86 | #' @name survival_biomarkers_subgroups | |
| 87 | #' @order 1 | |
| 88 | NULL | |
| 89 | ||
| 90 | #' Prepare survival data estimates for multiple biomarkers in a single data frame | |
| 91 | #' | |
| 92 | #' @description `r lifecycle::badge("stable")` | |
| 93 | #' | |
| 94 | #' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates, | |
| 95 | #' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame. | |
| 96 | #' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements | |
| 97 | #' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strata`. | |
| 98 | #' `groups_lists` optionally specifies groupings for `subgroups` variables. | |
| 99 | #' | |
| 100 | #' @inheritParams argument_convention | |
| 101 | #' @inheritParams fit_coxreg_multivar | |
| 102 | #' @inheritParams survival_duration_subgroups | |
| 103 | #' | |
| 104 | #' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`, | |
| 105 | #' `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, | |
| 106 | #' `var_label`, and `row_type`. | |
| 107 | #' | |
| 108 | #' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()]. | |
| 109 | #' | |
| 110 | #' @export | |
| 111 | extract_survival_biomarkers <- function(variables, | |
| 112 | data, | |
| 113 | groups_lists = list(), | |
| 114 | control = control_coxreg(), | |
| 115 |                                         label_all = "All Patients") { | |
| 116 | 6x |   if ("strat" %in% names(variables)) { | 
| 117 | ! | warning( | 
| 118 | ! | "Warning: the `strat` element name of the `variables` list argument to `extract_survival_biomarkers() ", | 
| 119 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 120 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 121 | ) | |
| 122 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 123 | } | |
| 124 | ||
| 125 | 6x | checkmate::assert_list(variables) | 
| 126 | 6x | checkmate::assert_character(variables$subgroups, null.ok = TRUE) | 
| 127 | 6x | checkmate::assert_string(label_all) | 
| 128 | ||
| 129 | # Start with all patients. | |
| 130 | 6x | result_all <- h_coxreg_mult_cont_df( | 
| 131 | 6x | variables = variables, | 
| 132 | 6x | data = data, | 
| 133 | 6x | control = control | 
| 134 | ) | |
| 135 | 6x | result_all$subgroup <- label_all | 
| 136 | 6x | result_all$var <- "ALL" | 
| 137 | 6x | result_all$var_label <- label_all | 
| 138 | 6x | result_all$row_type <- "content" | 
| 139 | 6x |   if (is.null(variables$subgroups)) { | 
| 140 | # Only return result for all patients. | |
| 141 | 1x | result_all | 
| 142 |   } else { | |
| 143 | # Add subgroups results. | |
| 144 | 5x | l_data <- h_split_by_subgroups( | 
| 145 | 5x | data, | 
| 146 | 5x | variables$subgroups, | 
| 147 | 5x | groups_lists = groups_lists | 
| 148 | ) | |
| 149 | 5x |     l_result <- lapply(l_data, function(grp) { | 
| 150 | 25x | result <- h_coxreg_mult_cont_df( | 
| 151 | 25x | variables = variables, | 
| 152 | 25x | data = grp$df, | 
| 153 | 25x | control = control | 
| 154 | ) | |
| 155 | 25x | result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] | 
| 156 | 25x | cbind(result, result_labels) | 
| 157 | }) | |
| 158 | 5x | result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 159 | 5x | result_subgroups$row_type <- "analysis" | 
| 160 | 5x | rbind( | 
| 161 | 5x | result_all, | 
| 162 | 5x | result_subgroups | 
| 163 | ) | |
| 164 | } | |
| 165 | } | |
| 166 | ||
| 167 | #' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table | |
| 168 | #' summarizing biomarker effects on survival by subgroup. | |
| 169 | #' | |
| 170 | #' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the | |
| 171 | #' [extract_survival_biomarkers()] function when creating `df`. | |
| 172 | #' | |
| 173 | #' @return An `rtables` table summarizing biomarker effects on survival by subgroup. | |
| 174 | #' | |
| 175 | #' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does | |
| 176 | #' not start from an input layout `lyt`. This is because internally the table is | |
| 177 | #' created by combining multiple subtables. | |
| 178 | #' | |
| 179 | #' @seealso [extract_survival_biomarkers()] | |
| 180 | #' | |
| 181 | #' @examples | |
| 182 | #' ## Table with default columns. | |
| 183 | #' tabulate_survival_biomarkers(df) | |
| 184 | #' | |
| 185 | #' ## Table with a manually chosen set of columns: leave out "pval", reorder. | |
| 186 | #' tab <- tabulate_survival_biomarkers( | |
| 187 | #' df = df, | |
| 188 | #'   vars = c("n_tot_events", "ci", "n_tot", "median", "hr"), | |
| 189 | #' time_unit = as.character(adtte_f$AVALU[1]) | |
| 190 | #' ) | |
| 191 | #' | |
| 192 | #' ## Finally produce the forest plot. | |
| 193 | #' \donttest{ | |
| 194 | #' g_forest(tab, xlim = c(0.8, 1.2)) | |
| 195 | #' } | |
| 196 | #' | |
| 197 | #' @export | |
| 198 | #' @order 2 | |
| 199 | tabulate_survival_biomarkers <- function(df, | |
| 200 |                                          vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), | |
| 201 | groups_lists = list(), | |
| 202 | control = control_coxreg(), | |
| 203 | label_all = lifecycle::deprecated(), | |
| 204 | time_unit = NULL, | |
| 205 | na_str = default_na_str(), | |
| 206 | ..., | |
| 207 | .stat_names = NULL, | |
| 208 | .formats = NULL, | |
| 209 | .labels = NULL, | |
| 210 |                                          .indent_mods = NULL) { | |
| 211 | 5x |   if (lifecycle::is_present(label_all)) { | 
| 212 | 1x | lifecycle::deprecate_warn( | 
| 213 | 1x | "0.9.5", "tabulate_survival_biomarkers(label_all)", | 
| 214 | 1x | details = paste( | 
| 215 | 1x | "Please assign the `label_all` parameter within the", | 
| 216 | 1x | "`extract_survival_biomarkers()` function when creating `df`." | 
| 217 | ) | |
| 218 | ) | |
| 219 | } | |
| 220 | ||
| 221 | 5x | checkmate::assert_data_frame(df) | 
| 222 | 5x | checkmate::assert_character(df$biomarker) | 
| 223 | 5x | checkmate::assert_character(df$biomarker_label) | 
| 224 | 5x |   checkmate::assert_subset(vars, get_stats("tabulate_survival_biomarkers")) | 
| 225 | ||
| 226 | # Process standard extra arguments | |
| 227 | 5x |   extra_args <- list(".stats" = vars) | 
| 228 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 229 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 230 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 231 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 232 | ||
| 233 | 5x | colvars <- d_survival_subgroups_colvars( | 
| 234 | 5x | vars, | 
| 235 | 5x | conf_level = df$conf_level[1], | 
| 236 | 5x | method = df$pval_label[1], | 
| 237 | 5x | time_unit = time_unit | 
| 238 | ) | |
| 239 | ||
| 240 | # Process additional arguments to the statistic function | |
| 241 | 5x | extra_args <- c( | 
| 242 | 5x | extra_args, | 
| 243 | 5x | groups_lists = list(groups_lists), control = list(control), biomarker = TRUE, | 
| 244 | ... | |
| 245 | ) | |
| 246 | ||
| 247 | # Adding additional info from layout to analysis function | |
| 248 | 5x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 249 | 5x | formals(a_survival_subgroups) <- c(formals(a_survival_subgroups), extra_args[[".additional_fun_parameters"]]) | 
| 250 | ||
| 251 | # Create "ci" column from "lcl" and "ucl" | |
| 252 | 5x | df$ci <- combine_vectors(df$lcl, df$ucl) | 
| 253 | ||
| 254 | 5x | df_subs <- split(df, f = df$biomarker) | 
| 255 | 5x | tbls <- lapply( | 
| 256 | 5x | df_subs, | 
| 257 | 5x |     function(df) { | 
| 258 | 9x | lyt <- basic_table() | 
| 259 | ||
| 260 | # Split cols by the multiple variables to populate into columns. | |
| 261 | 9x | lyt <- split_cols_by_multivar( | 
| 262 | 9x | lyt = lyt, | 
| 263 | 9x | vars = colvars$vars, | 
| 264 | 9x | varlabels = colvars$labels | 
| 265 | ) | |
| 266 | ||
| 267 | # Row split by biomarker | |
| 268 | 9x | lyt <- split_rows_by( | 
| 269 | 9x | lyt = lyt, | 
| 270 | 9x | var = "biomarker_label", | 
| 271 | 9x | nested = FALSE | 
| 272 | ) | |
| 273 | ||
| 274 | # Add "All Patients" row | |
| 275 | 9x | lyt <- split_rows_by( | 
| 276 | 9x | lyt = lyt, | 
| 277 | 9x | var = "row_type", | 
| 278 | 9x |         split_fun = keep_split_levels("content"), | 
| 279 | 9x | nested = TRUE, | 
| 280 | 9x | child_labels = "hidden" | 
| 281 | ) | |
| 282 | 9x | lyt <- analyze_colvars( | 
| 283 | 9x | lyt = lyt, | 
| 284 | 9x | afun = a_survival_subgroups, | 
| 285 | 9x | na_str = na_str, | 
| 286 | 9x | extra_args = c(extra_args, overall = TRUE) | 
| 287 | ) | |
| 288 | ||
| 289 | # Add analysis rows | |
| 290 | 9x |       if ("analysis" %in% df$row_type) { | 
| 291 | 6x | lyt <- split_rows_by( | 
| 292 | 6x | lyt = lyt, | 
| 293 | 6x | var = "row_type", | 
| 294 | 6x |           split_fun = keep_split_levels("analysis"), | 
| 295 | 6x | nested = TRUE, | 
| 296 | 6x | child_labels = "hidden" | 
| 297 | ) | |
| 298 | 6x | lyt <- split_rows_by( | 
| 299 | 6x | lyt = lyt, | 
| 300 | 6x | var = "var_label", | 
| 301 | 6x | nested = TRUE, | 
| 302 | 6x | indent_mod = 1L | 
| 303 | ) | |
| 304 | 6x | lyt <- analyze_colvars( | 
| 305 | 6x | lyt = lyt, | 
| 306 | 6x | afun = a_survival_subgroups, | 
| 307 | 6x | na_str = na_str, | 
| 308 | 6x | inclNAs = TRUE, | 
| 309 | 6x | extra_args = extra_args | 
| 310 | ) | |
| 311 | } | |
| 312 | 9x | build_table(lyt, df = df) | 
| 313 | } | |
| 314 | ) | |
| 315 | ||
| 316 | 5x | result <- do.call(rbind, tbls) | 
| 317 | ||
| 318 | 5x |   n_tot_ids <- grep("^n_tot", vars) | 
| 319 | 5x |   hr_id <- match("hr", vars) | 
| 320 | 5x |   ci_id <- match("ci", vars) | 
| 321 | 5x | structure( | 
| 322 | 5x | result, | 
| 323 | 5x |     forest_header = paste0(c("Higher", "Lower"), "\nBetter"), | 
| 324 | 5x | col_x = hr_id, | 
| 325 | 5x | col_ci = ci_id, | 
| 326 | 5x | col_symbol_size = n_tot_ids[1] | 
| 327 | ) | |
| 328 | } | 
| 1 | #' Multivariate logistic regression table | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Layout-creating function which summarizes a logistic variable regression for binary outcome with | |
| 6 | #' categorical/continuous covariates in model statement. For each covariate category (if categorical) | |
| 7 | #' or specified values (if continuous), present degrees of freedom, regression parameter estimate and | |
| 8 | #' standard error (SE) relative to reference group or category. Report odds ratios for each covariate | |
| 9 | #' category or specified values and corresponding Wald confidence intervals as default but allow user | |
| 10 | #' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis | |
| 11 | #' that covariate has no effect on response in model containing all specified covariates. | |
| 12 | #' Allow option to include one two-way interaction and present similar output for | |
| 13 | #' each interaction degree of freedom. | |
| 14 | #' | |
| 15 | #' @inheritParams argument_convention | |
| 16 | #' @param drop_and_remove_str (`string`)\cr string to be dropped and removed. | |
| 17 | #' | |
| 18 | #' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()]. | |
| 19 | #' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout. | |
| 20 | #' | |
| 21 | #' @note For the formula, the variable names need to be standard `data.frame` column names without | |
| 22 | #' special characters. | |
| 23 | #' | |
| 24 | #' @examples | |
| 25 | #' library(dplyr) | |
| 26 | #' library(broom) | |
| 27 | #' | |
| 28 | #' adrs_f <- tern_ex_adrs %>% | |
| 29 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 30 | #'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% | |
| 31 | #' mutate( | |
| 32 | #'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), | |
| 33 | #' RACE = factor(RACE), | |
| 34 | #' SEX = factor(SEX) | |
| 35 | #' ) | |
| 36 | #' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") | |
| 37 | #' mod1 <- fit_logistic( | |
| 38 | #' data = adrs_f, | |
| 39 | #' variables = list( | |
| 40 | #' response = "Response", | |
| 41 | #' arm = "ARMCD", | |
| 42 | #'     covariates = c("AGE", "RACE") | |
| 43 | #' ) | |
| 44 | #' ) | |
| 45 | #' mod2 <- fit_logistic( | |
| 46 | #' data = adrs_f, | |
| 47 | #' variables = list( | |
| 48 | #' response = "Response", | |
| 49 | #' arm = "ARMCD", | |
| 50 | #'     covariates = c("AGE", "RACE"), | |
| 51 | #' interaction = "AGE" | |
| 52 | #' ) | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' df <- tidy(mod1, conf_level = 0.99) | |
| 56 | #' df2 <- tidy(mod2, conf_level = 0.99) | |
| 57 | #' | |
| 58 | #' # flagging empty strings with "_" | |
| 59 | #' df <- df_explicit_na(df, na_level = "_") | |
| 60 | #' df2 <- df_explicit_na(df2, na_level = "_") | |
| 61 | #' | |
| 62 | #' result1 <- basic_table() %>% | |
| 63 | #' summarize_logistic( | |
| 64 | #' conf_level = 0.95, | |
| 65 | #' drop_and_remove_str = "_" | |
| 66 | #' ) %>% | |
| 67 | #' build_table(df = df) | |
| 68 | #' result1 | |
| 69 | #' | |
| 70 | #' result2 <- basic_table() %>% | |
| 71 | #' summarize_logistic( | |
| 72 | #' conf_level = 0.95, | |
| 73 | #' drop_and_remove_str = "_" | |
| 74 | #' ) %>% | |
| 75 | #' build_table(df = df2) | |
| 76 | #' result2 | |
| 77 | #' | |
| 78 | #' @export | |
| 79 | #' @order 1 | |
| 80 | summarize_logistic <- function(lyt, | |
| 81 | conf_level, | |
| 82 | drop_and_remove_str = "", | |
| 83 |                                .indent_mods = NULL) { | |
| 84 | # checks | |
| 85 | 3x | checkmate::assert_string(drop_and_remove_str) | 
| 86 | ||
| 87 | 3x |   sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary") | 
| 88 | 3x |   sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods) | 
| 89 | 3x |   sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods) | 
| 90 | 3x | split_fun <- drop_and_remove_levels(drop_and_remove_str) | 
| 91 | ||
| 92 | 3x | lyt <- logistic_regression_cols(lyt, conf_level = conf_level) | 
| 93 | 3x | lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun) | 
| 94 | 3x | lyt <- sum_logistic_variable_test(lyt) | 
| 95 | 3x | lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun) | 
| 96 | 3x | lyt <- sum_logistic_term_estimates(lyt) | 
| 97 | 3x | lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun) | 
| 98 | 3x | lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun) | 
| 99 | 3x | lyt <- sum_logistic_odds_ratios(lyt) | 
| 100 | 3x | lyt | 
| 101 | } | |
| 102 | ||
| 103 | #' Fit for logistic regression | |
| 104 | #' | |
| 105 | #' @description `r lifecycle::badge("stable")` | |
| 106 | #' | |
| 107 | #' Fit a (conditional) logistic regression model. | |
| 108 | #' | |
| 109 | #' @inheritParams argument_convention | |
| 110 | #' @param data (`data.frame`)\cr the data frame on which the model was fit. | |
| 111 | #' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`. | |
| 112 | #' This will be used when fitting the (conditional) logistic regression model on the left hand | |
| 113 | #' side of the formula. | |
| 114 | #' | |
| 115 | #' @return A fitted logistic regression model. | |
| 116 | #' | |
| 117 | #' @section Model Specification: | |
| 118 | #' | |
| 119 | #' The `variables` list needs to include the following elements: | |
| 120 | #' * `arm`: Treatment arm variable name. | |
| 121 | #' * `response`: The response arm variable name. Usually this is a 0/1 variable. | |
| 122 | #' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names. | |
| 123 | #' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already | |
| 124 | #' included in `covariates`. Then the interaction with the treatment arm is included in the model. | |
| 125 | #' | |
| 126 | #' @examples | |
| 127 | #' library(dplyr) | |
| 128 | #' | |
| 129 | #' adrs_f <- tern_ex_adrs %>% | |
| 130 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 131 | #'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% | |
| 132 | #' mutate( | |
| 133 | #'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), | |
| 134 | #' RACE = factor(RACE), | |
| 135 | #' SEX = factor(SEX) | |
| 136 | #' ) | |
| 137 | #' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") | |
| 138 | #' mod1 <- fit_logistic( | |
| 139 | #' data = adrs_f, | |
| 140 | #' variables = list( | |
| 141 | #' response = "Response", | |
| 142 | #' arm = "ARMCD", | |
| 143 | #'     covariates = c("AGE", "RACE") | |
| 144 | #' ) | |
| 145 | #' ) | |
| 146 | #' mod2 <- fit_logistic( | |
| 147 | #' data = adrs_f, | |
| 148 | #' variables = list( | |
| 149 | #' response = "Response", | |
| 150 | #' arm = "ARMCD", | |
| 151 | #'     covariates = c("AGE", "RACE"), | |
| 152 | #' interaction = "AGE" | |
| 153 | #' ) | |
| 154 | #' ) | |
| 155 | #' | |
| 156 | #' @export | |
| 157 | fit_logistic <- function(data, | |
| 158 | variables = list( | |
| 159 | response = "Response", | |
| 160 | arm = "ARMCD", | |
| 161 | covariates = NULL, | |
| 162 | interaction = NULL, | |
| 163 | strata = NULL | |
| 164 | ), | |
| 165 |                          response_definition = "response") { | |
| 166 | 75x | assert_df_with_variables(data, variables) | 
| 167 | 75x |   checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata")) | 
| 168 | 75x | checkmate::assert_string(response_definition) | 
| 169 | 75x |   checkmate::assert_true(grepl("response", response_definition)) | 
| 170 | ||
| 171 | 75x | response_definition <- sub( | 
| 172 | 75x | pattern = "response", | 
| 173 | 75x | replacement = variables$response, | 
| 174 | 75x | x = response_definition, | 
| 175 | 75x | fixed = TRUE | 
| 176 | ) | |
| 177 | 75x | form <- paste0(response_definition, " ~ ", variables$arm) | 
| 178 | 75x |   if (!is.null(variables$covariates)) { | 
| 179 | 29x | form <- paste0(form, " + ", paste(variables$covariates, collapse = " + ")) | 
| 180 | } | |
| 181 | 75x |   if (!is.null(variables$interaction)) { | 
| 182 | 18x | checkmate::assert_string(variables$interaction) | 
| 183 | 18x | checkmate::assert_subset(variables$interaction, variables$covariates) | 
| 184 | 18x | form <- paste0(form, " + ", variables$arm, ":", variables$interaction) | 
| 185 | } | |
| 186 | 75x |   if (!is.null(variables$strata)) { | 
| 187 | 14x |     strata_arg <- if (length(variables$strata) > 1) { | 
| 188 | 7x |       paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))") | 
| 189 |     } else { | |
| 190 | 7x | variables$strata | 
| 191 | } | |
| 192 | 14x |     form <- paste0(form, "+ strata(", strata_arg, ")") | 
| 193 | } | |
| 194 | 75x | formula <- stats::as.formula(form) | 
| 195 | 75x |   if (is.null(variables$strata)) { | 
| 196 | 61x | stats::glm( | 
| 197 | 61x | formula = formula, | 
| 198 | 61x | data = data, | 
| 199 | 61x |       family = stats::binomial("logit") | 
| 200 | ) | |
| 201 |   } else { | |
| 202 | 14x | clogit_with_tryCatch( | 
| 203 | 14x | formula = formula, | 
| 204 | 14x | data = data, | 
| 205 | 14x | x = TRUE | 
| 206 | ) | |
| 207 | } | |
| 208 | } | |
| 209 | ||
| 210 | #' Custom tidy method for binomial GLM results | |
| 211 | #' | |
| 212 | #' @description `r lifecycle::badge("stable")` | |
| 213 | #' | |
| 214 | #' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object | |
| 215 | #' with `binomial` family. | |
| 216 | #' | |
| 217 | #' @inheritParams argument_convention | |
| 218 | #' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise the median is used. | |
| 219 | #' @param x (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. | |
| 220 | #' | |
| 221 | #' @return A `data.frame` containing the tidied model. | |
| 222 | #' | |
| 223 | #' @method tidy glm | |
| 224 | #' | |
| 225 | #' @seealso [h_logistic_regression] for relevant helper functions. | |
| 226 | #' | |
| 227 | #' @examples | |
| 228 | #' library(dplyr) | |
| 229 | #' library(broom) | |
| 230 | #' | |
| 231 | #' adrs_f <- tern_ex_adrs %>% | |
| 232 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 233 | #'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% | |
| 234 | #' mutate( | |
| 235 | #'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), | |
| 236 | #' RACE = factor(RACE), | |
| 237 | #' SEX = factor(SEX) | |
| 238 | #' ) | |
| 239 | #' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") | |
| 240 | #' mod1 <- fit_logistic( | |
| 241 | #' data = adrs_f, | |
| 242 | #' variables = list( | |
| 243 | #' response = "Response", | |
| 244 | #' arm = "ARMCD", | |
| 245 | #'     covariates = c("AGE", "RACE") | |
| 246 | #' ) | |
| 247 | #' ) | |
| 248 | #' mod2 <- fit_logistic( | |
| 249 | #' data = adrs_f, | |
| 250 | #' variables = list( | |
| 251 | #' response = "Response", | |
| 252 | #' arm = "ARMCD", | |
| 253 | #'     covariates = c("AGE", "RACE"), | |
| 254 | #' interaction = "AGE" | |
| 255 | #' ) | |
| 256 | #' ) | |
| 257 | #' | |
| 258 | #' df <- tidy(mod1, conf_level = 0.99) | |
| 259 | #' df2 <- tidy(mod2, conf_level = 0.99) | |
| 260 | #' | |
| 261 | #' @export | |
| 262 | tidy.glm <- function(x, # nolint | |
| 263 | conf_level = 0.95, | |
| 264 | at = NULL, | |
| 265 |                      ...) { | |
| 266 | 5x | checkmate::assert_class(x, "glm") | 
| 267 | 5x | checkmate::assert_set_equal(x$family$family, "binomial") | 
| 268 | ||
| 269 | 5x | terms_name <- attr(stats::terms(x), "term.labels") | 
| 270 | 5x | xs_class <- attr(x$terms, "dataClasses") | 
| 271 | 5x | interaction <- terms_name[which(!terms_name %in% names(xs_class))] | 
| 272 | 5x |   df <- if (length(interaction) == 0) { | 
| 273 | 2x | h_logistic_simple_terms( | 
| 274 | 2x | x = terms_name, | 
| 275 | 2x | fit_glm = x, | 
| 276 | 2x | conf_level = conf_level | 
| 277 | ) | |
| 278 |   } else { | |
| 279 | 3x | h_logistic_inter_terms( | 
| 280 | 3x | x = terms_name, | 
| 281 | 3x | fit_glm = x, | 
| 282 | 3x | conf_level = conf_level, | 
| 283 | 3x | at = at | 
| 284 | ) | |
| 285 | } | |
| 286 | 5x |   for (var in c("variable", "term", "interaction", "reference")) { | 
| 287 | 20x | df[[var]] <- factor(df[[var]], levels = unique(df[[var]])) | 
| 288 | } | |
| 289 | 5x | df | 
| 290 | } | |
| 291 | ||
| 292 | #' Logistic regression multivariate column layout function | |
| 293 | #' | |
| 294 | #' @description `r lifecycle::badge("stable")` | |
| 295 | #' | |
| 296 | #' Layout-creating function which creates a multivariate column layout summarizing logistic | |
| 297 | #' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()]. | |
| 298 | #' | |
| 299 | #' @inheritParams argument_convention | |
| 300 | #' | |
| 301 | #' @return A layout object suitable for passing to further layouting functions. Adding this | |
| 302 | #' function to an `rtable` layout will split the table into columns corresponding to | |
| 303 | #' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`. | |
| 304 | #' | |
| 305 | #' @export | |
| 306 | logistic_regression_cols <- function(lyt, | |
| 307 |                                      conf_level = 0.95) { | |
| 308 | 4x |   vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue") | 
| 309 | 4x | var_labels <- c( | 
| 310 | 4x | df = "Degrees of Freedom", | 
| 311 | 4x | estimate = "Parameter Estimate", | 
| 312 | 4x | std_error = "Standard Error", | 
| 313 | 4x | odds_ratio = "Odds Ratio", | 
| 314 | 4x |     ci = paste("Wald", f_conf_level(conf_level)), | 
| 315 | 4x | pvalue = "p-value" | 
| 316 | ) | |
| 317 | 4x | split_cols_by_multivar( | 
| 318 | 4x | lyt = lyt, | 
| 319 | 4x | vars = vars, | 
| 320 | 4x | varlabels = var_labels | 
| 321 | ) | |
| 322 | } | |
| 323 | ||
| 324 | #' Logistic regression summary table | |
| 325 | #' | |
| 326 | #' @description `r lifecycle::badge("stable")` | |
| 327 | #' | |
| 328 | #' Constructor for content functions to be used in [`summarize_logistic()`] to summarize | |
| 329 | #' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()]. | |
| 330 | #' | |
| 331 | #' @inheritParams argument_convention | |
| 332 | #' @param flag_var (`string`)\cr variable name identifying which row should be used in this | |
| 333 | #' content function. | |
| 334 | #' | |
| 335 | #' @return A content function. | |
| 336 | #' | |
| 337 | #' @export | |
| 338 | logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) { | |
| 339 | 10x | checkmate::assert_string(flag_var) | 
| 340 | 10x |   function(lyt) { | 
| 341 | 10x | cfun_list <- list( | 
| 342 | 10x |       df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods), | 
| 343 | 10x |       estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods), | 
| 344 | 10x |       std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods), | 
| 345 | 10x |       odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods), | 
| 346 | 10x |       ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods), | 
| 347 | 10x |       pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods) | 
| 348 | ) | |
| 349 | 10x | summarize_row_groups( | 
| 350 | 10x | lyt = lyt, | 
| 351 | 10x | cfun = cfun_list, | 
| 352 | 10x | na_str = na_str | 
| 353 | ) | |
| 354 | } | |
| 355 | } | 
| 1 | #' Proportion difference estimation | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analysis function [estimate_proportion_diff()] creates a layout element to estimate the difference in proportion | |
| 6 | #' of responders within a studied population. The primary analysis variable, `vars`, is a logical variable indicating | |
| 7 | #' whether a response has occurred for each record. See the `method` parameter for options of methods to use when | |
| 8 | #' constructing the confidence interval of the proportion difference. A stratification variable can be supplied via the | |
| 9 | #' `strata` element of the `variables` argument. | |
| 10 | #' | |
| 11 | #' | |
| 12 | #' @inheritParams prop_diff_strat_nc | |
| 13 | #' @inheritParams argument_convention | |
| 14 | #' @param method (`string`)\cr the method used for the confidence interval estimation. | |
| 15 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 16 | #' | |
| 17 | #'   Options are: ``r shQuote(get_stats("estimate_proportion_diff"), type = "sh")`` | |
| 18 | #' | |
| 19 | #' @seealso [d_proportion_diff()] | |
| 20 | #' | |
| 21 | #' @name prop_diff | |
| 22 | #' @order 1 | |
| 23 | NULL | |
| 24 | ||
| 25 | #' @describeIn prop_diff Statistics function estimating the difference | |
| 26 | #' in terms of responder proportion. | |
| 27 | #' | |
| 28 | #' @return | |
| 29 | #' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`. | |
| 30 | #' | |
| 31 | #' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are | |
| 32 | #' not permitted. | |
| 33 | #' | |
| 34 | #' @examples | |
| 35 | #' s_proportion_diff( | |
| 36 | #' df = subset(dta, grp == "A"), | |
| 37 | #' .var = "rsp", | |
| 38 | #' .ref_group = subset(dta, grp == "B"), | |
| 39 | #' .in_ref_col = FALSE, | |
| 40 | #' conf_level = 0.90, | |
| 41 | #' method = "ha" | |
| 42 | #' ) | |
| 43 | #' | |
| 44 | #' # CMH example with strata | |
| 45 | #' s_proportion_diff( | |
| 46 | #' df = subset(dta, grp == "A"), | |
| 47 | #' .var = "rsp", | |
| 48 | #' .ref_group = subset(dta, grp == "B"), | |
| 49 | #' .in_ref_col = FALSE, | |
| 50 | #'   variables = list(strata = c("f1", "f2")), | |
| 51 | #' conf_level = 0.90, | |
| 52 | #' method = "cmh" | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' @export | |
| 56 | s_proportion_diff <- function(df, | |
| 57 | .var, | |
| 58 | .ref_group, | |
| 59 | .in_ref_col, | |
| 60 | variables = list(strata = NULL), | |
| 61 | conf_level = 0.95, | |
| 62 | method = c( | |
| 63 | "waldcc", "wald", "cmh", | |
| 64 | "ha", "newcombe", "newcombecc", | |
| 65 | "strat_newcombe", "strat_newcombecc" | |
| 66 | ), | |
| 67 | weights_method = "cmh", | |
| 68 |                               ...) { | |
| 69 | 11x | method <- match.arg(method) | 
| 70 | 11x |   if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) { | 
| 71 | ! | stop(paste( | 
| 72 | ! | "When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not", | 
| 73 | ! | "permitted. Please choose a different method." | 
| 74 | )) | |
| 75 | } | |
| 76 | 11x | y <- list(diff = numeric(), diff_ci = numeric()) | 
| 77 | ||
| 78 | 11x |   if (!.in_ref_col) { | 
| 79 | 7x | rsp <- c(.ref_group[[.var]], df[[.var]]) | 
| 80 | 7x | grp <- factor( | 
| 81 | 7x | rep( | 
| 82 | 7x |         c("ref", "Not-ref"), | 
| 83 | 7x | c(nrow(.ref_group), nrow(df)) | 
| 84 | ), | |
| 85 | 7x |       levels = c("ref", "Not-ref") | 
| 86 | ) | |
| 87 | ||
| 88 | 7x |     if (!is.null(variables$strata)) { | 
| 89 | 3x | strata_colnames <- variables$strata | 
| 90 | 3x | checkmate::assert_character(strata_colnames, null.ok = FALSE) | 
| 91 | 3x | strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) | 
| 92 | ||
| 93 | 3x | assert_df_with_variables(df, strata_vars) | 
| 94 | 3x | assert_df_with_variables(.ref_group, strata_vars) | 
| 95 | ||
| 96 | # Merging interaction strata for reference group rows data and remaining | |
| 97 | 3x | strata <- c( | 
| 98 | 3x | interaction(.ref_group[strata_colnames]), | 
| 99 | 3x | interaction(df[strata_colnames]) | 
| 100 | ) | |
| 101 | 3x | strata <- as.factor(strata) | 
| 102 | } | |
| 103 | ||
| 104 | # Defining the std way to calculate weights for strat_newcombe | |
| 105 | 7x |     if (!is.null(variables$weights_method)) { | 
| 106 | ! | weights_method <- variables$weights_method | 
| 107 |     } else { | |
| 108 | 7x | weights_method <- "cmh" | 
| 109 | } | |
| 110 | ||
| 111 | 7x | y <- switch(method, | 
| 112 | 7x | "wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE), | 
| 113 | 7x | "waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE), | 
| 114 | 7x | "ha" = prop_diff_ha(rsp, grp, conf_level), | 
| 115 | 7x | "newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE), | 
| 116 | 7x | "newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE), | 
| 117 | 7x | "strat_newcombe" = prop_diff_strat_nc(rsp, | 
| 118 | 7x | grp, | 
| 119 | 7x | strata, | 
| 120 | 7x | weights_method, | 
| 121 | 7x | conf_level, | 
| 122 | 7x | correct = FALSE | 
| 123 | ), | |
| 124 | 7x | "strat_newcombecc" = prop_diff_strat_nc(rsp, | 
| 125 | 7x | grp, | 
| 126 | 7x | strata, | 
| 127 | 7x | weights_method, | 
| 128 | 7x | conf_level, | 
| 129 | 7x | correct = TRUE | 
| 130 | ), | |
| 131 | 7x |       "cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")] | 
| 132 | ) | |
| 133 | ||
| 134 | 7x |     y$diff <- setNames(y$diff * 100, paste0("diff_", method)) | 
| 135 | 7x |     y$diff_ci <- setNames(y$diff_ci * 100, paste0("diff_ci_", method, c("_l", "_u"))) | 
| 136 | } | |
| 137 | ||
| 138 | 11x | attr(y$diff, "label") <- "Difference in Response rate (%)" | 
| 139 | 11x | attr(y$diff_ci, "label") <- d_proportion_diff( | 
| 140 | 11x | conf_level, method, | 
| 141 | 11x | long = FALSE | 
| 142 | ) | |
| 143 | ||
| 144 | 11x | y | 
| 145 | } | |
| 146 | ||
| 147 | #' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`. | |
| 148 | #' | |
| 149 | #' @return | |
| 150 | #' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 151 | #' | |
| 152 | #' @examples | |
| 153 | #' a_proportion_diff( | |
| 154 | #' df = subset(dta, grp == "A"), | |
| 155 | #'   .stats = c("diff"), | |
| 156 | #' .var = "rsp", | |
| 157 | #' .ref_group = subset(dta, grp == "B"), | |
| 158 | #' .in_ref_col = FALSE, | |
| 159 | #' conf_level = 0.90, | |
| 160 | #' method = "ha" | |
| 161 | #' ) | |
| 162 | #' | |
| 163 | #' @export | |
| 164 | a_proportion_diff <- function(df, | |
| 165 | ..., | |
| 166 | .stats = NULL, | |
| 167 | .stat_names = NULL, | |
| 168 | .formats = NULL, | |
| 169 | .labels = NULL, | |
| 170 |                               .indent_mods = NULL) { | |
| 171 | 9x | dots_extra_args <- list(...) | 
| 172 | ||
| 173 | # Check if there are user-defined functions | |
| 174 | 9x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 175 | 9x | .stats <- default_and_custom_stats_list$all_stats | 
| 176 | 9x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 177 | ||
| 178 | # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) | |
| 179 | 9x | extra_afun_params <- retrieve_extra_afun_params( | 
| 180 | 9x | names(dots_extra_args$.additional_fun_parameters) | 
| 181 | ) | |
| 182 | 9x | dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore | 
| 183 | ||
| 184 | # Main statistical functions application | |
| 185 | 9x | x_stats <- .apply_stat_functions( | 
| 186 | 9x | default_stat_fnc = s_proportion_diff, | 
| 187 | 9x | custom_stat_fnc_list = custom_stat_functions, | 
| 188 | 9x | args_list = c( | 
| 189 | 9x | df = list(df), | 
| 190 | 9x | extra_afun_params, | 
| 191 | 9x | dots_extra_args | 
| 192 | ) | |
| 193 | ) | |
| 194 | ||
| 195 | # Fill in with stats defaults if needed | |
| 196 | 9x |   .stats <- get_stats("estimate_proportion_diff", | 
| 197 | 9x | stats_in = .stats, | 
| 198 | 9x | custom_stats_in = names(custom_stat_functions) | 
| 199 | ) | |
| 200 | ||
| 201 | 9x | x_stats <- x_stats[.stats] | 
| 202 | ||
| 203 | # Fill in formats/indents/labels with custom input and defaults | |
| 204 | 9x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 205 | 9x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 206 | 9x |   if (is.null(.labels)) { | 
| 207 | 9x | .labels <- sapply(x_stats, attr, "label") | 
| 208 | 9x | .labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)] | 
| 209 | } | |
| 210 | 9x | .labels <- get_labels_from_stats(.stats, .labels) | 
| 211 | ||
| 212 | # Auto format handling | |
| 213 | 9x | .formats <- apply_auto_formatting( | 
| 214 | 9x | .formats, | 
| 215 | 9x | x_stats, | 
| 216 | 9x | extra_afun_params$.df_row, | 
| 217 | 9x | extra_afun_params$.var | 
| 218 | ) | |
| 219 | ||
| 220 | # Get and check statistical names from defaults | |
| 221 | 9x | .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats | 
| 222 | ||
| 223 | 9x | in_rows( | 
| 224 | 9x | .list = x_stats, | 
| 225 | 9x | .formats = .formats, | 
| 226 | 9x | .names = names(.labels), | 
| 227 | 9x | .stat_names = .stat_names, | 
| 228 | 9x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 229 | 9x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 230 | ) | |
| 231 | } | |
| 232 | ||
| 233 | #' @describeIn prop_diff Layout-creating function which can take statistics function arguments | |
| 234 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 235 | #' | |
| 236 | #' @return | |
| 237 | #' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions, | |
| 238 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 239 | #' the statistics from `s_proportion_diff()` to the table layout. | |
| 240 | #' | |
| 241 | #' @examples | |
| 242 | #' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B. | |
| 243 | #' nex <- 100 # Number of example rows | |
| 244 | #' dta <- data.frame( | |
| 245 | #' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), | |
| 246 | #'   "grp" = sample(c("A", "B"), nex, TRUE), | |
| 247 | #'   "f1" = sample(c("a1", "a2"), nex, TRUE), | |
| 248 | #'   "f2" = sample(c("x", "y", "z"), nex, TRUE), | |
| 249 | #' stringsAsFactors = TRUE | |
| 250 | #' ) | |
| 251 | #' | |
| 252 | #' l <- basic_table() %>% | |
| 253 | #' split_cols_by(var = "grp", ref_group = "B") %>% | |
| 254 | #' estimate_proportion_diff( | |
| 255 | #' vars = "rsp", | |
| 256 | #' conf_level = 0.90, | |
| 257 | #' method = "ha" | |
| 258 | #' ) | |
| 259 | #' | |
| 260 | #' build_table(l, df = dta) | |
| 261 | #' | |
| 262 | #' @export | |
| 263 | #' @order 2 | |
| 264 | estimate_proportion_diff <- function(lyt, | |
| 265 | vars, | |
| 266 | variables = list(strata = NULL), | |
| 267 | conf_level = 0.95, | |
| 268 | method = c( | |
| 269 | "waldcc", "wald", "cmh", | |
| 270 | "ha", "newcombe", "newcombecc", | |
| 271 | "strat_newcombe", "strat_newcombecc" | |
| 272 | ), | |
| 273 | weights_method = "cmh", | |
| 274 | var_labels = vars, | |
| 275 | na_str = default_na_str(), | |
| 276 | nested = TRUE, | |
| 277 | show_labels = "hidden", | |
| 278 | table_names = vars, | |
| 279 | section_div = NA_character_, | |
| 280 | ..., | |
| 281 | na_rm = TRUE, | |
| 282 |                                      .stats = c("diff", "diff_ci"), | |
| 283 | .stat_names = NULL, | |
| 284 | .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"), | |
| 285 | .labels = NULL, | |
| 286 |                                      .indent_mods = c(diff = 0L, diff_ci = 1L)) { | |
| 287 | # Depending on main functions | |
| 288 | 4x | extra_args <- list( | 
| 289 | 4x | "na_rm" = na_rm, | 
| 290 | 4x | "variables" = variables, | 
| 291 | 4x | "conf_level" = conf_level, | 
| 292 | 4x | "method" = method, | 
| 293 | 4x | "weights_method" = weights_method, | 
| 294 | ... | |
| 295 | ) | |
| 296 | ||
| 297 | # Needed defaults | |
| 298 | 4x | if (!is.null(.stats)) extra_args[[".stats"]] <- .stats | 
| 299 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 300 | 4x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 301 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 302 | 4x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 303 | ||
| 304 | # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) | |
| 305 | 4x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 306 | 4x | formals(a_proportion_diff) <- c( | 
| 307 | 4x | formals(a_proportion_diff), | 
| 308 | 4x | extra_args[[".additional_fun_parameters"]] | 
| 309 | ) | |
| 310 | ||
| 311 |   # Main {rtables} structural call | |
| 312 | 4x | analyze( | 
| 313 | 4x | lyt = lyt, | 
| 314 | 4x | vars = vars, | 
| 315 | 4x | var_labels = var_labels, | 
| 316 | 4x | afun = a_proportion_diff, | 
| 317 | 4x | na_str = na_str, | 
| 318 | 4x | inclNAs = !na_rm, | 
| 319 | 4x | nested = nested, | 
| 320 | 4x | extra_args = extra_args, | 
| 321 | 4x | show_labels = show_labels, | 
| 322 | 4x | table_names = table_names, | 
| 323 | 4x | section_div = section_div | 
| 324 | ) | |
| 325 | } | |
| 326 | ||
| 327 | #' Check proportion difference arguments | |
| 328 | #' | |
| 329 | #' @description `r lifecycle::badge("stable")` | |
| 330 | #' | |
| 331 | #' Verifies that and/or convert arguments into valid values to be used in the | |
| 332 | #' estimation of difference in responder proportions. | |
| 333 | #' | |
| 334 | #' @inheritParams prop_diff | |
| 335 | #' @inheritParams prop_diff_wald | |
| 336 | #' | |
| 337 | #' @examples | |
| 338 | #' # example code | |
| 339 | #' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B. | |
| 340 | #' nex <- 100 # Number of example rows | |
| 341 | #' dta <- data.frame( | |
| 342 | #' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), | |
| 343 | #'   "grp" = sample(c("A", "B"), nex, TRUE), | |
| 344 | #'   "f1" = sample(c("a1", "a2"), nex, TRUE), | |
| 345 | #'   "f2" = sample(c("x", "y", "z"), nex, TRUE), | |
| 346 | #' stringsAsFactors = TRUE | |
| 347 | #' ) | |
| 348 | #' check_diff_prop_ci(rsp = dta[["rsp"]], grp = dta[["grp"]], conf_level = 0.95) | |
| 349 | #' @export | |
| 350 | check_diff_prop_ci <- function(rsp, | |
| 351 | grp, | |
| 352 | strata = NULL, | |
| 353 | conf_level, | |
| 354 |                                correct = NULL) { | |
| 355 | 28x | checkmate::assert_logical(rsp, any.missing = FALSE) | 
| 356 | 28x | checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2) | 
| 357 | 27x | checkmate::assert_number(conf_level, lower = 0, upper = 1) | 
| 358 | 27x | checkmate::assert_flag(correct, null.ok = TRUE) | 
| 359 | ||
| 360 | 27x |   if (!is.null(strata)) { | 
| 361 | 12x | checkmate::assert_factor(strata, len = length(rsp)) | 
| 362 | } | |
| 363 | ||
| 364 | 27x | invisible() | 
| 365 | } | |
| 366 | ||
| 367 | #' Description of method used for proportion comparison | |
| 368 | #' | |
| 369 | #' @description `r lifecycle::badge("stable")` | |
| 370 | #' | |
| 371 | #' This is an auxiliary function that describes the analysis in | |
| 372 | #' [s_proportion_diff()]. | |
| 373 | #' | |
| 374 | #' @inheritParams s_proportion_diff | |
| 375 | #' @param long (`flag`)\cr whether a long (`TRUE`) or a short (`FALSE`, default) description is required. | |
| 376 | #' | |
| 377 | #' @return A `string` describing the analysis. | |
| 378 | #' | |
| 379 | #' @seealso [prop_diff] | |
| 380 | #' | |
| 381 | #' @export | |
| 382 | d_proportion_diff <- function(conf_level, | |
| 383 | method, | |
| 384 |                               long = FALSE) { | |
| 385 | 11x | label <- paste0(conf_level * 100, "% CI") | 
| 386 | 11x |   if (long) { | 
| 387 | ! | label <- paste( | 
| 388 | ! | label, | 
| 389 | ! | ifelse( | 
| 390 | ! | method == "cmh", | 
| 391 | ! | "for adjusted difference", | 
| 392 | ! | "for difference" | 
| 393 | ) | |
| 394 | ) | |
| 395 | } | |
| 396 | ||
| 397 | 11x | method_part <- switch(method, | 
| 398 | 11x | "cmh" = "CMH, without correction", | 
| 399 | 11x | "waldcc" = "Wald, with correction", | 
| 400 | 11x | "wald" = "Wald, without correction", | 
| 401 | 11x | "ha" = "Anderson-Hauck", | 
| 402 | 11x | "newcombe" = "Newcombe, without correction", | 
| 403 | 11x | "newcombecc" = "Newcombe, with correction", | 
| 404 | 11x | "strat_newcombe" = "Stratified Newcombe, without correction", | 
| 405 | 11x | "strat_newcombecc" = "Stratified Newcombe, with correction", | 
| 406 | 11x | stop(paste(method, "does not have a description")) | 
| 407 | ) | |
| 408 | 11x |   paste0(label, " (", method_part, ")") | 
| 409 | } | |
| 410 | ||
| 411 | #' Helper functions to calculate proportion difference | |
| 412 | #' | |
| 413 | #' @description `r lifecycle::badge("stable")` | |
| 414 | #' | |
| 415 | #' @inheritParams argument_convention | |
| 416 | #' @inheritParams prop_diff | |
| 417 | #' @param grp (`factor`)\cr vector assigning observations to one out of two groups | |
| 418 | #' (e.g. reference and treatment group). | |
| 419 | #' | |
| 420 | #' @return A named `list` of elements `diff` (proportion difference) and `diff_ci` | |
| 421 | #' (proportion difference confidence interval). | |
| 422 | #' | |
| 423 | #' @seealso [prop_diff()] for implementation of these helper functions. | |
| 424 | #' | |
| 425 | #' @name h_prop_diff | |
| 426 | NULL | |
| 427 | ||
| 428 | #' @describeIn h_prop_diff The Wald interval follows the usual textbook | |
| 429 | #' definition for a single proportion confidence interval using the normal | |
| 430 | #' approximation. It is possible to include a continuity correction for Wald's | |
| 431 | #' interval. | |
| 432 | #' | |
| 433 | #' @param correct (`flag`)\cr whether to include the continuity correction. For further | |
| 434 | #' information, see [stats::prop.test()]. | |
| 435 | #' | |
| 436 | #' @examples | |
| 437 | #' # Wald confidence interval | |
| 438 | #' set.seed(2) | |
| 439 | #' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20) | |
| 440 | #' grp <- factor(c(rep("A", 10), rep("B", 10))) | |
| 441 | #' | |
| 442 | #' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE) | |
| 443 | #' | |
| 444 | #' @export | |
| 445 | prop_diff_wald <- function(rsp, | |
| 446 | grp, | |
| 447 | conf_level = 0.95, | |
| 448 |                            correct = FALSE) { | |
| 449 | 8x |   if (isTRUE(correct)) { | 
| 450 | 5x | mthd <- "waldcc" | 
| 451 |   } else { | |
| 452 | 3x | mthd <- "wald" | 
| 453 | } | |
| 454 | 8x | grp <- as_factor_keep_attributes(grp) | 
| 455 | 8x | check_diff_prop_ci( | 
| 456 | 8x | rsp = rsp, grp = grp, conf_level = conf_level, correct = correct | 
| 457 | ) | |
| 458 | ||
| 459 | # check if binary response is coded as logical | |
| 460 | 8x | checkmate::assert_logical(rsp, any.missing = FALSE) | 
| 461 | 8x | checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2) | 
| 462 | ||
| 463 | 8x | tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) | 
| 464 | # x1 and n1 are non-reference groups. | |
| 465 | 8x | diff_ci <- desctools_binom( | 
| 466 | 8x | x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), | 
| 467 | 8x | x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), | 
| 468 | 8x | conf.level = conf_level, | 
| 469 | 8x | method = mthd | 
| 470 | ) | |
| 471 | ||
| 472 | 8x | list( | 
| 473 | 8x | "diff" = unname(diff_ci[, "est"]), | 
| 474 | 8x |     "diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")]) | 
| 475 | ) | |
| 476 | } | |
| 477 | ||
| 478 | #' @describeIn h_prop_diff Anderson-Hauck confidence interval. | |
| 479 | #' | |
| 480 | #' @examples | |
| 481 | #' # Anderson-Hauck confidence interval | |
| 482 | #' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B. | |
| 483 | #' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE) | |
| 484 | #' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A")) | |
| 485 | #' | |
| 486 | #' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90) | |
| 487 | #' | |
| 488 | #' ## Edge case: Same proportion of response in A and B. | |
| 489 | #' rsp <- c(TRUE, FALSE, TRUE, FALSE) | |
| 490 | #' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B")) | |
| 491 | #' | |
| 492 | #' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6) | |
| 493 | #' | |
| 494 | #' @export | |
| 495 | prop_diff_ha <- function(rsp, | |
| 496 | grp, | |
| 497 |                          conf_level) { | |
| 498 | 4x | grp <- as_factor_keep_attributes(grp) | 
| 499 | 4x | check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level) | 
| 500 | ||
| 501 | 4x | tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) | 
| 502 | # x1 and n1 are non-reference groups. | |
| 503 | 4x | ci <- desctools_binom( | 
| 504 | 4x | x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), | 
| 505 | 4x | x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), | 
| 506 | 4x | conf.level = conf_level, | 
| 507 | 4x | method = "ha" | 
| 508 | ) | |
| 509 | 4x | list( | 
| 510 | 4x | "diff" = unname(ci[, "est"]), | 
| 511 | 4x |     "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")]) | 
| 512 | ) | |
| 513 | } | |
| 514 | ||
| 515 | #' @describeIn h_prop_diff Newcombe confidence interval. It is based on | |
| 516 | #' the Wilson score confidence interval for a single binomial proportion. | |
| 517 | #' | |
| 518 | #' @examples | |
| 519 | #' # Newcombe confidence interval | |
| 520 | #' | |
| 521 | #' set.seed(1) | |
| 522 | #' rsp <- c( | |
| 523 | #' sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE), | |
| 524 | #' sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE) | |
| 525 | #' ) | |
| 526 | #' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A")) | |
| 527 | #' table(rsp, grp) | |
| 528 | #' | |
| 529 | #' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9) | |
| 530 | #' | |
| 531 | #' @export | |
| 532 | prop_diff_nc <- function(rsp, | |
| 533 | grp, | |
| 534 | conf_level, | |
| 535 |                          correct = FALSE) { | |
| 536 | 2x |   if (isTRUE(correct)) { | 
| 537 | ! | mthd <- "scorecc" | 
| 538 |   } else { | |
| 539 | 2x | mthd <- "score" | 
| 540 | } | |
| 541 | 2x | grp <- as_factor_keep_attributes(grp) | 
| 542 | 2x | check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level) | 
| 543 | ||
| 544 | 2x | p_grp <- tapply(rsp, grp, mean) | 
| 545 | 2x | diff_p <- unname(diff(p_grp)) | 
| 546 | 2x | tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) | 
| 547 | 2x | ci <- desctools_binom( | 
| 548 | # x1 and n1 are non-reference groups. | |
| 549 | 2x | x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), | 
| 550 | 2x | x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), | 
| 551 | 2x | conf.level = conf_level, | 
| 552 | 2x | method = mthd | 
| 553 | ) | |
| 554 | 2x | list( | 
| 555 | 2x | "diff" = unname(ci[, "est"]), | 
| 556 | 2x |     "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")]) | 
| 557 | ) | |
| 558 | } | |
| 559 | ||
| 560 | #' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in | |
| 561 | #' response rates between the experimental treatment group and the control treatment group, adjusted | |
| 562 | #' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared | |
| 563 | #' test, use [stats::mantelhaen.test()]. | |
| 564 | #' | |
| 565 | #' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. | |
| 566 | #' | |
| 567 | #' @examples | |
| 568 | #' # Cochran-Mantel-Haenszel confidence interval | |
| 569 | #' | |
| 570 | #' set.seed(2) | |
| 571 | #' rsp <- sample(c(TRUE, FALSE), 100, TRUE) | |
| 572 | #' grp <- sample(c("Placebo", "Treatment"), 100, TRUE) | |
| 573 | #' grp <- factor(grp, levels = c("Placebo", "Treatment")) | |
| 574 | #' strata_data <- data.frame( | |
| 575 | #'   "f1" = sample(c("a", "b"), 100, TRUE), | |
| 576 | #'   "f2" = sample(c("x", "y", "z"), 100, TRUE), | |
| 577 | #' stringsAsFactors = TRUE | |
| 578 | #' ) | |
| 579 | #' | |
| 580 | #' prop_diff_cmh( | |
| 581 | #' rsp = rsp, grp = grp, strata = interaction(strata_data), | |
| 582 | #' conf_level = 0.90 | |
| 583 | #' ) | |
| 584 | #' | |
| 585 | #' @export | |
| 586 | prop_diff_cmh <- function(rsp, | |
| 587 | grp, | |
| 588 | strata, | |
| 589 |                           conf_level = 0.95) { | |
| 590 | 8x | grp <- as_factor_keep_attributes(grp) | 
| 591 | 8x | strata <- as_factor_keep_attributes(strata) | 
| 592 | 8x | check_diff_prop_ci( | 
| 593 | 8x | rsp = rsp, grp = grp, conf_level = conf_level, strata = strata | 
| 594 | ) | |
| 595 | ||
| 596 | 8x |   if (any(tapply(rsp, strata, length) < 5)) { | 
| 597 | 1x |     warning("Less than 5 observations in some strata.") | 
| 598 | } | |
| 599 | ||
| 600 | # first dimension: FALSE, TRUE | |
| 601 | # 2nd dimension: CONTROL, TX | |
| 602 | # 3rd dimension: levels of strata | |
| 603 | # rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records | |
| 604 | 8x | t_tbl <- table( | 
| 605 | 8x |     factor(rsp, levels = c("FALSE", "TRUE")), | 
| 606 | 8x | grp, | 
| 607 | 8x | strata | 
| 608 | ) | |
| 609 | 8x | n1 <- colSums(t_tbl[1:2, 1, ]) | 
| 610 | 8x | n2 <- colSums(t_tbl[1:2, 2, ]) | 
| 611 | 8x | p1 <- t_tbl[2, 1, ] / n1 | 
| 612 | 8x | p2 <- t_tbl[2, 2, ] / n2 | 
| 613 | # CMH weights | |
| 614 | 8x | use_stratum <- (n1 > 0) & (n2 > 0) | 
| 615 | 8x | n1 <- n1[use_stratum] | 
| 616 | 8x | n2 <- n2[use_stratum] | 
| 617 | 8x | p1 <- p1[use_stratum] | 
| 618 | 8x | p2 <- p2[use_stratum] | 
| 619 | 8x | wt <- (n1 * n2 / (n1 + n2)) | 
| 620 | 8x | wt_normalized <- wt / sum(wt) | 
| 621 | 8x | est1 <- sum(wt_normalized * p1) | 
| 622 | 8x | est2 <- sum(wt_normalized * p2) | 
| 623 | 8x | estimate <- c(est1, est2) | 
| 624 | 8x | names(estimate) <- levels(grp) | 
| 625 | 8x | se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1)) | 
| 626 | 8x | se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2)) | 
| 627 | 8x | z <- stats::qnorm((1 + conf_level) / 2) | 
| 628 | 8x | err1 <- z * se1 | 
| 629 | 8x | err2 <- z * se2 | 
| 630 | 8x | ci1 <- c((est1 - err1), (est1 + err1)) | 
| 631 | 8x | ci2 <- c((est2 - err2), (est2 + err2)) | 
| 632 | 8x | estimate_ci <- list(ci1, ci2) | 
| 633 | 8x | names(estimate_ci) <- levels(grp) | 
| 634 | 8x | diff_est <- est2 - est1 | 
| 635 | 8x | se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2)) | 
| 636 | 8x | diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff) | 
| 637 | ||
| 638 | 8x | list( | 
| 639 | 8x | prop = estimate, | 
| 640 | 8x | prop_ci = estimate_ci, | 
| 641 | 8x | diff = diff_est, | 
| 642 | 8x | diff_ci = diff_ci, | 
| 643 | 8x | weights = wt_normalized, | 
| 644 | 8x | n1 = n1, | 
| 645 | 8x | n2 = n2 | 
| 646 | ) | |
| 647 | } | |
| 648 | ||
| 649 | #' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response | |
| 650 | #' rates between the experimental treatment group and the control treatment group, adjusted for stratification | |
| 651 | #'   factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}. | |
| 652 | #' Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights | |
| 653 | #' (see [prop_diff_cmh()]). | |
| 654 | #' | |
| 655 | #' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. | |
| 656 | #' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"` | |
| 657 | #' and directs the way weights are estimated. | |
| 658 | #' | |
| 659 | #' @references | |
| 660 | #' \insertRef{Yan2010-jt}{tern} | |
| 661 | #' | |
| 662 | #' @examples | |
| 663 | #' # Stratified Newcombe confidence interval | |
| 664 | #' | |
| 665 | #' set.seed(2) | |
| 666 | #' data_set <- data.frame( | |
| 667 | #' "rsp" = sample(c(TRUE, FALSE), 100, TRUE), | |
| 668 | #'   "f1" = sample(c("a", "b"), 100, TRUE), | |
| 669 | #'   "f2" = sample(c("x", "y", "z"), 100, TRUE), | |
| 670 | #'   "grp" = sample(c("Placebo", "Treatment"), 100, TRUE), | |
| 671 | #' stringsAsFactors = TRUE | |
| 672 | #' ) | |
| 673 | #' | |
| 674 | #' prop_diff_strat_nc( | |
| 675 | #' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]), | |
| 676 | #' weights_method = "cmh", | |
| 677 | #' conf_level = 0.90 | |
| 678 | #' ) | |
| 679 | #' | |
| 680 | #' prop_diff_strat_nc( | |
| 681 | #' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]), | |
| 682 | #' weights_method = "wilson_h", | |
| 683 | #' conf_level = 0.90 | |
| 684 | #' ) | |
| 685 | #' | |
| 686 | #' @export | |
| 687 | prop_diff_strat_nc <- function(rsp, | |
| 688 | grp, | |
| 689 | strata, | |
| 690 |                                weights_method = c("cmh", "wilson_h"), | |
| 691 | conf_level = 0.95, | |
| 692 |                                correct = FALSE) { | |
| 693 | 4x | weights_method <- match.arg(weights_method) | 
| 694 | 4x | grp <- as_factor_keep_attributes(grp) | 
| 695 | 4x | strata <- as_factor_keep_attributes(strata) | 
| 696 | 4x | check_diff_prop_ci( | 
| 697 | 4x | rsp = rsp, grp = grp, conf_level = conf_level, strata = strata | 
| 698 | ) | |
| 699 | 4x | checkmate::assert_number(conf_level, lower = 0, upper = 1) | 
| 700 | 4x | checkmate::assert_flag(correct) | 
| 701 | 4x |   if (any(tapply(rsp, strata, length) < 5)) { | 
| 702 | ! |     warning("Less than 5 observations in some strata.") | 
| 703 | } | |
| 704 | ||
| 705 | 4x | rsp_by_grp <- split(rsp, f = grp) | 
| 706 | 4x | strata_by_grp <- split(strata, f = grp) | 
| 707 | ||
| 708 | # Finding the weights | |
| 709 | 4x |   weights <- if (identical(weights_method, "cmh")) { | 
| 710 | 3x | prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights | 
| 711 | 4x |   } else if (identical(weights_method, "wilson_h")) { | 
| 712 | 1x | prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights | 
| 713 | } | |
| 714 | 4x | weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0 | 
| 715 | ||
| 716 | # Calculating lower (`l`) and upper (`u`) confidence bounds per group. | |
| 717 | 4x | strat_wilson_by_grp <- Map( | 
| 718 | 4x | prop_strat_wilson, | 
| 719 | 4x | rsp = rsp_by_grp, | 
| 720 | 4x | strata = strata_by_grp, | 
| 721 | 4x | weights = list(weights, weights), | 
| 722 | 4x | conf_level = conf_level, | 
| 723 | 4x | correct = correct | 
| 724 | ) | |
| 725 | ||
| 726 | 4x | ci_ref <- strat_wilson_by_grp[[1]] | 
| 727 | 4x | ci_trt <- strat_wilson_by_grp[[2]] | 
| 728 | 4x | l_ref <- as.numeric(ci_ref$conf_int[1]) | 
| 729 | 4x | u_ref <- as.numeric(ci_ref$conf_int[2]) | 
| 730 | 4x | l_trt <- as.numeric(ci_trt$conf_int[1]) | 
| 731 | 4x | u_trt <- as.numeric(ci_trt$conf_int[2]) | 
| 732 | ||
| 733 | # Estimating the diff and n_ref, n_trt (it allows different weights to be used) | |
| 734 | 4x | t_tbl <- table( | 
| 735 | 4x |     factor(rsp, levels = c("FALSE", "TRUE")), | 
| 736 | 4x | grp, | 
| 737 | 4x | strata | 
| 738 | ) | |
| 739 | 4x | n_ref <- colSums(t_tbl[1:2, 1, ]) | 
| 740 | 4x | n_trt <- colSums(t_tbl[1:2, 2, ]) | 
| 741 | 4x | use_stratum <- (n_ref > 0) & (n_trt > 0) | 
| 742 | 4x | n_ref <- n_ref[use_stratum] | 
| 743 | 4x | n_trt <- n_trt[use_stratum] | 
| 744 | 4x | p_ref <- t_tbl[2, 1, use_stratum] / n_ref | 
| 745 | 4x | p_trt <- t_tbl[2, 2, use_stratum] / n_trt | 
| 746 | 4x | est1 <- sum(weights * p_ref) | 
| 747 | 4x | est2 <- sum(weights * p_trt) | 
| 748 | 4x | diff_est <- est2 - est1 | 
| 749 | ||
| 750 | 4x | lambda1 <- sum(weights^2 / n_ref) | 
| 751 | 4x | lambda2 <- sum(weights^2 / n_trt) | 
| 752 | 4x | z <- stats::qnorm((1 + conf_level) / 2) | 
| 753 | ||
| 754 | 4x | lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref)) | 
| 755 | 4x | upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt)) | 
| 756 | ||
| 757 | 4x | list( | 
| 758 | 4x | "diff" = diff_est, | 
| 759 | 4x |     "diff_ci" = c("lower" = lower, "upper" = upper) | 
| 760 | ) | |
| 761 | } | 
| 1 | #' Survival time analysis | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [surv_time()] creates a layout element to analyze survival time by calculating survival time | |
| 6 | #' median, median confidence interval, quantiles, and range (for all, censored, or event patients). The primary | |
| 7 | #' analysis variable `vars` is the time variable and the secondary analysis variable `is_event` indicates whether or | |
| 8 | #' not an event has occurred. | |
| 9 | #' | |
| 10 | #' @inheritParams argument_convention | |
| 11 | #' @param control (`list`)\cr parameters for comparison details, specified by using the helper function | |
| 12 | #' [control_surv_time()]. Some possible parameter options are: | |
| 13 | #' * `conf_level` (`proportion`)\cr confidence level of the interval for survival time. | |
| 14 | #' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log", | |
| 15 | #' see more in [survival::survfit()]. Note option "none" is not supported. | |
| 16 | #' * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time. | |
| 17 | #' @param ref_fn_censor (`flag`)\cr whether referential footnotes indicating censored observations should be printed | |
| 18 | #' when the `range` statistic is included. | |
| 19 | #' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector | |
| 20 | #' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation | |
| 21 | #' for that statistic's row label. | |
| 22 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 23 | #' | |
| 24 | #'   Options are: ``r shQuote(get_stats("surv_time"), type = "sh")`` | |
| 25 | #' | |
| 26 | #' @examples | |
| 27 | #' library(dplyr) | |
| 28 | #' | |
| 29 | #' adtte_f <- tern_ex_adtte %>% | |
| 30 | #' filter(PARAMCD == "OS") %>% | |
| 31 | #' mutate( | |
| 32 | #' AVAL = day2month(AVAL), | |
| 33 | #' is_event = CNSR == 0 | |
| 34 | #' ) | |
| 35 | #' df <- adtte_f %>% filter(ARMCD == "ARM A") | |
| 36 | #' | |
| 37 | #' @name survival_time | |
| 38 | #' @order 1 | |
| 39 | NULL | |
| 40 | ||
| 41 | #' @describeIn survival_time Statistics function which analyzes survival times. | |
| 42 | #' | |
| 43 | #' @return | |
| 44 | #' * `s_surv_time()` returns the statistics: | |
| 45 | #' * `median`: Median survival time. | |
| 46 | #' * `median_ci`: Confidence interval for median time. | |
| 47 | #' * `median_ci_3d`: Median with confidence interval for median time. | |
| 48 | #' * `quantiles`: Survival time for two specified quantiles. | |
| 49 | #' * `quantiles_lower`: quantile with confidence interval for the first specified quantile. | |
| 50 | #' * `quantiles_upper`: quantile with confidence interval for the second specified quantile. | |
| 51 | #' * `range_censor`: Survival time range for censored observations. | |
| 52 | #' * `range_event`: Survival time range for observations with events. | |
| 53 | #' * `range`: Survival time range for all observations. | |
| 54 | #' | |
| 55 | #' @keywords internal | |
| 56 | s_surv_time <- function(df, | |
| 57 | .var, | |
| 58 | ..., | |
| 59 | is_event, | |
| 60 |                         control = control_surv_time()) { | |
| 61 | 232x | checkmate::assert_string(.var) | 
| 62 | 232x | assert_df_with_variables(df, list(tte = .var, is_event = is_event)) | 
| 63 | 232x | checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE) | 
| 64 | 232x | checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE) | 
| 65 | ||
| 66 | 232x | conf_type <- control$conf_type | 
| 67 | 232x | conf_level <- control$conf_level | 
| 68 | 232x | quantiles <- control$quantiles | 
| 69 | ||
| 70 | 232x |   formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1")) | 
| 71 | 232x | srv_fit <- survival::survfit( | 
| 72 | 232x | formula = formula, | 
| 73 | 232x | data = df, | 
| 74 | 232x | conf.int = conf_level, | 
| 75 | 232x | conf.type = conf_type | 
| 76 | ) | |
| 77 | 232x | srv_tab <- summary(srv_fit, extend = TRUE)$table | 
| 78 | 232x | srv_qt_tab_pre <- stats::quantile(srv_fit, probs = quantiles) | 
| 79 | 232x | srv_qt_tab <- srv_qt_tab_pre$quantile | 
| 80 | 232x | range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE) | 
| 81 | 232x | range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE) | 
| 82 | 232x | range <- range_noinf(df[[.var]], na.rm = TRUE) | 
| 83 | ||
| 84 | 232x | names(quantiles) <- as.character(100 * quantiles) | 
| 85 | 232x | srv_qt_tab_pre <- unlist(srv_qt_tab_pre) | 
| 86 | 232x |   srv_qt_ci <- lapply(quantiles, function(x) { | 
| 87 | 464x | name <- as.character(100 * x) | 
| 88 | ||
| 89 | 464x | c( | 
| 90 | 464x |       srv_qt_tab_pre[[paste0("quantile.", name)]], | 
| 91 | 464x |       srv_qt_tab_pre[[paste0("lower.", name)]], | 
| 92 | 464x |       srv_qt_tab_pre[[paste0("upper.", name)]] | 
| 93 | ) | |
| 94 | }) | |
| 95 | ||
| 96 | 232x | list( | 
| 97 | 232x | median = formatters::with_label(unname(srv_tab["median"]), "Median"), | 
| 98 | 232x | median_ci = formatters::with_label( | 
| 99 | 232x |       unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level) | 
| 100 | ), | |
| 101 | 232x | quantiles = formatters::with_label( | 
| 102 | 232x | unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile") | 
| 103 | ), | |
| 104 | 232x | range_censor = formatters::with_label(range_censor, "Range (censored)"), | 
| 105 | 232x | range_event = formatters::with_label(range_event, "Range (event)"), | 
| 106 | 232x | range = formatters::with_label(range, "Range"), | 
| 107 | 232x | median_ci_3d = formatters::with_label( | 
| 108 | 232x | c( | 
| 109 | 232x | unname(srv_tab["median"]), | 
| 110 | 232x |         unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]) | 
| 111 | ), | |
| 112 | 232x |       paste0("Median (", f_conf_level(conf_level), ")") | 
| 113 | ), | |
| 114 | 232x | quantiles_lower = formatters::with_label( | 
| 115 | 232x |       unname(srv_qt_ci[[1]]), paste0(quantiles[1] * 100, "%-ile (", f_conf_level(conf_level), ")") | 
| 116 | ), | |
| 117 | 232x | quantiles_upper = formatters::with_label( | 
| 118 | 232x |       unname(srv_qt_ci[[2]]), paste0(quantiles[2] * 100, "%-ile (", f_conf_level(conf_level), ")") | 
| 119 | ) | |
| 120 | ) | |
| 121 | } | |
| 122 | ||
| 123 | #' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`. | |
| 124 | #' | |
| 125 | #' @return | |
| 126 | #' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 127 | #' | |
| 128 | #' @examples | |
| 129 | #' a_surv_time( | |
| 130 | #' df, | |
| 131 | #' .df_row = df, | |
| 132 | #' .var = "AVAL", | |
| 133 | #' is_event = "is_event" | |
| 134 | #' ) | |
| 135 | #' | |
| 136 | #' @export | |
| 137 | a_surv_time <- function(df, | |
| 138 | labelstr = "", | |
| 139 | ..., | |
| 140 | .stats = NULL, | |
| 141 | .stat_names = NULL, | |
| 142 | .formats = NULL, | |
| 143 | .labels = NULL, | |
| 144 |                         .indent_mods = NULL) { | |
| 145 | # Check for additional parameters to the statistics function | |
| 146 | 14x | dots_extra_args <- list(...) | 
| 147 | 14x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 148 | 14x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 149 | ||
| 150 | # Check for user-defined functions | |
| 151 | 14x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 152 | 14x | .stats <- default_and_custom_stats_list$all_stats | 
| 153 | 14x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 154 | ||
| 155 | # Main statistic calculations | |
| 156 | 14x | x_stats <- .apply_stat_functions( | 
| 157 | 14x | default_stat_fnc = s_surv_time, | 
| 158 | 14x | custom_stat_fnc_list = custom_stat_functions, | 
| 159 | 14x | args_list = c( | 
| 160 | 14x | df = list(df), | 
| 161 | 14x | labelstr = list(labelstr), | 
| 162 | 14x | extra_afun_params, | 
| 163 | 14x | dots_extra_args | 
| 164 | ) | |
| 165 | ) | |
| 166 | ||
| 167 | 14x | rng_censor_lwr <- x_stats[["range_censor"]][1] | 
| 168 | 14x | rng_censor_upr <- x_stats[["range_censor"]][2] | 
| 169 | ||
| 170 | # Fill in formatting defaults | |
| 171 | 14x |   .stats <- get_stats("surv_time", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 172 | 14x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 173 | 14x | .labels <- get_labels_from_stats(.stats, .labels) %>% labels_use_control(dots_extra_args$control) | 
| 174 | 14x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 175 | ||
| 176 | 14x | x_stats <- x_stats[.stats] | 
| 177 | ||
| 178 | # Auto format handling | |
| 179 | 14x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 180 | ||
| 181 | # Get and check statistical names | |
| 182 | 14x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 183 | ||
| 184 | # Get cell footnotes | |
| 185 | 14x |   cell_fns <- stats::setNames(vector("list", length = length(x_stats)), .labels) | 
| 186 | 14x |   if ("range" %in% names(x_stats) && "ref_fn_censor" %in% names(dots_extra_args) && dots_extra_args$ref_fn_censor) { | 
| 187 | 14x |     if (identical(x_stats[["range"]][1], rng_censor_lwr) && identical(x_stats[["range"]][2], rng_censor_upr)) { | 
| 188 | 2x | cell_fns[[.labels[["range"]]]] <- "Censored observations: range minimum & maximum" | 
| 189 | 12x |     } else if (identical(x_stats[["range"]][1], rng_censor_lwr)) { | 
| 190 | 2x | cell_fns[[.labels[["range"]]]] <- "Censored observation: range minimum" | 
| 191 | 10x |     } else if (identical(x_stats[["range"]][2], rng_censor_upr)) { | 
| 192 | 1x | cell_fns[[.labels[["range"]]]] <- "Censored observation: range maximum" | 
| 193 | } | |
| 194 | } | |
| 195 | ||
| 196 | 14x | in_rows( | 
| 197 | 14x | .list = x_stats, | 
| 198 | 14x | .formats = .formats, | 
| 199 | 14x | .names = names(.labels), | 
| 200 | 14x | .stat_names = .stat_names, | 
| 201 | 14x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 202 | 14x | .indent_mods = .indent_mods %>% .unlist_keep_nulls(), | 
| 203 | 14x | .cell_footnotes = cell_fns | 
| 204 | ) | |
| 205 | } | |
| 206 | ||
| 207 | #' @describeIn survival_time Layout-creating function which can take statistics function arguments | |
| 208 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 209 | #' | |
| 210 | #' @return | |
| 211 | #' * `surv_time()` returns a layout object suitable for passing to further layouting functions, | |
| 212 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 213 | #' the statistics from `s_surv_time()` to the table layout. | |
| 214 | #' | |
| 215 | #' @examples | |
| 216 | #' basic_table() %>% | |
| 217 | #' split_cols_by(var = "ARMCD") %>% | |
| 218 | #' add_colcounts() %>% | |
| 219 | #' surv_time( | |
| 220 | #' vars = "AVAL", | |
| 221 | #' var_labels = "Survival Time (Months)", | |
| 222 | #' is_event = "is_event", | |
| 223 | #' control = control_surv_time(conf_level = 0.9, conf_type = "log-log") | |
| 224 | #' ) %>% | |
| 225 | #' build_table(df = adtte_f) | |
| 226 | #' | |
| 227 | #' @export | |
| 228 | #' @order 2 | |
| 229 | surv_time <- function(lyt, | |
| 230 | vars, | |
| 231 | is_event, | |
| 232 | control = control_surv_time(), | |
| 233 | ref_fn_censor = TRUE, | |
| 234 | na_str = default_na_str(), | |
| 235 | nested = TRUE, | |
| 236 | ..., | |
| 237 | var_labels = "Time to Event", | |
| 238 | show_labels = "visible", | |
| 239 | table_names = vars, | |
| 240 |                       .stats = c("median", "median_ci", "quantiles", "range"), | |
| 241 | .stat_names = NULL, | |
| 242 | .formats = list( | |
| 243 | median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x", | |
| 244 | quantiles_lower = "xx.x (xx.x - xx.x)", quantiles_upper = "xx.x (xx.x - xx.x)", | |
| 245 | median_ci_3d = "xx.x (xx.x - xx.x)" | |
| 246 | ), | |
| 247 | .labels = list(median_ci = "95% CI", range = "Range"), | |
| 248 |                       .indent_mods = list(median_ci = 1L)) { | |
| 249 | # Process standard extra arguments | |
| 250 | 3x |   extra_args <- list(".stats" = .stats) | 
| 251 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 252 | 3x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 253 | 3x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 254 | 3x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 255 | ||
| 256 | # Process additional arguments to the statistic function | |
| 257 | 3x | extra_args <- c( | 
| 258 | 3x | extra_args, | 
| 259 | 3x | is_event = is_event, control = list(control), ref_fn_censor = ref_fn_censor, | 
| 260 | ... | |
| 261 | ) | |
| 262 | ||
| 263 | # Adding additional info from layout to analysis function | |
| 264 | 3x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 265 | 3x | formals(a_surv_time) <- c(formals(a_surv_time), extra_args[[".additional_fun_parameters"]]) | 
| 266 | ||
| 267 | 3x | analyze( | 
| 268 | 3x | lyt = lyt, | 
| 269 | 3x | vars = vars, | 
| 270 | 3x | afun = a_surv_time, | 
| 271 | 3x | var_labels = var_labels, | 
| 272 | 3x | show_labels = show_labels, | 
| 273 | 3x | table_names = table_names, | 
| 274 | 3x | na_str = na_str, | 
| 275 | 3x | nested = nested, | 
| 276 | 3x | extra_args = extra_args | 
| 277 | ) | |
| 278 | } | 
| 1 | #' Count patients with marked laboratory abnormalities | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_abnormal_by_marked()] creates a layout element to count patients with marked laboratory | |
| 6 | #' abnormalities for each direction of abnormality, categorized by parameter value. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `var` which indicates whether a single, replicated, | |
| 9 | #' or last marked laboratory abnormality was observed. Levels of `var` to include for each marked lab | |
| 10 | #' abnormality (`single` and `last_replicated`) can be supplied via the `category` parameter. Additional | |
| 11 | #' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults | |
| 12 | #' to `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a | |
| 13 | #' variable to indicate parameter values, and `direction` (defaults to `abn_dir`), a variable to indicate | |
| 14 | #' abnormality directions. | |
| 15 | #' | |
| 16 | #' For each combination of `param` and `direction` levels, marked lab abnormality counts are calculated | |
| 17 | #' as follows: | |
| 18 | #' * `Single, not last` & `Last or replicated`: The number of patients with `Single, not last` | |
| 19 | #' and `Last or replicated` values, respectively. | |
| 20 | #' * `Any`: The number of patients with either single or replicated marked abnormalities. | |
| 21 | #' | |
| 22 | #' Fractions are calculated by dividing the above counts by the number of patients with at least one | |
| 23 | #' valid measurement recorded during the analysis. | |
| 24 | #' | |
| 25 | #' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two | |
| 26 | #' row splits, one on variable `param` and one on variable `direction`. | |
| 27 | #' | |
| 28 | #' @inheritParams argument_convention | |
| 29 | #' @param category (`list`)\cr a list with different marked category names for single | |
| 30 | #' and last or replicated. | |
| 31 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 32 | #' | |
| 33 | #'   Options are: ``r shQuote(get_stats("abnormal_by_marked"), type = "sh")`` | |
| 34 | #' | |
| 35 | #' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has | |
| 36 | #' abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the | |
| 37 | #' patient will be counted only under the `Last or replicated` category. | |
| 38 | #' | |
| 39 | #' @name abnormal_by_marked | |
| 40 | #' @order 1 | |
| 41 | NULL | |
| 42 | ||
| 43 | #' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities. | |
| 44 | #' | |
| 45 | #' @return | |
| 46 | #' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`, | |
| 47 | #' `Last or replicated`, and `Any` results. | |
| 48 | #' | |
| 49 | #' @keywords internal | |
| 50 | s_count_abnormal_by_marked <- function(df, | |
| 51 | .var = "AVALCAT1", | |
| 52 | .spl_context, | |
| 53 |                                        category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")), | |
| 54 | variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"), | |
| 55 |                                        ...) { | |
| 56 | 9x | checkmate::assert_string(.var) | 
| 57 | 9x | checkmate::assert_list(variables) | 
| 58 | 9x | checkmate::assert_list(category) | 
| 59 | 9x |   checkmate::assert_subset(names(category), c("single", "last_replicated")) | 
| 60 | 9x |   checkmate::assert_subset(names(variables), c("id", "param", "direction")) | 
| 61 | 9x | checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1) | 
| 62 | ||
| 63 | 8x | assert_df_with_variables(df, c(aval = .var, variables)) | 
| 64 | 8x |   checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) | 
| 65 | 8x |   checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) | 
| 66 | ||
| 67 | ||
| 68 | 8x | first_row <- .spl_context[.spl_context$split == variables[["param"]], ] | 
| 69 | # Patients in the denominator have at least one post-baseline visit. | |
| 70 | 8x | subj <- first_row$full_parent_df[[1]][[variables[["id"]]]] | 
| 71 | 8x | subj_cur_col <- subj[first_row$cur_col_subset[[1]]] | 
| 72 | # Some subjects may have a record for high and low directions but | |
| 73 | # should be counted only once. | |
| 74 | 8x | denom <- length(unique(subj_cur_col)) | 
| 75 | ||
| 76 | 8x |   if (denom != 0) { | 
| 77 | 8x | subjects_last_replicated <- unique( | 
| 78 | 8x | df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE] | 
| 79 | ) | |
| 80 | 8x | subjects_single <- unique( | 
| 81 | 8x | df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE] | 
| 82 | ) | |
| 83 | # Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group. | |
| 84 | 8x | subjects_single <- setdiff(subjects_single, subjects_last_replicated) | 
| 85 | 8x | n_single <- length(subjects_single) | 
| 86 | 8x | n_last_replicated <- length(subjects_last_replicated) | 
| 87 | 8x | n_any <- n_single + n_last_replicated | 
| 88 | 8x | result <- list(count_fraction = list( | 
| 89 | 8x | "Single, not last" = c(n_single, n_single / denom), | 
| 90 | 8x | "Last or replicated" = c(n_last_replicated, n_last_replicated / denom), | 
| 91 | 8x | "Any Abnormality" = c(n_any, n_any / denom) | 
| 92 | )) | |
| 93 |   } else { | |
| 94 | ! | result <- list(count_fraction = list( | 
| 95 | ! | "Single, not last" = c(0, 0), | 
| 96 | ! | "Last or replicated" = c(0, 0), | 
| 97 | ! | "Any Abnormality" = c(0, 0) | 
| 98 | )) | |
| 99 | } | |
| 100 | ||
| 101 | 8x | result | 
| 102 | } | |
| 103 | ||
| 104 | #' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun` | |
| 105 | #' in `count_abnormal_by_marked()`. | |
| 106 | #' | |
| 107 | #' @return | |
| 108 | #' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 109 | #' | |
| 110 | #' @keywords internal | |
| 111 | a_count_abnormal_by_marked <- function(df, | |
| 112 | ..., | |
| 113 | .stats = NULL, | |
| 114 | .stat_names = NULL, | |
| 115 | .formats = NULL, | |
| 116 | .labels = NULL, | |
| 117 |                                        .indent_mods = NULL) { | |
| 118 | # Check for additional parameters to the statistics function | |
| 119 | 6x | dots_extra_args <- list(...) | 
| 120 | 6x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 121 | 6x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 122 | ||
| 123 | # Check for user-defined functions | |
| 124 | 6x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 125 | 6x | .stats <- default_and_custom_stats_list$all_stats | 
| 126 | 6x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 127 | ||
| 128 | # Apply statistics function | |
| 129 | 6x | x_stats <- .apply_stat_functions( | 
| 130 | 6x | default_stat_fnc = s_count_abnormal_by_marked, | 
| 131 | 6x | custom_stat_fnc_list = custom_stat_functions, | 
| 132 | 6x | args_list = c( | 
| 133 | 6x | df = list(df), | 
| 134 | 6x | extra_afun_params, | 
| 135 | 6x | dots_extra_args | 
| 136 | ) | |
| 137 | ) | |
| 138 | ||
| 139 | # Fill in formatting defaults | |
| 140 | 6x |   .stats <- get_stats("abnormal_by_marked", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 141 | 6x | levels_per_stats <- lapply(x_stats, names) | 
| 142 | 6x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 143 | 6x | .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 144 | 6x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 145 | ||
| 146 | 6x | x_stats <- x_stats[.stats] %>% | 
| 147 | 6x | .unlist_keep_nulls() %>% | 
| 148 | 6x | setNames(names(.formats)) | 
| 149 | ||
| 150 | # Auto format handling | |
| 151 | 6x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 152 | ||
| 153 | # Get and check statistical names | |
| 154 | 6x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 155 | ||
| 156 | 6x | in_rows( | 
| 157 | 6x | .list = x_stats, | 
| 158 | 6x | .formats = .formats, | 
| 159 | 6x | .names = .labels %>% .unlist_keep_nulls(), | 
| 160 | 6x | .stat_names = .stat_names, | 
| 161 | 6x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 162 | 6x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 163 | ) | |
| 164 | } | |
| 165 | ||
| 166 | #' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments | |
| 167 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 168 | #' | |
| 169 | #' @return | |
| 170 | #' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions, | |
| 171 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 172 | #' the statistics from `s_count_abnormal_by_marked()` to the table layout. | |
| 173 | #' | |
| 174 | #' @examples | |
| 175 | #' library(dplyr) | |
| 176 | #' | |
| 177 | #' df <- data.frame( | |
| 178 | #' USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))), | |
| 179 | #'   ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))), | |
| 180 | #' ANRIND = factor(c( | |
| 181 | #' "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH", | |
| 182 | #' "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW", | |
| 183 | #' "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW" | |
| 184 | #' )), | |
| 185 | #'   ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2), | |
| 186 | #'   PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))), | |
| 187 | #'   AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)), | |
| 188 | #' stringsAsFactors = FALSE | |
| 189 | #' ) | |
| 190 | #' | |
| 191 | #' df <- df %>% | |
| 192 | #' mutate(abn_dir = factor( | |
| 193 | #' case_when( | |
| 194 | #' ANRIND == "LOW LOW" ~ "Low", | |
| 195 | #' ANRIND == "HIGH HIGH" ~ "High", | |
| 196 | #' TRUE ~ "" | |
| 197 | #' ), | |
| 198 | #'     levels = c("Low", "High") | |
| 199 | #' )) | |
| 200 | #' | |
| 201 | #' # Select only post-baseline records. | |
| 202 | #' df <- df %>% filter(ONTRTFL == "Y") | |
| 203 | #' df_crp <- df %>% | |
| 204 | #' filter(PARAMCD == "CRP") %>% | |
| 205 | #' droplevels() | |
| 206 | #' full_parent_df <- list(df_crp, "not_needed") | |
| 207 | #' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed") | |
| 208 | #' spl_context <- data.frame( | |
| 209 | #'   split = c("PARAMCD", "GRADE_DIR"), | |
| 210 | #' full_parent_df = I(full_parent_df), | |
| 211 | #' cur_col_subset = I(cur_col_subset) | |
| 212 | #' ) | |
| 213 | #' | |
| 214 | #' map <- unique( | |
| 215 | #'   df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")] | |
| 216 | #' ) %>% | |
| 217 | #' lapply(as.character) %>% | |
| 218 | #' as.data.frame() %>% | |
| 219 | #' arrange(PARAMCD, abn_dir) | |
| 220 | #' | |
| 221 | #' basic_table() %>% | |
| 222 | #'   split_cols_by("ARMCD") %>% | |
| 223 | #'   split_rows_by("PARAMCD") %>% | |
| 224 | #' summarize_num_patients( | |
| 225 | #' var = "USUBJID", | |
| 226 | #' .stats = "unique_count" | |
| 227 | #' ) %>% | |
| 228 | #' split_rows_by( | |
| 229 | #' "abn_dir", | |
| 230 | #' split_fun = trim_levels_to_map(map) | |
| 231 | #' ) %>% | |
| 232 | #' count_abnormal_by_marked( | |
| 233 | #' var = "AVALCAT1", | |
| 234 | #' variables = list( | |
| 235 | #' id = "USUBJID", | |
| 236 | #' param = "PARAMCD", | |
| 237 | #' direction = "abn_dir" | |
| 238 | #' ) | |
| 239 | #' ) %>% | |
| 240 | #' build_table(df = df) | |
| 241 | #' | |
| 242 | #' basic_table() %>% | |
| 243 | #'   split_cols_by("ARMCD") %>% | |
| 244 | #'   split_rows_by("PARAMCD") %>% | |
| 245 | #' summarize_num_patients( | |
| 246 | #' var = "USUBJID", | |
| 247 | #' .stats = "unique_count" | |
| 248 | #' ) %>% | |
| 249 | #' split_rows_by( | |
| 250 | #' "abn_dir", | |
| 251 | #'     split_fun = trim_levels_in_group("abn_dir") | |
| 252 | #' ) %>% | |
| 253 | #' count_abnormal_by_marked( | |
| 254 | #' var = "AVALCAT1", | |
| 255 | #' variables = list( | |
| 256 | #' id = "USUBJID", | |
| 257 | #' param = "PARAMCD", | |
| 258 | #' direction = "abn_dir" | |
| 259 | #' ) | |
| 260 | #' ) %>% | |
| 261 | #' build_table(df = df) | |
| 262 | #' | |
| 263 | #' @export | |
| 264 | #' @order 2 | |
| 265 | count_abnormal_by_marked <- function(lyt, | |
| 266 | var, | |
| 267 |                                      category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")), | |
| 268 | variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"), | |
| 269 | na_str = default_na_str(), | |
| 270 | nested = TRUE, | |
| 271 | ..., | |
| 272 | .stats = "count_fraction", | |
| 273 | .stat_names = NULL, | |
| 274 | .formats = list(count_fraction = format_count_fraction), | |
| 275 | .labels = NULL, | |
| 276 |                                      .indent_mods = NULL) { | |
| 277 | 1x | checkmate::assert_string(var) | 
| 278 | ||
| 279 | # Process standard extra arguments | |
| 280 | 1x |   extra_args <- list(".stats" = .stats) | 
| 281 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 282 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 283 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 284 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 285 | ||
| 286 | # Process additional arguments to the statistic function | |
| 287 | 1x | extra_args <- c(extra_args, "category" = list(category), "variables" = list(variables), ...) | 
| 288 | ||
| 289 | # Append additional info from layout to the analysis function | |
| 290 | 1x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 291 | 1x | formals(a_count_abnormal_by_marked) <- c( | 
| 292 | 1x | formals(a_count_abnormal_by_marked), extra_args[[".additional_fun_parameters"]] | 
| 293 | ) | |
| 294 | ||
| 295 | 1x | analyze( | 
| 296 | 1x | lyt = lyt, | 
| 297 | 1x | vars = var, | 
| 298 | 1x | afun = a_count_abnormal_by_marked, | 
| 299 | 1x | na_str = na_str, | 
| 300 | 1x | nested = nested, | 
| 301 | 1x | extra_args = extra_args, | 
| 302 | 1x | show_labels = "hidden" | 
| 303 | ) | |
| 304 | } | 
| 1 | #' Tabulate biomarker effects on binary response by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The [tabulate_rsp_biomarkers()] function creates a layout element to tabulate the estimated biomarker effects on a | |
| 6 | #' binary response endpoint across subgroups, returning statistics including response rate and odds ratio for each | |
| 7 | #' population subgroup. The table is created from `df`, a list of data frames returned by [extract_rsp_biomarkers()], | |
| 8 | #' with the statistics to include specified via the `vars` parameter. | |
| 9 | #' | |
| 10 | #' A forest plot can be created from the resulting table using the [g_forest()] function. | |
| 11 | #' | |
| 12 | #' @inheritParams argument_convention | |
| 13 | #' @param df (`data.frame`)\cr containing all analysis variables, as returned by | |
| 14 | #' [extract_rsp_biomarkers()]. | |
| 15 | #' @param vars (`character`)\cr the names of statistics to be reported among: | |
| 16 | #' * `n_tot`: Total number of patients per group. | |
| 17 | #' * `n_rsp`: Total number of responses per group. | |
| 18 | #' * `prop`: Total response proportion per group. | |
| 19 | #' * `or`: Odds ratio. | |
| 20 | #' * `ci`: Confidence interval of odds ratio. | |
| 21 | #' * `pval`: p-value of the effect. | |
| 22 | #' Note, the statistics `n_tot`, `or` and `ci` are required. | |
| 23 | #' | |
| 24 | #' @return An `rtables` table summarizing biomarker effects on binary response by subgroup. | |
| 25 | #' | |
| 26 | #' @details These functions create a layout starting from a data frame which contains | |
| 27 | #' the required statistics. The tables are then typically used as input for forest plots. | |
| 28 | #' | |
| 29 | #' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does | |
| 30 | #' not start from an input layout `lyt`. This is because internally the table is | |
| 31 | #' created by combining multiple subtables. | |
| 32 | #' | |
| 33 | #' @seealso [extract_rsp_biomarkers()] | |
| 34 | #' | |
| 35 | #' @examples | |
| 36 | #' library(dplyr) | |
| 37 | #' library(forcats) | |
| 38 | #' | |
| 39 | #' adrs <- tern_ex_adrs | |
| 40 | #' adrs_labels <- formatters::var_labels(adrs) | |
| 41 | #' | |
| 42 | #' adrs_f <- adrs %>% | |
| 43 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 44 | #' mutate(rsp = AVALC == "CR") | |
| 45 | #' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") | |
| 46 | #' | |
| 47 | #' df <- extract_rsp_biomarkers( | |
| 48 | #' variables = list( | |
| 49 | #' rsp = "rsp", | |
| 50 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 51 | #' covariates = "SEX", | |
| 52 | #' subgroups = "BMRKR2" | |
| 53 | #' ), | |
| 54 | #' data = adrs_f | |
| 55 | #' ) | |
| 56 | #' | |
| 57 | #' \donttest{ | |
| 58 | #' ## Table with default columns. | |
| 59 | #' tabulate_rsp_biomarkers(df) | |
| 60 | #' | |
| 61 | #' ## Table with a manually chosen set of columns: leave out "pval", reorder. | |
| 62 | #' tab <- tabulate_rsp_biomarkers( | |
| 63 | #' df = df, | |
| 64 | #'   vars = c("n_rsp", "ci", "n_tot", "prop", "or") | |
| 65 | #' ) | |
| 66 | #' | |
| 67 | #' ## Finally produce the forest plot. | |
| 68 | #' g_forest(tab, xlim = c(0.7, 1.4)) | |
| 69 | #' } | |
| 70 | #' | |
| 71 | #' @export | |
| 72 | #' @name response_biomarkers_subgroups | |
| 73 | tabulate_rsp_biomarkers <- function(df, | |
| 74 |                                     vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"), | |
| 75 | na_str = default_na_str(), | |
| 76 | ..., | |
| 77 | .stat_names = NULL, | |
| 78 | .formats = NULL, | |
| 79 | .labels = NULL, | |
| 80 |                                     .indent_mods = NULL) { | |
| 81 | 4x | checkmate::assert_data_frame(df) | 
| 82 | 4x | checkmate::assert_character(df$biomarker) | 
| 83 | 4x | checkmate::assert_character(df$biomarker_label) | 
| 84 | 4x |   checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers")) | 
| 85 | ||
| 86 | # Process standard extra arguments | |
| 87 | 4x |   extra_args <- list(".stats" = vars) | 
| 88 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 89 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 90 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 91 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 92 | ||
| 93 | 4x | colvars <- d_rsp_subgroups_colvars( | 
| 94 | 4x | vars, | 
| 95 | 4x | conf_level = df$conf_level[1], | 
| 96 | 4x | method = df$pval_label[1] | 
| 97 | ) | |
| 98 | ||
| 99 | # Process additional arguments to the statistic function | |
| 100 | 4x | extra_args <- c(extra_args, biomarker = TRUE, ...) | 
| 101 | ||
| 102 | # Adding additional info from layout to analysis function | |
| 103 | 4x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 104 | 4x | formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]]) | 
| 105 | ||
| 106 | # Create "ci" column from "lcl" and "ucl" | |
| 107 | 4x | df$ci <- combine_vectors(df$lcl, df$ucl) | 
| 108 | ||
| 109 | 4x | df_subs <- split(df, f = df$biomarker) | 
| 110 | 4x | tbls <- lapply( | 
| 111 | 4x | df_subs, | 
| 112 | 4x |     function(df) { | 
| 113 | 7x | lyt <- basic_table() | 
| 114 | ||
| 115 | # Split cols by the multiple variables to populate into columns. | |
| 116 | 7x | lyt <- split_cols_by_multivar( | 
| 117 | 7x | lyt = lyt, | 
| 118 | 7x | vars = colvars$vars, | 
| 119 | 7x | varlabels = colvars$labels | 
| 120 | ) | |
| 121 | ||
| 122 | # Row split by biomarker | |
| 123 | 7x | lyt <- split_rows_by( | 
| 124 | 7x | lyt = lyt, | 
| 125 | 7x | var = "biomarker_label", | 
| 126 | 7x | nested = FALSE | 
| 127 | ) | |
| 128 | ||
| 129 | # Add "All Patients" row | |
| 130 | 7x | lyt <- split_rows_by( | 
| 131 | 7x | lyt = lyt, | 
| 132 | 7x | var = "row_type", | 
| 133 | 7x |         split_fun = keep_split_levels("content"), | 
| 134 | 7x | nested = TRUE, | 
| 135 | 7x | child_labels = "hidden" | 
| 136 | ) | |
| 137 | 7x | lyt <- analyze_colvars( | 
| 138 | 7x | lyt = lyt, | 
| 139 | 7x | afun = a_response_subgroups, | 
| 140 | 7x | na_str = na_str, | 
| 141 | 7x | extra_args = c(extra_args, overall = TRUE) | 
| 142 | ) | |
| 143 | ||
| 144 | # Add analysis rows | |
| 145 | 7x |       if ("analysis" %in% df$row_type) { | 
| 146 | 4x | lyt <- split_rows_by( | 
| 147 | 4x | lyt = lyt, | 
| 148 | 4x | var = "row_type", | 
| 149 | 4x |           split_fun = keep_split_levels("analysis"), | 
| 150 | 4x | nested = TRUE, | 
| 151 | 4x | child_labels = "hidden" | 
| 152 | ) | |
| 153 | 4x | lyt <- split_rows_by( | 
| 154 | 4x | lyt = lyt, | 
| 155 | 4x | var = "var_label", | 
| 156 | 4x | nested = TRUE, | 
| 157 | 4x | indent_mod = 1L | 
| 158 | ) | |
| 159 | 4x | lyt <- analyze_colvars( | 
| 160 | 4x | lyt = lyt, | 
| 161 | 4x | afun = a_response_subgroups, | 
| 162 | 4x | na_str = na_str, | 
| 163 | 4x | inclNAs = TRUE, | 
| 164 | 4x | extra_args = extra_args | 
| 165 | ) | |
| 166 | } | |
| 167 | 7x | build_table(lyt, df = df) | 
| 168 | } | |
| 169 | ) | |
| 170 | ||
| 171 | 4x | result <- do.call(rbind, tbls) | 
| 172 | ||
| 173 | 4x |   n_id <- grep("n_tot", vars) | 
| 174 | 4x |   or_id <- match("or", vars) | 
| 175 | 4x |   ci_id <- match("ci", vars) | 
| 176 | 4x | structure( | 
| 177 | 4x | result, | 
| 178 | 4x |     forest_header = paste0(c("Lower", "Higher"), "\nBetter"), | 
| 179 | 4x | col_x = or_id, | 
| 180 | 4x | col_ci = ci_id, | 
| 181 | 4x | col_symbol_size = n_id | 
| 182 | ) | |
| 183 | } | |
| 184 | ||
| 185 | #' Prepare response data estimates for multiple biomarkers in a single data frame | |
| 186 | #' | |
| 187 | #' @description `r lifecycle::badge("stable")` | |
| 188 | #' | |
| 189 | #' Prepares estimates for number of responses, patients and overall response rate, | |
| 190 | #' as well as odds ratio estimates, confidence intervals and p-values, | |
| 191 | #' for multiple biomarkers across population subgroups in a single data frame. | |
| 192 | #' `variables` corresponds to the names of variables found in `data`, passed as a | |
| 193 | #' named list and requires elements `rsp` and `biomarkers` (vector of continuous | |
| 194 | #' biomarker variables) and optionally `covariates`, `subgroups` and `strata`. | |
| 195 | #' `groups_lists` optionally specifies groupings for `subgroups` variables. | |
| 196 | #' | |
| 197 | #' @inheritParams argument_convention | |
| 198 | #' @inheritParams response_subgroups | |
| 199 | #' @param control (named `list`)\cr controls for the response definition and the | |
| 200 | #' confidence level produced by [control_logistic()]. | |
| 201 | #' | |
| 202 | #' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`, | |
| 203 | #' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, | |
| 204 | #' `var_label`, and `row_type`. | |
| 205 | #' | |
| 206 | #' @note You can also specify a continuous variable in `rsp` and then use the | |
| 207 | #' `response_definition` control to convert that internally to a logical | |
| 208 | #' variable reflecting binary response. | |
| 209 | #' | |
| 210 | #' @seealso [h_logistic_mult_cont_df()] which is used internally. | |
| 211 | #' | |
| 212 | #' @examples | |
| 213 | #' library(dplyr) | |
| 214 | #' library(forcats) | |
| 215 | #' | |
| 216 | #' adrs <- tern_ex_adrs | |
| 217 | #' adrs_labels <- formatters::var_labels(adrs) | |
| 218 | #' | |
| 219 | #' adrs_f <- adrs %>% | |
| 220 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 221 | #' mutate(rsp = AVALC == "CR") | |
| 222 | #' | |
| 223 | #' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`, | |
| 224 | #' # in logistic regression models with one covariate `RACE`. The subgroups | |
| 225 | #' # are defined by the levels of `BMRKR2`. | |
| 226 | #' df <- extract_rsp_biomarkers( | |
| 227 | #' variables = list( | |
| 228 | #' rsp = "rsp", | |
| 229 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 230 | #' covariates = "SEX", | |
| 231 | #' subgroups = "BMRKR2" | |
| 232 | #' ), | |
| 233 | #' data = adrs_f | |
| 234 | #' ) | |
| 235 | #' df | |
| 236 | #' | |
| 237 | #' # Here we group the levels of `BMRKR2` manually, and we add a stratification | |
| 238 | #' # variable `STRATA1`. We also here use a continuous variable `EOSDY` | |
| 239 | #' # which is then binarized internally (response is defined as this variable | |
| 240 | #' # being larger than 750). | |
| 241 | #' df_grouped <- extract_rsp_biomarkers( | |
| 242 | #' variables = list( | |
| 243 | #' rsp = "EOSDY", | |
| 244 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 245 | #' covariates = "SEX", | |
| 246 | #' subgroups = "BMRKR2", | |
| 247 | #' strata = "STRATA1" | |
| 248 | #' ), | |
| 249 | #' data = adrs_f, | |
| 250 | #' groups_lists = list( | |
| 251 | #' BMRKR2 = list( | |
| 252 | #' "low" = "LOW", | |
| 253 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 254 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 255 | #' ) | |
| 256 | #' ), | |
| 257 | #' control = control_logistic( | |
| 258 | #' response_definition = "I(response > 750)" | |
| 259 | #' ) | |
| 260 | #' ) | |
| 261 | #' df_grouped | |
| 262 | #' | |
| 263 | #' @export | |
| 264 | extract_rsp_biomarkers <- function(variables, | |
| 265 | data, | |
| 266 | groups_lists = list(), | |
| 267 | control = control_logistic(), | |
| 268 |                                    label_all = "All Patients") { | |
| 269 | 5x |   if ("strat" %in% names(variables)) { | 
| 270 | ! | warning( | 
| 271 | ! | "Warning: the `strat` element name of the `variables` list argument to `extract_rsp_biomarkers() ", | 
| 272 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 273 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 274 | ) | |
| 275 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 276 | } | |
| 277 | ||
| 278 | 5x | assert_list_of_variables(variables) | 
| 279 | 5x | checkmate::assert_string(variables$rsp) | 
| 280 | 5x | checkmate::assert_character(variables$subgroups, null.ok = TRUE) | 
| 281 | 5x | checkmate::assert_string(label_all) | 
| 282 | ||
| 283 | # Start with all patients. | |
| 284 | 5x | result_all <- h_logistic_mult_cont_df( | 
| 285 | 5x | variables = variables, | 
| 286 | 5x | data = data, | 
| 287 | 5x | control = control | 
| 288 | ) | |
| 289 | 5x | result_all$subgroup <- label_all | 
| 290 | 5x | result_all$var <- "ALL" | 
| 291 | 5x | result_all$var_label <- label_all | 
| 292 | 5x | result_all$row_type <- "content" | 
| 293 | 5x |   if (is.null(variables$subgroups)) { | 
| 294 | # Only return result for all patients. | |
| 295 | 1x | result_all | 
| 296 |   } else { | |
| 297 | # Add subgroups results. | |
| 298 | 4x | l_data <- h_split_by_subgroups( | 
| 299 | 4x | data, | 
| 300 | 4x | variables$subgroups, | 
| 301 | 4x | groups_lists = groups_lists | 
| 302 | ) | |
| 303 | 4x |     l_result <- lapply(l_data, function(grp) { | 
| 304 | 20x | result <- h_logistic_mult_cont_df( | 
| 305 | 20x | variables = variables, | 
| 306 | 20x | data = grp$df, | 
| 307 | 20x | control = control | 
| 308 | ) | |
| 309 | 20x | result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] | 
| 310 | 20x | cbind(result, result_labels) | 
| 311 | }) | |
| 312 | 4x | result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 313 | 4x | result_subgroups$row_type <- "analysis" | 
| 314 | 4x | rbind( | 
| 315 | 4x | result_all, | 
| 316 | 4x | result_subgroups | 
| 317 | ) | |
| 318 | } | |
| 319 | } | 
| 1 | #' Horizontal waterfall plot | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup. | |
| 6 | #' | |
| 7 | #' @param height (`numeric`)\cr vector containing values to be plotted as the waterfall bars. | |
| 8 | #' @param id (`character`)\cr vector containing identifiers to use as the x-axis label for the waterfall bars. | |
| 9 | #' @param col (`character`)\cr color(s). | |
| 10 | #' @param col_var (`factor`, `character`, or `NULL`)\cr categorical variable for bar coloring. `NULL` by default. | |
| 11 | #' @param xlab (`string`)\cr x label. Default is `"ID"`. | |
| 12 | #' @param ylab (`string`)\cr y label. Default is `"Value"`. | |
| 13 | #' @param title (`string`)\cr text to be displayed as plot title. | |
| 14 | #' @param col_legend_title (`string`)\cr text to be displayed as legend title. | |
| 15 | #' | |
| 16 | #' @return A `ggplot` waterfall plot. | |
| 17 | #' | |
| 18 | #' @examples | |
| 19 | #' library(dplyr) | |
| 20 | #' | |
| 21 | #' g_waterfall(height = c(3, 5, -1), id = letters[1:3]) | |
| 22 | #' | |
| 23 | #' g_waterfall( | |
| 24 | #' height = c(3, 5, -1), | |
| 25 | #' id = letters[1:3], | |
| 26 | #' col_var = letters[1:3] | |
| 27 | #' ) | |
| 28 | #' | |
| 29 | #' adsl_f <- tern_ex_adsl %>% | |
| 30 | #' select(USUBJID, STUDYID, ARM, ARMCD, SEX) | |
| 31 | #' | |
| 32 | #' adrs_f <- tern_ex_adrs %>% | |
| 33 | #' filter(PARAMCD == "OVRINV") %>% | |
| 34 | #' mutate(pchg = rnorm(n(), 10, 50)) | |
| 35 | #' | |
| 36 | #' adrs_f <- head(adrs_f, 30) | |
| 37 | #' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ] | |
| 38 | #' head(adrs_f) | |
| 39 | #' | |
| 40 | #' g_waterfall( | |
| 41 | #' height = adrs_f$pchg, | |
| 42 | #' id = adrs_f$USUBJID, | |
| 43 | #' col_var = adrs_f$AVALC | |
| 44 | #' ) | |
| 45 | #' | |
| 46 | #' g_waterfall( | |
| 47 | #' height = adrs_f$pchg, | |
| 48 | #'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID), | |
| 49 | #' col_var = adrs_f$SEX | |
| 50 | #' ) | |
| 51 | #' | |
| 52 | #' g_waterfall( | |
| 53 | #' height = adrs_f$pchg, | |
| 54 | #'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID), | |
| 55 | #' xlab = "ID", | |
| 56 | #' ylab = "Percentage Change", | |
| 57 | #' title = "Waterfall plot" | |
| 58 | #' ) | |
| 59 | #' | |
| 60 | #' @export | |
| 61 | g_waterfall <- function(height, | |
| 62 | id, | |
| 63 | col_var = NULL, | |
| 64 |                         col = getOption("ggplot2.discrete.colour"), | |
| 65 | xlab = NULL, | |
| 66 | ylab = NULL, | |
| 67 | col_legend_title = NULL, | |
| 68 |                         title = NULL) { | |
| 69 | 2x |   if (!is.null(col_var)) { | 
| 70 | 1x | check_same_n(height = height, id = id, col_var = col_var) | 
| 71 |   } else { | |
| 72 | 1x | check_same_n(height = height, id = id) | 
| 73 | } | |
| 74 | ||
| 75 | 2x |   checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE) | 
| 76 | 2x | checkmate::assert_character(col, null.ok = TRUE) | 
| 77 | ||
| 78 | 2x | xlabel <- deparse(substitute(id)) | 
| 79 | 2x | ylabel <- deparse(substitute(height)) | 
| 80 | ||
| 81 | 2x |   col_label <- if (!missing(col_var)) { | 
| 82 | 1x | deparse(substitute(col_var)) | 
| 83 | } | |
| 84 | ||
| 85 | 2x | xlab <- if (is.null(xlab)) xlabel else xlab | 
| 86 | 2x | ylab <- if (is.null(ylab)) ylabel else ylab | 
| 87 | 2x | col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title | 
| 88 | ||
| 89 | 2x | plot_data <- data.frame( | 
| 90 | 2x | height = height, | 
| 91 | 2x | id = as.character(id), | 
| 92 | 2x | col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)), | 
| 93 | 2x | stringsAsFactors = FALSE | 
| 94 | ) | |
| 95 | ||
| 96 | 2x | plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ] | 
| 97 | ||
| 98 | 2x | p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) + | 
| 99 | 2x | ggplot2::geom_col() + | 
| 100 | 2x | ggplot2::geom_text( | 
| 101 | 2x | label = format(plot_data_ord$height, digits = 2), | 
| 102 | 2x | vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5) | 
| 103 | ) + | |
| 104 | 2x | ggplot2::xlab(xlab) + | 
| 105 | 2x | ggplot2::ylab(ylab) + | 
| 106 | 2x | ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5)) | 
| 107 | ||
| 108 | 2x |   if (!is.null(col_var)) { | 
| 109 | 1x | p <- p + | 
| 110 | 1x | ggplot2::aes(fill = col_var) + | 
| 111 | 1x | ggplot2::labs(fill = col_legend_title) + | 
| 112 | 1x | ggplot2::theme( | 
| 113 | 1x | legend.position = "bottom", | 
| 114 | 1x | legend.background = ggplot2::element_blank(), | 
| 115 | 1x | legend.title = ggplot2::element_text(face = "bold"), | 
| 116 | 1x | legend.box.background = ggplot2::element_rect(colour = "black") | 
| 117 | ) | |
| 118 | } | |
| 119 | ||
| 120 | 2x |   if (!is.null(col)) { | 
| 121 | 1x | p <- p + | 
| 122 | 1x | ggplot2::scale_fill_manual(values = col) | 
| 123 | } | |
| 124 | ||
| 125 | 2x |   if (!is.null(title)) { | 
| 126 | 1x | p <- p + | 
| 127 | 1x | ggplot2::labs(title = title) + | 
| 128 | 1x | ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) | 
| 129 | } | |
| 130 | ||
| 131 | 2x | p | 
| 132 | } | 
| 1 | #' Individual patient plots | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Line plot(s) displaying trend in patients' parameter values over time is rendered. | |
| 6 | #' Patients' individual baseline values can be added to the plot(s) as reference. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param xvar (`string`)\cr time point variable to be plotted on x-axis. | |
| 10 | #' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis. | |
| 11 | #' @param xlab (`string`)\cr plot label for x-axis. | |
| 12 | #' @param ylab (`string`)\cr plot label for y-axis. | |
| 13 | #' @param id_var (`string`)\cr variable used as patient identifier. | |
| 14 | #' @param title (`string`)\cr title for plot. | |
| 15 | #' @param subtitle (`string`)\cr subtitle for plot. | |
| 16 | #' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on | |
| 17 | #' plot when `TRUE`. | |
| 18 | #' @param yvar_baseline (`string`)\cr variable with baseline values only. | |
| 19 | #' Ignored when `add_baseline_hline` is `FALSE`. | |
| 20 | #' @param ggtheme (`theme`)\cr optional graphical theme function as provided | |
| 21 | #' by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display. | |
| 22 | #' @param plotting_choices (`string`)\cr specifies options for displaying | |
| 23 | #' plots. Must be one of `"all_in_one"`, `"split_by_max_obs"`, or `"separate_by_obs"`. | |
| 24 | #' @param max_obs_per_plot (`integer(1)`)\cr number of observations to be plotted on one | |
| 25 | #' plot. Ignored if `plotting_choices` is not `"separate_by_obs"`. | |
| 26 | #' @param caption (`string`)\cr optional caption below the plot. | |
| 27 | #' @param col (`character`)\cr line colors. | |
| 28 | #' | |
| 29 | #' @seealso Relevant helper function [h_g_ipp()]. | |
| 30 | #' | |
| 31 | #' @name g_ipp | |
| 32 | #' @aliases individual_patient_plot | |
| 33 | NULL | |
| 34 | ||
| 35 | #' Helper function to create simple line plot over time | |
| 36 | #' | |
| 37 | #' @description `r lifecycle::badge("stable")` | |
| 38 | #' | |
| 39 | #' Function that generates a simple line plot displaying parameter trends over time. | |
| 40 | #' | |
| 41 | #' @inheritParams argument_convention | |
| 42 | #' @inheritParams g_ipp | |
| 43 | #' | |
| 44 | #' @return A `ggplot` line plot. | |
| 45 | #' | |
| 46 | #' @seealso [g_ipp()] which uses this function. | |
| 47 | #' | |
| 48 | #' @examples | |
| 49 | #' library(dplyr) | |
| 50 | #' | |
| 51 | #' # Select a small sample of data to plot. | |
| 52 | #' adlb <- tern_ex_adlb %>% | |
| 53 | #'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>% | |
| 54 | #' slice(1:36) | |
| 55 | #' | |
| 56 | #' p <- h_g_ipp( | |
| 57 | #' df = adlb, | |
| 58 | #' xvar = "AVISIT", | |
| 59 | #' yvar = "AVAL", | |
| 60 | #' xlab = "Visit", | |
| 61 | #' id_var = "USUBJID", | |
| 62 | #' ylab = "SGOT/ALT (U/L)", | |
| 63 | #' add_baseline_hline = TRUE | |
| 64 | #' ) | |
| 65 | #' p | |
| 66 | #' | |
| 67 | #' @export | |
| 68 | h_g_ipp <- function(df, | |
| 69 | xvar, | |
| 70 | yvar, | |
| 71 | xlab, | |
| 72 | ylab, | |
| 73 | id_var, | |
| 74 | title = "Individual Patient Plots", | |
| 75 | subtitle = "", | |
| 76 | caption = NULL, | |
| 77 | add_baseline_hline = FALSE, | |
| 78 | yvar_baseline = "BASE", | |
| 79 | ggtheme = nestcolor::theme_nest(), | |
| 80 |                     col = NULL) { | |
| 81 | 13x | checkmate::assert_string(xvar) | 
| 82 | 13x | checkmate::assert_string(yvar) | 
| 83 | 13x | checkmate::assert_string(yvar_baseline) | 
| 84 | 13x | checkmate::assert_string(id_var) | 
| 85 | 13x | checkmate::assert_string(xlab) | 
| 86 | 13x | checkmate::assert_string(ylab) | 
| 87 | 13x | checkmate::assert_string(title) | 
| 88 | 13x | checkmate::assert_string(subtitle) | 
| 89 | 13x | checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df)) | 
| 90 | 13x | checkmate::assert_data_frame(df) | 
| 91 | 13x | checkmate::assert_flag(add_baseline_hline) | 
| 92 | 13x | checkmate::assert_character(col, null.ok = TRUE) | 
| 93 | ||
| 94 | 13x | p <- ggplot2::ggplot( | 
| 95 | 13x | data = df, | 
| 96 | 13x | mapping = ggplot2::aes( | 
| 97 | 13x | x = .data[[xvar]], | 
| 98 | 13x | y = .data[[yvar]], | 
| 99 | 13x | group = .data[[id_var]], | 
| 100 | 13x | colour = .data[[id_var]] | 
| 101 | ) | |
| 102 | ) + | |
| 103 | 13x | ggplot2::geom_line(linewidth = 0.4) + | 
| 104 | 13x | ggplot2::geom_point(size = 2) + | 
| 105 | 13x | ggplot2::labs( | 
| 106 | 13x | x = xlab, | 
| 107 | 13x | y = ylab, | 
| 108 | 13x | title = title, | 
| 109 | 13x | subtitle = subtitle, | 
| 110 | 13x | caption = caption | 
| 111 | ) + | |
| 112 | 13x | ggtheme | 
| 113 | ||
| 114 | 13x |   if (add_baseline_hline) { | 
| 115 | 12x | baseline_df <- df[, c(id_var, yvar_baseline)] | 
| 116 | 12x | baseline_df <- unique(baseline_df) | 
| 117 | ||
| 118 | 12x | p <- p + | 
| 119 | 12x | ggplot2::geom_hline( | 
| 120 | 12x | data = baseline_df, | 
| 121 | 12x | mapping = ggplot2::aes( | 
| 122 | 12x | yintercept = .data[[yvar_baseline]], | 
| 123 | 12x | colour = .data[[id_var]] | 
| 124 | ), | |
| 125 | 12x | linetype = "dotdash", | 
| 126 | 12x | linewidth = 0.4 | 
| 127 | ) + | |
| 128 | 12x | ggplot2::geom_text( | 
| 129 | 12x | data = baseline_df, | 
| 130 | 12x | mapping = ggplot2::aes( | 
| 131 | 12x | x = 1, | 
| 132 | 12x | y = .data[[yvar_baseline]], | 
| 133 | 12x | label = .data[[id_var]], | 
| 134 | 12x | colour = .data[[id_var]] | 
| 135 | ), | |
| 136 | 12x | nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)), | 
| 137 | 12x | vjust = "right", | 
| 138 | 12x | size = 2 | 
| 139 | ) | |
| 140 | ||
| 141 | 12x |     if (!is.null(col)) { | 
| 142 | 1x | p <- p + | 
| 143 | 1x | ggplot2::scale_color_manual(values = col) | 
| 144 | } | |
| 145 | } | |
| 146 | 13x | p | 
| 147 | } | |
| 148 | ||
| 149 | #' @describeIn g_ipp Plotting function for individual patient plots which, depending on user | |
| 150 | #' preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter | |
| 151 | #' values over time. | |
| 152 | #' | |
| 153 | #' @return A `ggplot` object or a list of `ggplot` objects. | |
| 154 | #' | |
| 155 | #' @examples | |
| 156 | #' library(dplyr) | |
| 157 | #' | |
| 158 | #' # Select a small sample of data to plot. | |
| 159 | #' adlb <- tern_ex_adlb %>% | |
| 160 | #'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>% | |
| 161 | #' slice(1:36) | |
| 162 | #' | |
| 163 | #' plot_list <- g_ipp( | |
| 164 | #' df = adlb, | |
| 165 | #' xvar = "AVISIT", | |
| 166 | #' yvar = "AVAL", | |
| 167 | #' xlab = "Visit", | |
| 168 | #' ylab = "SGOT/ALT (U/L)", | |
| 169 | #' title = "Individual Patient Plots", | |
| 170 | #' add_baseline_hline = TRUE, | |
| 171 | #' plotting_choices = "split_by_max_obs", | |
| 172 | #' max_obs_per_plot = 5 | |
| 173 | #' ) | |
| 174 | #' plot_list | |
| 175 | #' | |
| 176 | #' @export | |
| 177 | g_ipp <- function(df, | |
| 178 | xvar, | |
| 179 | yvar, | |
| 180 | xlab, | |
| 181 | ylab, | |
| 182 | id_var = "USUBJID", | |
| 183 | title = "Individual Patient Plots", | |
| 184 | subtitle = "", | |
| 185 | caption = NULL, | |
| 186 | add_baseline_hline = FALSE, | |
| 187 | yvar_baseline = "BASE", | |
| 188 | ggtheme = nestcolor::theme_nest(), | |
| 189 |                   plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"), | |
| 190 | max_obs_per_plot = 4, | |
| 191 |                   col = NULL) { | |
| 192 | 3x | checkmate::assert_count(max_obs_per_plot) | 
| 193 | 3x |   checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs")) | 
| 194 | 3x | checkmate::assert_character(col, null.ok = TRUE) | 
| 195 | ||
| 196 | 3x | plotting_choices <- match.arg(plotting_choices) | 
| 197 | ||
| 198 | 3x |   if (plotting_choices == "all_in_one") { | 
| 199 | 1x | p <- h_g_ipp( | 
| 200 | 1x | df = df, | 
| 201 | 1x | xvar = xvar, | 
| 202 | 1x | yvar = yvar, | 
| 203 | 1x | xlab = xlab, | 
| 204 | 1x | ylab = ylab, | 
| 205 | 1x | id_var = id_var, | 
| 206 | 1x | title = title, | 
| 207 | 1x | subtitle = subtitle, | 
| 208 | 1x | caption = caption, | 
| 209 | 1x | add_baseline_hline = add_baseline_hline, | 
| 210 | 1x | yvar_baseline = yvar_baseline, | 
| 211 | 1x | ggtheme = ggtheme, | 
| 212 | 1x | col = col | 
| 213 | ) | |
| 214 | ||
| 215 | 1x | return(p) | 
| 216 | 2x |   } else if (plotting_choices == "split_by_max_obs") { | 
| 217 | 1x | id_vec <- unique(df[[id_var]]) | 
| 218 | 1x | id_list <- split( | 
| 219 | 1x | id_vec, | 
| 220 | 1x | rep(1:ceiling(length(id_vec) / max_obs_per_plot), | 
| 221 | 1x | each = max_obs_per_plot, | 
| 222 | 1x | length.out = length(id_vec) | 
| 223 | ) | |
| 224 | ) | |
| 225 | ||
| 226 | 1x | df_list <- list() | 
| 227 | 1x | plot_list <- list() | 
| 228 | ||
| 229 | 1x |     for (i in seq_along(id_list)) { | 
| 230 | 2x | df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ] | 
| 231 | ||
| 232 | 2x | plots <- h_g_ipp( | 
| 233 | 2x | df = df_list[[i]], | 
| 234 | 2x | xvar = xvar, | 
| 235 | 2x | yvar = yvar, | 
| 236 | 2x | xlab = xlab, | 
| 237 | 2x | ylab = ylab, | 
| 238 | 2x | id_var = id_var, | 
| 239 | 2x | title = title, | 
| 240 | 2x | subtitle = subtitle, | 
| 241 | 2x | caption = caption, | 
| 242 | 2x | add_baseline_hline = add_baseline_hline, | 
| 243 | 2x | yvar_baseline = yvar_baseline, | 
| 244 | 2x | ggtheme = ggtheme, | 
| 245 | 2x | col = col | 
| 246 | ) | |
| 247 | ||
| 248 | 2x | plot_list[[i]] <- plots | 
| 249 | } | |
| 250 | 1x | return(plot_list) | 
| 251 |   } else { | |
| 252 | 1x | ind_df <- split(df, df[[id_var]]) | 
| 253 | 1x | plot_list <- lapply( | 
| 254 | 1x | ind_df, | 
| 255 | 1x |       function(x) { | 
| 256 | 8x | h_g_ipp( | 
| 257 | 8x | df = x, | 
| 258 | 8x | xvar = xvar, | 
| 259 | 8x | yvar = yvar, | 
| 260 | 8x | xlab = xlab, | 
| 261 | 8x | ylab = ylab, | 
| 262 | 8x | id_var = id_var, | 
| 263 | 8x | title = title, | 
| 264 | 8x | subtitle = subtitle, | 
| 265 | 8x | caption = caption, | 
| 266 | 8x | add_baseline_hline = add_baseline_hline, | 
| 267 | 8x | yvar_baseline = yvar_baseline, | 
| 268 | 8x | ggtheme = ggtheme, | 
| 269 | 8x | col = col | 
| 270 | ) | |
| 271 | } | |
| 272 | ) | |
| 273 | ||
| 274 | 1x | return(plot_list) | 
| 275 | } | |
| 276 | } | 
| 1 | #' Helper functions for Cox proportional hazards regression | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()]. | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' @inheritParams h_coxreg_univar_extract | |
| 9 | #' @inheritParams cox_regression_inter | |
| 10 | #' @inheritParams control_coxreg | |
| 11 | #' | |
| 12 | #' @seealso [cox_regression] | |
| 13 | #' | |
| 14 | #' @name h_cox_regression | |
| 15 | NULL | |
| 16 | ||
| 17 | #' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used | |
| 18 | #' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models. | |
| 19 | #' | |
| 20 | #' @return | |
| 21 | #' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]). | |
| 22 | #' | |
| 23 | #' @examples | |
| 24 | #' # `h_coxreg_univar_formulas` | |
| 25 | #' | |
| 26 | #' ## Simple formulas. | |
| 27 | #' h_coxreg_univar_formulas( | |
| 28 | #' variables = list( | |
| 29 | #'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y") | |
| 30 | #' ) | |
| 31 | #' ) | |
| 32 | #' | |
| 33 | #' ## Addition of an optional strata. | |
| 34 | #' h_coxreg_univar_formulas( | |
| 35 | #' variables = list( | |
| 36 | #'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"), | |
| 37 | #' strata = "SITE" | |
| 38 | #' ) | |
| 39 | #' ) | |
| 40 | #' | |
| 41 | #' ## Inclusion of the interaction term. | |
| 42 | #' h_coxreg_univar_formulas( | |
| 43 | #' variables = list( | |
| 44 | #'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"), | |
| 45 | #' strata = "SITE" | |
| 46 | #' ), | |
| 47 | #' interaction = TRUE | |
| 48 | #' ) | |
| 49 | #' | |
| 50 | #' ## Only covariates fitted in separate models. | |
| 51 | #' h_coxreg_univar_formulas( | |
| 52 | #' variables = list( | |
| 53 | #'     time = "time", event = "status", covariates = c("X", "y") | |
| 54 | #' ) | |
| 55 | #' ) | |
| 56 | #' | |
| 57 | #' @export | |
| 58 | h_coxreg_univar_formulas <- function(variables, | |
| 59 |                                      interaction = FALSE) { | |
| 60 | 50x | checkmate::assert_list(variables, names = "named") | 
| 61 | 50x | has_arm <- "arm" %in% names(variables) | 
| 62 | 50x | arm_name <- if (has_arm) "arm" else NULL | 
| 63 | ||
| 64 | 50x | checkmate::assert_character(variables$covariates, null.ok = TRUE) | 
| 65 | ||
| 66 | 50x | checkmate::assert_flag(interaction) | 
| 67 | ||
| 68 | 50x |   if (!has_arm || is.null(variables$covariates)) { | 
| 69 | 10x | checkmate::assert_false(interaction) | 
| 70 | } | |
| 71 | ||
| 72 | 48x | assert_list_of_variables(variables[c(arm_name, "event", "time")]) | 
| 73 | ||
| 74 | 48x |   if (!is.null(variables$covariates)) { | 
| 75 | 47x | forms <- paste0( | 
| 76 | 47x |       "survival::Surv(", variables$time, ", ", variables$event, ") ~ ", | 
| 77 | 47x | ifelse(has_arm, variables$arm, "1"), | 
| 78 | 47x | ifelse(interaction, " * ", " + "), | 
| 79 | 47x | variables$covariates, | 
| 80 | 47x | ifelse( | 
| 81 | 47x | !is.null(variables$strata), | 
| 82 | 47x |         paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"), | 
| 83 | "" | |
| 84 | ) | |
| 85 | ) | |
| 86 |   } else { | |
| 87 | 1x | forms <- NULL | 
| 88 | } | |
| 89 | 48x | nams <- variables$covariates | 
| 90 | 48x |   if (has_arm) { | 
| 91 | 41x | ref <- paste0( | 
| 92 | 41x |       "survival::Surv(", variables$time, ", ", variables$event, ") ~ ", | 
| 93 | 41x | variables$arm, | 
| 94 | 41x | ifelse( | 
| 95 | 41x | !is.null(variables$strata), | 
| 96 | 41x | paste0( | 
| 97 | 41x |           " + strata(", paste0(variables$strata, collapse = ", "), ")" | 
| 98 | ), | |
| 99 | "" | |
| 100 | ) | |
| 101 | ) | |
| 102 | 41x | forms <- c(ref, forms) | 
| 103 | 41x |     nams <- c("ref", nams) | 
| 104 | } | |
| 105 | 48x | stats::setNames(forms, nams) | 
| 106 | } | |
| 107 | ||
| 108 | #' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas | |
| 109 | #' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox | |
| 110 | #' regression models. Interactions will not be included in multivariate Cox regression model. | |
| 111 | #' | |
| 112 | #' @return | |
| 113 | #' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]). | |
| 114 | #' | |
| 115 | #' @examples | |
| 116 | #' # `h_coxreg_multivar_formula` | |
| 117 | #' | |
| 118 | #' h_coxreg_multivar_formula( | |
| 119 | #' variables = list( | |
| 120 | #'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE") | |
| 121 | #' ) | |
| 122 | #' ) | |
| 123 | #' | |
| 124 | #' # Addition of an optional strata. | |
| 125 | #' h_coxreg_multivar_formula( | |
| 126 | #' variables = list( | |
| 127 | #'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"), | |
| 128 | #' strata = "SITE" | |
| 129 | #' ) | |
| 130 | #' ) | |
| 131 | #' | |
| 132 | #' # Example without treatment arm. | |
| 133 | #' h_coxreg_multivar_formula( | |
| 134 | #' variables = list( | |
| 135 | #'     time = "AVAL", event = "event", covariates = c("RACE", "AGE"), | |
| 136 | #' strata = "SITE" | |
| 137 | #' ) | |
| 138 | #' ) | |
| 139 | #' | |
| 140 | #' @export | |
| 141 | h_coxreg_multivar_formula <- function(variables) { | |
| 142 | 89x | checkmate::assert_list(variables, names = "named") | 
| 143 | 89x | has_arm <- "arm" %in% names(variables) | 
| 144 | 89x | arm_name <- if (has_arm) "arm" else NULL | 
| 145 | ||
| 146 | 89x | checkmate::assert_character(variables$covariates, null.ok = TRUE) | 
| 147 | ||
| 148 | 89x | assert_list_of_variables(variables[c(arm_name, "event", "time")]) | 
| 149 | ||
| 150 | 89x | y <- paste0( | 
| 151 | 89x |     "survival::Surv(", variables$time, ", ", variables$event, ") ~ ", | 
| 152 | 89x | ifelse(has_arm, variables$arm, "1") | 
| 153 | ) | |
| 154 | 89x |   if (length(variables$covariates) > 0) { | 
| 155 | 26x | y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ") | 
| 156 | } | |
| 157 | 89x |   if (!is.null(variables$strata)) { | 
| 158 | 5x |     y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")") | 
| 159 | } | |
| 160 | 89x | y | 
| 161 | } | |
| 162 | ||
| 163 | #' @describeIn h_cox_regression Utility function to help tabulate the result of | |
| 164 | #' a univariate Cox regression model. | |
| 165 | #' | |
| 166 | #' @param effect (`string`)\cr the treatment variable. | |
| 167 | #' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()]. | |
| 168 | #' | |
| 169 | #' @return | |
| 170 | #' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`, | |
| 171 | #' `n`, `hr`, `lcl`, `ucl`, and `pval`. | |
| 172 | #' | |
| 173 | #' @examples | |
| 174 | #' library(survival) | |
| 175 | #' | |
| 176 | #' dta_simple <- data.frame( | |
| 177 | #' time = c(5, 5, 10, 10, 5, 5, 10, 10), | |
| 178 | #' status = c(0, 0, 1, 0, 0, 1, 1, 1), | |
| 179 | #'   armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")), | |
| 180 | #' var1 = c(45, 55, 65, 75, 55, 65, 85, 75), | |
| 181 | #'   var2 = c("F", "M", "F", "M", "F", "M", "F", "U") | |
| 182 | #' ) | |
| 183 | #' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple) | |
| 184 | #' result <- h_coxreg_univar_extract( | |
| 185 | #' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple | |
| 186 | #' ) | |
| 187 | #' result | |
| 188 | #' | |
| 189 | #' @export | |
| 190 | h_coxreg_univar_extract <- function(effect, | |
| 191 | covar, | |
| 192 | data, | |
| 193 | mod, | |
| 194 |                                     control = control_coxreg()) { | |
| 195 | 66x | checkmate::assert_string(covar) | 
| 196 | 66x | checkmate::assert_string(effect) | 
| 197 | 66x | checkmate::assert_class(mod, "coxph") | 
| 198 | 66x | test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] | 
| 199 | ||
| 200 | 66x | mod_aov <- muffled_car_anova(mod, test_statistic) | 
| 201 | 66x | msum <- summary(mod, conf.int = control$conf_level) | 
| 202 | 66x | sum_cox <- broom::tidy(msum) | 
| 203 | ||
| 204 | # Combine results together. | |
| 205 | 66x | effect_aov <- mod_aov[effect, , drop = TRUE] | 
| 206 | 66x | pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]] | 
| 207 | 66x | sum_main <- sum_cox[grepl(effect, sum_cox$level), ] | 
| 208 | ||
| 209 | 66x |   term_label <- if (effect == covar) { | 
| 210 | 34x | paste0( | 
| 211 | 34x | levels(data[[covar]])[2], | 
| 212 | 34x |       " vs control (", | 
| 213 | 34x | levels(data[[covar]])[1], | 
| 214 | ")" | |
| 215 | ) | |
| 216 |   } else { | |
| 217 | 32x | unname(labels_or_names(data[covar])) | 
| 218 | } | |
| 219 | 66x | data.frame( | 
| 220 | 66x | effect = ifelse(covar == effect, "Treatment:", "Covariate:"), | 
| 221 | 66x | term = covar, | 
| 222 | 66x | term_label = term_label, | 
| 223 | 66x | level = levels(data[[effect]])[2], | 
| 224 | 66x | n = mod[["n"]], | 
| 225 | 66x | hr = unname(sum_main["exp(coef)"]), | 
| 226 | 66x |     lcl = unname(sum_main[grep("lower", names(sum_main))]), | 
| 227 | 66x |     ucl = unname(sum_main[grep("upper", names(sum_main))]), | 
| 228 | 66x | pval = pval, | 
| 229 | 66x | stringsAsFactors = FALSE | 
| 230 | ) | |
| 231 | } | |
| 232 | ||
| 233 | #' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help | |
| 234 | #' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable. | |
| 235 | #' | |
| 236 | #' @return | |
| 237 | #' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`, | |
| 238 | #' `n`, `term`, and `term_label`. | |
| 239 | #' | |
| 240 | #' @examples | |
| 241 | #' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple) | |
| 242 | #' result <- h_coxreg_multivar_extract( | |
| 243 | #' var = "var1", mod = mod, data = dta_simple | |
| 244 | #' ) | |
| 245 | #' result | |
| 246 | #' | |
| 247 | #' @export | |
| 248 | h_coxreg_multivar_extract <- function(var, | |
| 249 | data, | |
| 250 | mod, | |
| 251 |                                       control = control_coxreg()) { | |
| 252 | 132x | test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] | 
| 253 | 132x | mod_aov <- muffled_car_anova(mod, test_statistic) | 
| 254 | ||
| 255 | 132x | msum <- summary(mod, conf.int = control$conf_level) | 
| 256 | 132x | sum_anova <- broom::tidy(mod_aov) | 
| 257 | 132x | sum_cox <- broom::tidy(msum) | 
| 258 | ||
| 259 | 132x |   ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")] | 
| 260 | 132x | names(ret_anova)[2] <- "pval" | 
| 261 | 132x |   if (is.factor(data[[var]])) { | 
| 262 | 53x | ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")] | 
| 263 |   } else { | |
| 264 | 79x | ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")] | 
| 265 | } | |
| 266 | 132x |   names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl") | 
| 267 | 132x | varlab <- unname(labels_or_names(data[var])) | 
| 268 | 132x | ret_cox$term <- varlab | 
| 269 | ||
| 270 | 132x |   if (is.numeric(data[[var]])) { | 
| 271 | 79x | ret <- ret_cox | 
| 272 | 79x | ret$term_label <- ret$term | 
| 273 | 53x |   } else if (length(levels(data[[var]])) <= 2) { | 
| 274 | 34x | ret_anova$pval <- NA | 
| 275 | 34x | ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")") | 
| 276 | 34x | ret_cox$level <- gsub(var, "", ret_cox$level) | 
| 277 | 34x | ret_cox$term_label <- ret_cox$level | 
| 278 | 34x | ret <- dplyr::bind_rows(ret_anova, ret_cox) | 
| 279 |   } else { | |
| 280 | 19x | ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")") | 
| 281 | 19x | ret_cox$level <- gsub(var, "", ret_cox$level) | 
| 282 | 19x | ret_cox$term_label <- ret_cox$level | 
| 283 | 19x | ret <- dplyr::bind_rows(ret_anova, ret_cox) | 
| 284 | } | |
| 285 | ||
| 286 | 132x | as.data.frame(ret) | 
| 287 | } | 
| 1 | #' Helper functions for tabulating binary response by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions that tabulate in a data frame statistics such as response rate | |
| 6 | #' and odds ratio for population subgroups. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @inheritParams response_subgroups | |
| 10 | #' @param arm (`factor`)\cr the treatment group variable. | |
| 11 | #' | |
| 12 | #' @details Main functionality is to prepare data for use in a layout-creating function. | |
| 13 | #' | |
| 14 | #' @examples | |
| 15 | #' library(dplyr) | |
| 16 | #' library(forcats) | |
| 17 | #' | |
| 18 | #' adrs <- tern_ex_adrs | |
| 19 | #' adrs_labels <- formatters::var_labels(adrs) | |
| 20 | #' | |
| 21 | #' adrs_f <- adrs %>% | |
| 22 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 23 | #'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>% | |
| 24 | #' droplevels() %>% | |
| 25 | #' mutate( | |
| 26 | #' # Reorder levels of factor to make the placebo group the reference arm. | |
| 27 | #' ARM = fct_relevel(ARM, "B: Placebo"), | |
| 28 | #' rsp = AVALC == "CR" | |
| 29 | #' ) | |
| 30 | #' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") | |
| 31 | #' | |
| 32 | #' @name h_response_subgroups | |
| 33 | NULL | |
| 34 | ||
| 35 | #' @describeIn h_response_subgroups Helper to prepare a data frame of binary responses by arm. | |
| 36 | #' | |
| 37 | #' @return | |
| 38 | #' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`. | |
| 39 | #' | |
| 40 | #' @examples | |
| 41 | #' h_proportion_df( | |
| 42 | #' c(TRUE, FALSE, FALSE), | |
| 43 | #'   arm = factor(c("A", "A", "B"), levels = c("A", "B")) | |
| 44 | #' ) | |
| 45 | #' | |
| 46 | #' @export | |
| 47 | h_proportion_df <- function(rsp, arm) { | |
| 48 | 79x | checkmate::assert_logical(rsp) | 
| 49 | 78x | assert_valid_factor(arm, len = length(rsp)) | 
| 50 | 78x | non_missing_rsp <- !is.na(rsp) | 
| 51 | 78x | rsp <- rsp[non_missing_rsp] | 
| 52 | 78x | arm <- arm[non_missing_rsp] | 
| 53 | ||
| 54 | 78x | lst_rsp <- split(rsp, arm) | 
| 55 | 78x |   lst_results <- Map(function(x, arm) { | 
| 56 | 156x |     if (length(x) > 0) { | 
| 57 | 154x | s_prop <- s_proportion(df = x) | 
| 58 | 154x | data.frame( | 
| 59 | 154x | arm = arm, | 
| 60 | 154x | n = length(x), | 
| 61 | 154x | n_rsp = unname(s_prop$n_prop[1]), | 
| 62 | 154x | prop = unname(s_prop$n_prop[2]), | 
| 63 | 154x | stringsAsFactors = FALSE | 
| 64 | ) | |
| 65 |     } else { | |
| 66 | 2x | data.frame( | 
| 67 | 2x | arm = arm, | 
| 68 | 2x | n = 0L, | 
| 69 | 2x | n_rsp = NA, | 
| 70 | 2x | prop = NA, | 
| 71 | 2x | stringsAsFactors = FALSE | 
| 72 | ) | |
| 73 | } | |
| 74 | 78x | }, lst_rsp, names(lst_rsp)) | 
| 75 | ||
| 76 | 78x | df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE)) | 
| 77 | 78x | df$arm <- factor(df$arm, levels = levels(arm)) | 
| 78 | 78x | df | 
| 79 | } | |
| 80 | ||
| 81 | #' @describeIn h_response_subgroups Summarizes proportion of binary responses by arm and across subgroups | |
| 82 | #' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and | |
| 83 | #' requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies | |
| 84 | #' groupings for `subgroups` variables. | |
| 85 | #' | |
| 86 | #' @return | |
| 87 | #' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, | |
| 88 | #' `var`, `var_label`, and `row_type`. | |
| 89 | #' | |
| 90 | #' @examples | |
| 91 | #' h_proportion_subgroups_df( | |
| 92 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")), | |
| 93 | #' data = adrs_f | |
| 94 | #' ) | |
| 95 | #' | |
| 96 | #' # Define groupings for BMRKR2 levels. | |
| 97 | #' h_proportion_subgroups_df( | |
| 98 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")), | |
| 99 | #' data = adrs_f, | |
| 100 | #' groups_lists = list( | |
| 101 | #' BMRKR2 = list( | |
| 102 | #' "low" = "LOW", | |
| 103 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 104 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 105 | #' ) | |
| 106 | #' ) | |
| 107 | #' ) | |
| 108 | #' | |
| 109 | #' @export | |
| 110 | h_proportion_subgroups_df <- function(variables, | |
| 111 | data, | |
| 112 | groups_lists = list(), | |
| 113 |                                       label_all = "All Patients") { | |
| 114 | 17x | checkmate::assert_character(variables$rsp) | 
| 115 | 17x | checkmate::assert_character(variables$arm) | 
| 116 | 17x | checkmate::assert_character(variables$subgroups, null.ok = TRUE) | 
| 117 | 17x | assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) | 
| 118 | 17x | assert_df_with_variables(data, variables) | 
| 119 | 17x | checkmate::assert_string(label_all) | 
| 120 | ||
| 121 | # Add All Patients. | |
| 122 | 17x | result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]]) | 
| 123 | 17x | result_all$subgroup <- label_all | 
| 124 | 17x | result_all$var <- "ALL" | 
| 125 | 17x | result_all$var_label <- label_all | 
| 126 | 17x | result_all$row_type <- "content" | 
| 127 | ||
| 128 | # Add Subgroups. | |
| 129 | 17x |   if (is.null(variables$subgroups)) { | 
| 130 | 3x | result_all | 
| 131 |   } else { | |
| 132 | 14x | l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) | 
| 133 | ||
| 134 | 14x |     l_result <- lapply(l_data, function(grp) { | 
| 135 | 58x | result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]]) | 
| 136 | 58x | result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] | 
| 137 | 58x | cbind(result, result_labels) | 
| 138 | }) | |
| 139 | 14x | result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 140 | 14x | result_subgroups$row_type <- "analysis" | 
| 141 | ||
| 142 | 14x | rbind( | 
| 143 | 14x | result_all, | 
| 144 | 14x | result_subgroups | 
| 145 | ) | |
| 146 | } | |
| 147 | } | |
| 148 | ||
| 149 | #' @describeIn h_response_subgroups Helper to prepare a data frame with estimates of | |
| 150 | #' the odds ratio between a treatment and a control arm. | |
| 151 | #' | |
| 152 | #' @inheritParams response_subgroups | |
| 153 | #' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed. | |
| 154 | #' | |
| 155 | #' @return | |
| 156 | #' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and | |
| 157 | #' optionally `pval` and `pval_label`. | |
| 158 | #' | |
| 159 | #' @examples | |
| 160 | #' # Unstratatified analysis. | |
| 161 | #' h_odds_ratio_df( | |
| 162 | #' c(TRUE, FALSE, FALSE, TRUE), | |
| 163 | #'   arm = factor(c("A", "A", "B", "B"), levels = c("A", "B")) | |
| 164 | #' ) | |
| 165 | #' | |
| 166 | #' # Include p-value. | |
| 167 | #' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq") | |
| 168 | #' | |
| 169 | #' # Stratatified analysis. | |
| 170 | #' h_odds_ratio_df( | |
| 171 | #' rsp = adrs_f$rsp, | |
| 172 | #' arm = adrs_f$ARM, | |
| 173 | #'   strata_data = adrs_f[, c("STRATA1", "STRATA2")], | |
| 174 | #' method = "cmh" | |
| 175 | #' ) | |
| 176 | #' | |
| 177 | #' @export | |
| 178 | h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) { | |
| 179 | 84x | assert_valid_factor(arm, n.levels = 2, len = length(rsp)) | 
| 180 | ||
| 181 | 84x | df_rsp <- data.frame( | 
| 182 | 84x | rsp = rsp, | 
| 183 | 84x | arm = arm | 
| 184 | ) | |
| 185 | ||
| 186 | 84x |   if (!is.null(strata_data)) { | 
| 187 | 11x | strata_var <- interaction(strata_data, drop = TRUE) | 
| 188 | 11x | strata_name <- "strata" | 
| 189 | ||
| 190 | 11x | assert_valid_factor(strata_var, len = nrow(df_rsp)) | 
| 191 | ||
| 192 | 11x | df_rsp[[strata_name]] <- strata_var | 
| 193 |   } else { | |
| 194 | 73x | strata_name <- NULL | 
| 195 | } | |
| 196 | ||
| 197 | 84x | l_df <- split(df_rsp, arm) | 
| 198 | ||
| 199 | 84x |   if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) { | 
| 200 | # Odds ratio and CI. | |
| 201 | 82x | result_odds_ratio <- s_odds_ratio( | 
| 202 | 82x | df = l_df[[2]], | 
| 203 | 82x | .var = "rsp", | 
| 204 | 82x | .ref_group = l_df[[1]], | 
| 205 | 82x | .in_ref_col = FALSE, | 
| 206 | 82x | .df_row = df_rsp, | 
| 207 | 82x | variables = list(arm = "arm", strata = strata_name), | 
| 208 | 82x | conf_level = conf_level | 
| 209 | ) | |
| 210 | ||
| 211 | 82x | df <- data.frame( | 
| 212 | # Dummy column needed downstream to create a nested header. | |
| 213 | 82x | arm = " ", | 
| 214 | 82x | n_tot = unname(result_odds_ratio$n_tot["n_tot"]), | 
| 215 | 82x | or = unname(result_odds_ratio$or_ci["est"]), | 
| 216 | 82x | lcl = unname(result_odds_ratio$or_ci["lcl"]), | 
| 217 | 82x | ucl = unname(result_odds_ratio$or_ci["ucl"]), | 
| 218 | 82x | conf_level = conf_level, | 
| 219 | 82x | stringsAsFactors = FALSE | 
| 220 | ) | |
| 221 | ||
| 222 | 82x |     if (!is.null(method)) { | 
| 223 | # Test for difference. | |
| 224 | 44x | result_test <- s_test_proportion_diff( | 
| 225 | 44x | df = l_df[[2]], | 
| 226 | 44x | .var = "rsp", | 
| 227 | 44x | .ref_group = l_df[[1]], | 
| 228 | 44x | .in_ref_col = FALSE, | 
| 229 | 44x | variables = list(strata = strata_name), | 
| 230 | 44x | method = method | 
| 231 | ) | |
| 232 | ||
| 233 | 44x | df$pval <- as.numeric(result_test$pval) | 
| 234 | 44x | df$pval_label <- obj_label(result_test$pval) | 
| 235 | } | |
| 236 | ||
| 237 | # In those cases cannot go through the model so will obtain n_tot from data. | |
| 238 | } else if ( | |
| 239 | 2x | (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) || | 
| 240 | 2x | (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0) | 
| 241 |   ) { | |
| 242 | 2x | df <- data.frame( | 
| 243 | # Dummy column needed downstream to create a nested header. | |
| 244 | 2x | arm = " ", | 
| 245 | 2x | n_tot = sum(stats::complete.cases(df_rsp)), | 
| 246 | 2x | or = NA, | 
| 247 | 2x | lcl = NA, | 
| 248 | 2x | ucl = NA, | 
| 249 | 2x | conf_level = conf_level, | 
| 250 | 2x | stringsAsFactors = FALSE | 
| 251 | ) | |
| 252 | 2x |     if (!is.null(method)) { | 
| 253 | 2x | df$pval <- NA | 
| 254 | 2x | df$pval_label <- NA | 
| 255 | } | |
| 256 |   } else { | |
| 257 | ! | df <- data.frame( | 
| 258 | # Dummy column needed downstream to create a nested header. | |
| 259 | ! | arm = " ", | 
| 260 | ! | n_tot = 0L, | 
| 261 | ! | or = NA, | 
| 262 | ! | lcl = NA, | 
| 263 | ! | ucl = NA, | 
| 264 | ! | conf_level = conf_level, | 
| 265 | ! | stringsAsFactors = FALSE | 
| 266 | ) | |
| 267 | ||
| 268 | ! |     if (!is.null(method)) { | 
| 269 | ! | df$pval <- NA | 
| 270 | ! | df$pval_label <- NA | 
| 271 | } | |
| 272 | } | |
| 273 | ||
| 274 | 84x | df | 
| 275 | } | |
| 276 | ||
| 277 | #' @describeIn h_response_subgroups Summarizes estimates of the odds ratio between a treatment and a control | |
| 278 | #' arm across subgroups in a data frame. `variables` corresponds to the names of variables found in | |
| 279 | #' `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups` | |
| 280 | #' and `strata`. `groups_lists` optionally specifies groupings for `subgroups` variables. | |
| 281 | #' | |
| 282 | #' @return | |
| 283 | #' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, | |
| 284 | #' `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`. | |
| 285 | #' | |
| 286 | #' @examples | |
| 287 | #' # Unstratified analysis. | |
| 288 | #' h_odds_ratio_subgroups_df( | |
| 289 | #'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")), | |
| 290 | #' data = adrs_f | |
| 291 | #' ) | |
| 292 | #' | |
| 293 | #' # Stratified analysis. | |
| 294 | #' h_odds_ratio_subgroups_df( | |
| 295 | #' variables = list( | |
| 296 | #' rsp = "rsp", | |
| 297 | #' arm = "ARM", | |
| 298 | #'     subgroups = c("SEX", "BMRKR2"), | |
| 299 | #'     strata = c("STRATA1", "STRATA2") | |
| 300 | #' ), | |
| 301 | #' data = adrs_f | |
| 302 | #' ) | |
| 303 | #' | |
| 304 | #' # Define groupings of BMRKR2 levels. | |
| 305 | #' h_odds_ratio_subgroups_df( | |
| 306 | #' variables = list( | |
| 307 | #' rsp = "rsp", | |
| 308 | #' arm = "ARM", | |
| 309 | #'     subgroups = c("SEX", "BMRKR2") | |
| 310 | #' ), | |
| 311 | #' data = adrs_f, | |
| 312 | #' groups_lists = list( | |
| 313 | #' BMRKR2 = list( | |
| 314 | #' "low" = "LOW", | |
| 315 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 316 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 317 | #' ) | |
| 318 | #' ) | |
| 319 | #' ) | |
| 320 | #' | |
| 321 | #' @export | |
| 322 | h_odds_ratio_subgroups_df <- function(variables, | |
| 323 | data, | |
| 324 | groups_lists = list(), | |
| 325 | conf_level = 0.95, | |
| 326 | method = NULL, | |
| 327 |                                       label_all = "All Patients") { | |
| 328 | 18x |   if ("strat" %in% names(variables)) { | 
| 329 | ! | warning( | 
| 330 | ! | "Warning: the `strat` element name of the `variables` list argument to `h_odds_ratio_subgroups_df() ", | 
| 331 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 332 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 333 | ) | |
| 334 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 335 | } | |
| 336 | ||
| 337 | 18x | checkmate::assert_character(variables$rsp) | 
| 338 | 18x | checkmate::assert_character(variables$arm) | 
| 339 | 18x | checkmate::assert_character(variables$subgroups, null.ok = TRUE) | 
| 340 | 18x | checkmate::assert_character(variables$strata, null.ok = TRUE) | 
| 341 | 18x | assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) | 
| 342 | 18x | assert_df_with_variables(data, variables) | 
| 343 | 18x | checkmate::assert_string(label_all) | 
| 344 | ||
| 345 | 18x |   strata_data <- if (is.null(variables$strata)) { | 
| 346 | 16x | NULL | 
| 347 |   } else { | |
| 348 | 2x | data[, variables$strata, drop = FALSE] | 
| 349 | } | |
| 350 | ||
| 351 | # Add All Patients. | |
| 352 | 18x | result_all <- h_odds_ratio_df( | 
| 353 | 18x | rsp = data[[variables$rsp]], | 
| 354 | 18x | arm = data[[variables$arm]], | 
| 355 | 18x | strata_data = strata_data, | 
| 356 | 18x | conf_level = conf_level, | 
| 357 | 18x | method = method | 
| 358 | ) | |
| 359 | 18x | result_all$subgroup <- label_all | 
| 360 | 18x | result_all$var <- "ALL" | 
| 361 | 18x | result_all$var_label <- label_all | 
| 362 | 18x | result_all$row_type <- "content" | 
| 363 | ||
| 364 | 18x |   if (is.null(variables$subgroups)) { | 
| 365 | 3x | result_all | 
| 366 |   } else { | |
| 367 | 15x | l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) | 
| 368 | ||
| 369 | 15x |     l_result <- lapply(l_data, function(grp) { | 
| 370 | 62x |       grp_strata_data <- if (is.null(variables$strata)) { | 
| 371 | 54x | NULL | 
| 372 |       } else { | |
| 373 | 8x | grp$df[, variables$strata, drop = FALSE] | 
| 374 | } | |
| 375 | ||
| 376 | 62x | result <- h_odds_ratio_df( | 
| 377 | 62x | rsp = grp$df[[variables$rsp]], | 
| 378 | 62x | arm = grp$df[[variables$arm]], | 
| 379 | 62x | strata_data = grp_strata_data, | 
| 380 | 62x | conf_level = conf_level, | 
| 381 | 62x | method = method | 
| 382 | ) | |
| 383 | 62x | result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] | 
| 384 | 62x | cbind(result, result_labels) | 
| 385 | }) | |
| 386 | ||
| 387 | 15x | result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 388 | 15x | result_subgroups$row_type <- "analysis" | 
| 389 | ||
| 390 | 15x | rbind( | 
| 391 | 15x | result_all, | 
| 392 | 15x | result_subgroups | 
| 393 | ) | |
| 394 | } | |
| 395 | } | 
| 1 | #' Count number of patients | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [analyze_num_patients()] creates a layout element to count total numbers of unique or | |
| 6 | #' non-unique patients. The primary analysis variable `vars` is used to uniquely identify patients. | |
| 7 | #' | |
| 8 | #' The `count_by` variable can be used to identify non-unique patients such that the number of patients with a unique | |
| 9 | #' combination of values in `vars` and `count_by` will be returned instead as the `nonunique` statistic. The `required` | |
| 10 | #' variable can be used to specify a variable required to be non-missing for the record to be included in the counts. | |
| 11 | #' | |
| 12 | #' The summarize function [summarize_num_patients()] performs the same function as [analyze_num_patients()] except it | |
| 13 | #' creates content rows, not data rows, to summarize the current table row/column context and operates on the level of | |
| 14 | #' the latest row split or the root of the table if no row splits have occurred. | |
| 15 | #' | |
| 16 | #' @inheritParams argument_convention | |
| 17 | #' @param required (`character` or `NULL`)\cr name of a variable that is required to be non-missing. | |
| 18 | #' @param count_by (`character` or `NULL`)\cr name of a variable to be combined with `vars` when counting | |
| 19 | #' `nonunique` records. | |
| 20 | #' @param unique_count_suffix (`flag`)\cr whether the `"(n)"` suffix should be added to `unique_count` labels. | |
| 21 | #' Defaults to `TRUE`. | |
| 22 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 23 | #' | |
| 24 | #'   Options are: ``r shQuote(get_stats("summarize_num_patients"), type = "sh")`` | |
| 25 | #' | |
| 26 | #' @name summarize_num_patients | |
| 27 | #' @order 1 | |
| 28 | NULL | |
| 29 | ||
| 30 | #' @describeIn summarize_num_patients Statistics function which counts the number of | |
| 31 | #' unique patients, the corresponding percentage taken with respect to the | |
| 32 | #' total number of patients, and the number of non-unique patients. | |
| 33 | #' | |
| 34 | #' @param x (`character` or `factor`)\cr vector of patient IDs. | |
| 35 | #' | |
| 36 | #' @return | |
| 37 | #' * `s_num_patients()` returns a named `list` of 3 statistics: | |
| 38 | #' * `unique`: Vector of counts and percentages. | |
| 39 | #' * `nonunique`: Vector of counts. | |
| 40 | #' * `unique_count`: Counts. | |
| 41 | #' | |
| 42 | #' @examples | |
| 43 | #' # Use the statistics function to count number of unique and nonunique patients. | |
| 44 | #' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L) | |
| 45 | #' s_num_patients( | |
| 46 | #' x = as.character(c(1, 1, 1, 2, 4, NA)), | |
| 47 | #' labelstr = "", | |
| 48 | #' .N_col = 6L, | |
| 49 | #' count_by = c(1, 1, 2, 1, 1, 1) | |
| 50 | #' ) | |
| 51 | #' | |
| 52 | #' @export | |
| 53 | s_num_patients <- function(x, | |
| 54 | labelstr, | |
| 55 | .N_col, # nolint | |
| 56 | ..., | |
| 57 | count_by = NULL, | |
| 58 |                            unique_count_suffix = TRUE) { | |
| 59 | 181x | checkmate::assert_string(labelstr) | 
| 60 | 181x | checkmate::assert_count(.N_col) | 
| 61 | 181x |   checkmate::assert_multi_class(x, classes = c("factor", "character")) | 
| 62 | 181x | checkmate::assert_flag(unique_count_suffix) | 
| 63 | ||
| 64 | 181x | count1 <- n_available(unique(x)) | 
| 65 | 181x | count2 <- n_available(x) | 
| 66 | ||
| 67 | 181x |   if (!is.null(count_by)) { | 
| 68 | 16x | checkmate::assert_vector(count_by, len = length(x)) | 
| 69 | 16x | count2 <- n_available(unique(interaction(x, count_by))) | 
| 70 | } | |
| 71 | ||
| 72 | 181x | out <- list( | 
| 73 | 181x | unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr), | 
| 74 | 181x | nonunique = formatters::with_label(count2, labelstr), | 
| 75 | 181x | unique_count = formatters::with_label( | 
| 76 | 181x | count1, ifelse(unique_count_suffix, paste0(labelstr, if (nzchar(labelstr)) " ", "(n)"), labelstr) | 
| 77 | ) | |
| 78 | ) | |
| 79 | ||
| 80 | 181x | out | 
| 81 | } | |
| 82 | ||
| 83 | #' @describeIn summarize_num_patients Statistics function which counts the number of unique patients | |
| 84 | #' in a column (variable), the corresponding percentage taken with respect to the total number of | |
| 85 | #' patients, and the number of non-unique patients in the column. | |
| 86 | #' | |
| 87 | #' @return | |
| 88 | #' * `s_num_patients_content()` returns the same values as `s_num_patients()`. | |
| 89 | #' | |
| 90 | #' @examples | |
| 91 | #' # Count number of unique and non-unique patients. | |
| 92 | #' | |
| 93 | #' df <- data.frame( | |
| 94 | #' USUBJID = as.character(c(1, 2, 1, 4, NA)), | |
| 95 | #' EVENT = as.character(c(10, 15, 10, 17, 8)) | |
| 96 | #' ) | |
| 97 | #' s_num_patients_content(df, .N_col = 5, .var = "USUBJID") | |
| 98 | #' | |
| 99 | #' df_by_event <- data.frame( | |
| 100 | #' USUBJID = as.character(c(1, 2, 1, 4, NA)), | |
| 101 | #' EVENT = c(10, 15, 10, 17, 8) | |
| 102 | #' ) | |
| 103 | #' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT") | |
| 104 | #' | |
| 105 | #' @export | |
| 106 | s_num_patients_content <- function(df, | |
| 107 | labelstr = "", | |
| 108 | .N_col, # nolint | |
| 109 | .var, | |
| 110 | ..., | |
| 111 | required = NULL, | |
| 112 | count_by = NULL, | |
| 113 |                                    unique_count_suffix = TRUE) { | |
| 114 | 175x | checkmate::assert_string(.var) | 
| 115 | 175x | checkmate::assert_data_frame(df) | 
| 116 | 175x |   if (is.null(count_by)) { | 
| 117 | 162x | assert_df_with_variables(df, list(id = .var)) | 
| 118 |   } else { | |
| 119 | 13x | assert_df_with_variables(df, list(id = .var, count_by = count_by)) | 
| 120 | } | |
| 121 | 175x |   if (!is.null(required)) { | 
| 122 | ! | checkmate::assert_string(required) | 
| 123 | ! | assert_df_with_variables(df, list(required = required)) | 
| 124 | ! | df <- df[!is.na(df[[required]]), , drop = FALSE] | 
| 125 | } | |
| 126 | ||
| 127 | 175x | x <- df[[.var]] | 
| 128 | 175x | y <- if (is.null(count_by)) NULL else df[[count_by]] | 
| 129 | ||
| 130 | 175x | s_num_patients( | 
| 131 | 175x | x = x, | 
| 132 | 175x | labelstr = labelstr, | 
| 133 | 175x | .N_col = .N_col, | 
| 134 | 175x | count_by = y, | 
| 135 | 175x | unique_count_suffix = unique_count_suffix | 
| 136 | ) | |
| 137 | } | |
| 138 | ||
| 139 | #' @describeIn summarize_num_patients Formatted analysis function which is used as `afun` | |
| 140 | #' in `analyze_num_patients()` and as `cfun` in `summarize_num_patients()`. | |
| 141 | #' | |
| 142 | #' @return | |
| 143 | #' * `a_num_patients()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 144 | #' | |
| 145 | #' @keywords internal | |
| 146 | a_num_patients <- function(df, | |
| 147 | labelstr = "", | |
| 148 | ..., | |
| 149 | .stats = NULL, | |
| 150 | .stat_names = NULL, | |
| 151 | .formats = NULL, | |
| 152 | .labels = NULL, | |
| 153 |                            .indent_mods = NULL) { | |
| 154 | # Check for additional parameters to the statistics function | |
| 155 | 86x | dots_extra_args <- list(...) | 
| 156 | 86x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 157 | 86x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 158 | ||
| 159 | # Check for user-defined functions | |
| 160 | 86x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 161 | 86x | .stats <- default_and_custom_stats_list$all_stats | 
| 162 | 86x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 163 | ||
| 164 | # Apply statistics function | |
| 165 | 86x | x_stats <- .apply_stat_functions( | 
| 166 | 86x | default_stat_fnc = s_num_patients_content, | 
| 167 | 86x | custom_stat_fnc_list = custom_stat_functions, | 
| 168 | 86x | args_list = c( | 
| 169 | 86x | df = list(df), | 
| 170 | 86x | labelstr = list(labelstr), | 
| 171 | 86x | extra_afun_params, | 
| 172 | 86x | dots_extra_args | 
| 173 | ) | |
| 174 | ) | |
| 175 | ||
| 176 | # Fill in formatting defaults | |
| 177 | 86x |   .stats <- get_stats("summarize_num_patients", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 178 | 86x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 179 | 86x | .labels <- get_labels_from_stats( | 
| 180 | 86x | .stats, .labels, | 
| 181 | 86x | tern_defaults = c(lapply(x_stats, attr, "label")[nchar(lapply(x_stats, attr, "label")) > 0], tern_default_labels) | 
| 182 | ) | |
| 183 | 86x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 184 | ||
| 185 | 86x | x_stats <- x_stats[.stats] | 
| 186 | ||
| 187 | # Auto format handling | |
| 188 | 86x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 189 | ||
| 190 | # Get and check statistical names | |
| 191 | 86x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 192 | ||
| 193 | 86x | in_rows( | 
| 194 | 86x | .list = x_stats, | 
| 195 | 86x | .formats = .formats, | 
| 196 | 86x | .names = .labels %>% .unlist_keep_nulls(), | 
| 197 | 86x | .stat_names = .stat_names, | 
| 198 | 86x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 199 | 86x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 200 | ) | |
| 201 | } | |
| 202 | ||
| 203 | #' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments | |
| 204 | #' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. | |
| 205 | #' | |
| 206 | #' @return | |
| 207 | #' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions, | |
| 208 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 209 | #' the statistics from `s_num_patients_content()` to the table layout. | |
| 210 | #' | |
| 211 | #' @examples | |
| 212 | #' # summarize_num_patients | |
| 213 | #' tbl <- basic_table() %>% | |
| 214 | #'   split_cols_by("ARM") %>% | |
| 215 | #'   split_rows_by("SEX") %>% | |
| 216 | #'   summarize_num_patients("USUBJID", .stats = "unique_count") %>% | |
| 217 | #' build_table(df) | |
| 218 | #' | |
| 219 | #' tbl | |
| 220 | #' | |
| 221 | #' @export | |
| 222 | #' @order 3 | |
| 223 | summarize_num_patients <- function(lyt, | |
| 224 | var, | |
| 225 | required = NULL, | |
| 226 | count_by = NULL, | |
| 227 | unique_count_suffix = TRUE, | |
| 228 | na_str = default_na_str(), | |
| 229 | riskdiff = FALSE, | |
| 230 | ..., | |
| 231 |                                    .stats = c("unique", "nonunique", "unique_count"), | |
| 232 | .stat_names = NULL, | |
| 233 | .formats = NULL, | |
| 234 | .labels = list( | |
| 235 | unique = "Number of patients with at least one event", | |
| 236 | nonunique = "Number of events" | |
| 237 | ), | |
| 238 |                                    .indent_mods = 0L) { | |
| 239 | 17x | checkmate::assert_flag(riskdiff) | 
| 240 | 17x | afun <- if (isFALSE(riskdiff)) a_num_patients else afun_riskdiff | 
| 241 | ||
| 242 | # Process standard extra arguments | |
| 243 | 17x |   extra_args <- list(".stats" = .stats) | 
| 244 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 245 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 246 | 17x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 247 | 17x |   if (is.null(.indent_mods)) { | 
| 248 | ! | indent_mod <- 0L | 
| 249 | 17x |   } else if (length(.indent_mods) == 1) { | 
| 250 | 17x | indent_mod <- .indent_mods | 
| 251 |   } else { | |
| 252 | ! | indent_mod <- 0L | 
| 253 | ! | extra_args[[".indent_mods"]] <- .indent_mods | 
| 254 | } | |
| 255 | ||
| 256 | # Process additional arguments to the statistic function | |
| 257 | 17x | extra_args <- c( | 
| 258 | 17x | extra_args, | 
| 259 | 17x | required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, | 
| 260 | 17x |     if (!isFALSE(riskdiff)) list(afun = list("s_num_patients_content" = a_num_patients)), | 
| 261 | ... | |
| 262 | ) | |
| 263 | ||
| 264 | # Append additional info from layout to the analysis function | |
| 265 | 17x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 266 | 17x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 267 | ||
| 268 | 17x | summarize_row_groups( | 
| 269 | 17x | lyt = lyt, | 
| 270 | 17x | var = var, | 
| 271 | 17x | cfun = afun, | 
| 272 | 17x | na_str = na_str, | 
| 273 | 17x | extra_args = extra_args, | 
| 274 | 17x | indent_mod = indent_mod | 
| 275 | ) | |
| 276 | } | |
| 277 | ||
| 278 | #' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments | |
| 279 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 280 | #' | |
| 281 | #' @return | |
| 282 | #' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions, | |
| 283 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 284 | #' the statistics from `s_num_patients_content()` to the table layout. | |
| 285 | #' | |
| 286 | #' @details In general, functions that starts with `analyze*` are expected to | |
| 287 | #' work like [rtables::analyze()], while functions that starts with `summarize*` | |
| 288 | #' are based upon [rtables::summarize_row_groups()]. The latter provides a | |
| 289 | #' value for each dividing split in the row and column space, but, being it | |
| 290 | #' bound to the fundamental splits, it is repeated by design in every page | |
| 291 | #' when pagination is involved. | |
| 292 | #' | |
| 293 | #' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows. | |
| 294 | #' | |
| 295 | #' @examples | |
| 296 | #' df <- data.frame( | |
| 297 | #' USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)), | |
| 298 | #'   ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"), | |
| 299 | #' AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17), | |
| 300 | #'   SEX = c("M", "M", "M", "F", "F", "F", "M", "F", "M") | |
| 301 | #' ) | |
| 302 | #' | |
| 303 | #' # analyze_num_patients | |
| 304 | #' tbl <- basic_table() %>% | |
| 305 | #'   split_cols_by("ARM") %>% | |
| 306 | #' add_colcounts() %>% | |
| 307 | #'   analyze_num_patients("USUBJID", .stats = c("unique")) %>% | |
| 308 | #' build_table(df) | |
| 309 | #' | |
| 310 | #' tbl | |
| 311 | #' | |
| 312 | #' @export | |
| 313 | #' @order 2 | |
| 314 | analyze_num_patients <- function(lyt, | |
| 315 | vars, | |
| 316 | required = NULL, | |
| 317 | count_by = NULL, | |
| 318 | unique_count_suffix = TRUE, | |
| 319 | na_str = default_na_str(), | |
| 320 | nested = TRUE, | |
| 321 |                                  show_labels = c("default", "visible", "hidden"), | |
| 322 | riskdiff = FALSE, | |
| 323 | ..., | |
| 324 |                                  .stats = c("unique", "nonunique", "unique_count"), | |
| 325 | .stat_names = NULL, | |
| 326 | .formats = NULL, | |
| 327 | .labels = list( | |
| 328 | unique = "Number of patients with at least one event", | |
| 329 | nonunique = "Number of events" | |
| 330 | ), | |
| 331 |                                  .indent_mods = NULL) { | |
| 332 | 4x | checkmate::assert_flag(riskdiff) | 
| 333 | 4x | afun <- if (isFALSE(riskdiff)) a_num_patients else afun_riskdiff | 
| 334 | ||
| 335 | # Process standard extra arguments | |
| 336 | 4x |   extra_args <- list(".stats" = .stats) | 
| 337 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 338 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 339 | 4x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 340 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 341 | ||
| 342 | # Process additional arguments to the statistic function | |
| 343 | 4x | extra_args <- c( | 
| 344 | 4x | extra_args, | 
| 345 | 4x | required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, | 
| 346 | 4x |     if (!isFALSE(riskdiff)) list(afun = list("s_num_patients_content" = a_num_patients)), | 
| 347 | ... | |
| 348 | ) | |
| 349 | ||
| 350 | # Append additional info from layout to the analysis function | |
| 351 | 4x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 352 | 4x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 353 | ||
| 354 | 4x | analyze( | 
| 355 | 4x | lyt = lyt, | 
| 356 | 4x | vars = vars, | 
| 357 | 4x | afun = afun, | 
| 358 | 4x | na_str = na_str, | 
| 359 | 4x | nested = nested, | 
| 360 | 4x | extra_args = extra_args, | 
| 361 | 4x | show_labels = show_labels | 
| 362 | ) | |
| 363 | } | 
| 1 | #' Count the number of patients with a particular event | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_patients_with_event()] creates a layout element to calculate patient counts for a | |
| 6 | #' user-specified set of events. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `vars` which indicates unique subject identifiers. Events | |
| 9 | #' are defined by the user as a named vector via the `filters` argument, where each name corresponds to a | |
| 10 | #' variable and each value is the value(s) that that variable takes for the event. | |
| 11 | #' | |
| 12 | #' If there are multiple records with the same event recorded for a patient, only one occurrence is counted. | |
| 13 | #' | |
| 14 | #' @inheritParams argument_convention | |
| 15 | #' @param filters (`character`)\cr a character vector specifying the column names and flag variables | |
| 16 | #' to be used for counting the number of unique identifiers satisfying such conditions. | |
| 17 | #' Multiple column names and flags are accepted in this format | |
| 18 | #'   `c("column_name1" = "flag1", "column_name2" = "flag2")`. | |
| 19 | #' Note that only equality is being accepted as condition. | |
| 20 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 21 | #' | |
| 22 | #'   Options are: ``r shQuote(get_stats("count_patients_with_event"), type = "sh")`` | |
| 23 | #' | |
| 24 | #' @seealso [count_patients_with_flags()] | |
| 25 | #' | |
| 26 | #' @name count_patients_with_event | |
| 27 | #' @order 1 | |
| 28 | NULL | |
| 29 | ||
| 30 | #' @describeIn count_patients_with_event Statistics function which counts the number of patients for which | |
| 31 | #' the defined event has occurred. | |
| 32 | #' | |
| 33 | #' @inheritParams analyze_variables | |
| 34 | #' @param .var (`string`)\cr name of the column that contains the unique identifier. | |
| 35 | #' | |
| 36 | #' @return | |
| 37 | #' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event. | |
| 38 | #' | |
| 39 | #' @examples | |
| 40 | #' s_count_patients_with_event( | |
| 41 | #' tern_ex_adae, | |
| 42 | #' .var = "SUBJID", | |
| 43 | #'   filters = c("TRTEMFL" = "Y"), | |
| 44 | #' ) | |
| 45 | #' | |
| 46 | #' s_count_patients_with_event( | |
| 47 | #' tern_ex_adae, | |
| 48 | #' .var = "SUBJID", | |
| 49 | #'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL") | |
| 50 | #' ) | |
| 51 | #' | |
| 52 | #' s_count_patients_with_event( | |
| 53 | #' tern_ex_adae, | |
| 54 | #' .var = "SUBJID", | |
| 55 | #'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"), | |
| 56 | #' denom = "N_col", | |
| 57 | #' .N_col = 456 | |
| 58 | #' ) | |
| 59 | #' | |
| 60 | #' @export | |
| 61 | s_count_patients_with_event <- function(df, | |
| 62 | .var, | |
| 63 | .N_col = ncol(df), # nolint | |
| 64 | .N_row = nrow(df), # nolint | |
| 65 | ..., | |
| 66 | filters, | |
| 67 |                                         denom = c("n", "N_col", "N_row")) { | |
| 68 | 51x | col_names <- names(filters) | 
| 69 | 51x | filter_values <- filters | 
| 70 | ||
| 71 | 51x | checkmate::assert_subset(col_names, colnames(df)) | 
| 72 | ||
| 73 | 51x | temp <- Map( | 
| 74 | 51x | function(x, y) which(df[[x]] == y), | 
| 75 | 51x | col_names, | 
| 76 | 51x | filter_values | 
| 77 | ) | |
| 78 | 51x | position_satisfy_filters <- Reduce(intersect, temp) | 
| 79 | 51x | id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]])) | 
| 80 | 51x | result <- s_count_values( | 
| 81 | 51x | as.character(unique(df[[.var]])), | 
| 82 | 51x | id_satisfy_filters, | 
| 83 | 51x | denom = denom, | 
| 84 | 51x | .N_col = .N_col, | 
| 85 | 51x | .N_row = .N_row | 
| 86 | ) | |
| 87 | 51x | result | 
| 88 | } | |
| 89 | ||
| 90 | #' @describeIn count_patients_with_event Formatted analysis function which is used as `afun` | |
| 91 | #' in `count_patients_with_event()`. | |
| 92 | #' | |
| 93 | #' @return | |
| 94 | #' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 95 | #' | |
| 96 | #' @examples | |
| 97 | #' a_count_patients_with_event( | |
| 98 | #' tern_ex_adae, | |
| 99 | #' .var = "SUBJID", | |
| 100 | #'   filters = c("TRTEMFL" = "Y"), | |
| 101 | #' .N_col = 100, | |
| 102 | #' .N_row = 100 | |
| 103 | #' ) | |
| 104 | #' | |
| 105 | #' @export | |
| 106 | a_count_patients_with_event <- function(df, | |
| 107 | labelstr = "", | |
| 108 | ..., | |
| 109 | .stats = NULL, | |
| 110 | .stat_names = NULL, | |
| 111 | .formats = NULL, | |
| 112 | .labels = NULL, | |
| 113 |                                         .indent_mods = NULL) { | |
| 114 | # Check for additional parameters to the statistics function | |
| 115 | 19x | dots_extra_args <- list(...) | 
| 116 | 19x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 117 | 19x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 118 | ||
| 119 | # Check for user-defined functions | |
| 120 | 19x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 121 | 19x | .stats <- default_and_custom_stats_list$all_stats | 
| 122 | 19x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 123 | ||
| 124 | # Apply statistics function | |
| 125 | 19x | x_stats <- .apply_stat_functions( | 
| 126 | 19x | default_stat_fnc = s_count_patients_with_event, | 
| 127 | 19x | custom_stat_fnc_list = custom_stat_functions, | 
| 128 | 19x | args_list = c( | 
| 129 | 19x | df = list(df), | 
| 130 | 19x | extra_afun_params, | 
| 131 | 19x | dots_extra_args | 
| 132 | ) | |
| 133 | ) | |
| 134 | ||
| 135 | # Fill in formatting defaults | |
| 136 | 19x |   .stats <- get_stats("count_patients_with_event", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 137 | 19x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 138 | 19x | .labels <- get_labels_from_stats(.stats, .labels) | 
| 139 | 19x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 140 | ||
| 141 | 19x | x_stats <- x_stats[.stats] | 
| 142 | ||
| 143 | # Auto format handling | |
| 144 | 19x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 145 | ||
| 146 | # Get and check statistical names | |
| 147 | 19x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 148 | ||
| 149 | 19x | in_rows( | 
| 150 | 19x | .list = x_stats, | 
| 151 | 19x | .formats = .formats, | 
| 152 | 19x | .names = names(.labels), | 
| 153 | 19x | .stat_names = .stat_names, | 
| 154 | 19x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 155 | 19x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 156 | ) | |
| 157 | } | |
| 158 | ||
| 159 | #' @describeIn count_patients_with_event Layout-creating function which can take statistics function | |
| 160 | #' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 161 | #' | |
| 162 | #' @return | |
| 163 | #' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions, | |
| 164 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 165 | #' the statistics from `s_count_patients_with_event()` to the table layout. | |
| 166 | #' | |
| 167 | #' @examples | |
| 168 | #' lyt <- basic_table() %>% | |
| 169 | #'   split_cols_by("ARM") %>% | |
| 170 | #' add_colcounts() %>% | |
| 171 | #' count_values( | |
| 172 | #' "STUDYID", | |
| 173 | #' values = "AB12345", | |
| 174 | #' .stats = "count", | |
| 175 | #' .labels = c(count = "Total AEs") | |
| 176 | #' ) %>% | |
| 177 | #' count_patients_with_event( | |
| 178 | #' "SUBJID", | |
| 179 | #'     filters = c("TRTEMFL" = "Y"), | |
| 180 | #' .labels = c(count_fraction = "Total number of patients with at least one adverse event"), | |
| 181 | #' table_names = "tbl_all" | |
| 182 | #' ) %>% | |
| 183 | #' count_patients_with_event( | |
| 184 | #' "SUBJID", | |
| 185 | #'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"), | |
| 186 | #' .labels = c(count_fraction = "Total number of patients with fatal AEs"), | |
| 187 | #' table_names = "tbl_fatal" | |
| 188 | #' ) %>% | |
| 189 | #' count_patients_with_event( | |
| 190 | #' "SUBJID", | |
| 191 | #'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"), | |
| 192 | #' .labels = c(count_fraction = "Total number of patients with related fatal AEs"), | |
| 193 | #' .indent_mods = c(count_fraction = 2L), | |
| 194 | #' table_names = "tbl_rel_fatal" | |
| 195 | #' ) | |
| 196 | #' | |
| 197 | #' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) | |
| 198 | #' | |
| 199 | #' @export | |
| 200 | #' @order 2 | |
| 201 | count_patients_with_event <- function(lyt, | |
| 202 | vars, | |
| 203 | filters, | |
| 204 | riskdiff = FALSE, | |
| 205 | na_str = default_na_str(), | |
| 206 | nested = TRUE, | |
| 207 | show_labels = ifelse(length(vars) > 1, "visible", "hidden"), | |
| 208 | ..., | |
| 209 | table_names = vars, | |
| 210 | .stats = "count_fraction", | |
| 211 | .stat_names = NULL, | |
| 212 | .formats = list(count_fraction = format_count_fraction_fixed_dp), | |
| 213 | .labels = NULL, | |
| 214 |                                       .indent_mods = NULL) { | |
| 215 | 7x | checkmate::assert_flag(riskdiff) | 
| 216 | 7x | afun <- if (isFALSE(riskdiff)) a_count_patients_with_event else afun_riskdiff | 
| 217 | ||
| 218 | # Process standard extra arguments | |
| 219 | 7x |   extra_args <- list(".stats" = .stats) | 
| 220 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 221 | 7x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 222 | 6x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 223 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 224 | ||
| 225 | # Process additional arguments to the statistic function | |
| 226 | 7x | extra_args <- c( | 
| 227 | 7x | extra_args, | 
| 228 | 7x | filters = list(filters), | 
| 229 | 7x |     if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_event" = a_count_patients_with_event)), | 
| 230 | ... | |
| 231 | ) | |
| 232 | ||
| 233 | # Append additional info from layout to the analysis function | |
| 234 | 7x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 235 | 7x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 236 | ||
| 237 | 7x | analyze( | 
| 238 | 7x | lyt = lyt, | 
| 239 | 7x | vars = vars, | 
| 240 | 7x | afun = afun, | 
| 241 | 7x | na_str = na_str, | 
| 242 | 7x | nested = nested, | 
| 243 | 7x | extra_args = extra_args, | 
| 244 | 7x | show_labels = show_labels, | 
| 245 | 7x | table_names = table_names | 
| 246 | ) | |
| 247 | } | 
| 1 | #' Analyze numeric variables in columns | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("experimental")` | |
| 4 | #' | |
| 5 | #' The layout-creating function [analyze_vars_in_cols()] creates a layout element to generate a column-wise | |
| 6 | #' analysis table. | |
| 7 | #' | |
| 8 | #' This function sets the analysis methods as column labels and is a wrapper for [rtables::analyze_colvars()]. | |
| 9 | #' It was designed principally for PK tables. | |
| 10 | #' | |
| 11 | #' @inheritParams argument_convention | |
| 12 | #' @inheritParams rtables::analyze_colvars | |
| 13 | #' @param imp_rule (`string` or `NULL`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can | |
| 14 | #' also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order | |
| 15 | #' to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()] | |
| 16 | #' for more details on imputation. | |
| 17 | #' @param avalcat_var (`string`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a | |
| 18 | #' row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of | |
| 19 | #' the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable | |
| 20 | #' used to calculate the `n_blq` statistic (if included in `.stats`). | |
| 21 | #' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will | |
| 22 | #' speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is | |
| 23 | #' used for multiple tables with different data. Defaults to `FALSE`. | |
| 24 | #' @param row_labels (`character`)\cr as this function works in columns space, usually `.labels` | |
| 25 | #' character vector applies on the column space. You can change the row labels by defining this | |
| 26 | #' parameter to a named character vector with names corresponding to the split values. It defaults | |
| 27 | #' to `NULL` and if it contains only one `string`, it will duplicate that as a row label. | |
| 28 | #' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current | |
| 29 | #' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr` | |
| 30 | #' to define row labels. This behavior is not supported as we never need to overload row labels. | |
| 31 | #' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns. | |
| 32 | #' This option allows you to add multiple instances of this functions, also in a nested fashion, | |
| 33 | #' without adding more splits. This split must happen only one time on a single layout. | |
| 34 | #' | |
| 35 | #' @return | |
| 36 | #' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()]. | |
| 37 | #' Adding this function to an `rtable` layout will summarize the given variables, arrange the output | |
| 38 | #' in columns, and add it to the table layout. | |
| 39 | #' | |
| 40 | #' @note | |
| 41 | #' * This is an experimental implementation of [rtables::summarize_row_groups()] and [rtables::analyze_colvars()] | |
| 42 | #' that may be subjected to changes as `rtables` extends its support to more complex analysis pipelines in the | |
| 43 | #' column space. We encourage users to read the examples carefully and file issues for different use cases. | |
| 44 | #' * In this function, `labelstr` behaves atypically. If `labelstr = NULL` (the default), row labels are assigned | |
| 45 | #' automatically as the split values if `do_summarize_row_groups = FALSE` (the default), and as the group label | |
| 46 | #' if `do_summarize_row_groups = TRUE`. | |
| 47 | #' | |
| 48 | #' @seealso [analyze_vars()], [rtables::analyze_colvars()]. | |
| 49 | #' | |
| 50 | #' @examples | |
| 51 | #' library(dplyr) | |
| 52 | #' | |
| 53 | #' # Data preparation | |
| 54 | #' adpp <- tern_ex_adpp %>% h_pkparam_sort() | |
| 55 | #' | |
| 56 | #' lyt <- basic_table() %>% | |
| 57 | #' split_rows_by(var = "STRATA1", label_pos = "topleft") %>% | |
| 58 | #' split_rows_by( | |
| 59 | #' var = "SEX", | |
| 60 | #' label_pos = "topleft", | |
| 61 | #' child_labels = "hidden" | |
| 62 | #' ) %>% # Removes duplicated labels | |
| 63 | #' analyze_vars_in_cols(vars = "AGE") | |
| 64 | #' result <- build_table(lyt = lyt, df = adpp) | |
| 65 | #' result | |
| 66 | #' | |
| 67 | #' # By selecting just some statistics and ad-hoc labels | |
| 68 | #' lyt <- basic_table() %>% | |
| 69 | #' split_rows_by(var = "ARM", label_pos = "topleft") %>% | |
| 70 | #' split_rows_by( | |
| 71 | #' var = "SEX", | |
| 72 | #' label_pos = "topleft", | |
| 73 | #' child_labels = "hidden", | |
| 74 | #' split_fun = drop_split_levels | |
| 75 | #' ) %>% | |
| 76 | #' analyze_vars_in_cols( | |
| 77 | #' vars = "AGE", | |
| 78 | #'     .stats = c("n", "cv", "geom_mean"), | |
| 79 | #' .labels = c( | |
| 80 | #' n = "aN", | |
| 81 | #' cv = "aCV", | |
| 82 | #' geom_mean = "aGeomMean" | |
| 83 | #' ) | |
| 84 | #' ) | |
| 85 | #' result <- build_table(lyt = lyt, df = adpp) | |
| 86 | #' result | |
| 87 | #' | |
| 88 | #' # Changing row labels | |
| 89 | #' lyt <- basic_table() %>% | |
| 90 | #' analyze_vars_in_cols( | |
| 91 | #' vars = "AGE", | |
| 92 | #' row_labels = "some custom label" | |
| 93 | #' ) | |
| 94 | #' result <- build_table(lyt, df = adpp) | |
| 95 | #' result | |
| 96 | #' | |
| 97 | #' # Pharmacokinetic parameters | |
| 98 | #' lyt <- basic_table() %>% | |
| 99 | #' split_rows_by( | |
| 100 | #' var = "TLG_DISPLAY", | |
| 101 | #' split_label = "PK Parameter", | |
| 102 | #' label_pos = "topleft", | |
| 103 | #' child_labels = "hidden" | |
| 104 | #' ) %>% | |
| 105 | #' analyze_vars_in_cols( | |
| 106 | #' vars = "AVAL" | |
| 107 | #' ) | |
| 108 | #' result <- build_table(lyt, df = adpp) | |
| 109 | #' result | |
| 110 | #' | |
| 111 | #' # Multiple calls (summarize label and analyze underneath) | |
| 112 | #' lyt <- basic_table() %>% | |
| 113 | #' split_rows_by( | |
| 114 | #' var = "TLG_DISPLAY", | |
| 115 | #' split_label = "PK Parameter", | |
| 116 | #' label_pos = "topleft" | |
| 117 | #' ) %>% | |
| 118 | #' analyze_vars_in_cols( | |
| 119 | #' vars = "AVAL", | |
| 120 | #' do_summarize_row_groups = TRUE # does a summarize level | |
| 121 | #' ) %>% | |
| 122 | #'   split_rows_by("SEX", | |
| 123 | #' child_labels = "hidden", | |
| 124 | #' label_pos = "topleft" | |
| 125 | #' ) %>% | |
| 126 | #' analyze_vars_in_cols( | |
| 127 | #' vars = "AVAL", | |
| 128 | #' split_col_vars = FALSE # avoids re-splitting the columns | |
| 129 | #' ) | |
| 130 | #' result <- build_table(lyt, df = adpp) | |
| 131 | #' result | |
| 132 | #' | |
| 133 | #' @export | |
| 134 | analyze_vars_in_cols <- function(lyt, | |
| 135 | vars, | |
| 136 | ..., | |
| 137 | .stats = c( | |
| 138 | "n", | |
| 139 | "mean", | |
| 140 | "sd", | |
| 141 | "se", | |
| 142 | "cv", | |
| 143 | "geom_cv" | |
| 144 | ), | |
| 145 | .labels = c( | |
| 146 | n = "n", | |
| 147 | mean = "Mean", | |
| 148 | sd = "SD", | |
| 149 | se = "SE", | |
| 150 | cv = "CV (%)", | |
| 151 | geom_cv = "CV % Geometric Mean" | |
| 152 | ), | |
| 153 | row_labels = NULL, | |
| 154 | do_summarize_row_groups = FALSE, | |
| 155 | split_col_vars = TRUE, | |
| 156 | imp_rule = NULL, | |
| 157 | avalcat_var = "AVALCAT1", | |
| 158 | cache = FALSE, | |
| 159 | .indent_mods = NULL, | |
| 160 | na_str = default_na_str(), | |
| 161 | nested = TRUE, | |
| 162 | .formats = NULL, | |
| 163 |                                  .aligns = NULL) { | |
| 164 | 26x | extra_args <- list(...) | 
| 165 | ||
| 166 | 26x | checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE) | 
| 167 | 26x | checkmate::assert_character(row_labels, null.ok = TRUE) | 
| 168 | 26x | checkmate::assert_int(.indent_mods, null.ok = TRUE) | 
| 169 | 26x | checkmate::assert_flag(nested) | 
| 170 | 26x | checkmate::assert_flag(split_col_vars) | 
| 171 | 26x | checkmate::assert_flag(do_summarize_row_groups) | 
| 172 | ||
| 173 | # Filtering | |
| 174 | 26x |   met_grps <- paste0("analyze_vars", c("_numeric", "_counts")) | 
| 175 | 26x | .stats <- get_stats(met_grps, stats_in = .stats) | 
| 176 | 26x | formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats) | 
| 177 | 26x | labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels) %>% .unlist_keep_nulls() | 
| 178 | ! |   if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels) | 
| 179 | ||
| 180 | # Check for vars in the case that one or more are used | |
| 181 | 26x |   if (length(vars) == 1) { | 
| 182 | 21x | vars <- rep(vars, length(.stats)) | 
| 183 | 5x |   } else if (length(vars) != length(.stats)) { | 
| 184 | 1x | stop( | 
| 185 | 1x | "Analyzed variables (vars) does not have the same ", | 
| 186 | 1x | "number of elements of specified statistics (.stats)." | 
| 187 | ) | |
| 188 | } | |
| 189 | ||
| 190 | 25x |   if (split_col_vars) { | 
| 191 | # Checking there is not a previous identical column split | |
| 192 | 21x | clyt <- tail(clayout(lyt), 1)[[1]] | 
| 193 | ||
| 194 | 21x | dummy_lyt <- split_cols_by_multivar( | 
| 195 | 21x | lyt = basic_table(), | 
| 196 | 21x | vars = vars, | 
| 197 | 21x | varlabels = labels_v | 
| 198 | ) | |
| 199 | ||
| 200 | 21x |     if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) { | 
| 201 | 2x | stop( | 
| 202 | 2x | "Column split called again with the same values. ", | 
| 203 | 2x | "This can create many unwanted columns. Please consider adding ", | 
| 204 | 2x | "split_col_vars = FALSE to the last call of ", | 
| 205 | 2x | deparse(sys.calls()[[sys.nframe() - 1]]), "." | 
| 206 | ) | |
| 207 | } | |
| 208 | ||
| 209 | # Main col split | |
| 210 | 19x | lyt <- split_cols_by_multivar( | 
| 211 | 19x | lyt = lyt, | 
| 212 | 19x | vars = vars, | 
| 213 | 19x | varlabels = labels_v | 
| 214 | ) | |
| 215 | } | |
| 216 | ||
| 217 | 23x | env <- new.env() # create caching environment | 
| 218 | ||
| 219 | 23x |   if (do_summarize_row_groups) { | 
| 220 | 8x |     if (length(unique(vars)) > 1) { | 
| 221 | ! |       stop("When using do_summarize_row_groups only one label level var should be inserted.") | 
| 222 | } | |
| 223 | ||
| 224 | # Function list for do_summarize_row_groups. Slightly different handling of labels | |
| 225 | 8x | cfun_list <- Map( | 
| 226 | 8x |       function(stat, use_cache, cache_env) { | 
| 227 | 48x |         function(u, .spl_context, labelstr, .df_row, ...) { | 
| 228 | # Statistic | |
| 229 | 152x | var_row_val <- paste( | 
| 230 | 152x |             gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")), | 
| 231 | 152x | paste(.spl_context$value, collapse = "_"), | 
| 232 | 152x | sep = "_" | 
| 233 | ) | |
| 234 | 152x |           if (use_cache) { | 
| 235 | ! |             if (is.null(cache_env[[var_row_val]])) { | 
| 236 | ! | cache_env[[var_row_val]] <- s_summary(u, ...) | 
| 237 | } | |
| 238 | ! | x_stats <- cache_env[[var_row_val]] | 
| 239 |           } else { | |
| 240 | 152x | x_stats <- s_summary(u, ...) | 
| 241 | } | |
| 242 | ||
| 243 | 152x |           if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) { | 
| 244 | 152x | res <- x_stats[[stat]] | 
| 245 |           } else { | |
| 246 | ! |             timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1))) | 
| 247 | ! | res_imp <- imputation_rule( | 
| 248 | ! | .df_row, x_stats, stat, | 
| 249 | ! | imp_rule = imp_rule, | 
| 250 | ! |               post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0, | 
| 251 | ! | avalcat_var = avalcat_var | 
| 252 | ) | |
| 253 | ! | res <- res_imp[["val"]] | 
| 254 | ! | na_str <- res_imp[["na_str"]] | 
| 255 | } | |
| 256 | ||
| 257 | # Label check and replacement | |
| 258 | 152x |           if (length(row_labels) > 1) { | 
| 259 | 32x |             if (!(labelstr %in% names(row_labels))) { | 
| 260 | 2x | stop( | 
| 261 | 2x | "Replacing the labels in do_summarize_row_groups needs a named vector", | 
| 262 | 2x | "that contains the split values. In the current split variable ", | 
| 263 | 2x | .spl_context$split[nrow(.spl_context)], | 
| 264 | 2x | " the labelstr value (split value by default) ", labelstr, " is not in", | 
| 265 | 2x | " row_labels names: ", names(row_labels) | 
| 266 | ) | |
| 267 | } | |
| 268 | 30x | lbl <- unlist(row_labels[labelstr]) | 
| 269 |           } else { | |
| 270 | 120x | lbl <- labelstr | 
| 271 | } | |
| 272 | ||
| 273 | # Cell creation | |
| 274 | 150x | rcell(res, | 
| 275 | 150x | label = lbl, | 
| 276 | 150x | format = formats_v[names(formats_v) == stat][[1]], | 
| 277 | 150x | format_na_str = na_str, | 
| 278 | 150x | indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods), | 
| 279 | 150x | align = .aligns | 
| 280 | ) | |
| 281 | } | |
| 282 | }, | |
| 283 | 8x | stat = .stats, | 
| 284 | 8x | use_cache = cache, | 
| 285 | 8x | cache_env = replicate(length(.stats), env) | 
| 286 | ) | |
| 287 | ||
| 288 | # Main call to rtables | |
| 289 | 8x | summarize_row_groups( | 
| 290 | 8x | lyt = lyt, | 
| 291 | 8x | var = unique(vars), | 
| 292 | 8x | cfun = cfun_list, | 
| 293 | 8x | na_str = na_str, | 
| 294 | 8x | extra_args = extra_args | 
| 295 | ) | |
| 296 |   } else { | |
| 297 | # Function list for analyze_colvars | |
| 298 | 15x | afun_list <- Map( | 
| 299 | 15x |       function(stat, use_cache, cache_env) { | 
| 300 | 76x |         function(u, .spl_context, .df_row, ...) { | 
| 301 | # Main statistics | |
| 302 | 468x | var_row_val <- paste( | 
| 303 | 468x |             gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")), | 
| 304 | 468x | paste(.spl_context$value, collapse = "_"), | 
| 305 | 468x | sep = "_" | 
| 306 | ) | |
| 307 | 468x |           if (use_cache) { | 
| 308 | 16x | if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...) | 
| 309 | 56x | x_stats <- cache_env[[var_row_val]] | 
| 310 |           } else { | |
| 311 | 412x | x_stats <- s_summary(u, ...) | 
| 312 | } | |
| 313 | ||
| 314 | 468x |           if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) { | 
| 315 | 348x | res <- x_stats[[stat]] | 
| 316 |           } else { | |
| 317 | 120x |             timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1))) | 
| 318 | 120x | res_imp <- imputation_rule( | 
| 319 | 120x | .df_row, x_stats, stat, | 
| 320 | 120x | imp_rule = imp_rule, | 
| 321 | 120x |               post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0, | 
| 322 | 120x | avalcat_var = avalcat_var | 
| 323 | ) | |
| 324 | 120x | res <- res_imp[["val"]] | 
| 325 | 120x | na_str <- res_imp[["na_str"]] | 
| 326 | } | |
| 327 | ||
| 328 | 468x |           if (is.list(res)) { | 
| 329 | 52x |             if (length(res) > 1) { | 
| 330 | 1x |               stop("The analyzed column produced more than one category of results.") | 
| 331 |             } else { | |
| 332 | 51x | res <- unlist(res) | 
| 333 | } | |
| 334 | } | |
| 335 | ||
| 336 | # Label from context | |
| 337 | 467x | label_from_context <- .spl_context$value[nrow(.spl_context)] | 
| 338 | ||
| 339 | # Label switcher | |
| 340 | 467x |           if (is.null(row_labels)) { | 
| 341 | 387x | lbl <- label_from_context | 
| 342 |           } else { | |
| 343 | 80x |             if (length(row_labels) > 1) { | 
| 344 | 68x |               if (!(label_from_context %in% names(row_labels))) { | 
| 345 | 2x | stop( | 
| 346 | 2x | "Replacing the labels in do_summarize_row_groups needs a named vector", | 
| 347 | 2x | "that contains the split values. In the current split variable ", | 
| 348 | 2x | .spl_context$split[nrow(.spl_context)], | 
| 349 | 2x | " the split value ", label_from_context, " is not in", | 
| 350 | 2x | " row_labels names: ", names(row_labels) | 
| 351 | ) | |
| 352 | } | |
| 353 | 66x | lbl <- unlist(row_labels[label_from_context]) | 
| 354 |             } else { | |
| 355 | 12x | lbl <- row_labels | 
| 356 | } | |
| 357 | } | |
| 358 | ||
| 359 | # Cell creation | |
| 360 | 465x | rcell(res, | 
| 361 | 465x | label = lbl, | 
| 362 | 465x | format = formats_v[names(formats_v) == stat][[1]], | 
| 363 | 465x | format_na_str = na_str, | 
| 364 | 465x | indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods), | 
| 365 | 465x | align = .aligns | 
| 366 | ) | |
| 367 | } | |
| 368 | }, | |
| 369 | 15x | stat = .stats, | 
| 370 | 15x | use_cache = cache, | 
| 371 | 15x | cache_env = replicate(length(.stats), env) | 
| 372 | ) | |
| 373 | ||
| 374 | # Main call to rtables | |
| 375 | 15x | analyze_colvars(lyt, | 
| 376 | 15x | afun = afun_list, | 
| 377 | 15x | na_str = na_str, | 
| 378 | 15x | nested = nested, | 
| 379 | 15x | extra_args = extra_args | 
| 380 | ) | |
| 381 | } | |
| 382 | } | |
| 383 | ||
| 384 | # Helper function | |
| 385 | get_last_col_split <- function(lyt) { | |
| 386 | 3x | tail(tail(clayout(lyt), 1)[[1]], 1)[[1]] | 
| 387 | } | 
| 1 | #' Apply 1/3 or 1/2 imputation rule to data | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' @inheritParams argument_convention | |
| 6 | #' @param x_stats (named `list`)\cr a named list of statistics, typically the results of [s_summary()]. | |
| 7 | #' @param stat (`string`)\cr statistic to return the value/NA level of according to the imputation | |
| 8 | #' rule applied. | |
| 9 | #' @param imp_rule (`string`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation | |
| 10 | #' rule or `"1/2"` to implement 1/2 imputation rule. | |
| 11 | #' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`). | |
| 12 | #' This parameter is only used when `imp_rule` is set to `"1/3"`. | |
| 13 | #' @param avalcat_var (`string`)\cr name of variable that indicates whether a row in `df` corresponds | |
| 14 | #' to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above | |
| 15 | #' (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`. | |
| 16 | #' | |
| 17 | #' @return A `list` containing statistic value (`val`) and NA level (`na_str`) that should be displayed | |
| 18 | #' according to the specified imputation rule. | |
| 19 | #' | |
| 20 | #' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule` | |
| 21 | #' argument. | |
| 22 | #' | |
| 23 | #' @examples | |
| 24 | #' set.seed(1) | |
| 25 | #' df <- data.frame( | |
| 26 | #' AVAL = runif(50, 0, 1), | |
| 27 | #' AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE) | |
| 28 | #' ) | |
| 29 | #' x_stats <- s_summary(df$AVAL) | |
| 30 | #' imputation_rule(df, x_stats, "max", "1/3") | |
| 31 | #' imputation_rule(df, x_stats, "geom_mean", "1/3") | |
| 32 | #' imputation_rule(df, x_stats, "mean", "1/2") | |
| 33 | #' | |
| 34 | #' @export | |
| 35 | imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") { | |
| 36 | 128x | checkmate::assert_choice(avalcat_var, names(df)) | 
| 37 | 128x |   checkmate::assert_choice(imp_rule, c("1/3", "1/2")) | 
| 38 | 128x |   n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", df[[avalcat_var]])) | 
| 39 | 128x | ltr_blq_ratio <- n_blq / max(1, nrow(df)) | 
| 40 | ||
| 41 | # defaults | |
| 42 | 128x | val <- x_stats[[stat]] | 
| 43 | 128x | na_str <- "NE" | 
| 44 | ||
| 45 | 128x |   if (imp_rule == "1/3") { | 
| 46 | 2x | if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT | 
| 47 | 84x |     if (ltr_blq_ratio > 1 / 3) { | 
| 48 | 63x | if (stat != "geom_mean") na_str <- "ND" # 1/3_pre_GT, 1/3_post_GT | 
| 49 | 9x |       if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT | 
| 50 | 39x |       if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT | 
| 51 | } | |
| 52 | 44x |   } else if (imp_rule == "1/2") { | 
| 53 | 44x |     if (ltr_blq_ratio > 1 / 2 && !stat == "max") { | 
| 54 | 12x | val <- NA # 1/2_GT | 
| 55 | 12x | na_str <- "ND" # 1/2_GT | 
| 56 | } | |
| 57 | } | |
| 58 | ||
| 59 | 128x | list(val = val, na_str = na_str) | 
| 60 | } | 
| 1 | #' Helper functions for tabulating survival duration by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions that tabulate in a data frame statistics such as median survival | |
| 6 | #' time and hazard ratio for population subgroups. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @inheritParams survival_coxph_pairwise | |
| 10 | #' @inheritParams survival_duration_subgroups | |
| 11 | #' @param arm (`factor`)\cr the treatment group variable. | |
| 12 | #' | |
| 13 | #' @details Main functionality is to prepare data for use in a layout-creating function. | |
| 14 | #' | |
| 15 | #' @examples | |
| 16 | #' library(dplyr) | |
| 17 | #' library(forcats) | |
| 18 | #' | |
| 19 | #' adtte <- tern_ex_adtte | |
| 20 | #' | |
| 21 | #' # Save variable labels before data processing steps. | |
| 22 | #' adtte_labels <- formatters::var_labels(adtte) | |
| 23 | #' | |
| 24 | #' adtte_f <- adtte %>% | |
| 25 | #' filter( | |
| 26 | #' PARAMCD == "OS", | |
| 27 | #'     ARM %in% c("B: Placebo", "A: Drug X"), | |
| 28 | #'     SEX %in% c("M", "F") | |
| 29 | #' ) %>% | |
| 30 | #' mutate( | |
| 31 | #' # Reorder levels of ARM to display reference arm before treatment arm. | |
| 32 | #' ARM = droplevels(fct_relevel(ARM, "B: Placebo")), | |
| 33 | #' SEX = droplevels(SEX), | |
| 34 | #' is_event = CNSR == 0 | |
| 35 | #' ) | |
| 36 | #' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag") | |
| 37 | #' formatters::var_labels(adtte_f)[names(labels)] <- labels | |
| 38 | #' | |
| 39 | #' @name h_survival_duration_subgroups | |
| 40 | NULL | |
| 41 | ||
| 42 | #' @describeIn h_survival_duration_subgroups Helper to prepare a data frame of median survival times by arm. | |
| 43 | #' | |
| 44 | #' @return | |
| 45 | #' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`. | |
| 46 | #' | |
| 47 | #' @examples | |
| 48 | #' # Extract median survival time for one group. | |
| 49 | #' h_survtime_df( | |
| 50 | #' tte = adtte_f$AVAL, | |
| 51 | #' is_event = adtte_f$is_event, | |
| 52 | #' arm = adtte_f$ARM | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' @export | |
| 56 | h_survtime_df <- function(tte, is_event, arm) { | |
| 57 | 79x | checkmate::assert_numeric(tte) | 
| 58 | 78x | checkmate::assert_logical(is_event, len = length(tte)) | 
| 59 | 78x | assert_valid_factor(arm, len = length(tte)) | 
| 60 | ||
| 61 | 78x | df_tte <- data.frame( | 
| 62 | 78x | tte = tte, | 
| 63 | 78x | is_event = is_event, | 
| 64 | 78x | stringsAsFactors = FALSE | 
| 65 | ) | |
| 66 | ||
| 67 | # Delete NAs | |
| 68 | 78x | non_missing_rows <- stats::complete.cases(df_tte) | 
| 69 | 78x | df_tte <- df_tte[non_missing_rows, ] | 
| 70 | 78x | arm <- arm[non_missing_rows] | 
| 71 | ||
| 72 | 78x | lst_tte <- split(df_tte, arm) | 
| 73 | 78x |   lst_results <- Map(function(x, arm) { | 
| 74 | 156x |     if (nrow(x) > 0) { | 
| 75 | 152x | s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event") | 
| 76 | 152x | median_est <- unname(as.numeric(s_surv$median)) | 
| 77 | 152x | n_events <- sum(x$is_event) | 
| 78 |     } else { | |
| 79 | 4x | median_est <- NA | 
| 80 | 4x | n_events <- NA | 
| 81 | } | |
| 82 | ||
| 83 | 156x | data.frame( | 
| 84 | 156x | arm = arm, | 
| 85 | 156x | n = nrow(x), | 
| 86 | 156x | n_events = n_events, | 
| 87 | 156x | median = median_est, | 
| 88 | 156x | stringsAsFactors = FALSE | 
| 89 | ) | |
| 90 | 78x | }, lst_tte, names(lst_tte)) | 
| 91 | ||
| 92 | 78x | df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE)) | 
| 93 | 78x | df$arm <- factor(df$arm, levels = levels(arm)) | 
| 94 | 78x | df | 
| 95 | } | |
| 96 | ||
| 97 | #' @describeIn h_survival_duration_subgroups Summarizes median survival times by arm and across subgroups | |
| 98 | #' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and | |
| 99 | #' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies | |
| 100 | #' groupings for `subgroups` variables. | |
| 101 | #' | |
| 102 | #' @return | |
| 103 | #' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`, | |
| 104 | #' `var`, `var_label`, and `row_type`. | |
| 105 | #' | |
| 106 | #' @examples | |
| 107 | #' # Extract median survival time for multiple groups. | |
| 108 | #' h_survtime_subgroups_df( | |
| 109 | #' variables = list( | |
| 110 | #' tte = "AVAL", | |
| 111 | #' is_event = "is_event", | |
| 112 | #' arm = "ARM", | |
| 113 | #'     subgroups = c("SEX", "BMRKR2") | |
| 114 | #' ), | |
| 115 | #' data = adtte_f | |
| 116 | #' ) | |
| 117 | #' | |
| 118 | #' # Define groupings for BMRKR2 levels. | |
| 119 | #' h_survtime_subgroups_df( | |
| 120 | #' variables = list( | |
| 121 | #' tte = "AVAL", | |
| 122 | #' is_event = "is_event", | |
| 123 | #' arm = "ARM", | |
| 124 | #'     subgroups = c("SEX", "BMRKR2") | |
| 125 | #' ), | |
| 126 | #' data = adtte_f, | |
| 127 | #' groups_lists = list( | |
| 128 | #' BMRKR2 = list( | |
| 129 | #' "low" = "LOW", | |
| 130 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 131 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 132 | #' ) | |
| 133 | #' ) | |
| 134 | #' ) | |
| 135 | #' | |
| 136 | #' @export | |
| 137 | h_survtime_subgroups_df <- function(variables, | |
| 138 | data, | |
| 139 | groups_lists = list(), | |
| 140 |                                     label_all = "All Patients") { | |
| 141 | 15x | checkmate::assert_character(variables$tte) | 
| 142 | 15x | checkmate::assert_character(variables$is_event) | 
| 143 | 15x | checkmate::assert_character(variables$arm) | 
| 144 | 15x | checkmate::assert_character(variables$subgroups, null.ok = TRUE) | 
| 145 | ||
| 146 | 15x | assert_df_with_variables(data, variables) | 
| 147 | ||
| 148 | 15x | checkmate::assert_string(label_all) | 
| 149 | ||
| 150 | # Add All Patients. | |
| 151 | 15x | result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]]) | 
| 152 | 15x | result_all$subgroup <- label_all | 
| 153 | 15x | result_all$var <- "ALL" | 
| 154 | 15x | result_all$var_label <- label_all | 
| 155 | 15x | result_all$row_type <- "content" | 
| 156 | ||
| 157 | # Add Subgroups. | |
| 158 | 15x |   if (is.null(variables$subgroups)) { | 
| 159 | 3x | result_all | 
| 160 |   } else { | |
| 161 | 12x | l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) | 
| 162 | 12x |     l_result <- lapply(l_data, function(grp) { | 
| 163 | 60x | result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]]) | 
| 164 | 60x | result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] | 
| 165 | 60x | cbind(result, result_labels) | 
| 166 | }) | |
| 167 | 12x | result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 168 | 12x | result_subgroups$row_type <- "analysis" | 
| 169 | 12x | rbind( | 
| 170 | 12x | result_all, | 
| 171 | 12x | result_subgroups | 
| 172 | ) | |
| 173 | } | |
| 174 | } | |
| 175 | ||
| 176 | #' @describeIn h_survival_duration_subgroups Helper to prepare a data frame with estimates of | |
| 177 | #' treatment hazard ratio. | |
| 178 | #' | |
| 179 | #' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed. | |
| 180 | #' | |
| 181 | #' @return | |
| 182 | #' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, | |
| 183 | #' `conf_level`, `pval` and `pval_label`. | |
| 184 | #' | |
| 185 | #' @examples | |
| 186 | #' # Extract hazard ratio for one group. | |
| 187 | #' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM) | |
| 188 | #' | |
| 189 | #' # Extract hazard ratio for one group with stratification factor. | |
| 190 | #' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1) | |
| 191 | #' | |
| 192 | #' @export | |
| 193 | h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) { | |
| 194 | 85x | checkmate::assert_numeric(tte) | 
| 195 | 85x | checkmate::assert_logical(is_event, len = length(tte)) | 
| 196 | 85x | assert_valid_factor(arm, n.levels = 2, len = length(tte)) | 
| 197 | ||
| 198 | 85x | df_tte <- data.frame(tte = tte, is_event = is_event) | 
| 199 | 85x | strata_vars <- NULL | 
| 200 | ||
| 201 | 85x |   if (!is.null(strata_data)) { | 
| 202 | 5x |     if (is.data.frame(strata_data)) { | 
| 203 | 4x | strata_vars <- names(strata_data) | 
| 204 | 4x | checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte)) | 
| 205 | 4x | assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars))) | 
| 206 |     } else { | |
| 207 | 1x | assert_valid_factor(strata_data, len = nrow(df_tte)) | 
| 208 | 1x | strata_vars <- "strata_data" | 
| 209 | } | |
| 210 | 5x | df_tte[strata_vars] <- strata_data | 
| 211 | } | |
| 212 | ||
| 213 | 85x | l_df <- split(df_tte, arm) | 
| 214 | ||
| 215 | 85x |   if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) { | 
| 216 | # Hazard ratio and CI. | |
| 217 | 79x | result <- s_coxph_pairwise( | 
| 218 | 79x | df = l_df[[2]], | 
| 219 | 79x | .ref_group = l_df[[1]], | 
| 220 | 79x | .in_ref_col = FALSE, | 
| 221 | 79x | .var = "tte", | 
| 222 | 79x | is_event = "is_event", | 
| 223 | 79x | strata = strata_vars, | 
| 224 | 79x | control = control | 
| 225 | ) | |
| 226 | ||
| 227 | 79x | df <- data.frame( | 
| 228 | # Dummy column needed downstream to create a nested header. | |
| 229 | 79x | arm = " ", | 
| 230 | 79x | n_tot = unname(as.numeric(result$n_tot)), | 
| 231 | 79x | n_tot_events = unname(as.numeric(result$n_tot_events)), | 
| 232 | 79x | hr = unname(as.numeric(result$hr)), | 
| 233 | 79x | lcl = unname(result$hr_ci[1]), | 
| 234 | 79x | ucl = unname(result$hr_ci[2]), | 
| 235 | 79x | conf_level = control[["conf_level"]], | 
| 236 | 79x | pval = as.numeric(result$pvalue), | 
| 237 | 79x | pval_label = obj_label(result$pvalue), | 
| 238 | 79x | stringsAsFactors = FALSE | 
| 239 | ) | |
| 240 | } else if ( | |
| 241 | 6x | (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) || | 
| 242 | 6x | (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0) | 
| 243 |   ) { | |
| 244 | 6x | df_tte_complete <- df_tte[stats::complete.cases(df_tte), ] | 
| 245 | 6x | df <- data.frame( | 
| 246 | # Dummy column needed downstream to create a nested header. | |
| 247 | 6x | arm = " ", | 
| 248 | 6x | n_tot = nrow(df_tte_complete), | 
| 249 | 6x | n_tot_events = sum(df_tte_complete$is_event), | 
| 250 | 6x | hr = NA, | 
| 251 | 6x | lcl = NA, | 
| 252 | 6x | ucl = NA, | 
| 253 | 6x | conf_level = control[["conf_level"]], | 
| 254 | 6x | pval = NA, | 
| 255 | 6x | pval_label = NA, | 
| 256 | 6x | stringsAsFactors = FALSE | 
| 257 | ) | |
| 258 |   } else { | |
| 259 | ! | df <- data.frame( | 
| 260 | # Dummy column needed downstream to create a nested header. | |
| 261 | ! | arm = " ", | 
| 262 | ! | n_tot = 0L, | 
| 263 | ! | n_tot_events = 0L, | 
| 264 | ! | hr = NA, | 
| 265 | ! | lcl = NA, | 
| 266 | ! | ucl = NA, | 
| 267 | ! | conf_level = control[["conf_level"]], | 
| 268 | ! | pval = NA, | 
| 269 | ! | pval_label = NA, | 
| 270 | ! | stringsAsFactors = FALSE | 
| 271 | ) | |
| 272 | } | |
| 273 | ||
| 274 | 85x | df | 
| 275 | } | |
| 276 | ||
| 277 | #' @describeIn h_survival_duration_subgroups Summarizes estimates of the treatment hazard ratio | |
| 278 | #' across subgroups in a data frame. `variables` corresponds to the names of variables found in | |
| 279 | #' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and | |
| 280 | #' optionally `subgroups` and `strata`. `groups_lists` optionally specifies | |
| 281 | #' groupings for `subgroups` variables. | |
| 282 | #' | |
| 283 | #' @return | |
| 284 | #' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, | |
| 285 | #' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`. | |
| 286 | #' | |
| 287 | #' @examples | |
| 288 | #' # Extract hazard ratio for multiple groups. | |
| 289 | #' h_coxph_subgroups_df( | |
| 290 | #' variables = list( | |
| 291 | #' tte = "AVAL", | |
| 292 | #' is_event = "is_event", | |
| 293 | #' arm = "ARM", | |
| 294 | #'     subgroups = c("SEX", "BMRKR2") | |
| 295 | #' ), | |
| 296 | #' data = adtte_f | |
| 297 | #' ) | |
| 298 | #' | |
| 299 | #' # Define groupings of BMRKR2 levels. | |
| 300 | #' h_coxph_subgroups_df( | |
| 301 | #' variables = list( | |
| 302 | #' tte = "AVAL", | |
| 303 | #' is_event = "is_event", | |
| 304 | #' arm = "ARM", | |
| 305 | #'     subgroups = c("SEX", "BMRKR2") | |
| 306 | #' ), | |
| 307 | #' data = adtte_f, | |
| 308 | #' groups_lists = list( | |
| 309 | #' BMRKR2 = list( | |
| 310 | #' "low" = "LOW", | |
| 311 | #'       "low/medium" = c("LOW", "MEDIUM"), | |
| 312 | #'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH") | |
| 313 | #' ) | |
| 314 | #' ) | |
| 315 | #' ) | |
| 316 | #' | |
| 317 | #' # Extract hazard ratio for multiple groups with stratification factors. | |
| 318 | #' h_coxph_subgroups_df( | |
| 319 | #' variables = list( | |
| 320 | #' tte = "AVAL", | |
| 321 | #' is_event = "is_event", | |
| 322 | #' arm = "ARM", | |
| 323 | #'     subgroups = c("SEX", "BMRKR2"), | |
| 324 | #'     strata = c("STRATA1", "STRATA2") | |
| 325 | #' ), | |
| 326 | #' data = adtte_f | |
| 327 | #' ) | |
| 328 | #' | |
| 329 | #' @export | |
| 330 | h_coxph_subgroups_df <- function(variables, | |
| 331 | data, | |
| 332 | groups_lists = list(), | |
| 333 | control = control_coxph(), | |
| 334 |                                  label_all = "All Patients") { | |
| 335 | 17x |   if ("strat" %in% names(variables)) { | 
| 336 | ! | warning( | 
| 337 | ! | "Warning: the `strat` element name of the `variables` list argument to `h_coxph_subgroups_df() ", | 
| 338 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 339 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 340 | ) | |
| 341 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 342 | } | |
| 343 | ||
| 344 | 17x | checkmate::assert_character(variables$tte) | 
| 345 | 17x | checkmate::assert_character(variables$is_event) | 
| 346 | 17x | checkmate::assert_character(variables$arm) | 
| 347 | 17x | checkmate::assert_character(variables$subgroups, null.ok = TRUE) | 
| 348 | 17x | checkmate::assert_character(variables$strata, null.ok = TRUE) | 
| 349 | 17x | assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) | 
| 350 | 17x | assert_df_with_variables(data, variables) | 
| 351 | 17x | checkmate::assert_string(label_all) | 
| 352 | ||
| 353 | # Add All Patients. | |
| 354 | 17x | result_all <- h_coxph_df( | 
| 355 | 17x | tte = data[[variables$tte]], | 
| 356 | 17x | is_event = data[[variables$is_event]], | 
| 357 | 17x | arm = data[[variables$arm]], | 
| 358 | 17x | strata_data = if (is.null(variables$strata)) NULL else data[variables$strata], | 
| 359 | 17x | control = control | 
| 360 | ) | |
| 361 | 17x | result_all$subgroup <- label_all | 
| 362 | 17x | result_all$var <- "ALL" | 
| 363 | 17x | result_all$var_label <- label_all | 
| 364 | 17x | result_all$row_type <- "content" | 
| 365 | ||
| 366 | # Add Subgroups. | |
| 367 | 17x |   if (is.null(variables$subgroups)) { | 
| 368 | 3x | result_all | 
| 369 |   } else { | |
| 370 | 14x | l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) | 
| 371 | ||
| 372 | 14x |     l_result <- lapply(l_data, function(grp) { | 
| 373 | 64x | result <- h_coxph_df( | 
| 374 | 64x | tte = grp$df[[variables$tte]], | 
| 375 | 64x | is_event = grp$df[[variables$is_event]], | 
| 376 | 64x | arm = grp$df[[variables$arm]], | 
| 377 | 64x | strata_data = if (is.null(variables$strata)) NULL else grp$df[variables$strata], | 
| 378 | 64x | control = control | 
| 379 | ) | |
| 380 | 64x | result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] | 
| 381 | 64x | cbind(result, result_labels) | 
| 382 | }) | |
| 383 | ||
| 384 | 14x | result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 385 | 14x | result_subgroups$row_type <- "analysis" | 
| 386 | ||
| 387 | 14x | rbind( | 
| 388 | 14x | result_all, | 
| 389 | 14x | result_subgroups | 
| 390 | ) | |
| 391 | } | |
| 392 | } | |
| 393 | ||
| 394 | #' Split data frame by subgroups | |
| 395 | #' | |
| 396 | #' @description `r lifecycle::badge("stable")` | |
| 397 | #' | |
| 398 | #' Split a data frame into a non-nested list of subsets. | |
| 399 | #' | |
| 400 | #' @inheritParams argument_convention | |
| 401 | #' @inheritParams survival_duration_subgroups | |
| 402 | #' @param data (`data.frame`)\cr dataset to split. | |
| 403 | #' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets. | |
| 404 | #' Unused levels not present in `data` are dropped. Note that the order in this vector | |
| 405 | #' determines the order in the downstream table. | |
| 406 | #' | |
| 407 | #' @return A list with subset data (`df`) and metadata about the subset (`df_labels`). | |
| 408 | #' | |
| 409 | #' @details Main functionality is to prepare data for use in forest plot layouts. | |
| 410 | #' | |
| 411 | #' @examples | |
| 412 | #' df <- data.frame( | |
| 413 | #' x = c(1:5), | |
| 414 | #'   y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")), | |
| 415 | #'   z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C")) | |
| 416 | #' ) | |
| 417 | #' formatters::var_labels(df) <- paste("label for", names(df)) | |
| 418 | #' | |
| 419 | #' h_split_by_subgroups( | |
| 420 | #' data = df, | |
| 421 | #'   subgroups = c("y", "z") | |
| 422 | #' ) | |
| 423 | #' | |
| 424 | #' h_split_by_subgroups( | |
| 425 | #' data = df, | |
| 426 | #'   subgroups = c("y", "z"), | |
| 427 | #' groups_lists = list( | |
| 428 | #'     y = list("AB" = c("A", "B"), "C" = "C") | |
| 429 | #' ) | |
| 430 | #' ) | |
| 431 | #' | |
| 432 | #' @export | |
| 433 | h_split_by_subgroups <- function(data, | |
| 434 | subgroups, | |
| 435 |                                  groups_lists = list()) { | |
| 436 | 66x | checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE) | 
| 437 | 66x | checkmate::assert_list(groups_lists, names = "named") | 
| 438 | 66x | checkmate::assert_subset(names(groups_lists), subgroups) | 
| 439 | 66x | assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups))) | 
| 440 | ||
| 441 | 66x | data_labels <- unname(formatters::var_labels(data)) | 
| 442 | 66x | df_subgroups <- data[, subgroups, drop = FALSE] | 
| 443 | 66x | subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE) | 
| 444 | ||
| 445 | 66x |   l_labels <- Map(function(grp_i, name_i) { | 
| 446 | 120x | existing_levels <- levels(droplevels(grp_i)) | 
| 447 | 120x |     grp_levels <- if (name_i %in% names(groups_lists)) { | 
| 448 | # For this variable groupings are defined. We check which groups are contained in the data. | |
| 449 | 11x | group_list_i <- groups_lists[[name_i]] | 
| 450 | 11x | group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE) | 
| 451 | 11x | names(which(group_has_levels)) | 
| 452 |     } else { | |
| 453 | 109x | existing_levels | 
| 454 | } | |
| 455 | 120x | df_labels <- data.frame( | 
| 456 | 120x | subgroup = grp_levels, | 
| 457 | 120x | var = name_i, | 
| 458 | 120x | var_label = unname(subgroup_labels[name_i]), | 
| 459 | 120x | stringsAsFactors = FALSE # Rationale is that subgroups may not be unique. | 
| 460 | ) | |
| 461 | 66x | }, df_subgroups, names(df_subgroups)) | 
| 462 | ||
| 463 | # Create a data frame with one row per subgroup. | |
| 464 | 66x | df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE)) | 
| 465 | 66x | row_label <- paste0(df_labels$var, ".", df_labels$subgroup) | 
| 466 | 66x | row_split_var <- factor(row_label, levels = row_label) | 
| 467 | ||
| 468 | # Create a list of data subsets. | |
| 469 | 66x |   lapply(split(df_labels, row_split_var), function(row_i) { | 
| 470 | 294x |     which_row <- if (row_i$var %in% names(groups_lists)) { | 
| 471 | 31x | data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]] | 
| 472 |     } else { | |
| 473 | 263x | data[[row_i$var]] == row_i$subgroup | 
| 474 | } | |
| 475 | 294x | df <- data[which_row, ] | 
| 476 | 294x | rownames(df) <- NULL | 
| 477 | 294x | formatters::var_labels(df) <- data_labels | 
| 478 | ||
| 479 | 294x | list( | 
| 480 | 294x | df = df, | 
| 481 | 294x | df_labels = data.frame(row_i, row.names = NULL) | 
| 482 | ) | |
| 483 | }) | |
| 484 | } | 
| 1 | #' Re-implemented `range()` default S3 method for numerical objects | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data | |
| 6 | #' without any warnings. | |
| 7 | #' | |
| 8 | #' @param x (`numeric`)\cr a sequence of numbers for which the range is computed. | |
| 9 | #' @param na.rm (`flag`)\cr flag indicating if `NA` should be omitted. | |
| 10 | #' @param finite (`flag`)\cr flag indicating if non-finite elements should be removed. | |
| 11 | #' | |
| 12 | #' @return A 2-element vector of class `numeric`. | |
| 13 | #' | |
| 14 | #' @examples | |
| 15 | #' x <- rnorm(20, 1) | |
| 16 | #' range_noinf(x, na.rm = TRUE) | |
| 17 | #' range_noinf(rep(NA, 20), na.rm = TRUE) | |
| 18 | #' range(rep(NA, 20), na.rm = TRUE) | |
| 19 | #' | |
| 20 | #' @export | |
| 21 | range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint | |
| 22 | ||
| 23 | 1923x | checkmate::assert_numeric(x) | 
| 24 | ||
| 25 | 1923x |   if (finite) { | 
| 26 | 24x | x <- x[is.finite(x)] # removes NAs too | 
| 27 | 1899x |   } else if (na.rm) { | 
| 28 | 708x | x <- x[!is.na(x)] | 
| 29 | } | |
| 30 | ||
| 31 | 1923x |   if (length(x) == 0) { | 
| 32 | 111x | rval <- c(NA, NA) | 
| 33 | 111x | mode(rval) <- typeof(x) | 
| 34 |   } else { | |
| 35 | 1812x | rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE)) | 
| 36 | } | |
| 37 | ||
| 38 | 1923x | return(rval) | 
| 39 | } | |
| 40 | ||
| 41 | #' Utility function to create label for confidence interval | |
| 42 | #' | |
| 43 | #' @description `r lifecycle::badge("stable")` | |
| 44 | #' | |
| 45 | #' @inheritParams argument_convention | |
| 46 | #' | |
| 47 | #' @return A `string`. | |
| 48 | #' | |
| 49 | #' @export | |
| 50 | f_conf_level <- function(conf_level) { | |
| 51 | 8518x | assert_proportion_value(conf_level) | 
| 52 | 8516x | paste0(conf_level * 100, "% CI") | 
| 53 | } | |
| 54 | ||
| 55 | #' Utility function to create label for p-value | |
| 56 | #' | |
| 57 | #' @description `r lifecycle::badge("stable")` | |
| 58 | #' | |
| 59 | #' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis. | |
| 60 | #' | |
| 61 | #' @return A `string`. | |
| 62 | #' | |
| 63 | #' @export | |
| 64 | f_pval <- function(test_mean) { | |
| 65 | 1184x | checkmate::assert_numeric(test_mean, len = 1) | 
| 66 | 1182x |   paste0("p-value (H0: mean = ", test_mean, ")") | 
| 67 | } | |
| 68 | ||
| 69 | #' Utility function to return a named list of covariate names | |
| 70 | #' | |
| 71 | #' @description `r lifecycle::badge("stable")` | |
| 72 | #' | |
| 73 | #' @param covariates (`character`)\cr a vector that can contain single variable names (such as | |
| 74 | #' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`. | |
| 75 | #' | |
| 76 | #' @return A named `list` of `character` vector. | |
| 77 | #' | |
| 78 | #' @examples | |
| 79 | #' get_covariates(c("a * b", "c")) | |
| 80 | #' | |
| 81 | #' @export | |
| 82 | get_covariates <- function(covariates) { | |
| 83 | 14x | checkmate::assert_character(covariates) | 
| 84 | 12x | cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*")))) | 
| 85 | 12x | stats::setNames(as.list(cov_vars), cov_vars) | 
| 86 | } | |
| 87 | ||
| 88 | #' Replicate entries of a vector if required | |
| 89 | #' | |
| 90 | #' @description `r lifecycle::badge("stable")` | |
| 91 | #' | |
| 92 | #' Replicate entries of a vector if required. | |
| 93 | #' | |
| 94 | #' @inheritParams argument_convention | |
| 95 | #' @param n (`integer(1)`)\cr number of entries that are needed. | |
| 96 | #' | |
| 97 | #' @return `x` if it has the required length already or is `NULL`, | |
| 98 | #' otherwise if it is scalar the replicated version of it with `n` entries. | |
| 99 | #' | |
| 100 | #' @note This function will fail if `x` is not of length `n` and/or is not a scalar. | |
| 101 | #' | |
| 102 | #' @export | |
| 103 | to_n <- function(x, n) { | |
| 104 | 5x |   if (is.null(x)) { | 
| 105 | 1x | NULL | 
| 106 | 4x |   } else if (length(x) == 1) { | 
| 107 | 1x | rep(x, n) | 
| 108 | 3x |   } else if (length(x) == n) { | 
| 109 | 2x | x | 
| 110 |   } else { | |
| 111 | 1x |     stop("dimension mismatch") | 
| 112 | } | |
| 113 | } | |
| 114 | ||
| 115 | #' Check element dimension | |
| 116 | #' | |
| 117 | #' Checks if the elements in `...` have the same dimension. | |
| 118 | #' | |
| 119 | #' @param ... (`data.frame` or `vector`)\cr any data frames or vectors. | |
| 120 | #' @param omit_null (`flag`)\cr whether `NULL` elements in `...` should be omitted from the check. | |
| 121 | #' | |
| 122 | #' @return A `logical` value. | |
| 123 | #' | |
| 124 | #' @keywords internal | |
| 125 | check_same_n <- function(..., omit_null = TRUE) { | |
| 126 | 2x | dots <- list(...) | 
| 127 | ||
| 128 | 2x | n_list <- Map( | 
| 129 | 2x |     function(x, name) { | 
| 130 | 5x |       if (is.null(x)) { | 
| 131 | ! |         if (omit_null) { | 
| 132 | 2x | NA_integer_ | 
| 133 |         } else { | |
| 134 | ! |           stop("arg", name, "is not supposed to be NULL") | 
| 135 | } | |
| 136 | 5x |       } else if (is.data.frame(x)) { | 
| 137 | ! | nrow(x) | 
| 138 | 5x |       } else if (is.atomic(x)) { | 
| 139 | 5x | length(x) | 
| 140 |       } else { | |
| 141 | ! |         stop("data structure for ", name, "is currently not supported") | 
| 142 | } | |
| 143 | }, | |
| 144 | 2x | dots, names(dots) | 
| 145 | ) | |
| 146 | ||
| 147 | 2x | n <- stats::na.omit(unlist(n_list)) | 
| 148 | ||
| 149 | 2x |   if (length(unique(n)) > 1) { | 
| 150 | ! | sel <- which(n != n[1]) | 
| 151 | ! |     stop("Dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1]) | 
| 152 | } | |
| 153 | ||
| 154 | 2x | TRUE | 
| 155 | } | |
| 156 | ||
| 157 | #' Utility function to check if a float value is equal to another float value | |
| 158 | #' | |
| 159 | #' Uses `.Machine$double.eps` as the tolerance for the comparison. | |
| 160 | #' | |
| 161 | #' @param x (`numeric(1)`)\cr a float number. | |
| 162 | #' @param y (`numeric(1)`)\cr a float number. | |
| 163 | #' | |
| 164 | #' @return `TRUE` if identical, otherwise `FALSE`. | |
| 165 | #' | |
| 166 | #' @keywords internal | |
| 167 | .is_equal_float <- function(x, y) { | |
| 168 | 2990x | checkmate::assert_number(x) | 
| 169 | 2990x | checkmate::assert_number(y) | 
| 170 | ||
| 171 | # Define a tolerance | |
| 172 | 2990x | tolerance <- .Machine$double.eps | 
| 173 | ||
| 174 | # Check if x is close enough to y | |
| 175 | 2990x | abs(x - y) < tolerance | 
| 176 | } | |
| 177 | ||
| 178 | #' Make names without dots | |
| 179 | #' | |
| 180 | #' @param nams (`character`)\cr vector of original names. | |
| 181 | #' | |
| 182 | #' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()]. | |
| 183 | #' | |
| 184 | #' @keywords internal | |
| 185 | make_names <- function(nams) { | |
| 186 | 6x | orig <- make.names(nams) | 
| 187 | 6x |   gsub(".", "", x = orig, fixed = TRUE) | 
| 188 | } | |
| 189 | ||
| 190 | #' Conversion of months to days | |
| 191 | #' | |
| 192 | #' @description `r lifecycle::badge("stable")` | |
| 193 | #' | |
| 194 | #' Conversion of months to days. This is an approximative calculation because it | |
| 195 | #' considers each month as having an average of 30.4375 days. | |
| 196 | #' | |
| 197 | #' @param x (`numeric(1)`)\cr time in months. | |
| 198 | #' | |
| 199 | #' @return A `numeric` vector with the time in days. | |
| 200 | #' | |
| 201 | #' @examples | |
| 202 | #' x <- c(13.25, 8.15, 1, 2.834) | |
| 203 | #' month2day(x) | |
| 204 | #' | |
| 205 | #' @export | |
| 206 | month2day <- function(x) { | |
| 207 | 1x | checkmate::assert_numeric(x) | 
| 208 | 1x | x * 30.4375 | 
| 209 | } | |
| 210 | ||
| 211 | #' Conversion of days to months | |
| 212 | #' | |
| 213 | #' @param x (`numeric(1)`)\cr time in days. | |
| 214 | #' | |
| 215 | #' @return A `numeric` vector with the time in months. | |
| 216 | #' | |
| 217 | #' @examples | |
| 218 | #' x <- c(403, 248, 30, 86) | |
| 219 | #' day2month(x) | |
| 220 | #' | |
| 221 | #' @export | |
| 222 | day2month <- function(x) { | |
| 223 | 19x | checkmate::assert_numeric(x) | 
| 224 | 19x | x / 30.4375 | 
| 225 | } | |
| 226 | ||
| 227 | #' Return an empty numeric if all elements are `NA`. | |
| 228 | #' | |
| 229 | #' @param x (`numeric`)\cr vector. | |
| 230 | #' | |
| 231 | #' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`. | |
| 232 | #' | |
| 233 | #' @examples | |
| 234 | #' x <- c(NA, NA, NA) | |
| 235 | #' # Internal function - empty_vector_if_na | |
| 236 | #' @keywords internal | |
| 237 | empty_vector_if_na <- function(x) { | |
| 238 | 1017x |   if (all(is.na(x))) { | 
| 239 | 310x | numeric() | 
| 240 |   } else { | |
| 241 | 707x | x | 
| 242 | } | |
| 243 | } | |
| 244 | ||
| 245 | #' Element-wise combination of two vectors | |
| 246 | #' | |
| 247 | #' @param x (`vector`)\cr first vector to combine. | |
| 248 | #' @param y (`vector`)\cr second vector to combine. | |
| 249 | #' | |
| 250 | #' @return A `list` where each element combines corresponding elements of `x` and `y`. | |
| 251 | #' | |
| 252 | #' @examples | |
| 253 | #' combine_vectors(1:3, 4:6) | |
| 254 | #' | |
| 255 | #' @export | |
| 256 | combine_vectors <- function(x, y) { | |
| 257 | 42x | checkmate::assert_vector(x) | 
| 258 | 42x | checkmate::assert_vector(y, len = length(x)) | 
| 259 | ||
| 260 | 42x | result <- lapply(as.data.frame(rbind(x, y)), `c`) | 
| 261 | 42x | names(result) <- NULL | 
| 262 | 42x | result | 
| 263 | } | |
| 264 | ||
| 265 | #' Extract elements by name | |
| 266 | #' | |
| 267 | #' This utility function extracts elements from a vector `x` by `names`. | |
| 268 | #' Differences to the standard `[` function are: | |
| 269 | #' | |
| 270 | #' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function). | |
| 271 | #' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those | |
| 272 | #' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s. | |
| 273 | #' | |
| 274 | #' @param x (named `vector`)\cr where to extract named elements from. | |
| 275 | #' @param names (`character`)\cr vector of names to extract. | |
| 276 | #' | |
| 277 | #' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`. | |
| 278 | #' | |
| 279 | #' @keywords internal | |
| 280 | extract_by_name <- function(x, names) { | |
| 281 | 3x |   if (is.null(x)) { | 
| 282 | 1x | return(NULL) | 
| 283 | } | |
| 284 | 2x | checkmate::assert_named(x) | 
| 285 | 2x | checkmate::assert_character(names) | 
| 286 | 2x | which_extract <- intersect(names(x), names) | 
| 287 | 2x |   if (length(which_extract) > 0) { | 
| 288 | 1x | x[which_extract] | 
| 289 |   } else { | |
| 290 | 1x | NULL | 
| 291 | } | |
| 292 | } | |
| 293 | ||
| 294 | #' Labels for adverse event baskets | |
| 295 | #' | |
| 296 | #' @description `r lifecycle::badge("stable")` | |
| 297 | #' | |
| 298 | #' @param aesi (`character`)\cr vector with standardized MedDRA query name (e.g. `SMQxxNAM`) or customized query | |
| 299 | #' name (e.g. `CQxxNAM`). | |
| 300 | #' @param scope (`character`)\cr vector with scope of query (e.g. `SMQxxSC`). | |
| 301 | #' | |
| 302 | #' @return A `string` with the standard label for the AE basket. | |
| 303 | #' | |
| 304 | #' @examples | |
| 305 | #' adae <- tern_ex_adae | |
| 306 | #' | |
| 307 | #' # Standardized query label includes scope. | |
| 308 | #' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC) | |
| 309 | #' | |
| 310 | #' # Customized query label. | |
| 311 | #' aesi_label(adae$CQ01NAM) | |
| 312 | #' | |
| 313 | #' @export | |
| 314 | aesi_label <- function(aesi, scope = NULL) { | |
| 315 | 4x | checkmate::assert_character(aesi) | 
| 316 | 4x | checkmate::assert_character(scope, null.ok = TRUE) | 
| 317 | 4x | aesi_label <- obj_label(aesi) | 
| 318 | 4x | aesi <- sas_na(aesi) | 
| 319 | 4x | aesi <- unique(aesi)[!is.na(unique(aesi))] | 
| 320 | ||
| 321 | 4x |   lbl <- if (length(aesi) == 1 && !is.null(scope)) { | 
| 322 | 1x | scope <- sas_na(scope) | 
| 323 | 1x | scope <- unique(scope)[!is.na(unique(scope))] | 
| 324 | 1x | checkmate::assert_string(scope) | 
| 325 | 1x |     paste0(aesi, " (", scope, ")") | 
| 326 | 4x |   } else if (length(aesi) == 1 && is.null(scope)) { | 
| 327 | 1x | aesi | 
| 328 |   } else { | |
| 329 | 2x | aesi_label | 
| 330 | } | |
| 331 | ||
| 332 | 4x | lbl | 
| 333 | } | |
| 334 | ||
| 335 | #' Indicate study arm variable in formula | |
| 336 | #' | |
| 337 | #' We use `study_arm` to indicate the study arm variable in `tern` formulas. | |
| 338 | #' | |
| 339 | #' @param x arm information | |
| 340 | #' | |
| 341 | #' @return `x` | |
| 342 | #' | |
| 343 | #' @keywords internal | |
| 344 | study_arm <- function(x) { | |
| 345 | ! | structure(x, varname = deparse(substitute(x))) | 
| 346 | } | |
| 347 | ||
| 348 | #' Smooth function with optional grouping | |
| 349 | #' | |
| 350 | #' @description `r lifecycle::badge("stable")` | |
| 351 | #' | |
| 352 | #' This produces `loess` smoothed estimates of `y` with Student confidence intervals. | |
| 353 | #' | |
| 354 | #' @param df (`data.frame`)\cr data set containing all analysis variables. | |
| 355 | #' @param x (`string`)\cr x column name. | |
| 356 | #' @param y (`string`)\cr y column name. | |
| 357 | #' @param groups (`character` or `NULL`)\cr vector with optional grouping variables names. | |
| 358 | #' @param level (`proportion`)\cr level of confidence interval to use (0.95 by default). | |
| 359 | #' | |
| 360 | #' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and | |
| 361 | #' optional `groups` variables formatted as `factor` type. | |
| 362 | #' | |
| 363 | #' @export | |
| 364 | get_smooths <- function(df, x, y, groups = NULL, level = 0.95) { | |
| 365 | 5x | checkmate::assert_data_frame(df) | 
| 366 | 5x | df_cols <- colnames(df) | 
| 367 | 5x | checkmate::assert_string(x) | 
| 368 | 5x | checkmate::assert_subset(x, df_cols) | 
| 369 | 5x | checkmate::assert_numeric(df[[x]]) | 
| 370 | 5x | checkmate::assert_string(y) | 
| 371 | 5x | checkmate::assert_subset(y, df_cols) | 
| 372 | 5x | checkmate::assert_numeric(df[[y]]) | 
| 373 | ||
| 374 | 5x |   if (!is.null(groups)) { | 
| 375 | 4x | checkmate::assert_character(groups) | 
| 376 | 4x | checkmate::assert_subset(groups, df_cols) | 
| 377 | } | |
| 378 | ||
| 379 | 5x |   smooths <- function(x, y) { | 
| 380 | 18x | stats::predict(stats::loess(y ~ x), se = TRUE) | 
| 381 | } | |
| 382 | ||
| 383 | 5x |   if (!is.null(groups)) { | 
| 384 | 4x | cc <- stats::complete.cases(df[c(x, y, groups)]) | 
| 385 | 4x | df_c <- df[cc, c(x, y, groups)] | 
| 386 | 4x |     df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE] | 
| 387 | 4x | df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups])) | 
| 388 | ||
| 389 | 4x | df_smooth_raw <- | 
| 390 | 4x |       by(df_c_ordered, df_c_g, function(d) { | 
| 391 | 17x | plx <- smooths(d[[x]], d[[y]]) | 
| 392 | 17x | data.frame( | 
| 393 | 17x | x = d[[x]], | 
| 394 | 17x | y = plx$fit, | 
| 395 | 17x | ylow = plx$fit - stats::qt(level, plx$df) * plx$se.fit, | 
| 396 | 17x | yhigh = plx$fit + stats::qt(level, plx$df) * plx$se.fit | 
| 397 | ) | |
| 398 | }) | |
| 399 | ||
| 400 | 4x | df_smooth <- do.call(rbind, df_smooth_raw) | 
| 401 | 4x | df_smooth[groups] <- df_c_g | 
| 402 | ||
| 403 | 4x | df_smooth | 
| 404 |   } else { | |
| 405 | 1x | cc <- stats::complete.cases(df[c(x, y)]) | 
| 406 | 1x | df_c <- df[cc, ] | 
| 407 | 1x | plx <- smooths(df_c[[x]], df_c[[y]]) | 
| 408 | ||
| 409 | 1x | df_smooth <- data.frame( | 
| 410 | 1x | x = df_c[[x]], | 
| 411 | 1x | y = plx$fit, | 
| 412 | 1x | ylow = plx$fit - stats::qt(level, plx$df) * plx$se.fit, | 
| 413 | 1x | yhigh = plx$fit + stats::qt(level, plx$df) * plx$se.fit | 
| 414 | ) | |
| 415 | ||
| 416 | 1x | df_smooth | 
| 417 | } | |
| 418 | } | |
| 419 | ||
| 420 | #' Number of available (non-missing entries) in a vector | |
| 421 | #' | |
| 422 | #' Small utility function for better readability. | |
| 423 | #' | |
| 424 | #' @param x (`vector`)\cr vector in which to count non-missing values. | |
| 425 | #' | |
| 426 | #' @return Number of non-missing values. | |
| 427 | #' | |
| 428 | #' @keywords internal | |
| 429 | n_available <- function(x) { | |
| 430 | 423x | sum(!is.na(x)) | 
| 431 | } | |
| 432 | ||
| 433 | #' Reapply variable labels | |
| 434 | #' | |
| 435 | #' This is a helper function that is used in tests. | |
| 436 | #' | |
| 437 | #' @param x (`vector`)\cr vector of elements that needs new labels. | |
| 438 | #' @param varlabels (`character`)\cr vector of labels for `x`. | |
| 439 | #' @param ... further parameters to be added to the list. | |
| 440 | #' | |
| 441 | #' @return `x` with variable labels reapplied. | |
| 442 | #' | |
| 443 | #' @export | |
| 444 | reapply_varlabels <- function(x, varlabels, ...) { | |
| 445 | 11x | named_labels <- c(as.list(varlabels), list(...)) | 
| 446 | 11x | formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels) | 
| 447 | 11x | x | 
| 448 | } | |
| 449 | ||
| 450 | #' Wrapper function of survival::clogit | |
| 451 | #' | |
| 452 | #' When model fitting failed, a more useful message would show. | |
| 453 | #' | |
| 454 | #' @param formula Model formula. | |
| 455 | #' @param data data frame. | |
| 456 | #' @param ... further parameters to be added to survival::clogit. | |
| 457 | #' | |
| 458 | #' @return When model fitting is successful, an object of class "clogit".\cr | |
| 459 | #' When model fitting failed, an error message is shown. | |
| 460 | #' | |
| 461 | #' @examples | |
| 462 | #' \dontrun{ | |
| 463 | #' library(dplyr) | |
| 464 | #' adrs_local <- tern_ex_adrs %>% | |
| 465 | #'   dplyr::filter(ARMCD %in% c("ARM A", "ARM B")) %>% | |
| 466 | #' dplyr::mutate( | |
| 467 | #'     RSP = dplyr::case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), | |
| 468 | #' ARMBIN = droplevels(ARMCD) | |
| 469 | #' ) | |
| 470 | #' dta <- adrs_local | |
| 471 | #' dta <- dta[sample(nrow(dta)), ] | |
| 472 | #' mod <- clogit_with_tryCatch(formula = RSP ~ ARMBIN * AGE + strata(STRATA1), data = dta) | |
| 473 | #' } | |
| 474 | #' | |
| 475 | #' @export | |
| 476 | clogit_with_tryCatch <- function(formula, data, ...) { # nolint | |
| 477 | 37x | tryCatch( | 
| 478 | 37x | survival::clogit(formula = formula, data = data, ...), | 
| 479 | 37x |     error = function(e) stop("model not built successfully with survival::clogit") | 
| 480 | ) | |
| 481 | } | 
| 1 | #' Count the number of patients with particular flags | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_patients_with_flags()] creates a layout element to calculate counts of patients for | |
| 6 | #' which user-specified flags are present. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `var` which indicates unique subject identifiers. Flags | |
| 9 | #' variables to analyze are specified by the user via the `flag_variables` argument, and must either take value | |
| 10 | #' `TRUE` (flag present) or `FALSE` (flag absent) for each record. | |
| 11 | #' | |
| 12 | #' If there are multiple records with the same flag present for a patient, only one occurrence is counted. | |
| 13 | #' | |
| 14 | #' @inheritParams argument_convention | |
| 15 | #' @param flag_variables (`character`)\cr a vector specifying the names of `logical` variables from analysis dataset | |
| 16 | #' used for counting the number of unique identifiers. | |
| 17 | #' @param flag_labels (`character`)\cr vector of labels to use for flag variables. If any labels are also specified via | |
| 18 | #' the `.labels` parameter, the `.labels` values will take precedence and replace these labels. | |
| 19 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 20 | #' | |
| 21 | #'   Options are: ``r shQuote(get_stats("count_patients_with_flags"), type = "sh")`` | |
| 22 | #' | |
| 23 | #' @seealso [count_patients_with_event] | |
| 24 | #' | |
| 25 | #' @name count_patients_with_flags | |
| 26 | #' @order 1 | |
| 27 | NULL | |
| 28 | ||
| 29 | #' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which | |
| 30 | #' a particular flag variable is `TRUE`. | |
| 31 | #' | |
| 32 | #' @inheritParams analyze_variables | |
| 33 | #' @param .var (`string`)\cr name of the column that contains the unique identifier. | |
| 34 | #' | |
| 35 | #' @note If `flag_labels` is not specified, variables labels will be extracted from `df`. If variables are not | |
| 36 | #' labeled, variable names will be used instead. Alternatively, a named `vector` can be supplied to | |
| 37 | #' `flag_variables` such that within each name-value pair the name corresponds to the variable name and the value is | |
| 38 | #' the label to use for this variable. | |
| 39 | #' | |
| 40 | #' @return | |
| 41 | #' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular | |
| 42 | #' flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag. | |
| 43 | #' | |
| 44 | #' @examples | |
| 45 | #' # `s_count_patients_with_flags()` | |
| 46 | #' | |
| 47 | #' s_count_patients_with_flags( | |
| 48 | #' adae, | |
| 49 | #' "SUBJID", | |
| 50 | #'   flag_variables = c("fl1", "fl2", "fl3", "fl4"), | |
| 51 | #' denom = "N_col", | |
| 52 | #' .N_col = 1000 | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' @export | |
| 56 | s_count_patients_with_flags <- function(df, | |
| 57 | .var, | |
| 58 | .N_col = ncol(df), # nolint | |
| 59 | .N_row = nrow(df), # nolint | |
| 60 | ..., | |
| 61 | flag_variables, | |
| 62 | flag_labels = NULL, | |
| 63 |                                         denom = c("n", "N_col", "N_row")) { | |
| 64 | 41x | checkmate::assert_character(flag_variables) | 
| 65 | 41x |   if (!is.null(flag_labels)) { | 
| 66 | 6x | checkmate::assert_character(flag_labels, len = length(flag_variables), any.missing = FALSE) | 
| 67 | 6x | flag_names <- flag_labels | 
| 68 |   } else { | |
| 69 | 35x |     if (is.null(names(flag_variables))) { | 
| 70 | 20x | flag_names <- formatters::var_labels(df[flag_variables], fill = TRUE) | 
| 71 |     } else { | |
| 72 | 15x | flag_names <- unname(flag_variables) | 
| 73 | 15x | flag_variables <- names(flag_variables) | 
| 74 | } | |
| 75 | } | |
| 76 | 41x | checkmate::assert_subset(flag_variables, colnames(df)) | 
| 77 | ||
| 78 | 41x |   temp <- sapply(flag_variables, function(x) { | 
| 79 | 123x | tmp <- Map(function(y) which(df[[y]]), x) | 
| 80 | 123x | position_satisfy_flags <- Reduce(intersect, tmp) | 
| 81 | 123x | id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]])) | 
| 82 | 123x | s_count_values( | 
| 83 | 123x | x = as.character(unique(df[[.var]])), | 
| 84 | 123x | values = id_satisfy_flags, | 
| 85 | 123x | denom = denom, | 
| 86 | 123x | .N_col = .N_col, | 
| 87 | 123x | .N_row = .N_row | 
| 88 | ) | |
| 89 | }) | |
| 90 | 41x | colnames(temp) <- flag_names | 
| 91 | 41x | temp <- data.frame(t(temp)) | 
| 92 | 41x | result <- as.list(temp) | 
| 93 | 41x |   if (length(flag_variables) == 1) { | 
| 94 | 1x | for (i in seq(3)) names(result[[i]]) <- flag_names[1] | 
| 95 | } | |
| 96 | 41x | result | 
| 97 | } | |
| 98 | ||
| 99 | #' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun` | |
| 100 | #' in `count_patients_with_flags()`. | |
| 101 | #' | |
| 102 | #' @return | |
| 103 | #' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 104 | #' | |
| 105 | #' @examples | |
| 106 | #' a_count_patients_with_flags( | |
| 107 | #' adae, | |
| 108 | #' .N_col = 10L, | |
| 109 | #' .N_row = 10L, | |
| 110 | #' .var = "USUBJID", | |
| 111 | #'   flag_variables = c("fl1", "fl2", "fl3", "fl4") | |
| 112 | #' ) | |
| 113 | #' | |
| 114 | #' @export | |
| 115 | a_count_patients_with_flags <- function(df, | |
| 116 | labelstr = "", | |
| 117 | ..., | |
| 118 | .stats = NULL, | |
| 119 | .stat_names = NULL, | |
| 120 | .formats = NULL, | |
| 121 | .labels = NULL, | |
| 122 |                                         .indent_mods = NULL) { | |
| 123 | # Check for additional parameters to the statistics function | |
| 124 | 31x | dots_extra_args <- list(...) | 
| 125 | 31x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 126 | 31x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 127 | 31x | flag_variables <- dots_extra_args[["flag_variables"]] | 
| 128 | 31x | flag_labels <- dots_extra_args[["flag_labels"]] | 
| 129 | ||
| 130 | 17x | if (is.null(names(flag_variables))) flag_variables <- formatters::var_labels(df, fill = TRUE)[flag_variables] | 
| 131 | 26x | if (is.null(flag_labels)) flag_labels <- flag_variables | 
| 132 | ||
| 133 | # Check for user-defined functions | |
| 134 | 31x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 135 | 31x | .stats <- default_and_custom_stats_list$all_stats | 
| 136 | 31x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 137 | ||
| 138 | # Apply statistics function | |
| 139 | 31x | x_stats <- .apply_stat_functions( | 
| 140 | 31x | default_stat_fnc = s_count_patients_with_flags, | 
| 141 | 31x | custom_stat_fnc_list = custom_stat_functions, | 
| 142 | 31x | args_list = c( | 
| 143 | 31x | df = list(df), | 
| 144 | 31x | extra_afun_params, | 
| 145 | 31x | dots_extra_args | 
| 146 | ) | |
| 147 | ) | |
| 148 | ||
| 149 | # Fill in formatting defaults | |
| 150 | 31x |   .stats <- get_stats("count_patients_with_flags", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 151 | 31x | levels_per_stats <- rep(list(names(flag_variables)), length(.stats)) %>% stats::setNames(.stats) | 
| 152 | 31x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 153 | 31x | .labels <- get_labels_from_stats( | 
| 154 | 31x | .stats, .labels, levels_per_stats, | 
| 155 | 31x | tern_defaults = flag_labels %>% stats::setNames(names(flag_variables)) | 
| 156 | ) | |
| 157 | 31x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 158 | ||
| 159 | 31x | x_stats <- x_stats[.stats] %>% | 
| 160 | 31x | .unlist_keep_nulls() %>% | 
| 161 | 31x | setNames(names(.formats)) | 
| 162 | ||
| 163 | # Auto format handling | |
| 164 | 31x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 165 | ||
| 166 | # Get and check statistical names | |
| 167 | 31x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 168 | ||
| 169 | 31x | in_rows( | 
| 170 | 31x | .list = x_stats, | 
| 171 | 31x | .formats = .formats, | 
| 172 | 31x | .names = names(.labels), | 
| 173 | 31x | .stat_names = .stat_names, | 
| 174 | 31x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 175 | 31x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 176 | ) | |
| 177 | } | |
| 178 | ||
| 179 | #' @describeIn count_patients_with_flags Layout-creating function which can take statistics function | |
| 180 | #' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 181 | #' | |
| 182 | #' @return | |
| 183 | #' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions, | |
| 184 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 185 | #' the statistics from `s_count_patients_with_flags()` to the table layout. | |
| 186 | #' | |
| 187 | #' @examples | |
| 188 | #' # Add labelled flag variables to analysis dataset. | |
| 189 | #' adae <- tern_ex_adae %>% | |
| 190 | #' dplyr::mutate( | |
| 191 | #'     fl1 = TRUE %>% with_label("Total AEs"), | |
| 192 | #' fl2 = (TRTEMFL == "Y") %>% | |
| 193 | #'       with_label("Total number of patients with at least one adverse event"), | |
| 194 | #' fl3 = (TRTEMFL == "Y" & AEOUT == "FATAL") %>% | |
| 195 | #'       with_label("Total number of patients with fatal AEs"), | |
| 196 | #' fl4 = (TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y") %>% | |
| 197 | #'       with_label("Total number of patients with related fatal AEs") | |
| 198 | #' ) | |
| 199 | #' | |
| 200 | #' lyt <- basic_table() %>% | |
| 201 | #'   split_cols_by("ARM") %>% | |
| 202 | #' add_colcounts() %>% | |
| 203 | #' count_patients_with_flags( | |
| 204 | #' "SUBJID", | |
| 205 | #'     flag_variables = c("fl1", "fl2", "fl3", "fl4"), | |
| 206 | #' denom = "N_col" | |
| 207 | #' ) | |
| 208 | #' | |
| 209 | #' build_table(lyt, adae, alt_counts_df = tern_ex_adsl) | |
| 210 | #' | |
| 211 | #' @export | |
| 212 | #' @order 2 | |
| 213 | count_patients_with_flags <- function(lyt, | |
| 214 | var, | |
| 215 | flag_variables, | |
| 216 | flag_labels = NULL, | |
| 217 | var_labels = var, | |
| 218 | show_labels = "hidden", | |
| 219 | riskdiff = FALSE, | |
| 220 | na_str = default_na_str(), | |
| 221 | nested = TRUE, | |
| 222 | ..., | |
| 223 |                                       table_names = paste0("tbl_flags_", var), | |
| 224 | .stats = "count_fraction", | |
| 225 | .stat_names = NULL, | |
| 226 | .formats = list(count_fraction = format_count_fraction_fixed_dp), | |
| 227 | .indent_mods = NULL, | |
| 228 |                                       .labels = NULL) { | |
| 229 | 11x | checkmate::assert_flag(riskdiff) | 
| 230 | 11x | afun <- if (isFALSE(riskdiff)) a_count_patients_with_flags else afun_riskdiff | 
| 231 | ||
| 232 | # Process standard extra arguments | |
| 233 | 11x |   extra_args <- list(".stats" = .stats) | 
| 234 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 235 | 11x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 236 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 237 | 1x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 238 | ||
| 239 | # Process additional arguments to the statistic function | |
| 240 | 11x | extra_args <- c( | 
| 241 | 11x | extra_args, | 
| 242 | 11x | flag_variables = list(flag_variables), flag_labels = list(flag_labels), | 
| 243 | 11x |     if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_flags" = a_count_patients_with_flags)), | 
| 244 | ... | |
| 245 | ) | |
| 246 | ||
| 247 | # Append additional info from layout to the analysis function | |
| 248 | 11x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 249 | 11x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 250 | ||
| 251 | 11x | analyze( | 
| 252 | 11x | lyt = lyt, | 
| 253 | 11x | vars = var, | 
| 254 | 11x | afun = afun, | 
| 255 | 11x | na_str = na_str, | 
| 256 | 11x | nested = nested, | 
| 257 | 11x | extra_args = extra_args, | 
| 258 | 11x | var_labels = var_labels, | 
| 259 | 11x | show_labels = show_labels, | 
| 260 | 11x | table_names = table_names | 
| 261 | ) | |
| 262 | } | 
| 1 | #' Convert list of groups to a data frame | |
| 2 | #' | |
| 3 | #' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()]. | |
| 4 | #' | |
| 5 | #' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the | |
| 6 | #' levels that belong to it in the character vectors that are elements of the list. | |
| 7 | #' | |
| 8 | #' @return A `tibble` in the required format. | |
| 9 | #' | |
| 10 | #' @examples | |
| 11 | #' grade_groups <- list( | |
| 12 | #'   "Any Grade (%)" = c("1", "2", "3", "4", "5"), | |
| 13 | #'   "Grade 3-4 (%)" = c("3", "4"), | |
| 14 | #' "Grade 5 (%)" = "5" | |
| 15 | #' ) | |
| 16 | #' groups_list_to_df(grade_groups) | |
| 17 | #' | |
| 18 | #' @export | |
| 19 | groups_list_to_df <- function(groups_list) { | |
| 20 | 5x | checkmate::assert_list(groups_list, names = "named") | 
| 21 | 5x | lapply(groups_list, checkmate::assert_character) | 
| 22 | 5x | tibble::tibble( | 
| 23 | 5x | valname = make_names(names(groups_list)), | 
| 24 | 5x | label = names(groups_list), | 
| 25 | 5x | levelcombo = unname(groups_list), | 
| 26 | 5x | exargs = replicate(length(groups_list), list()) | 
| 27 | ) | |
| 28 | } | |
| 29 | ||
| 30 | #' Reference and treatment group combination | |
| 31 | #' | |
| 32 | #' @description `r lifecycle::badge("stable")` | |
| 33 | #' | |
| 34 | #' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of | |
| 35 | #' columns in the `rtables` framework and teal modules. | |
| 36 | #' | |
| 37 | #' @param fct (`factor`)\cr the variable with levels which needs to be grouped. | |
| 38 | #' @param ref (`character`)\cr the reference level(s). | |
| 39 | #' @param collapse (`string`)\cr a character string to separate `fct` and `ref`. | |
| 40 | #' | |
| 41 | #' @return A `list` with first item `ref` (reference) and second item `trt` (treatment). | |
| 42 | #' | |
| 43 | #' @examples | |
| 44 | #' groups <- combine_groups( | |
| 45 | #' fct = DM$ARM, | |
| 46 | #'   ref = c("B: Placebo") | |
| 47 | #' ) | |
| 48 | #' | |
| 49 | #' basic_table() %>% | |
| 50 | #'   split_cols_by_groups("ARM", groups) %>% | |
| 51 | #' add_colcounts() %>% | |
| 52 | #'   analyze_vars("AGE") %>% | |
| 53 | #' build_table(DM) | |
| 54 | #' | |
| 55 | #' @export | |
| 56 | combine_groups <- function(fct, | |
| 57 | ref = NULL, | |
| 58 |                            collapse = "/") { | |
| 59 | 10x | checkmate::assert_string(collapse) | 
| 60 | 10x | checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE) | 
| 61 | 10x |   checkmate::assert_multi_class(fct, classes = c("factor", "character")) | 
| 62 | ||
| 63 | 10x | fct <- as_factor_keep_attributes(fct) | 
| 64 | ||
| 65 | 10x | group_levels <- levels(fct) | 
| 66 | 10x |   if (is.null(ref)) { | 
| 67 | 6x | ref <- group_levels[1] | 
| 68 |   } else { | |
| 69 | 4x | checkmate::assert_subset(ref, group_levels) | 
| 70 | } | |
| 71 | ||
| 72 | 10x | groups <- list( | 
| 73 | 10x | ref = group_levels[group_levels %in% ref], | 
| 74 | 10x | trt = group_levels[!group_levels %in% ref] | 
| 75 | ) | |
| 76 | 10x | stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse)) | 
| 77 | } | |
| 78 | ||
| 79 | #' Split columns by groups of levels | |
| 80 | #' | |
| 81 | #' @description `r lifecycle::badge("stable")` | |
| 82 | #' | |
| 83 | #' @inheritParams argument_convention | |
| 84 | #' @inheritParams groups_list_to_df | |
| 85 | #' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to | |
| 86 | #' control formats (`format`), add a joint column for all groups (`incl_all`). | |
| 87 | #' | |
| 88 | #' @return A layout object suitable for passing to further layouting functions. Adding | |
| 89 | #' this function to an `rtable` layout will add a column split including the given | |
| 90 | #' groups to the table layout. | |
| 91 | #' | |
| 92 | #' @seealso [rtables::split_cols_by()] | |
| 93 | #' | |
| 94 | #' @examples | |
| 95 | #' # 1 - Basic use | |
| 96 | #' | |
| 97 | #' # Without group combination `split_cols_by_groups` is | |
| 98 | #' # equivalent to [rtables::split_cols_by()]. | |
| 99 | #' basic_table() %>% | |
| 100 | #'   split_cols_by_groups("ARM") %>% | |
| 101 | #' add_colcounts() %>% | |
| 102 | #'   analyze("AGE") %>% | |
| 103 | #' build_table(DM) | |
| 104 | #' | |
| 105 | #' # Add a reference column. | |
| 106 | #' basic_table() %>% | |
| 107 | #'   split_cols_by_groups("ARM", ref_group = "B: Placebo") %>% | |
| 108 | #' add_colcounts() %>% | |
| 109 | #' analyze( | |
| 110 | #' "AGE", | |
| 111 | #'     afun = function(x, .ref_group, .in_ref_col) { | |
| 112 | #'       if (.in_ref_col) { | |
| 113 | #'         in_rows("Diff Mean" = rcell(NULL)) | |
| 114 | #'       } else { | |
| 115 | #'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx")) | |
| 116 | #' } | |
| 117 | #' } | |
| 118 | #' ) %>% | |
| 119 | #' build_table(DM) | |
| 120 | #' | |
| 121 | #' # 2 - Adding group specification | |
| 122 | #' | |
| 123 | #' # Manual preparation of the groups. | |
| 124 | #' groups <- list( | |
| 125 | #'   "Arms A+B" = c("A: Drug X", "B: Placebo"), | |
| 126 | #'   "Arms A+C" = c("A: Drug X", "C: Combination") | |
| 127 | #' ) | |
| 128 | #' | |
| 129 | #' # Use of split_cols_by_groups without reference column. | |
| 130 | #' basic_table() %>% | |
| 131 | #'   split_cols_by_groups("ARM", groups) %>% | |
| 132 | #' add_colcounts() %>% | |
| 133 | #'   analyze("AGE") %>% | |
| 134 | #' build_table(DM) | |
| 135 | #' | |
| 136 | #' # Including differentiated output in the reference column. | |
| 137 | #' basic_table() %>% | |
| 138 | #'   split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>% | |
| 139 | #' analyze( | |
| 140 | #' "AGE", | |
| 141 | #'     afun = function(x, .ref_group, .in_ref_col) { | |
| 142 | #'       if (.in_ref_col) { | |
| 143 | #'         in_rows("Diff. of Averages" = rcell(NULL)) | |
| 144 | #'       } else { | |
| 145 | #'         in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx")) | |
| 146 | #' } | |
| 147 | #' } | |
| 148 | #' ) %>% | |
| 149 | #' build_table(DM) | |
| 150 | #' | |
| 151 | #' # 3 - Binary list dividing factor levels into reference and treatment | |
| 152 | #' | |
| 153 | #' # `combine_groups` defines reference and treatment. | |
| 154 | #' groups <- combine_groups( | |
| 155 | #' fct = DM$ARM, | |
| 156 | #'   ref = c("A: Drug X", "B: Placebo") | |
| 157 | #' ) | |
| 158 | #' groups | |
| 159 | #' | |
| 160 | #' # Use group definition without reference column. | |
| 161 | #' basic_table() %>% | |
| 162 | #'   split_cols_by_groups("ARM", groups_list = groups) %>% | |
| 163 | #' add_colcounts() %>% | |
| 164 | #'   analyze("AGE") %>% | |
| 165 | #' build_table(DM) | |
| 166 | #' | |
| 167 | #' # Use group definition with reference column (first item of groups). | |
| 168 | #' basic_table() %>% | |
| 169 | #'   split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>% | |
| 170 | #' add_colcounts() %>% | |
| 171 | #' analyze( | |
| 172 | #' "AGE", | |
| 173 | #'     afun = function(x, .ref_group, .in_ref_col) { | |
| 174 | #'       if (.in_ref_col) { | |
| 175 | #'         in_rows("Diff Mean" = rcell(NULL)) | |
| 176 | #'       } else { | |
| 177 | #'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx")) | |
| 178 | #' } | |
| 179 | #' } | |
| 180 | #' ) %>% | |
| 181 | #' build_table(DM) | |
| 182 | #' | |
| 183 | #' @export | |
| 184 | split_cols_by_groups <- function(lyt, | |
| 185 | var, | |
| 186 | groups_list = NULL, | |
| 187 | ref_group = NULL, | |
| 188 |                                  ...) { | |
| 189 | 6x |   if (is.null(groups_list)) { | 
| 190 | 2x | split_cols_by( | 
| 191 | 2x | lyt = lyt, | 
| 192 | 2x | var = var, | 
| 193 | 2x | ref_group = ref_group, | 
| 194 | ... | |
| 195 | ) | |
| 196 |   } else { | |
| 197 | 4x | groups_df <- groups_list_to_df(groups_list) | 
| 198 | 4x |     if (!is.null(ref_group)) { | 
| 199 | 3x | ref_group <- groups_df$valname[groups_df$label == ref_group] | 
| 200 | } | |
| 201 | 4x | split_cols_by( | 
| 202 | 4x | lyt = lyt, | 
| 203 | 4x | var = var, | 
| 204 | 4x | split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname), | 
| 205 | 4x | ref_group = ref_group, | 
| 206 | ... | |
| 207 | ) | |
| 208 | } | |
| 209 | } | |
| 210 | ||
| 211 | #' Combine counts | |
| 212 | #' | |
| 213 | #' Simplifies the estimation of column counts, especially when group combination is required. | |
| 214 | #' | |
| 215 | #' @inheritParams combine_groups | |
| 216 | #' @inheritParams groups_list_to_df | |
| 217 | #' | |
| 218 | #' @return A `vector` of column counts. | |
| 219 | #' | |
| 220 | #' @seealso [combine_groups()] | |
| 221 | #' | |
| 222 | #' @examples | |
| 223 | #' ref <- c("A: Drug X", "B: Placebo") | |
| 224 | #' groups <- combine_groups(fct = DM$ARM, ref = ref) | |
| 225 | #' | |
| 226 | #' col_counts <- combine_counts( | |
| 227 | #' fct = DM$ARM, | |
| 228 | #' groups_list = groups | |
| 229 | #' ) | |
| 230 | #' | |
| 231 | #' basic_table() %>% | |
| 232 | #'   split_cols_by_groups("ARM", groups) %>% | |
| 233 | #' add_colcounts() %>% | |
| 234 | #'   analyze_vars("AGE") %>% | |
| 235 | #' build_table(DM, col_counts = col_counts) | |
| 236 | #' | |
| 237 | #' ref <- "A: Drug X" | |
| 238 | #' groups <- combine_groups(fct = DM$ARM, ref = ref) | |
| 239 | #' col_counts <- combine_counts( | |
| 240 | #' fct = DM$ARM, | |
| 241 | #' groups_list = groups | |
| 242 | #' ) | |
| 243 | #' | |
| 244 | #' basic_table() %>% | |
| 245 | #'   split_cols_by_groups("ARM", groups) %>% | |
| 246 | #' add_colcounts() %>% | |
| 247 | #'   analyze_vars("AGE") %>% | |
| 248 | #' build_table(DM, col_counts = col_counts) | |
| 249 | #' | |
| 250 | #' @export | |
| 251 | combine_counts <- function(fct, groups_list = NULL) { | |
| 252 | 4x |   checkmate::assert_multi_class(fct, classes = c("factor", "character")) | 
| 253 | ||
| 254 | 4x | fct <- as_factor_keep_attributes(fct) | 
| 255 | ||
| 256 | 4x |   if (is.null(groups_list)) { | 
| 257 | 1x | y <- table(fct) | 
| 258 | 1x | y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]]) | 
| 259 |   } else { | |
| 260 | 3x | y <- vapply( | 
| 261 | 3x | X = groups_list, | 
| 262 | 3x | FUN = function(x) sum(table(fct)[x]), | 
| 263 | 3x | FUN.VALUE = 1 | 
| 264 | ) | |
| 265 | } | |
| 266 | 4x | y | 
| 267 | } | 
| 1 | #' Summarize variables in columns | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [summarize_colvars()] uses the statistics function [s_summary()] to analyze variables that are | |
| 6 | #' arranged in columns. The variables to analyze should be specified in the table layout via column splits (see | |
| 7 | #' [rtables::split_cols_by()] and [rtables::split_cols_by_multivar()]) prior to using [summarize_colvars()]. | |
| 8 | #' | |
| 9 | #' The function is a minimal wrapper for [rtables::analyze_colvars()], a function typically used to apply different | |
| 10 | #' analysis methods in rows for each column variable. To use the analysis methods as column labels, please refer to | |
| 11 | #' the [analyze_vars_in_cols()] function. | |
| 12 | #' | |
| 13 | #' @inheritParams argument_convention | |
| 14 | #' @param ... arguments passed to [s_summary()]. | |
| 15 | #' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector | |
| 16 | #' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation | |
| 17 | #' for that statistic's row label. | |
| 18 | #' | |
| 19 | #' @return | |
| 20 | #' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()]. | |
| 21 | #' Adding this function to an `rtable` layout will summarize the given variables, arrange the output | |
| 22 | #' in columns, and add it to the table layout. | |
| 23 | #' | |
| 24 | #' @seealso [rtables::split_cols_by_multivar()] and [`analyze_colvars_functions`]. | |
| 25 | #' | |
| 26 | #' @examples | |
| 27 | #' dta_test <- data.frame( | |
| 28 | #' USUBJID = rep(1:6, each = 3), | |
| 29 | #'   PARAMCD = rep("lab", 6 * 3), | |
| 30 | #'   AVISIT = rep(paste0("V", 1:3), 6), | |
| 31 | #' ARM = rep(LETTERS[1:3], rep(6, 3)), | |
| 32 | #' AVAL = c(9:1, rep(NA, 9)), | |
| 33 | #' CHG = c(1:9, rep(NA, 9)) | |
| 34 | #' ) | |
| 35 | #' | |
| 36 | #' ## Default output within a `rtables` pipeline. | |
| 37 | #' basic_table() %>% | |
| 38 | #'   split_cols_by("ARM") %>% | |
| 39 | #'   split_rows_by("AVISIT") %>% | |
| 40 | #'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>% | |
| 41 | #' summarize_colvars() %>% | |
| 42 | #' build_table(dta_test) | |
| 43 | #' | |
| 44 | #' ## Selection of statistics, formats and labels also work. | |
| 45 | #' basic_table() %>% | |
| 46 | #'   split_cols_by("ARM") %>% | |
| 47 | #'   split_rows_by("AVISIT") %>% | |
| 48 | #'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>% | |
| 49 | #' summarize_colvars( | |
| 50 | #'     .stats = c("n", "mean_sd"), | |
| 51 | #'     .formats = c("mean_sd" = "xx.x, xx.x"), | |
| 52 | #' .labels = c(n = "n", mean_sd = "Mean, SD") | |
| 53 | #' ) %>% | |
| 54 | #' build_table(dta_test) | |
| 55 | #' | |
| 56 | #' ## Use arguments interpreted by `s_summary`. | |
| 57 | #' basic_table() %>% | |
| 58 | #'   split_cols_by("ARM") %>% | |
| 59 | #'   split_rows_by("AVISIT") %>% | |
| 60 | #'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>% | |
| 61 | #' summarize_colvars(na.rm = FALSE) %>% | |
| 62 | #' build_table(dta_test) | |
| 63 | #' | |
| 64 | #' @export | |
| 65 | summarize_colvars <- function(lyt, | |
| 66 | na_str = default_na_str(), | |
| 67 | ..., | |
| 68 |                               .stats = c("n", "mean_sd", "median", "range", "count_fraction"), | |
| 69 | .stat_names = NULL, | |
| 70 | .formats = NULL, | |
| 71 | .labels = NULL, | |
| 72 |                               .indent_mods = NULL) { | |
| 73 | # Process standard extra arguments | |
| 74 | 3x |   extra_args <- list(".stats" = .stats) | 
| 75 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 76 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 77 | 1x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 78 | 1x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 79 | ||
| 80 | # Adding additional info from layout to analysis function | |
| 81 | 3x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 82 | 3x | formals(a_summary) <- c(formals(a_summary), extra_args[[".additional_fun_parameters"]]) | 
| 83 | ||
| 84 | 3x | analyze_colvars( | 
| 85 | 3x | lyt, | 
| 86 | 3x | afun = a_summary, | 
| 87 | 3x | na_str = na_str, | 
| 88 | 3x | extra_args = extra_args | 
| 89 | ) | |
| 90 | } | 
| 1 | #' Additional assertions to use with `checkmate` | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Additional assertion functions which can be used together with the `checkmate` package. | |
| 6 | #' | |
| 7 | #' @inheritParams checkmate::assert_factor | |
| 8 | #' @param x (`any`)\cr object to test. | |
| 9 | #' @param df (`data.frame`)\cr data set to test. | |
| 10 | #' @param variables (named `list` of `character`)\cr list of variables to test. | |
| 11 | #' @param include_boundaries (`flag`)\cr whether to include boundaries when testing | |
| 12 | #' for proportions. | |
| 13 | #' @param na_level (`string`)\cr the string you have been using to represent NA or | |
| 14 | #' missing data. For `NA` values please consider using directly [is.na()] or | |
| 15 | #' similar approaches. | |
| 16 | #' | |
| 17 | #' @return Nothing if assertion passes, otherwise prints the error message. | |
| 18 | #' | |
| 19 | #' @name assertions | |
| 20 | NULL | |
| 21 | ||
| 22 | check_list_of_variables <- function(x) { | |
| 23 | # drop NULL elements in list | |
| 24 | 2999x | x <- Filter(Negate(is.null), x) | 
| 25 | ||
| 26 | 2999x | res <- checkmate::check_list(x, | 
| 27 | 2999x | names = "named", | 
| 28 | 2999x | min.len = 1, | 
| 29 | 2999x | any.missing = FALSE, | 
| 30 | 2999x | types = "character" | 
| 31 | ) | |
| 32 | # no empty strings allowed | |
| 33 | 2999x |   if (isTRUE(res)) { | 
| 34 | 2994x | res <- checkmate::check_character(unlist(x), min.chars = 1) | 
| 35 | } | |
| 36 | 2999x | res | 
| 37 | } | |
| 38 | #' @describeIn assertions Checks whether `x` is a valid list of variable names. | |
| 39 | #' `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`. | |
| 40 | #' | |
| 41 | #' @keywords internal | |
| 42 | assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables) | |
| 43 | ||
| 44 | check_df_with_variables <- function(df, variables, na_level = NULL) { | |
| 45 | 2682x | checkmate::assert_data_frame(df) | 
| 46 | 2680x | assert_list_of_variables(variables) | 
| 47 | ||
| 48 | # flag for equal variables and column names | |
| 49 | 2678x | err_flag <- all(unlist(variables) %in% colnames(df)) | 
| 50 | 2678x | checkmate::assert_flag(err_flag) | 
| 51 | ||
| 52 | 2678x |   if (isFALSE(err_flag)) { | 
| 53 | 5x | vars <- setdiff(unlist(variables), colnames(df)) | 
| 54 | 5x | return(paste( | 
| 55 | 5x | deparse(substitute(df)), | 
| 56 | 5x | "does not contain all specified variables as column names. Missing from data frame:", | 
| 57 | 5x | paste(vars, collapse = ", ") | 
| 58 | )) | |
| 59 | } | |
| 60 | # checking if na_level is present and in which column | |
| 61 | 2673x |   if (!is.null(na_level)) { | 
| 62 | 9x | checkmate::assert_string(na_level) | 
| 63 | 9x | res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level))) | 
| 64 | 9x |     if (any(res)) { | 
| 65 | 1x | return(paste0( | 
| 66 | 1x |         deparse(substitute(df)), " contains explicit na_level (", na_level, | 
| 67 | 1x | ") in the following columns: ", paste0(unlist(variables)[res], | 
| 68 | 1x | collapse = ", " | 
| 69 | ) | |
| 70 | )) | |
| 71 | } | |
| 72 | } | |
| 73 | 2672x | return(TRUE) | 
| 74 | } | |
| 75 | #' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`. | |
| 76 | #' Please notice how this produces an error when not all variables are present in the | |
| 77 | #' data.frame while the opposite is not required. | |
| 78 | #' | |
| 79 | #' @examples | |
| 80 | #' x <- data.frame( | |
| 81 | #' a = 1:10, | |
| 82 | #' b = rnorm(10) | |
| 83 | #' ) | |
| 84 | #' assert_df_with_variables(x, variables = list(a = "a", b = "b")) | |
| 85 | #' | |
| 86 | #' x <- ex_adsl | |
| 87 | #' assert_df_with_variables(x, list(a = "ARM", b = "USUBJID")) | |
| 88 | #' | |
| 89 | #' @export | |
| 90 | assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables) | |
| 91 | ||
| 92 | check_valid_factor <- function(x, | |
| 93 | min.levels = 1, # nolint | |
| 94 | max.levels = NULL, # nolint | |
| 95 | null.ok = TRUE, # nolint | |
| 96 | any.missing = TRUE, # nolint | |
| 97 | n.levels = NULL, # nolint | |
| 98 |                                len = NULL) { | |
| 99 | # checks on levels insertion | |
| 100 | 1113x | checkmate::assert_int(min.levels, lower = 1) | 
| 101 | ||
| 102 | # main factor check | |
| 103 | 1113x | res <- checkmate::check_factor(x, | 
| 104 | 1113x | min.levels = min.levels, | 
| 105 | 1113x | null.ok = null.ok, | 
| 106 | 1113x | max.levels = max.levels, | 
| 107 | 1113x | any.missing = any.missing, | 
| 108 | 1113x | n.levels = n.levels | 
| 109 | ) | |
| 110 | ||
| 111 | # no empty strings allowed | |
| 112 | 1113x |   if (isTRUE(res)) { | 
| 113 | 1099x | res <- checkmate::check_character(levels(x), min.chars = 1) | 
| 114 | } | |
| 115 | ||
| 116 | 1113x | return(res) | 
| 117 | } | |
| 118 | #' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty | |
| 119 | #' string levels). Note that `NULL` and `NA` elements are allowed. | |
| 120 | #' | |
| 121 | #' @keywords internal | |
| 122 | assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor) | |
| 123 | ||
| 124 | check_df_with_factors <- function(df, | |
| 125 | variables, | |
| 126 | min.levels = 1, # nolint | |
| 127 | max.levels = NULL, # nolint | |
| 128 | any.missing = TRUE, # nolint | |
| 129 |                                   na_level = NULL) { | |
| 130 | 254x | res <- check_df_with_variables(df, variables, na_level) | 
| 131 | # checking if all the columns specified by variables are valid factors | |
| 132 | 253x |   if (isTRUE(res)) { | 
| 133 | # searching the data.frame with selected columns (variables) as a list | |
| 134 | 251x | res <- lapply( | 
| 135 | 251x | X = as.list(df)[unlist(variables)], | 
| 136 | 251x | FUN = check_valid_factor, | 
| 137 | 251x | min.levels = min.levels, | 
| 138 | 251x | max.levels = max.levels, | 
| 139 | 251x | any.missing = any.missing | 
| 140 | ) | |
| 141 | 251x | res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1))) | 
| 142 | 251x |     if (any(res_lo)) { | 
| 143 | 6x | return(paste0( | 
| 144 | 6x | deparse(substitute(df)), " does not contain only factor variables among:", | 
| 145 | 6x | "\n* Column `", paste0(unlist(variables)[res_lo], | 
| 146 | 6x | "` of the data.frame -> ", res[res_lo], | 
| 147 | 6x | collapse = "\n* " | 
| 148 | ) | |
| 149 | )) | |
| 150 |     } else { | |
| 151 | 245x | res <- TRUE | 
| 152 | } | |
| 153 | } | |
| 154 | 247x | return(res) | 
| 155 | } | |
| 156 | ||
| 157 | #' @describeIn assertions Check whether `df` is a data frame where the analysis `variables` | |
| 158 | #' are all factors. Note that the creation of `NA` by direct call of `factor()` will | |
| 159 | #' trim `NA` levels out of the vector list itself. | |
| 160 | #' | |
| 161 | #' @examples | |
| 162 | #' x <- ex_adsl | |
| 163 | #' assert_df_with_factors(x, list(a = "ARM")) | |
| 164 | #' | |
| 165 | #' @export | |
| 166 | assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors) | |
| 167 | ||
| 168 | #' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1. | |
| 169 | #' | |
| 170 | #' @examples | |
| 171 | #' assert_proportion_value(0.95) | |
| 172 | #' assert_proportion_value(1.0, include_boundaries = TRUE) | |
| 173 | #' | |
| 174 | #' @export | |
| 175 | assert_proportion_value <- function(x, include_boundaries = FALSE) { | |
| 176 | 19234x | checkmate::assert_number(x, lower = 0, upper = 1) | 
| 177 | 19222x | checkmate::assert_flag(include_boundaries) | 
| 178 | 19222x |   if (isFALSE(include_boundaries)) { | 
| 179 | 13294x | checkmate::assert_true(x > 0) | 
| 180 | 13292x | checkmate::assert_true(x < 1) | 
| 181 | } | |
| 182 | } | 
| 1 | #' Helper functions for tabulating biomarker effects on binary response by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions which are documented here separately to not confuse the user | |
| 6 | #' when reading about the user-facing functions. | |
| 7 | #' | |
| 8 | #' @inheritParams response_biomarkers_subgroups | |
| 9 | #' @inheritParams extract_rsp_biomarkers | |
| 10 | #' @inheritParams argument_convention | |
| 11 | #' | |
| 12 | #' @examples | |
| 13 | #' library(dplyr) | |
| 14 | #' library(forcats) | |
| 15 | #' | |
| 16 | #' adrs <- tern_ex_adrs | |
| 17 | #' adrs_labels <- formatters::var_labels(adrs) | |
| 18 | #' | |
| 19 | #' adrs_f <- adrs %>% | |
| 20 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 21 | #' mutate(rsp = AVALC == "CR") | |
| 22 | #' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") | |
| 23 | #' | |
| 24 | #' @name h_response_biomarkers_subgroups | |
| 25 | NULL | |
| 26 | ||
| 27 | #' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list | |
| 28 | #' to the "logistic regression" variable list. The reason is that currently there is an | |
| 29 | #' inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`. | |
| 30 | #' | |
| 31 | #' @param biomarker (`string`)\cr the name of the biomarker variable. | |
| 32 | #' | |
| 33 | #' @return | |
| 34 | #' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`. | |
| 35 | #' | |
| 36 | #' @examples | |
| 37 | #' # This is how the variable list is converted internally. | |
| 38 | #' h_rsp_to_logistic_variables( | |
| 39 | #' variables = list( | |
| 40 | #' rsp = "RSP", | |
| 41 | #'     covariates = c("A", "B"), | |
| 42 | #' strata = "D" | |
| 43 | #' ), | |
| 44 | #' biomarker = "AGE" | |
| 45 | #' ) | |
| 46 | #' | |
| 47 | #' @export | |
| 48 | h_rsp_to_logistic_variables <- function(variables, biomarker) { | |
| 49 | 49x |   if ("strat" %in% names(variables)) { | 
| 50 | ! | warning( | 
| 51 | ! | "Warning: the `strat` element name of the `variables` list argument to `h_rsp_to_logistic_variables() ", | 
| 52 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 53 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 54 | ) | |
| 55 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 56 | } | |
| 57 | 49x | checkmate::assert_list(variables) | 
| 58 | 49x | checkmate::assert_string(variables$rsp) | 
| 59 | 49x | checkmate::assert_string(biomarker) | 
| 60 | 49x | list( | 
| 61 | 49x | response = variables$rsp, | 
| 62 | 49x | arm = biomarker, | 
| 63 | 49x | covariates = variables$covariates, | 
| 64 | 49x | strata = variables$strata | 
| 65 | ) | |
| 66 | } | |
| 67 | ||
| 68 | #' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and | |
| 69 | #' overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple | |
| 70 | #' biomarkers in a given single data set. | |
| 71 | #' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements | |
| 72 | #' `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates` | |
| 73 | #' and `strata`. | |
| 74 | #' | |
| 75 | #' @return | |
| 76 | #' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers. | |
| 77 | #' | |
| 78 | #' @examples | |
| 79 | #' # For a single population, estimate separately the effects | |
| 80 | #' # of two biomarkers. | |
| 81 | #' df <- h_logistic_mult_cont_df( | |
| 82 | #' variables = list( | |
| 83 | #' rsp = "rsp", | |
| 84 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 85 | #' covariates = "SEX" | |
| 86 | #' ), | |
| 87 | #' data = adrs_f | |
| 88 | #' ) | |
| 89 | #' df | |
| 90 | #' | |
| 91 | #' # If the data set is empty, still the corresponding rows with missings are returned. | |
| 92 | #' h_coxreg_mult_cont_df( | |
| 93 | #' variables = list( | |
| 94 | #' rsp = "rsp", | |
| 95 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 96 | #' covariates = "SEX", | |
| 97 | #' strata = "STRATA1" | |
| 98 | #' ), | |
| 99 | #' data = adrs_f[NULL, ] | |
| 100 | #' ) | |
| 101 | #' | |
| 102 | #' @export | |
| 103 | h_logistic_mult_cont_df <- function(variables, | |
| 104 | data, | |
| 105 |                                     control = control_logistic()) { | |
| 106 | 28x |   if ("strat" %in% names(variables)) { | 
| 107 | ! | warning( | 
| 108 | ! | "Warning: the `strat` element name of the `variables` list argument to `h_logistic_mult_cont_df() ", | 
| 109 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 110 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 111 | ) | |
| 112 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 113 | } | |
| 114 | 28x | assert_df_with_variables(data, variables) | 
| 115 | ||
| 116 | 28x | checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE) | 
| 117 | 28x | checkmate::assert_list(control, names = "named") | 
| 118 | ||
| 119 | 28x | conf_level <- control[["conf_level"]] | 
| 120 | 28x | pval_label <- "p-value (Wald)" | 
| 121 | ||
| 122 | # If there is any data, run model, otherwise return empty results. | |
| 123 | 28x |   if (nrow(data) > 0) { | 
| 124 | 27x | bm_cols <- match(variables$biomarkers, names(data)) | 
| 125 | 27x |     l_result <- lapply(variables$biomarkers, function(bm) { | 
| 126 | 48x | model_fit <- fit_logistic( | 
| 127 | 48x | variables = h_rsp_to_logistic_variables(variables, bm), | 
| 128 | 48x | data = data, | 
| 129 | 48x | response_definition = control$response_definition | 
| 130 | ) | |
| 131 | 48x | result <- h_logistic_simple_terms( | 
| 132 | 48x | x = bm, | 
| 133 | 48x | fit_glm = model_fit, | 
| 134 | 48x | conf_level = control$conf_level | 
| 135 | ) | |
| 136 | 48x |       resp_vector <- if (inherits(model_fit, "glm")) { | 
| 137 | 38x | model_fit$model[[variables$rsp]] | 
| 138 |       } else { | |
| 139 | 10x | as.logical(as.matrix(model_fit$y)[, "status"]) | 
| 140 | } | |
| 141 | 48x | data.frame( | 
| 142 | # Dummy column needed downstream to create a nested header. | |
| 143 | 48x | biomarker = bm, | 
| 144 | 48x | biomarker_label = formatters::var_labels(data[bm], fill = TRUE), | 
| 145 | 48x | n_tot = length(resp_vector), | 
| 146 | 48x | n_rsp = sum(resp_vector), | 
| 147 | 48x | prop = mean(resp_vector), | 
| 148 | 48x | or = as.numeric(result[1L, "odds_ratio"]), | 
| 149 | 48x | lcl = as.numeric(result[1L, "lcl"]), | 
| 150 | 48x | ucl = as.numeric(result[1L, "ucl"]), | 
| 151 | 48x | conf_level = conf_level, | 
| 152 | 48x | pval = as.numeric(result[1L, "pvalue"]), | 
| 153 | 48x | pval_label = pval_label, | 
| 154 | 48x | stringsAsFactors = FALSE | 
| 155 | ) | |
| 156 | }) | |
| 157 | 27x | do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 158 |   } else { | |
| 159 | 1x | data.frame( | 
| 160 | 1x | biomarker = variables$biomarkers, | 
| 161 | 1x | biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE), | 
| 162 | 1x | n_tot = 0L, | 
| 163 | 1x | n_rsp = 0L, | 
| 164 | 1x | prop = NA, | 
| 165 | 1x | or = NA, | 
| 166 | 1x | lcl = NA, | 
| 167 | 1x | ucl = NA, | 
| 168 | 1x | conf_level = conf_level, | 
| 169 | 1x | pval = NA, | 
| 170 | 1x | pval_label = pval_label, | 
| 171 | 1x | row.names = seq_along(variables$biomarkers), | 
| 172 | 1x | stringsAsFactors = FALSE | 
| 173 | ) | |
| 174 | } | |
| 175 | } | 
| 1 | #' Summarize change from baseline values or absolute baseline values | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [summarize_change()] creates a layout element to summarize the change from baseline or absolute | |
| 6 | #' baseline values. The primary analysis variable `vars` indicates the numerical change from baseline results. | |
| 7 | #' | |
| 8 | #' Required secondary analysis variables `value` and `baseline_flag` can be supplied to the function via | |
| 9 | #' the `variables` argument. The `value` element should be the name of the analysis value variable, and the | |
| 10 | #' `baseline_flag` element should be the name of the flag variable that indicates whether or not records contain | |
| 11 | #' baseline values. Depending on the baseline flag given, either the absolute baseline values (at baseline) | |
| 12 | #' or the change from baseline values (post-baseline) are then summarized. | |
| 13 | #' | |
| 14 | #' @inheritParams argument_convention | |
| 15 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 16 | #' | |
| 17 | #'   Options are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")`` | |
| 18 | #' | |
| 19 | #' @name summarize_change | |
| 20 | #' @order 1 | |
| 21 | NULL | |
| 22 | ||
| 23 | #' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits. | |
| 24 | #' | |
| 25 | #' @return | |
| 26 | #' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()]. | |
| 27 | #' | |
| 28 | #' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise | |
| 29 | #' an error will be thrown. | |
| 30 | #' | |
| 31 | #' @keywords internal | |
| 32 | s_change_from_baseline <- function(df, ...) { | |
| 33 | 10x | args_list <- list(...) | 
| 34 | 10x | .var <- args_list[[".var"]] | 
| 35 | 10x | variables <- args_list[["variables"]] | 
| 36 | ||
| 37 | 10x | checkmate::assert_numeric(df[[variables$value]]) | 
| 38 | 10x | checkmate::assert_numeric(df[[.var]]) | 
| 39 | 10x | checkmate::assert_logical(df[[variables$baseline_flag]]) | 
| 40 | 10x | checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1) | 
| 41 | 10x | assert_df_with_variables(df, c(variables, list(chg = .var))) | 
| 42 | ||
| 43 | 10x | combined <- ifelse( | 
| 44 | 10x | df[[variables$baseline_flag]], | 
| 45 | 10x | df[[variables$value]], | 
| 46 | 10x | df[[.var]] | 
| 47 | ) | |
| 48 | 10x |   if (is.logical(combined) && identical(length(combined), 0L)) { | 
| 49 | 1x | combined <- numeric(0) | 
| 50 | } | |
| 51 | 10x | s_summary(combined, ...) | 
| 52 | } | |
| 53 | ||
| 54 | #' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`. | |
| 55 | #' | |
| 56 | #' @return | |
| 57 | #' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 58 | #' | |
| 59 | #' @keywords internal | |
| 60 | a_change_from_baseline <- function(df, | |
| 61 | ..., | |
| 62 | .stats = NULL, | |
| 63 | .stat_names = NULL, | |
| 64 | .formats = NULL, | |
| 65 | .labels = NULL, | |
| 66 |                                    .indent_mods = NULL) { | |
| 67 | # Check for additional parameters to the statistics function | |
| 68 | 8x | dots_extra_args <- list(...) | 
| 69 | 8x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 70 | 8x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 71 | ||
| 72 | # Check for user-defined functions | |
| 73 | 8x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 74 | 8x | .stats <- default_and_custom_stats_list$all_stats | 
| 75 | 8x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 76 | ||
| 77 | # Apply statistics function | |
| 78 | 8x | x_stats <- .apply_stat_functions( | 
| 79 | 8x | default_stat_fnc = s_change_from_baseline, | 
| 80 | 8x | custom_stat_fnc_list = custom_stat_functions, | 
| 81 | 8x | args_list = c( | 
| 82 | 8x | df = list(df), | 
| 83 | 8x | extra_afun_params, | 
| 84 | 8x | dots_extra_args | 
| 85 | ) | |
| 86 | ) | |
| 87 | ||
| 88 | # Fill in with formatting defaults | |
| 89 | 6x |   .stats <- get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 90 | 6x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 91 | 6x | .labels <- get_labels_from_stats(.stats, .labels) | 
| 92 | 6x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 93 | ||
| 94 | 6x | x_stats <- x_stats[.stats] | 
| 95 | ||
| 96 | # Auto format handling | |
| 97 | 6x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 98 | ||
| 99 | # Get and check statistical names | |
| 100 | 6x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 101 | ||
| 102 | 6x | in_rows( | 
| 103 | 6x | .list = x_stats, | 
| 104 | 6x | .formats = .formats, | 
| 105 | 6x | .names = names(.labels), | 
| 106 | 6x | .stat_names = .stat_names, | 
| 107 | 6x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 108 | 6x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 109 | ) | |
| 110 | } | |
| 111 | ||
| 112 | #' @describeIn summarize_change Layout-creating function which can take statistics function arguments | |
| 113 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 114 | #' | |
| 115 | #' @return | |
| 116 | #' * `summarize_change()` returns a layout object suitable for passing to further layouting functions, | |
| 117 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 118 | #' the statistics from `s_change_from_baseline()` to the table layout. | |
| 119 | #' | |
| 120 | #' @note To be used after a split on visits in the layout, such that each data subset only contains | |
| 121 | #' either baseline or post-baseline data. | |
| 122 | #' | |
| 123 | #' @examples | |
| 124 | #' library(dplyr) | |
| 125 | #' | |
| 126 | #' # Fabricate dataset | |
| 127 | #' dta_test <- data.frame( | |
| 128 | #' USUBJID = rep(1:6, each = 3), | |
| 129 | #'   AVISIT = rep(paste0("V", 1:3), 6), | |
| 130 | #' ARM = rep(LETTERS[1:3], rep(6, 3)), | |
| 131 | #' AVAL = c(9:1, rep(NA, 9)) | |
| 132 | #' ) %>% | |
| 133 | #' mutate(ABLFLL = AVISIT == "V1") %>% | |
| 134 | #' group_by(USUBJID) %>% | |
| 135 | #' mutate( | |
| 136 | #' BLVAL = AVAL[ABLFLL], | |
| 137 | #' CHG = AVAL - BLVAL | |
| 138 | #' ) %>% | |
| 139 | #' ungroup() | |
| 140 | #' | |
| 141 | #' results <- basic_table() %>% | |
| 142 | #'   split_cols_by("ARM") %>% | |
| 143 | #'   split_rows_by("AVISIT") %>% | |
| 144 | #'   summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>% | |
| 145 | #' build_table(dta_test) | |
| 146 | #' | |
| 147 | #' results | |
| 148 | #' | |
| 149 | #' @export | |
| 150 | #' @order 2 | |
| 151 | summarize_change <- function(lyt, | |
| 152 | vars, | |
| 153 | variables, | |
| 154 | var_labels = vars, | |
| 155 | na_str = default_na_str(), | |
| 156 | na_rm = TRUE, | |
| 157 | nested = TRUE, | |
| 158 | show_labels = "default", | |
| 159 | table_names = vars, | |
| 160 | section_div = NA_character_, | |
| 161 | ..., | |
| 162 |                              .stats = c("n", "mean_sd", "median", "range"), | |
| 163 | .stat_names = NULL, | |
| 164 | .formats = c( | |
| 165 | mean_sd = "xx.xx (xx.xx)", | |
| 166 | mean_se = "xx.xx (xx.xx)", | |
| 167 | median = "xx.xx", | |
| 168 | range = "xx.xx - xx.xx", | |
| 169 | mean_pval = "xx.xx" | |
| 170 | ), | |
| 171 | .labels = NULL, | |
| 172 |                              .indent_mods = NULL) { | |
| 173 | # Process standard extra arguments | |
| 174 | 4x |   extra_args <- list(".stats" = .stats) | 
| 175 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 176 | 4x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 177 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 178 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 179 | ||
| 180 | # Process additional arguments to the statistic function | |
| 181 | 4x | extra_args <- c( | 
| 182 | 4x | extra_args, | 
| 183 | 4x | variables = list(variables), | 
| 184 | 4x | na_rm = na_rm, | 
| 185 | ... | |
| 186 | ) | |
| 187 | ||
| 188 | # Append additional info from layout to the analysis function | |
| 189 | 4x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 190 | 4x | formals(a_change_from_baseline) <- c(formals(a_change_from_baseline), extra_args[[".additional_fun_parameters"]]) | 
| 191 | ||
| 192 | 4x | analyze( | 
| 193 | 4x | lyt = lyt, | 
| 194 | 4x | vars = vars, | 
| 195 | 4x | afun = a_change_from_baseline, | 
| 196 | 4x | na_str = na_str, | 
| 197 | 4x | nested = nested, | 
| 198 | 4x | extra_args = extra_args, | 
| 199 | 4x | var_labels = var_labels, | 
| 200 | 4x | show_labels = show_labels, | 
| 201 | 4x | table_names = table_names, | 
| 202 | 4x | inclNAs = !na_rm, | 
| 203 | 4x | section_div = section_div | 
| 204 | ) | |
| 205 | } | 
| 1 | #' Occurrence table sorting | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Functions to score occurrence table subtables and rows which can be used in the | |
| 6 | #' sorting of occurrence tables. | |
| 7 | #' | |
| 8 | #' @name score_occurrences | |
| 9 | NULL | |
| 10 | ||
| 11 | #' @describeIn score_occurrences Scoring function which sums the counts across all | |
| 12 | #' columns. It will fail if anything else but counts are used. | |
| 13 | #' | |
| 14 | #' @inheritParams rtables_access | |
| 15 | #' | |
| 16 | #' @return | |
| 17 | #' * `score_occurrences()` returns the sum of counts across all columns of a table row. | |
| 18 | #' | |
| 19 | #' @seealso [h_row_first_values()] | |
| 20 | #' | |
| 21 | #' @examples | |
| 22 | #' lyt <- basic_table() %>% | |
| 23 | #'   split_cols_by("ARM") %>% | |
| 24 | #' add_colcounts() %>% | |
| 25 | #' analyze_num_patients( | |
| 26 | #' vars = "USUBJID", | |
| 27 | #'     .stats = c("unique"), | |
| 28 | #'     .labels = c("Total number of patients with at least one event") | |
| 29 | #' ) %>% | |
| 30 | #'   split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>% | |
| 31 | #' summarize_num_patients( | |
| 32 | #' var = "USUBJID", | |
| 33 | #'     .stats = c("unique", "nonunique"), | |
| 34 | #' .labels = c( | |
| 35 | #' "Total number of patients with at least one event", | |
| 36 | #' "Total number of events" | |
| 37 | #' ) | |
| 38 | #' ) %>% | |
| 39 | #' count_occurrences(vars = "AEDECOD") | |
| 40 | #' | |
| 41 | #' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>% | |
| 42 | #' prune_table() | |
| 43 | #' | |
| 44 | #' tbl_sorted <- tbl %>% | |
| 45 | #'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences) | |
| 46 | #' | |
| 47 | #' tbl_sorted | |
| 48 | #' | |
| 49 | #' @export | |
| 50 | score_occurrences <- function(table_row) { | |
| 51 | 37x | row_counts <- h_row_counts(table_row) | 
| 52 | 37x | sum(row_counts) | 
| 53 | } | |
| 54 | ||
| 55 | #' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include | |
| 56 | #' specific columns in the scoring. See [h_row_counts()] for further information. | |
| 57 | #' | |
| 58 | #' @inheritParams has_count_in_cols | |
| 59 | #' | |
| 60 | #' @return | |
| 61 | #' * `score_occurrences_cols()` returns a function that sums counts across all specified columns | |
| 62 | #' of a table row. | |
| 63 | #' | |
| 64 | #' @seealso [h_row_counts()] | |
| 65 | #' | |
| 66 | #' @examples | |
| 67 | #' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo")) | |
| 68 | #' | |
| 69 | #' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted. | |
| 70 | #' # That would require a second pass of `sort_at_path`. | |
| 71 | #' tbl_sorted <- tbl %>% | |
| 72 | #'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b) | |
| 73 | #' | |
| 74 | #' tbl_sorted | |
| 75 | #' | |
| 76 | #' @export | |
| 77 | score_occurrences_cols <- function(...) { | |
| 78 | 4x |   function(table_row) { | 
| 79 | 20x | row_counts <- h_row_counts(table_row, ...) | 
| 80 | 20x | sum(row_counts) | 
| 81 | } | |
| 82 | } | |
| 83 | ||
| 84 | #' @describeIn score_occurrences Scoring functions produced by this constructor can be used on | |
| 85 | #' subtables: They sum up all specified column counts in the subtable. This is useful when | |
| 86 | #' there is no available content row summing up these counts. | |
| 87 | #' | |
| 88 | #' @return | |
| 89 | #' * `score_occurrences_subtable()` returns a function that sums counts in each subtable | |
| 90 | #' across all specified columns. | |
| 91 | #' | |
| 92 | #' @examples | |
| 93 | #' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl)) | |
| 94 | #' | |
| 95 | #' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That | |
| 96 | #' # would require a second pass of `sort_at_path`. | |
| 97 | #' tbl_sorted <- tbl %>% | |
| 98 | #'   sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE) | |
| 99 | #' | |
| 100 | #' tbl_sorted | |
| 101 | #' | |
| 102 | #' @export | |
| 103 | score_occurrences_subtable <- function(...) { | |
| 104 | 1x | score_table_row <- score_occurrences_cols(...) | 
| 105 | 1x |   function(table_tree) { | 
| 106 | 2x | table_rows <- collect_leaves(table_tree) | 
| 107 | 2x | counts <- vapply(table_rows, score_table_row, numeric(1)) | 
| 108 | 2x | sum(counts) | 
| 109 | } | |
| 110 | } | |
| 111 | ||
| 112 | #' @describeIn score_occurrences Produces a score function for sorting table by summing the first content row in | |
| 113 | #' specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()]. | |
| 114 | #' | |
| 115 | #' @return | |
| 116 | #' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in | |
| 117 | #' specified columns. | |
| 118 | #' | |
| 119 | #' @export | |
| 120 | score_occurrences_cont_cols <- function(...) { | |
| 121 | 1x | score_table_row <- score_occurrences_cols(...) | 
| 122 | 1x |   function(table_tree) { | 
| 123 | 2x |     if (inherits(table_tree, "ContentRow")) { | 
| 124 | ! | return(NA) | 
| 125 | } | |
| 126 | 2x | content_row <- h_content_first_row(table_tree) | 
| 127 | 2x | score_table_row(content_row) | 
| 128 | } | |
| 129 | } | 
| 1 | #' Count occurrences | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_occurrences()] creates a layout element to calculate occurrence counts for patients. | |
| 6 | #' | |
| 7 | #' This function analyzes the variable(s) supplied to `vars` and returns a table of occurrence counts for | |
| 8 | #' each unique value (or level) of the variable(s). This variable (or variables) must be | |
| 9 | #' non-numeric. The `id` variable is used to indicate unique subject identifiers (defaults to `USUBJID`). | |
| 10 | #' | |
| 11 | #' If there are multiple occurrences of the same value recorded for a patient, the value is only counted once. | |
| 12 | #' | |
| 13 | #' The summarize function [summarize_occurrences()] performs the same function as [count_occurrences()] except it | |
| 14 | #' creates content rows, not data rows, to summarize the current table row/column context and operates on the level of | |
| 15 | #' the latest row split or the root of the table if no row splits have occurred. | |
| 16 | #' | |
| 17 | #' @inheritParams argument_convention | |
| 18 | #' @param drop (`flag`)\cr whether non-appearing occurrence levels should be dropped from the resulting table. | |
| 19 | #' Note that in that case the remaining occurrence levels in the table are sorted alphabetically. | |
| 20 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 21 | #' | |
| 22 | #'   Options are: ``r shQuote(get_stats("count_occurrences"), type = "sh")`` | |
| 23 | #' | |
| 24 | #' @note By default, occurrences which don't appear in a given row split are dropped from the table and | |
| 25 | #' the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout | |
| 26 | #' needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would | |
| 27 | #' like to show all occurrences. | |
| 28 | #' | |
| 29 | #' @examples | |
| 30 | #' library(dplyr) | |
| 31 | #' df <- data.frame( | |
| 32 | #' USUBJID = as.character(c( | |
| 33 | #' 1, 1, 2, 4, 4, 4, | |
| 34 | #' 6, 6, 6, 7, 7, 8 | |
| 35 | #' )), | |
| 36 | #' MHDECOD = c( | |
| 37 | #' "MH1", "MH2", "MH1", "MH1", "MH1", "MH3", | |
| 38 | #' "MH2", "MH2", "MH3", "MH1", "MH2", "MH4" | |
| 39 | #' ), | |
| 40 | #'   ARM = rep(c("A", "B"), each = 6), | |
| 41 | #'   SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") | |
| 42 | #' ) | |
| 43 | #' df_adsl <- df %>% | |
| 44 | #' select(USUBJID, ARM) %>% | |
| 45 | #' unique() | |
| 46 | #' | |
| 47 | #' @name count_occurrences | |
| 48 | #' @order 1 | |
| 49 | NULL | |
| 50 | ||
| 51 | #' @describeIn count_occurrences Statistics function which counts number of patients that report an | |
| 52 | #' occurrence. | |
| 53 | #' | |
| 54 | #' @param denom (`string`)\cr choice of denominator for proportion. Options are: | |
| 55 | #' * `N_col`: total number of patients in this column across rows. | |
| 56 | #' * `n`: number of patients with any occurrences. | |
| 57 | #' * `N_row`: total number of patients in this row across columns. | |
| 58 | #' | |
| 59 | #' @return | |
| 60 | #' * `s_count_occurrences()` returns a list with: | |
| 61 | #' * `count`: list of counts with one element per occurrence. | |
| 62 | #' * `count_fraction`: list of counts and fractions with one element per occurrence. | |
| 63 | #' * `fraction`: list of numerators and denominators with one element per occurrence. | |
| 64 | #' | |
| 65 | #' @examples | |
| 66 | #' # Count unique occurrences per subject. | |
| 67 | #' s_count_occurrences( | |
| 68 | #' df, | |
| 69 | #' .N_col = 4L, | |
| 70 | #' .N_row = 4L, | |
| 71 | #' .df_row = df, | |
| 72 | #' .var = "MHDECOD", | |
| 73 | #' id = "USUBJID" | |
| 74 | #' ) | |
| 75 | #' | |
| 76 | #' @export | |
| 77 | s_count_occurrences <- function(df, | |
| 78 | .var = "MHDECOD", | |
| 79 | .N_col, # nolint | |
| 80 | .N_row, # nolint | |
| 81 | .df_row, | |
| 82 | ..., | |
| 83 | drop = TRUE, | |
| 84 | id = "USUBJID", | |
| 85 |                                 denom = c("N_col", "n", "N_row")) { | |
| 86 | 126x | checkmate::assert_flag(drop) | 
| 87 | 126x | assert_df_with_variables(df, list(range = .var, id = id)) | 
| 88 | 126x | checkmate::assert_count(.N_col) | 
| 89 | 126x |   checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) | 
| 90 | 126x |   checkmate::assert_multi_class(df[[id]], classes = c("factor", "character")) | 
| 91 | ||
| 92 | 126x |   occurrences <- if (drop) { | 
| 93 | # Note that we don't try to preserve original level order here since a) that would required | |
| 94 | # more time to look up in large original levels and b) that would fail for character input variable. | |
| 95 | 115x | occurrence_levels <- sort(unique(.df_row[[.var]])) | 
| 96 | 115x |     if (length(occurrence_levels) == 0) { | 
| 97 | 1x | stop( | 
| 98 | 1x | "no empty `.df_row` input allowed when `drop = TRUE`,", | 
| 99 | 1x | " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls" | 
| 100 | ) | |
| 101 | } | |
| 102 | 114x | factor(df[[.var]], levels = occurrence_levels) | 
| 103 |   } else { | |
| 104 | 11x | df[[.var]] | 
| 105 | } | |
| 106 | 125x | ids <- factor(df[[id]]) | 
| 107 | 125x | denom <- match.arg(denom) %>% | 
| 108 | 125x | switch( | 
| 109 | 125x | n = nlevels(ids), | 
| 110 | 125x | N_row = .N_row, | 
| 111 | 125x | N_col = .N_col | 
| 112 | ) | |
| 113 | 125x | has_occurrence_per_id <- table(occurrences, ids) > 0 | 
| 114 | 125x | n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id)) | 
| 115 | 125x | cur_count_fraction <- lapply( | 
| 116 | 125x | n_ids_per_occurrence, | 
| 117 | 125x |     function(i, denom) { | 
| 118 | 514x |       if (i == 0 && denom == 0) { | 
| 119 | ! | c(0, 0) | 
| 120 |       } else { | |
| 121 | 514x | c(i, i / denom) | 
| 122 | } | |
| 123 | }, | |
| 124 | 125x | denom = denom | 
| 125 | ) | |
| 126 | ||
| 127 | 125x | list( | 
| 128 | 125x | count = n_ids_per_occurrence, | 
| 129 | 125x | count_fraction = cur_count_fraction, | 
| 130 | 125x | count_fraction_fixed_dp = cur_count_fraction, | 
| 131 | 125x | fraction = lapply( | 
| 132 | 125x | n_ids_per_occurrence, | 
| 133 | 125x |       function(i, denom) c("num" = i, "denom" = denom), | 
| 134 | 125x | denom = denom | 
| 135 | ) | |
| 136 | ) | |
| 137 | } | |
| 138 | ||
| 139 | #' @describeIn count_occurrences Formatted analysis function which is used as `afun` | |
| 140 | #' in `count_occurrences()`. | |
| 141 | #' | |
| 142 | #' @return | |
| 143 | #' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 144 | #' | |
| 145 | #' @examples | |
| 146 | #' a_count_occurrences( | |
| 147 | #' df, | |
| 148 | #' .N_col = 4L, | |
| 149 | #' .df_row = df, | |
| 150 | #' .var = "MHDECOD", | |
| 151 | #' id = "USUBJID" | |
| 152 | #' ) | |
| 153 | #' | |
| 154 | #' @export | |
| 155 | a_count_occurrences <- function(df, | |
| 156 | labelstr = "", | |
| 157 | ..., | |
| 158 | .stats = NULL, | |
| 159 | .stat_names = NULL, | |
| 160 | .formats = NULL, | |
| 161 | .labels = NULL, | |
| 162 |                                 .indent_mods = NULL) { | |
| 163 | # Check for additional parameters to the statistics function | |
| 164 | 85x | dots_extra_args <- list(...) | 
| 165 | 85x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 166 | 85x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 167 | ||
| 168 | # Check for user-defined functions | |
| 169 | 85x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 170 | 85x | .stats <- default_and_custom_stats_list$all_stats | 
| 171 | 85x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 172 | ||
| 173 | # Apply statistics function | |
| 174 | 85x | x_stats <- .apply_stat_functions( | 
| 175 | 85x | default_stat_fnc = s_count_occurrences, | 
| 176 | 85x | custom_stat_fnc_list = custom_stat_functions, | 
| 177 | 85x | args_list = c( | 
| 178 | 85x | df = list(df), | 
| 179 | 85x | extra_afun_params, | 
| 180 | 85x | dots_extra_args | 
| 181 | ) | |
| 182 | ) | |
| 183 | ||
| 184 | # if empty, return NA | |
| 185 | 85x |   if (is.null(unlist(x_stats))) { | 
| 186 | 3x | return(in_rows(.list = as.list(rep(NA, length(.stats))) %>% stats::setNames(.stats))) | 
| 187 | } | |
| 188 | ||
| 189 | # Fill in formatting defaults | |
| 190 | 82x |   .stats <- get_stats("count_occurrences", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 191 | 82x | x_stats <- x_stats[.stats] | 
| 192 | 82x | levels_per_stats <- lapply(x_stats, names) | 
| 193 | 82x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 194 | 82x | .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 195 | 82x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 196 | ||
| 197 | 82x | x_stats <- x_stats[.stats] %>% | 
| 198 | 82x | .unlist_keep_nulls() %>% | 
| 199 | 82x | setNames(names(.formats)) | 
| 200 | ||
| 201 | # Auto format handling | |
| 202 | 82x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 203 | ||
| 204 | # Get and check statistical names | |
| 205 | 82x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 206 | ||
| 207 | 82x | in_rows( | 
| 208 | 82x | .list = x_stats, | 
| 209 | 82x | .formats = .formats, | 
| 210 | 82x | .names = .labels %>% .unlist_keep_nulls(), | 
| 211 | 82x | .stat_names = .stat_names, | 
| 212 | 82x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 213 | 82x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 214 | ) | |
| 215 | } | |
| 216 | ||
| 217 | #' @describeIn count_occurrences Layout-creating function which can take statistics function arguments | |
| 218 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 219 | #' | |
| 220 | #' @return | |
| 221 | #' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions, | |
| 222 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 223 | #' the statistics from `s_count_occurrences()` to the table layout. | |
| 224 | #' | |
| 225 | #' @examples | |
| 226 | #' # Create table layout | |
| 227 | #' lyt <- basic_table() %>% | |
| 228 | #'   split_cols_by("ARM") %>% | |
| 229 | #' add_colcounts() %>% | |
| 230 | #'   count_occurrences(vars = "MHDECOD", .stats = c("count_fraction")) | |
| 231 | #' | |
| 232 | #' # Apply table layout to data and produce `rtable` object | |
| 233 | #' tbl <- lyt %>% | |
| 234 | #' build_table(df, alt_counts_df = df_adsl) %>% | |
| 235 | #' prune_table() | |
| 236 | #' | |
| 237 | #' tbl | |
| 238 | #' | |
| 239 | #' @export | |
| 240 | #' @order 2 | |
| 241 | count_occurrences <- function(lyt, | |
| 242 | vars, | |
| 243 | id = "USUBJID", | |
| 244 | drop = TRUE, | |
| 245 | var_labels = vars, | |
| 246 | show_labels = "hidden", | |
| 247 | riskdiff = FALSE, | |
| 248 | na_str = default_na_str(), | |
| 249 | nested = TRUE, | |
| 250 | ..., | |
| 251 | table_names = vars, | |
| 252 | .stats = "count_fraction_fixed_dp", | |
| 253 | .stat_names = NULL, | |
| 254 | .formats = NULL, | |
| 255 | .labels = NULL, | |
| 256 |                               .indent_mods = NULL) { | |
| 257 | 9x | checkmate::assert_flag(riskdiff) | 
| 258 | 9x | afun <- if (isFALSE(riskdiff)) a_count_occurrences else afun_riskdiff | 
| 259 | ||
| 260 | # Process standard extra arguments | |
| 261 | 9x |   extra_args <- list(".stats" = .stats) | 
| 262 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 263 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 264 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 265 | 1x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 266 | ||
| 267 | # Process additional arguments to the statistic function | |
| 268 | 9x | extra_args <- c( | 
| 269 | 9x | extra_args, | 
| 270 | 9x | id = id, drop = drop, | 
| 271 | 9x |     if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences" = a_count_occurrences)), | 
| 272 | ... | |
| 273 | ) | |
| 274 | ||
| 275 | # Append additional info from layout to the analysis function | |
| 276 | 9x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 277 | 9x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 278 | ||
| 279 | 9x | analyze( | 
| 280 | 9x | lyt = lyt, | 
| 281 | 9x | vars = vars, | 
| 282 | 9x | afun = afun, | 
| 283 | 9x | na_str = na_str, | 
| 284 | 9x | nested = nested, | 
| 285 | 9x | extra_args = extra_args, | 
| 286 | 9x | var_labels = var_labels, | 
| 287 | 9x | show_labels = show_labels, | 
| 288 | 9x | table_names = table_names | 
| 289 | ) | |
| 290 | } | |
| 291 | ||
| 292 | #' @describeIn count_occurrences Layout-creating function which can take content function arguments | |
| 293 | #' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. | |
| 294 | #' | |
| 295 | #' @return | |
| 296 | #' * `summarize_occurrences()` returns a layout object suitable for passing to further layouting functions, | |
| 297 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows | |
| 298 | #' containing the statistics from `s_count_occurrences()` to the table layout. | |
| 299 | #' | |
| 300 | #' @examples | |
| 301 | #' # Layout creating function with custom format. | |
| 302 | #' basic_table() %>% | |
| 303 | #' add_colcounts() %>% | |
| 304 | #'   split_rows_by("SEX", child_labels = "visible") %>% | |
| 305 | #' summarize_occurrences( | |
| 306 | #' var = "MHDECOD", | |
| 307 | #'     .formats = c("count_fraction" = "xx.xx (xx.xx%)") | |
| 308 | #' ) %>% | |
| 309 | #' build_table(df, alt_counts_df = df_adsl) | |
| 310 | #' | |
| 311 | #' @export | |
| 312 | #' @order 3 | |
| 313 | summarize_occurrences <- function(lyt, | |
| 314 | var, | |
| 315 | id = "USUBJID", | |
| 316 | drop = TRUE, | |
| 317 | riskdiff = FALSE, | |
| 318 | na_str = default_na_str(), | |
| 319 | ..., | |
| 320 | .stats = "count_fraction_fixed_dp", | |
| 321 | .stat_names = NULL, | |
| 322 | .formats = NULL, | |
| 323 | .indent_mods = 0L, | |
| 324 |                                   .labels = NULL) { | |
| 325 | 5x | checkmate::assert_flag(riskdiff) | 
| 326 | 5x | afun <- if (isFALSE(riskdiff)) a_count_occurrences else afun_riskdiff | 
| 327 | ||
| 328 | # Process standard extra arguments | |
| 329 | 5x |   extra_args <- list(".stats" = .stats) | 
| 330 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 331 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 332 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 333 | 5x |   if (is.null(.indent_mods)) { | 
| 334 | ! | indent_mod <- 0L | 
| 335 | 5x |   } else if (length(.indent_mods) == 1) { | 
| 336 | 5x | indent_mod <- .indent_mods | 
| 337 |   } else { | |
| 338 | ! | indent_mod <- 0L | 
| 339 | ! | extra_args[[".indent_mods"]] <- .indent_mods | 
| 340 | } | |
| 341 | ||
| 342 | # Process additional arguments to the statistic function | |
| 343 | 5x | extra_args <- c( | 
| 344 | 5x | extra_args, | 
| 345 | 5x | id = id, drop = drop, | 
| 346 | 5x |     if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences" = a_count_occurrences)), | 
| 347 | ... | |
| 348 | ) | |
| 349 | ||
| 350 | # Append additional info from layout to the analysis function | |
| 351 | 5x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 352 | 5x | formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) | 
| 353 | ||
| 354 | 5x | summarize_row_groups( | 
| 355 | 5x | lyt = lyt, | 
| 356 | 5x | var = var, | 
| 357 | 5x | cfun = afun, | 
| 358 | 5x | na_str = na_str, | 
| 359 | 5x | extra_args = extra_args, | 
| 360 | 5x | indent_mod = indent_mod | 
| 361 | ) | |
| 362 | } | 
| 1 | #' Difference test for two proportions | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [test_proportion_diff()] creates a layout element to test the difference between two | |
| 6 | #' proportions. The primary analysis variable, `vars`, indicates whether a response has occurred for each record. See | |
| 7 | #' the `method` parameter for options of methods to use to calculate the p-value. Additionally, a stratification | |
| 8 | #' variable can be supplied via the `strata` element of the `variables` argument. | |
| 9 | #' | |
| 10 | #' @inheritParams argument_convention | |
| 11 | #' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used | |
| 12 | #' to calculate the p-value. | |
| 13 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 14 | #' | |
| 15 | #'   Options are: ``r shQuote(get_stats("test_proportion_diff"), type = "sh")`` | |
| 16 | #' | |
| 17 | #' @seealso [h_prop_diff_test] | |
| 18 | #' | |
| 19 | #' @name prop_diff_test | |
| 20 | #' @order 1 | |
| 21 | NULL | |
| 22 | ||
| 23 | #' @describeIn prop_diff_test Statistics function which tests the difference between two proportions. | |
| 24 | #' | |
| 25 | #' @return | |
| 26 | #' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label` | |
| 27 | #' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same. | |
| 28 | #' | |
| 29 | #' @examples | |
| 30 | #' | |
| 31 | #' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B. | |
| 32 | #' nex <- 100 # Number of example rows | |
| 33 | #' dta <- data.frame( | |
| 34 | #' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), | |
| 35 | #'   "grp" = sample(c("A", "B"), nex, TRUE), | |
| 36 | #'   "f1" = sample(c("a1", "a2"), nex, TRUE), | |
| 37 | #'   "f2" = sample(c("x", "y", "z"), nex, TRUE), | |
| 38 | #' stringsAsFactors = TRUE | |
| 39 | #' ) | |
| 40 | #' s_test_proportion_diff( | |
| 41 | #' df = subset(dta, grp == "A"), | |
| 42 | #' .var = "rsp", | |
| 43 | #' .ref_group = subset(dta, grp == "B"), | |
| 44 | #' .in_ref_col = FALSE, | |
| 45 | #' variables = NULL, | |
| 46 | #' method = "chisq" | |
| 47 | #' ) | |
| 48 | #' | |
| 49 | #' @export | |
| 50 | s_test_proportion_diff <- function(df, | |
| 51 | .var, | |
| 52 | .ref_group, | |
| 53 | .in_ref_col, | |
| 54 | variables = list(strata = NULL), | |
| 55 |                                    method = c("chisq", "schouten", "fisher", "cmh"), | |
| 56 |                                    ...) { | |
| 57 | 58x | method <- match.arg(method) | 
| 58 | 58x | y <- list(pval = numeric()) | 
| 59 | ||
| 60 | 58x |   if (!.in_ref_col) { | 
| 61 | 52x | assert_df_with_variables(df, list(rsp = .var)) | 
| 62 | 52x | assert_df_with_variables(.ref_group, list(rsp = .var)) | 
| 63 | 52x | rsp <- factor( | 
| 64 | 52x | c(.ref_group[[.var]], df[[.var]]), | 
| 65 | 52x |       levels = c("TRUE", "FALSE") | 
| 66 | ) | |
| 67 | 52x | grp <- factor( | 
| 68 | 52x |       rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), | 
| 69 | 52x |       levels = c("ref", "Not-ref") | 
| 70 | ) | |
| 71 | ||
| 72 | 52x |     if (!is.null(variables$strata) || method == "cmh") { | 
| 73 | 14x | strata <- variables$strata | 
| 74 | 14x | checkmate::assert_false(is.null(strata)) | 
| 75 | 14x | strata_vars <- stats::setNames(as.list(strata), strata) | 
| 76 | 14x | assert_df_with_variables(df, strata_vars) | 
| 77 | 14x | assert_df_with_variables(.ref_group, strata_vars) | 
| 78 | 14x | strata <- c(interaction(.ref_group[strata]), interaction(df[strata])) | 
| 79 | } | |
| 80 | ||
| 81 | 52x | tbl <- switch(method, | 
| 82 | 52x | cmh = table(grp, rsp, strata), | 
| 83 | 52x | table(grp, rsp) | 
| 84 | ) | |
| 85 | ||
| 86 | 52x | y$pval <- switch(method, | 
| 87 | 52x | chisq = prop_chisq(tbl), | 
| 88 | 52x | cmh = prop_cmh(tbl), | 
| 89 | 52x | fisher = prop_fisher(tbl), | 
| 90 | 52x | schouten = prop_schouten(tbl) | 
| 91 | ) | |
| 92 | } | |
| 93 | ||
| 94 | 58x | y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method)) | 
| 95 | 58x | y | 
| 96 | } | |
| 97 | ||
| 98 | #' Description of the difference test between two proportions | |
| 99 | #' | |
| 100 | #' @description `r lifecycle::badge("stable")` | |
| 101 | #' | |
| 102 | #' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`. | |
| 103 | #' | |
| 104 | #' @inheritParams s_test_proportion_diff | |
| 105 | #' | |
| 106 | #' @return A `string` describing the test from which the p-value is derived. | |
| 107 | #' | |
| 108 | #' @export | |
| 109 | d_test_proportion_diff <- function(method) { | |
| 110 | 59x | checkmate::assert_string(method) | 
| 111 | 59x | meth_part <- switch(method, | 
| 112 | 59x | "schouten" = "Chi-Squared Test with Schouten Correction", | 
| 113 | 59x | "chisq" = "Chi-Squared Test", | 
| 114 | 59x | "cmh" = "Cochran-Mantel-Haenszel Test", | 
| 115 | 59x | "fisher" = "Fisher's Exact Test", | 
| 116 | 59x | stop(paste(method, "does not have a description")) | 
| 117 | ) | |
| 118 | 59x |   paste0("p-value (", meth_part, ")") | 
| 119 | } | |
| 120 | ||
| 121 | #' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`. | |
| 122 | #' | |
| 123 | #' @return | |
| 124 | #' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 125 | #' | |
| 126 | #' @keywords internal | |
| 127 | a_test_proportion_diff <- function(df, | |
| 128 | ..., | |
| 129 | .stats = NULL, | |
| 130 | .stat_names = NULL, | |
| 131 | .formats = NULL, | |
| 132 | .labels = NULL, | |
| 133 |                                    .indent_mods = NULL) { | |
| 134 | 13x | dots_extra_args <- list(...) | 
| 135 | ||
| 136 | # Check if there are user-defined functions | |
| 137 | 13x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 138 | 13x | .stats <- default_and_custom_stats_list$all_stats | 
| 139 | 13x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 140 | ||
| 141 | # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) | |
| 142 | 13x | extra_afun_params <- retrieve_extra_afun_params( | 
| 143 | 13x | names(dots_extra_args$.additional_fun_parameters) | 
| 144 | ) | |
| 145 | 13x | dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore | 
| 146 | ||
| 147 | # Main statistical functions application | |
| 148 | 13x | x_stats <- .apply_stat_functions( | 
| 149 | 13x | default_stat_fnc = s_test_proportion_diff, | 
| 150 | 13x | custom_stat_fnc_list = custom_stat_functions, | 
| 151 | 13x | args_list = c( | 
| 152 | 13x | df = list(df), | 
| 153 | 13x | extra_afun_params, | 
| 154 | 13x | dots_extra_args | 
| 155 | ) | |
| 156 | ) | |
| 157 | ||
| 158 | # Fill in with stats defaults if needed | |
| 159 | 13x |   .stats <- get_stats("test_proportion_diff", | 
| 160 | 13x | stats_in = .stats, | 
| 161 | 13x | custom_stats_in = names(custom_stat_functions) | 
| 162 | ) | |
| 163 | ||
| 164 | 13x | x_stats <- x_stats[.stats] | 
| 165 | ||
| 166 | # Fill in formats/indents/labels with custom input and defaults | |
| 167 | 13x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 168 | 13x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 169 | 13x |   if (is.null(.labels)) { | 
| 170 | 13x | .labels <- sapply(x_stats, attr, "label") | 
| 171 | 13x | .labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)] | 
| 172 | } | |
| 173 | 13x | .labels <- get_labels_from_stats(.stats, .labels) | 
| 174 | ||
| 175 | # Auto format handling | |
| 176 | 13x | .formats <- apply_auto_formatting( | 
| 177 | 13x | .formats, | 
| 178 | 13x | x_stats, | 
| 179 | 13x | extra_afun_params$.df_row, | 
| 180 | 13x | extra_afun_params$.var | 
| 181 | ) | |
| 182 | ||
| 183 | # Get and check statistical names from defaults | |
| 184 | 13x | .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats | 
| 185 | ||
| 186 | 13x | in_rows( | 
| 187 | 13x | .list = x_stats, | 
| 188 | 13x | .formats = .formats, | 
| 189 | 13x | .names = names(.labels), | 
| 190 | 13x | .stat_names = .stat_names, | 
| 191 | 13x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 192 | 13x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 193 | ) | |
| 194 | } | |
| 195 | ||
| 196 | #' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments | |
| 197 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 198 | #' | |
| 199 | #' @return | |
| 200 | #' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions, | |
| 201 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 202 | #' the statistics from `s_test_proportion_diff()` to the table layout. | |
| 203 | #' | |
| 204 | #' @examples | |
| 205 | #' dta <- data.frame( | |
| 206 | #' rsp = sample(c(TRUE, FALSE), 100, TRUE), | |
| 207 | #'   grp = factor(rep(c("A", "B"), each = 50)), | |
| 208 | #'   strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20)) | |
| 209 | #' ) | |
| 210 | #' | |
| 211 | #' # With `rtables` pipelines. | |
| 212 | #' l <- basic_table() %>% | |
| 213 | #' split_cols_by(var = "grp", ref_group = "B") %>% | |
| 214 | #' test_proportion_diff( | |
| 215 | #' vars = "rsp", | |
| 216 | #' method = "cmh", variables = list(strata = "strata") | |
| 217 | #' ) | |
| 218 | #' | |
| 219 | #' build_table(l, df = dta) | |
| 220 | #' | |
| 221 | #' @export | |
| 222 | #' @order 2 | |
| 223 | test_proportion_diff <- function(lyt, | |
| 224 | vars, | |
| 225 | variables = list(strata = NULL), | |
| 226 |                                  method = c("chisq", "schouten", "fisher", "cmh"), | |
| 227 | var_labels = vars, | |
| 228 | na_str = default_na_str(), | |
| 229 | nested = TRUE, | |
| 230 | show_labels = "hidden", | |
| 231 | table_names = vars, | |
| 232 | section_div = NA_character_, | |
| 233 | ..., | |
| 234 | na_rm = TRUE, | |
| 235 |                                  .stats = c("pval"), | |
| 236 | .stat_names = NULL, | |
| 237 | .formats = c(pval = "x.xxxx | (<0.0001)"), | |
| 238 | .labels = NULL, | |
| 239 |                                  .indent_mods = c(pval = 1L)) { | |
| 240 | # Depending on main functions | |
| 241 | 6x | extra_args <- list( | 
| 242 | 6x | "na_rm" = na_rm, | 
| 243 | 6x | "variables" = variables, | 
| 244 | 6x | "method" = method, | 
| 245 | ... | |
| 246 | ) | |
| 247 | ||
| 248 | # Needed defaults | |
| 249 | 6x | if (!is.null(.stats)) extra_args[[".stats"]] <- .stats | 
| 250 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 251 | 6x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 252 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 253 | 6x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 254 | ||
| 255 | # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) | |
| 256 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 257 | 6x | formals(a_test_proportion_diff) <- c( | 
| 258 | 6x | formals(a_test_proportion_diff), | 
| 259 | 6x | extra_args[[".additional_fun_parameters"]] | 
| 260 | ) | |
| 261 | ||
| 262 |   # Main {rtables} structural call | |
| 263 | 6x | analyze( | 
| 264 | 6x | lyt = lyt, | 
| 265 | 6x | vars = vars, | 
| 266 | 6x | var_labels = var_labels, | 
| 267 | 6x | afun = a_test_proportion_diff, | 
| 268 | 6x | na_str = na_str, | 
| 269 | 6x | inclNAs = !na_rm, | 
| 270 | 6x | nested = nested, | 
| 271 | 6x | extra_args = extra_args, | 
| 272 | 6x | show_labels = show_labels, | 
| 273 | 6x | table_names = table_names, | 
| 274 | 6x | section_div = section_div | 
| 275 | ) | |
| 276 | } | |
| 277 | ||
| 278 | #' Helper functions to test proportion differences | |
| 279 | #' | |
| 280 | #' Helper functions to implement various tests on the difference between two proportions. | |
| 281 | #' | |
| 282 | #' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns. | |
| 283 | #' | |
| 284 | #' @return A p-value. | |
| 285 | #' | |
| 286 | #' @seealso [prop_diff_test()] for implementation of these helper functions. | |
| 287 | #' | |
| 288 | #' @name h_prop_diff_test | |
| 289 | NULL | |
| 290 | ||
| 291 | #' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()]. | |
| 292 | #' | |
| 293 | #' @keywords internal | |
| 294 | prop_chisq <- function(tbl) { | |
| 295 | 41x | checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) | 
| 296 | 41x |   tbl <- tbl[, c("TRUE", "FALSE")] | 
| 297 | 41x |   if (any(colSums(tbl) == 0)) { | 
| 298 | 2x | return(1) | 
| 299 | } | |
| 300 | 39x | stats::prop.test(tbl, correct = FALSE)$p.value | 
| 301 | } | |
| 302 | ||
| 303 | #' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test. Internally calls | |
| 304 | #' [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded. | |
| 305 | #' | |
| 306 | #' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response | |
| 307 | #' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension. | |
| 308 | #' | |
| 309 | #' @keywords internal | |
| 310 | prop_cmh <- function(ary) { | |
| 311 | 16x | checkmate::assert_array(ary) | 
| 312 | 16x | checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2) | 
| 313 | 16x | checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3) | 
| 314 | 16x | strata_sizes <- apply(ary, MARGIN = 3, sum) | 
| 315 | 16x |   if (any(strata_sizes < 5)) { | 
| 316 | 1x |     warning("<5 data points in some strata. CMH test may be incorrect.") | 
| 317 | 1x | ary <- ary[, , strata_sizes > 1] | 
| 318 | } | |
| 319 | ||
| 320 | 16x | stats::mantelhaen.test(ary, correct = FALSE)$p.value | 
| 321 | } | |
| 322 | ||
| 323 | #' @describeIn h_prop_diff_test Performs the Chi-Squared test with Schouten correction. | |
| 324 | #' | |
| 325 | #' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}. | |
| 326 | #' | |
| 327 | #' @keywords internal | |
| 328 | prop_schouten <- function(tbl) { | |
| 329 | 100x | checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) | 
| 330 | 100x |   tbl <- tbl[, c("TRUE", "FALSE")] | 
| 331 | 100x |   if (any(colSums(tbl) == 0)) { | 
| 332 | 1x | return(1) | 
| 333 | } | |
| 334 | ||
| 335 | 99x | n <- sum(tbl) | 
| 336 | 99x | n1 <- sum(tbl[1, ]) | 
| 337 | 99x | n2 <- sum(tbl[2, ]) | 
| 338 | ||
| 339 | 99x | ad <- diag(tbl) | 
| 340 | 99x | bc <- diag(apply(tbl, 2, rev)) | 
| 341 | 99x | ac <- tbl[, 1] | 
| 342 | 99x | bd <- tbl[, 2] | 
| 343 | ||
| 344 | 99x | t_schouten <- (n - 1) * | 
| 345 | 99x | (abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 / | 
| 346 | 99x | (n1 * n2 * sum(ac) * sum(bd)) | 
| 347 | ||
| 348 | 99x | 1 - stats::pchisq(t_schouten, df = 1) | 
| 349 | } | |
| 350 | ||
| 351 | #' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()]. | |
| 352 | #' | |
| 353 | #' @keywords internal | |
| 354 | prop_fisher <- function(tbl) { | |
| 355 | 2x | checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) | 
| 356 | 2x |   tbl <- tbl[, c("TRUE", "FALSE")] | 
| 357 | 2x | stats::fisher.test(tbl)$p.value | 
| 358 | } | 
| 1 | #' Analyze a pairwise Cox-PH model | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [coxph_pairwise()] creates a layout element to analyze a pairwise Cox-PH model. | |
| 6 | #' | |
| 7 | #' This function can return statistics including p-value, hazard ratio (HR), and HR confidence intervals from both | |
| 8 | #' stratified and unstratified Cox-PH models. The variable(s) to be analyzed is specified via the `vars` argument and | |
| 9 | #' any stratification factors via the `strata` argument. | |
| 10 | #' | |
| 11 | #' @inheritParams argument_convention | |
| 12 | #' @inheritParams s_surv_time | |
| 13 | #' @param strata (`character` or `NULL`)\cr variable names indicating stratification factors. | |
| 14 | #' @param strat `r lifecycle::badge("deprecated")` Please use the `strata` argument instead. | |
| 15 | #' @param control (`list`)\cr parameters for comparison details, specified by using the helper function | |
| 16 | #' [control_coxph()]. Some possible parameter options are: | |
| 17 | #' * `pval_method` (`string`)\cr p-value method for testing the null hypothesis that hazard ratio = 1. Default | |
| 18 | #' method is `"log-rank"` which comes from [survival::survdiff()], can also be set to `"wald"` or `"likelihood"` | |
| 19 | #' (from [survival::coxph()]). | |
| 20 | #' * `ties` (`string`)\cr specifying the method for tie handling. Default is `"efron"`, | |
| 21 | #' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]. | |
| 22 | #' * `conf_level` (`proportion`)\cr confidence level of the interval for HR. | |
| 23 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 24 | #' | |
| 25 | #'   Options are: ``r shQuote(get_stats("coxph_pairwise"), type = "sh")`` | |
| 26 | #' | |
| 27 | #' @name survival_coxph_pairwise | |
| 28 | #' @order 1 | |
| 29 | NULL | |
| 30 | ||
| 31 | #' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR, and p-value of a Cox-PH model. | |
| 32 | #' | |
| 33 | #' @return | |
| 34 | #' * `s_coxph_pairwise()` returns the statistics: | |
| 35 | #' * `pvalue`: p-value to test the null hypothesis that hazard ratio = 1. | |
| 36 | #' * `hr`: Hazard ratio. | |
| 37 | #' * `hr_ci`: Confidence interval for hazard ratio. | |
| 38 | #' * `n_tot`: Total number of observations. | |
| 39 | #' * `n_tot_events`: Total number of events. | |
| 40 | #' | |
| 41 | #' @keywords internal | |
| 42 | s_coxph_pairwise <- function(df, | |
| 43 | .ref_group, | |
| 44 | .in_ref_col, | |
| 45 | .var, | |
| 46 | is_event, | |
| 47 | strata = NULL, | |
| 48 | strat = lifecycle::deprecated(), | |
| 49 | control = control_coxph(), | |
| 50 |                              ...) { | |
| 51 | 110x |   if (lifecycle::is_present(strat)) { | 
| 52 | ! |     lifecycle::deprecate_warn("0.9.4", "s_coxph_pairwise(strat)", "s_coxph_pairwise(strata)") | 
| 53 | ! | strata <- strat | 
| 54 | } | |
| 55 | ||
| 56 | 110x | checkmate::assert_string(.var) | 
| 57 | 110x | checkmate::assert_numeric(df[[.var]]) | 
| 58 | 110x | checkmate::assert_logical(df[[is_event]]) | 
| 59 | 110x | assert_df_with_variables(df, list(tte = .var, is_event = is_event)) | 
| 60 | 110x | pval_method <- control$pval_method | 
| 61 | 110x | ties <- control$ties | 
| 62 | 110x | conf_level <- control$conf_level | 
| 63 | ||
| 64 | 110x |   if (.in_ref_col) { | 
| 65 | 6x | return( | 
| 66 | 6x | list( | 
| 67 | 6x |         pvalue = formatters::with_label(numeric(), paste0("p-value (", pval_method, ")")), | 
| 68 | 6x | hr = formatters::with_label(numeric(), "Hazard Ratio"), | 
| 69 | 6x | hr_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), | 
| 70 | 6x |         hr_ci_3d = formatters::with_label(numeric(), paste0("Hazard Ratio (", f_conf_level(conf_level), ")")), | 
| 71 | 6x | n_tot = formatters::with_label(numeric(), "Total n"), | 
| 72 | 6x | n_tot_events = formatters::with_label(numeric(), "Total events") | 
| 73 | ) | |
| 74 | ) | |
| 75 | } | |
| 76 | 104x | data <- rbind(.ref_group, df) | 
| 77 | 104x |   group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x")) | 
| 78 | ||
| 79 | 104x | df_cox <- data.frame( | 
| 80 | 104x | tte = data[[.var]], | 
| 81 | 104x | is_event = data[[is_event]], | 
| 82 | 104x | arm = group | 
| 83 | ) | |
| 84 | 104x |   if (is.null(strata)) { | 
| 85 | 91x | formula_cox <- survival::Surv(tte, is_event) ~ arm | 
| 86 |   } else { | |
| 87 | 13x | formula_cox <- stats::as.formula( | 
| 88 | 13x | paste0( | 
| 89 | 13x |         "survival::Surv(tte, is_event) ~ arm + strata(", | 
| 90 | 13x | paste(strata, collapse = ","), | 
| 91 | ")" | |
| 92 | ) | |
| 93 | ) | |
| 94 | 13x | df_cox <- cbind(df_cox, data[strata]) | 
| 95 | } | |
| 96 | 104x | cox_fit <- survival::coxph( | 
| 97 | 104x | formula = formula_cox, | 
| 98 | 104x | data = df_cox, | 
| 99 | 104x | ties = ties | 
| 100 | ) | |
| 101 | 104x | sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) | 
| 102 | 104x | orginal_survdiff <- survival::survdiff( | 
| 103 | 104x | formula_cox, | 
| 104 | 104x | data = df_cox | 
| 105 | ) | |
| 106 | 104x | log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1) | 
| 107 | ||
| 108 | 104x | pval <- switch(pval_method, | 
| 109 | 104x | "wald" = sum_cox$waldtest["pvalue"], | 
| 110 | 104x | "log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff() | 
| 111 | 104x | "likelihood" = sum_cox$logtest["pvalue"] | 
| 112 | ) | |
| 113 | 104x | list( | 
| 114 | 104x |     pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")), | 
| 115 | 104x | hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"), | 
| 116 | 104x | hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)), | 
| 117 | 104x | hr_ci_3d = formatters::with_label( | 
| 118 | 104x | c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])), | 
| 119 | 104x |       paste0("Hazard Ratio (", f_conf_level(conf_level), ")") | 
| 120 | ), | |
| 121 | 104x | n_tot = formatters::with_label(sum_cox$n, "Total n"), | 
| 122 | 104x | n_tot_events = formatters::with_label(sum_cox$nevent, "Total events") | 
| 123 | ) | |
| 124 | } | |
| 125 | ||
| 126 | #' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`. | |
| 127 | #' | |
| 128 | #' @return | |
| 129 | #' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 130 | #' | |
| 131 | #' @keywords internal | |
| 132 | a_coxph_pairwise <- function(df, | |
| 133 | ..., | |
| 134 | .stats = NULL, | |
| 135 | .stat_names = NULL, | |
| 136 | .formats = NULL, | |
| 137 | .labels = NULL, | |
| 138 |                              .indent_mods = NULL) { | |
| 139 | # Check for additional parameters to the statistics function | |
| 140 | 18x | dots_extra_args <- list(...) | 
| 141 | 18x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 142 | 18x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 143 | ||
| 144 | # Check for user-defined functions | |
| 145 | 18x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 146 | 18x | .stats <- default_and_custom_stats_list$all_stats | 
| 147 | 18x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 148 | ||
| 149 | # Apply statistics function | |
| 150 | 18x | x_stats <- .apply_stat_functions( | 
| 151 | 18x | default_stat_fnc = s_coxph_pairwise, | 
| 152 | 18x | custom_stat_fnc_list = custom_stat_functions, | 
| 153 | 18x | args_list = c( | 
| 154 | 18x | df = list(df), | 
| 155 | 18x | extra_afun_params, | 
| 156 | 18x | dots_extra_args | 
| 157 | ) | |
| 158 | ) | |
| 159 | ||
| 160 | # Fill in formatting defaults | |
| 161 | 18x |   .stats <- get_stats("coxph_pairwise", | 
| 162 | 18x | stats_in = .stats, | 
| 163 | 18x | custom_stats_in = names(custom_stat_functions) | 
| 164 | ) | |
| 165 | 18x | x_stats <- x_stats[.stats] | 
| 166 | 18x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 167 | 18x | .labels <- get_labels_from_stats( | 
| 168 | 18x | .stats, .labels, | 
| 169 | 18x | tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) | 
| 170 | ) | |
| 171 | 18x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 172 | ||
| 173 | # Auto format handling | |
| 174 | 18x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 175 | ||
| 176 | # Get and check statistical names | |
| 177 | 18x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 178 | ||
| 179 | 18x | in_rows( | 
| 180 | 18x | .list = x_stats, | 
| 181 | 18x | .formats = .formats, | 
| 182 | 18x | .names = .labels %>% .unlist_keep_nulls(), | 
| 183 | 18x | .stat_names = .stat_names, | 
| 184 | 18x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 185 | 18x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 186 | ) | |
| 187 | } | |
| 188 | ||
| 189 | #' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments | |
| 190 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 191 | #' | |
| 192 | #' @return | |
| 193 | #' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions, | |
| 194 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 195 | #' the statistics from `s_coxph_pairwise()` to the table layout. | |
| 196 | #' | |
| 197 | #' @examples | |
| 198 | #' library(dplyr) | |
| 199 | #' | |
| 200 | #' adtte_f <- tern_ex_adtte %>% | |
| 201 | #' filter(PARAMCD == "OS") %>% | |
| 202 | #' mutate(is_event = CNSR == 0) | |
| 203 | #' | |
| 204 | #' df <- adtte_f %>% filter(ARMCD == "ARM A") | |
| 205 | #' df_ref_group <- adtte_f %>% filter(ARMCD == "ARM B") | |
| 206 | #' | |
| 207 | #' basic_table() %>% | |
| 208 | #' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>% | |
| 209 | #' add_colcounts() %>% | |
| 210 | #' coxph_pairwise( | |
| 211 | #' vars = "AVAL", | |
| 212 | #' is_event = "is_event", | |
| 213 | #' var_labels = "Unstratified Analysis" | |
| 214 | #' ) %>% | |
| 215 | #' build_table(df = adtte_f) | |
| 216 | #' | |
| 217 | #' basic_table() %>% | |
| 218 | #' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>% | |
| 219 | #' add_colcounts() %>% | |
| 220 | #' coxph_pairwise( | |
| 221 | #' vars = "AVAL", | |
| 222 | #' is_event = "is_event", | |
| 223 | #' var_labels = "Stratified Analysis", | |
| 224 | #' strata = "SEX", | |
| 225 | #' control = control_coxph(pval_method = "wald") | |
| 226 | #' ) %>% | |
| 227 | #' build_table(df = adtte_f) | |
| 228 | #' | |
| 229 | #' @export | |
| 230 | #' @order 2 | |
| 231 | coxph_pairwise <- function(lyt, | |
| 232 | vars, | |
| 233 | strata = NULL, | |
| 234 | control = control_coxph(), | |
| 235 | na_str = default_na_str(), | |
| 236 | nested = TRUE, | |
| 237 | ..., | |
| 238 | var_labels = "CoxPH", | |
| 239 | show_labels = "visible", | |
| 240 | table_names = vars, | |
| 241 |                            .stats = c("pvalue", "hr", "hr_ci"), | |
| 242 | .stat_names = NULL, | |
| 243 | .formats = NULL, | |
| 244 | .labels = NULL, | |
| 245 |                            .indent_mods = NULL) { | |
| 246 | # Process standard extra arguments | |
| 247 | 6x |   extra_args <- list(".stats" = .stats) | 
| 248 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 249 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 250 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 251 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 252 | ||
| 253 | # Process additional arguments to the statistic function | |
| 254 | 6x | extra_args <- c( | 
| 255 | 6x | extra_args, | 
| 256 | 6x | strata = list(strata), control = list(control), | 
| 257 | ... | |
| 258 | ) | |
| 259 | ||
| 260 | # Append additional info from layout to the analysis function | |
| 261 | 6x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 262 | 6x | formals(a_coxph_pairwise) <- c(formals(a_coxph_pairwise), extra_args[[".additional_fun_parameters"]]) | 
| 263 | ||
| 264 | 6x | analyze( | 
| 265 | 6x | lyt = lyt, | 
| 266 | 6x | vars = vars, | 
| 267 | 6x | afun = a_coxph_pairwise, | 
| 268 | 6x | na_str = na_str, | 
| 269 | 6x | nested = nested, | 
| 270 | 6x | extra_args = extra_args, | 
| 271 | 6x | var_labels = var_labels, | 
| 272 | 6x | show_labels = show_labels, | 
| 273 | 6x | table_names = table_names | 
| 274 | ) | |
| 275 | } | 
| 1 | #' Control function for Cox-PH model | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This is an auxiliary function for controlling arguments for Cox-PH model, typically used internally to specify | |
| 6 | #' details of Cox-PH model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1. | |
| 10 | #' Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`. | |
| 11 | #' @param ties (`string`)\cr string specifying the method for tie handling. Default is `"efron"`, | |
| 12 | #' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]. | |
| 13 | #' | |
| 14 | #' @return A list of components with the same names as the arguments. | |
| 15 | #' | |
| 16 | #' @export | |
| 17 | control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), | |
| 18 |                           ties = c("efron", "breslow", "exact"), | |
| 19 |                           conf_level = 0.95) { | |
| 20 | 53x | pval_method <- match.arg(pval_method) | 
| 21 | 52x | ties <- match.arg(ties) | 
| 22 | 52x | assert_proportion_value(conf_level) | 
| 23 | ||
| 24 | 51x | list(pval_method = pval_method, ties = ties, conf_level = conf_level) | 
| 25 | } | |
| 26 | ||
| 27 | #' Control function for `survfit` models for survival time | |
| 28 | #' | |
| 29 | #' @description `r lifecycle::badge("stable")` | |
| 30 | #' | |
| 31 | #' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify | |
| 32 | #' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation. | |
| 33 | #' | |
| 34 | #' @inheritParams argument_convention | |
| 35 | #' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log", | |
| 36 | #' see more in [survival::survfit()]. Note option "none" is no longer supported. | |
| 37 | #' @param quantiles (`numeric(2)`)\cr vector of length two specifying the quantiles of survival time. | |
| 38 | #' | |
| 39 | #' @return A list of components with the same names as the arguments. | |
| 40 | #' | |
| 41 | #' @export | |
| 42 | control_surv_time <- function(conf_level = 0.95, | |
| 43 |                               conf_type = c("plain", "log", "log-log"), | |
| 44 |                               quantiles = c(0.25, 0.75)) { | |
| 45 | 229x | conf_type <- match.arg(conf_type) | 
| 46 | 228x | checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE) | 
| 47 | 227x | nullo <- lapply(quantiles, assert_proportion_value) | 
| 48 | 227x | assert_proportion_value(conf_level) | 
| 49 | 226x | list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles) | 
| 50 | } | |
| 51 | ||
| 52 | #' Control function for `survfit` models for patients' survival rate at time points | |
| 53 | #' | |
| 54 | #' @description `r lifecycle::badge("stable")` | |
| 55 | #' | |
| 56 | #' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify | |
| 57 | #' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point. | |
| 58 | #' | |
| 59 | #' @inheritParams argument_convention | |
| 60 | #' @inheritParams control_surv_time | |
| 61 | #' | |
| 62 | #' @return A list of components with the same names as the arguments. | |
| 63 | #' | |
| 64 | #' @export | |
| 65 | control_surv_timepoint <- function(conf_level = 0.95, | |
| 66 |                                    conf_type = c("plain", "log", "log-log")) { | |
| 67 | 24x | conf_type <- match.arg(conf_type) | 
| 68 | 23x | assert_proportion_value(conf_level) | 
| 69 | 22x | list( | 
| 70 | 22x | conf_level = conf_level, | 
| 71 | 22x | conf_type = conf_type | 
| 72 | ) | |
| 73 | } | 
| 1 | #' Helper functions for incidence rate | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' @param control (`list`)\cr parameters for estimation details, specified by using | |
| 6 | #' the helper function [control_incidence_rate()]. Possible parameter options are: | |
| 7 | #' * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate. | |
| 8 | #' * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar` | |
| 9 | #' for confidence interval type. | |
| 10 | #' * `input_time_unit`: (`string`)\cr `day`, `week`, `month`, or `year` (default) | |
| 11 | #' indicating time unit for data input. | |
| 12 | #' * `num_pt_year`: (`numeric`)\cr time unit for desired output (in person-years). | |
| 13 | #' @param person_years (`numeric(1)`)\cr total person-years at risk. | |
| 14 | #' @param alpha (`numeric(1)`)\cr two-sided alpha-level for confidence interval. | |
| 15 | #' @param n_events (`integer(1)`)\cr number of events observed. | |
| 16 | #' | |
| 17 | #' @return Estimated incidence rate, `rate`, and associated confidence interval, `rate_ci`. | |
| 18 | #' | |
| 19 | #' @seealso [incidence_rate] | |
| 20 | #' | |
| 21 | #' @name h_incidence_rate | |
| 22 | NULL | |
| 23 | ||
| 24 | #' @describeIn h_incidence_rate Helper function to estimate the incidence rate and | |
| 25 | #' associated confidence interval. | |
| 26 | #' | |
| 27 | #' @keywords internal | |
| 28 | h_incidence_rate <- function(person_years, | |
| 29 | n_events, | |
| 30 |                              control = control_incidence_rate()) { | |
| 31 | 18x | alpha <- 1 - control$conf_level | 
| 32 | 18x | est <- switch(control$conf_type, | 
| 33 | 18x | normal = h_incidence_rate_normal(person_years, n_events, alpha), | 
| 34 | 18x | normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha), | 
| 35 | 18x | exact = h_incidence_rate_exact(person_years, n_events, alpha), | 
| 36 | 18x | byar = h_incidence_rate_byar(person_years, n_events, alpha) | 
| 37 | ) | |
| 38 | ||
| 39 | 18x | num_pt_year <- control$num_pt_year | 
| 40 | 18x | list( | 
| 41 | 18x | rate = est$rate * num_pt_year, | 
| 42 | 18x | rate_ci = est$rate_ci * num_pt_year | 
| 43 | ) | |
| 44 | } | |
| 45 | ||
| 46 | #' @describeIn h_incidence_rate Helper function to estimate the incidence rate and | |
| 47 | #' associated confidence interval based on the normal approximation for the | |
| 48 | #' incidence rate. Unit is one person-year. | |
| 49 | #' | |
| 50 | #' @examples | |
| 51 | #' h_incidence_rate_normal(200, 2) | |
| 52 | #' | |
| 53 | #' @export | |
| 54 | h_incidence_rate_normal <- function(person_years, | |
| 55 | n_events, | |
| 56 |                                     alpha = 0.05) { | |
| 57 | 14x | checkmate::assert_number(person_years) | 
| 58 | 14x | checkmate::assert_number(n_events) | 
| 59 | 14x | assert_proportion_value(alpha) | 
| 60 | ||
| 61 | 14x | est <- n_events / person_years | 
| 62 | 14x | se <- sqrt(est / person_years) | 
| 63 | 14x | ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se | 
| 64 | ||
| 65 | 14x | list(rate = est, rate_ci = ci) | 
| 66 | } | |
| 67 | ||
| 68 | #' @describeIn h_incidence_rate Helper function to estimate the incidence rate and | |
| 69 | #' associated confidence interval based on the normal approximation for the | |
| 70 | #' logarithm of the incidence rate. Unit is one person-year. | |
| 71 | #' | |
| 72 | #' @examples | |
| 73 | #' h_incidence_rate_normal_log(200, 2) | |
| 74 | #' | |
| 75 | #' @export | |
| 76 | h_incidence_rate_normal_log <- function(person_years, | |
| 77 | n_events, | |
| 78 |                                         alpha = 0.05) { | |
| 79 | 6x | checkmate::assert_number(person_years) | 
| 80 | 6x | checkmate::assert_number(n_events) | 
| 81 | 6x | assert_proportion_value(alpha) | 
| 82 | ||
| 83 | 6x | rate_est <- n_events / person_years | 
| 84 | 6x | rate_se <- sqrt(rate_est / person_years) | 
| 85 | 6x | lrate_est <- log(rate_est) | 
| 86 | 6x | lrate_se <- rate_se / rate_est | 
| 87 | 6x | ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se) | 
| 88 | ||
| 89 | 6x | list(rate = rate_est, rate_ci = ci) | 
| 90 | } | |
| 91 | ||
| 92 | #' @describeIn h_incidence_rate Helper function to estimate the incidence rate and | |
| 93 | #' associated exact confidence interval. Unit is one person-year. | |
| 94 | #' | |
| 95 | #' @examples | |
| 96 | #' h_incidence_rate_exact(200, 2) | |
| 97 | #' | |
| 98 | #' @export | |
| 99 | h_incidence_rate_exact <- function(person_years, | |
| 100 | n_events, | |
| 101 |                                    alpha = 0.05) { | |
| 102 | 1x | checkmate::assert_number(person_years) | 
| 103 | 1x | checkmate::assert_number(n_events) | 
| 104 | 1x | assert_proportion_value(alpha) | 
| 105 | ||
| 106 | 1x | est <- n_events / person_years | 
| 107 | 1x | lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years) | 
| 108 | 1x | ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years) | 
| 109 | ||
| 110 | 1x | list(rate = est, rate_ci = c(lcl, ucl)) | 
| 111 | } | |
| 112 | ||
| 113 | #' @describeIn h_incidence_rate Helper function to estimate the incidence rate and | |
| 114 | #' associated Byar's confidence interval. Unit is one person-year. | |
| 115 | #' | |
| 116 | #' @examples | |
| 117 | #' h_incidence_rate_byar(200, 2) | |
| 118 | #' | |
| 119 | #' @export | |
| 120 | h_incidence_rate_byar <- function(person_years, | |
| 121 | n_events, | |
| 122 |                                   alpha = 0.05) { | |
| 123 | 1x | checkmate::assert_number(person_years) | 
| 124 | 1x | checkmate::assert_number(n_events) | 
| 125 | 1x | assert_proportion_value(alpha) | 
| 126 | ||
| 127 | 1x | est <- n_events / person_years | 
| 128 | 1x | seg_1 <- n_events + 0.5 | 
| 129 | 1x | seg_2 <- 1 - 1 / (9 * (n_events + 0.5)) | 
| 130 | 1x | seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3 | 
| 131 | 1x | lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years | 
| 132 | 1x | ucl <- seg_1 * ((seg_2 + seg_3)^3) / person_years | 
| 133 | ||
| 134 | 1x | list(rate = est, rate_ci = c(lcl, ucl)) | 
| 135 | } | 
| 1 | #' Factor utilities | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' A collection of utility functions for factors. | |
| 6 | #' | |
| 7 | #' @param x (`factor`)\cr factor variable or object to convert (for `as_factor_keep_attributes`). | |
| 8 | #' | |
| 9 | #' @seealso [cut_quantile_bins()] for splitting numeric vectors into quantile bins. | |
| 10 | #' | |
| 11 | #' @name factor_utils | |
| 12 | NULL | |
| 13 | ||
| 14 | #' @describeIn factor_utils Combine specified old factor Levels in a single new level. | |
| 15 | #' | |
| 16 | #' @param levels (`character`)\cr level names to be combined. | |
| 17 | #' @param new_level (`string`)\cr name of new level. | |
| 18 | #' | |
| 19 | #' @return | |
| 20 | #' * `combine_levels`: A `factor` with the new levels. | |
| 21 | #' | |
| 22 | #' @examples | |
| 23 | #' x <- factor(letters[1:5], levels = letters[5:1]) | |
| 24 | #' combine_levels(x, levels = c("a", "b")) | |
| 25 | #' | |
| 26 | #' combine_levels(x, c("e", "b")) | |
| 27 | #' | |
| 28 | #' @export | |
| 29 | combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) { | |
| 30 | 4x | checkmate::assert_factor(x) | 
| 31 | 4x | checkmate::assert_subset(levels, levels(x)) | 
| 32 | ||
| 33 | 4x | lvls <- levels(x) | 
| 34 | ||
| 35 | 4x | lvls[lvls %in% levels] <- new_level | 
| 36 | ||
| 37 | 4x | levels(x) <- lvls | 
| 38 | ||
| 39 | 4x | x | 
| 40 | } | |
| 41 | ||
| 42 | #' Conversion of a vector to a factor | |
| 43 | #' | |
| 44 | #' @describeIn factor_utils Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user | |
| 45 | #' can decide whether they prefer converting to factor manually (e.g. for full control of | |
| 46 | #' factor levels). | |
| 47 | #' | |
| 48 | #' @param x_name (`string`)\cr name of `x`. | |
| 49 | #' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector. | |
| 50 | #' @param verbose (`flag`)\cr defaults to `TRUE`. It prints out warnings and messages. | |
| 51 | #' | |
| 52 | #' @return | |
| 53 | #' * `as_factor_keep_attributes`: A `factor` with same attributes (except class) as `x`. | |
| 54 | #' Does not modify `x` if already a `factor`. | |
| 55 | #' | |
| 56 | #' @examples | |
| 57 | #' a_chr_with_labels <- c("a", "b", NA) | |
| 58 | #' attr(a_chr_with_labels, "label") <- "A character vector with labels" | |
| 59 | #' as_factor_keep_attributes(a_chr_with_labels) | |
| 60 | #' | |
| 61 | #' @export | |
| 62 | as_factor_keep_attributes <- function(x, | |
| 63 | x_name = deparse(substitute(x)), | |
| 64 | na_level = "<Missing>", | |
| 65 |                                       verbose = TRUE) { | |
| 66 | 221x | checkmate::assert_atomic(x) | 
| 67 | 221x | checkmate::assert_string(x_name) | 
| 68 | 221x | checkmate::assert_string(na_level) | 
| 69 | 221x | checkmate::assert_flag(verbose) | 
| 70 | 221x |   if (is.factor(x)) { | 
| 71 | 200x | return(x) | 
| 72 | } | |
| 73 | 21x | x_class <- class(x)[1] | 
| 74 | 21x |   if (verbose) { | 
| 75 | 15x | warning(paste( | 
| 76 | 15x | "automatically converting", x_class, "variable", x_name, | 
| 77 | 15x | "to factor, better manually convert to factor to avoid failures" | 
| 78 | )) | |
| 79 | } | |
| 80 | 21x |   if (identical(length(x), 0L)) { | 
| 81 | 1x | warning(paste( | 
| 82 | 1x | x_name, "has length 0, this can lead to tabulation failures, better convert to factor" | 
| 83 | )) | |
| 84 | } | |
| 85 | 21x |   if (is.character(x)) { | 
| 86 | 21x | x_no_na <- explicit_na(sas_na(x), label = na_level) | 
| 87 | 21x |     if (any(na_level %in% x_no_na)) { | 
| 88 | 3x | do.call( | 
| 89 | 3x | structure, | 
| 90 | 3x | c( | 
| 91 | 3x | list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)), | 
| 92 | 3x | attributes(x) | 
| 93 | ) | |
| 94 | ) | |
| 95 |     } else { | |
| 96 | 18x | do.call(structure, c(list(.Data = as.factor(x)), attributes(x))) | 
| 97 | } | |
| 98 |   } else { | |
| 99 | ! | do.call(structure, c(list(.Data = as.factor(x)), attributes(x))) | 
| 100 | } | |
| 101 | } | |
| 102 | ||
| 103 | #' Labels for bins in percent | |
| 104 | #' | |
| 105 | #' This creates labels for quantile based bins in percent. This assumes the right-closed | |
| 106 | #' intervals as produced by [cut_quantile_bins()]. | |
| 107 | #' | |
| 108 | #' @param probs (`numeric`)\cr the probabilities identifying the quantiles. | |
| 109 | #' This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where | |
| 110 | #' the boundaries 0 and 1 must not be included. | |
| 111 | #' @param digits (`integer(1)`)\cr number of decimal places to round the percent numbers. | |
| 112 | #' | |
| 113 | #' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc. | |
| 114 | #' | |
| 115 | #' @keywords internal | |
| 116 | bins_percent_labels <- function(probs, | |
| 117 |                                 digits = 0) { | |
| 118 | 3x | if (isFALSE(0 %in% probs)) probs <- c(0, probs) | 
| 119 | 3x | if (isFALSE(1 %in% probs)) probs <- c(probs, 1) | 
| 120 | 10x | checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE) | 
| 121 | 10x | percent <- round(probs * 100, digits = digits) | 
| 122 | 10x | left <- paste0(utils::head(percent, -1), "%") | 
| 123 | 10x | right <- paste0(utils::tail(percent, -1), "%") | 
| 124 | 10x | without_left_bracket <- paste0(left, ",", right, "]") | 
| 125 | 10x |   with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1)) | 
| 126 | 10x |   if (length(without_left_bracket) > 1) { | 
| 127 | 7x | with_left_bracket <- c( | 
| 128 | 7x | with_left_bracket, | 
| 129 | 7x |       paste0("(", utils::tail(without_left_bracket, -1)) | 
| 130 | ) | |
| 131 | } | |
| 132 | 10x | with_left_bracket | 
| 133 | } | |
| 134 | ||
| 135 | #' Cut numeric vector into empirical quantile bins | |
| 136 | #' | |
| 137 | #' @description `r lifecycle::badge("stable")` | |
| 138 | #' | |
| 139 | #' This cuts a numeric vector into sample quantile bins. | |
| 140 | #' | |
| 141 | #' @inheritParams bins_percent_labels | |
| 142 | #' @param x (`numeric`)\cr the continuous variable values which should be cut into | |
| 143 | #' quantile bins. This may contain `NA` values, which are then | |
| 144 | #' not used for the quantile calculations, but included in the return vector. | |
| 145 | #' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n` | |
| 146 | #' probabilities in `probs`, then this must be `n + 1` long. | |
| 147 | #' @param type (`integer(1)`)\cr type of quantiles to use, see [stats::quantile()] for details. | |
| 148 | #' @param ordered (`flag`)\cr should the result be an ordered factor. | |
| 149 | #' | |
| 150 | #' @return | |
| 151 | #' * `cut_quantile_bins`: A `factor` variable with appropriately-labeled bins as levels. | |
| 152 | #' | |
| 153 | #' @note Intervals are closed on the right side. That is, the first bin is the interval | |
| 154 | #' `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc., | |
| 155 | #' and the last bin is `(qn, +Inf]` where `qn` is the last quantile. | |
| 156 | #' | |
| 157 | #' @examples | |
| 158 | #' # Default is to cut into quartile bins. | |
| 159 | #' cut_quantile_bins(cars$speed) | |
| 160 | #' | |
| 161 | #' # Use custom quantiles. | |
| 162 | #' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88)) | |
| 163 | #' | |
| 164 | #' # Use custom labels. | |
| 165 | #' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4)) | |
| 166 | #' | |
| 167 | #' # NAs are preserved in result factor. | |
| 168 | #' ozone_binned <- cut_quantile_bins(airquality$Ozone) | |
| 169 | #' which(is.na(ozone_binned)) | |
| 170 | #' # So you might want to make these explicit. | |
| 171 | #' explicit_na(ozone_binned) | |
| 172 | #' | |
| 173 | #' @export | |
| 174 | cut_quantile_bins <- function(x, | |
| 175 | probs = c(0.25, 0.5, 0.75), | |
| 176 | labels = NULL, | |
| 177 | type = 7, | |
| 178 |                               ordered = TRUE) { | |
| 179 | 8x | checkmate::assert_flag(ordered) | 
| 180 | 8x | checkmate::assert_numeric(x) | 
| 181 | 7x | if (isFALSE(0 %in% probs)) probs <- c(0, probs) | 
| 182 | 7x | if (isFALSE(1 %in% probs)) probs <- c(probs, 1) | 
| 183 | 8x | checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE) | 
| 184 | 7x | if (is.null(labels)) labels <- bins_percent_labels(probs) | 
| 185 | 8x | checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE) | 
| 186 | ||
| 187 | 8x |   if (all(is.na(x))) { | 
| 188 | # Early return if there are only NAs in input. | |
| 189 | 1x | return(factor(x, ordered = ordered, levels = labels)) | 
| 190 | } | |
| 191 | ||
| 192 | 7x | quantiles <- stats::quantile( | 
| 193 | 7x | x, | 
| 194 | 7x | probs = probs, | 
| 195 | 7x | type = type, | 
| 196 | 7x | na.rm = TRUE | 
| 197 | ) | |
| 198 | ||
| 199 | 7x | checkmate::assert_numeric(quantiles, unique = TRUE) | 
| 200 | ||
| 201 | 6x | cut( | 
| 202 | 6x | x, | 
| 203 | 6x | breaks = quantiles, | 
| 204 | 6x | labels = labels, | 
| 205 | 6x | ordered_result = ordered, | 
| 206 | 6x | include.lowest = TRUE, | 
| 207 | 6x | right = TRUE | 
| 208 | ) | |
| 209 | } | |
| 210 | ||
| 211 | #' @describeIn factor_utils This discards the observations as well as the levels specified from a factor. | |
| 212 | #' | |
| 213 | #' @param discard (`character`)\cr levels to discard. | |
| 214 | #' | |
| 215 | #' @return | |
| 216 | #' * `fct_discard`: A modified `factor` with observations as well as levels from `discard` dropped. | |
| 217 | #' | |
| 218 | #' @examples | |
| 219 | #' fct_discard(factor(c("a", "b", "c")), "c") | |
| 220 | #' | |
| 221 | #' @export | |
| 222 | fct_discard <- function(x, discard) { | |
| 223 | 321x | checkmate::assert_factor(x) | 
| 224 | 321x | checkmate::assert_character(discard, any.missing = FALSE) | 
| 225 | 321x | new_obs <- x[!(x %in% discard)] | 
| 226 | 321x | new_levels <- setdiff(levels(x), discard) | 
| 227 | 321x | factor(new_obs, levels = new_levels) | 
| 228 | } | |
| 229 | ||
| 230 | #' @describeIn factor_utils This inserts explicit missing values in a factor based on a condition. Additionally, | |
| 231 | #' existing `NA` values will be explicitly converted to given `na_level`. | |
| 232 | #' | |
| 233 | #' @param condition (`logical`)\cr positions at which to insert missing values. | |
| 234 | #' @param na_level (`string`)\cr which level to use for missing values. | |
| 235 | #' | |
| 236 | #' @return | |
| 237 | #' * `fct_explicit_na_if`: A modified `factor` with inserted and existing `NA` converted to `na_level`. | |
| 238 | #' | |
| 239 | #' @seealso [forcats::fct_na_value_to_level()] which is used internally. | |
| 240 | #' | |
| 241 | #' @examples | |
| 242 | #' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE)) | |
| 243 | #' | |
| 244 | #' @export | |
| 245 | fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") { | |
| 246 | 1x | checkmate::assert_factor(x, len = length(condition)) | 
| 247 | 1x | checkmate::assert_logical(condition) | 
| 248 | 1x | x[condition] <- NA | 
| 249 | 1x | x <- forcats::fct_na_value_to_level(x, level = na_level) | 
| 250 | 1x | forcats::fct_drop(x, only = na_level) | 
| 251 | } | |
| 252 | ||
| 253 | #' @describeIn factor_utils This collapses levels and only keeps those new group levels, in the order provided. | |
| 254 | #' The returned factor has levels in the order given, with the possible missing level last (this will | |
| 255 | #' only be included if there are missing values). | |
| 256 | #' | |
| 257 | #' @param .f (`factor` or `character`)\cr original vector. | |
| 258 | #' @param ... (named `character`)\cr levels in each vector provided will be collapsed into | |
| 259 | #' the new level given by the respective name. | |
| 260 | #' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the | |
| 261 | #' new factor. Note that this level must not be contained in the new levels specified in `...`. | |
| 262 | #' | |
| 263 | #' @return | |
| 264 | #' * `fct_collapse_only`: A modified `factor` with collapsed levels. Values and levels which are not included | |
| 265 | #' in the given `character` vector input will be set to the missing level `.na_level`. | |
| 266 | #' | |
| 267 | #' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed, | |
| 268 | #' [explicit_na()] can be called separately on the result. | |
| 269 | #' | |
| 270 | #' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally. | |
| 271 | #' | |
| 272 | #' @examples | |
| 273 | #' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d")) | |
| 274 | #' | |
| 275 | #' @export | |
| 276 | fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") { | |
| 277 | 4x | new_lvls <- names(list(...)) | 
| 278 | 4x |   if (checkmate::test_subset(.na_level, new_lvls)) { | 
| 279 | 1x |     stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels")) | 
| 280 | } | |
| 281 | 3x | x <- forcats::fct_collapse(.f, ..., other_level = .na_level) | 
| 282 | 3x | do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls))) | 
| 283 | } | 
| 1 | #' Summarize analysis of covariance (ANCOVA) results | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [summarize_ancova()] creates a layout element to summarize ANCOVA results. | |
| 6 | #' | |
| 7 | #' This function can be used to analyze multiple endpoints and/or multiple timepoints within the response variable(s) | |
| 8 | #' specified as `vars`. | |
| 9 | #' | |
| 10 | #' Additional variables for the analysis, namely an arm (grouping) variable and covariate variables, can be defined | |
| 11 | #' via the `variables` argument. See below for more details on how to specify `variables`. An interaction term can | |
| 12 | #' be implemented in the model if needed. The interaction variable that should interact with the arm variable is | |
| 13 | #' specified via the `interaction_term` parameter, and the specific value of `interaction_term` for which to extract | |
| 14 | #' the ANCOVA results via the `interaction_y` parameter. | |
| 15 | #' | |
| 16 | #' @inheritParams h_ancova | |
| 17 | #' @inheritParams argument_convention | |
| 18 | #' @param interaction_y (`string` or `flag`)\cr a selected item inside of the `interaction_item` variable which will be | |
| 19 | #' used to select the specific ANCOVA results. if the interaction is not needed, the default option is `FALSE`. | |
| 20 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 21 | #' | |
| 22 | #'   Options are: ``r shQuote(get_stats("summarize_ancova"), type = "sh")`` | |
| 23 | #' | |
| 24 | #' @name summarize_ancova | |
| 25 | #' @order 1 | |
| 26 | NULL | |
| 27 | ||
| 28 | #' Helper function to return results of a linear model | |
| 29 | #' | |
| 30 | #' @description `r lifecycle::badge("stable")` | |
| 31 | #' | |
| 32 | #' @inheritParams argument_convention | |
| 33 | #' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`. | |
| 34 | #' @param variables (named `list` of `string`)\cr list of additional analysis variables, with expected elements: | |
| 35 | #' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be | |
| 36 | #' summarized. Specifically, the first level of `arm` variable is taken as the reference group. | |
| 37 | #' * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or | |
| 38 | #' interaction terms indicated by `"X1 * X2"`. | |
| 39 | #' @param interaction_item (`string` or `NULL`)\cr name of the variable that should have interactions | |
| 40 | #' with arm. if the interaction is not needed, the default option is `NULL`. | |
| 41 | #' @param weights_emmeans (`string` or `NULL`)\cr argument from [emmeans::emmeans()] | |
| 42 | #' | |
| 43 | #' @return The summary of a linear model. | |
| 44 | #' | |
| 45 | #' @examples | |
| 46 | #' h_ancova( | |
| 47 | #' .var = "Sepal.Length", | |
| 48 | #' .df_row = iris, | |
| 49 | #'   variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width")) | |
| 50 | #' ) | |
| 51 | #' | |
| 52 | #' @export | |
| 53 | h_ancova <- function(.var, | |
| 54 | .df_row, | |
| 55 | variables, | |
| 56 | interaction_item = NULL, | |
| 57 |                      weights_emmeans = NULL) { | |
| 58 | 27x | checkmate::assert_string(.var) | 
| 59 | 27x | checkmate::assert_list(variables) | 
| 60 | 27x |   checkmate::assert_subset(names(variables), c("arm", "covariates")) | 
| 61 | 27x | assert_df_with_variables(.df_row, list(rsp = .var)) | 
| 62 | ||
| 63 | 26x | arm <- variables$arm | 
| 64 | 26x | covariates <- variables$covariates | 
| 65 | 26x |   if (!is.null(covariates) && length(covariates) > 0) { | 
| 66 | # Get all covariate variable names in the model. | |
| 67 | 11x | var_list <- get_covariates(covariates) | 
| 68 | 11x | assert_df_with_variables(.df_row, var_list) | 
| 69 | } | |
| 70 | ||
| 71 | 25x | covariates_part <- paste(covariates, collapse = " + ") | 
| 72 | 25x |   if (covariates_part != "") { | 
| 73 | 10x | formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm)) | 
| 74 |   } else { | |
| 75 | 15x | formula <- stats::as.formula(paste0(.var, " ~ ", arm)) | 
| 76 | } | |
| 77 | ||
| 78 | 25x |   if (is.null(interaction_item)) { | 
| 79 | 21x | specs <- arm | 
| 80 |   } else { | |
| 81 | 4x | specs <- c(arm, interaction_item) | 
| 82 | } | |
| 83 | ||
| 84 | 25x | lm_fit <- stats::lm( | 
| 85 | 25x | formula = formula, | 
| 86 | 25x | data = .df_row | 
| 87 | ) | |
| 88 | 25x | emmeans_fit <- emmeans::emmeans( | 
| 89 | 25x | lm_fit, | 
| 90 | # Specify here the group variable over which EMM are desired. | |
| 91 | 25x | specs = specs, | 
| 92 | # Pass the data again so that the factor levels of the arm variable can be inferred. | |
| 93 | 25x | data = .df_row, | 
| 94 | 25x | weights = weights_emmeans | 
| 95 | ) | |
| 96 | ||
| 97 | 25x | emmeans_fit | 
| 98 | } | |
| 99 | ||
| 100 | #' @describeIn summarize_ancova Statistics function that produces a named list of results | |
| 101 | #' of the investigated linear model. | |
| 102 | #' | |
| 103 | #' @return | |
| 104 | #' * `s_ancova()` returns a named list of 5 statistics: | |
| 105 | #' * `n`: Count of complete sample size for the group. | |
| 106 | #' * `lsmean`: Estimated marginal means in the group. | |
| 107 | #' * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group. | |
| 108 | #' If working with the reference group, this will be empty. | |
| 109 | #' * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison | |
| 110 | #' to the reference group. | |
| 111 | #' * `pval`: p-value (not adjusted for multiple comparisons). | |
| 112 | #' | |
| 113 | #' @keywords internal | |
| 114 | s_ancova <- function(df, | |
| 115 | .var, | |
| 116 | .df_row, | |
| 117 | .ref_group, | |
| 118 | .in_ref_col, | |
| 119 | variables, | |
| 120 | conf_level, | |
| 121 | interaction_y = FALSE, | |
| 122 | interaction_item = NULL, | |
| 123 | weights_emmeans = NULL, | |
| 124 |                      ...) { | |
| 125 | 24x | emmeans_fit <- h_ancova( | 
| 126 | 24x | .var = .var, | 
| 127 | 24x | variables = variables, | 
| 128 | 24x | .df_row = .df_row, | 
| 129 | 24x | interaction_item = interaction_item, | 
| 130 | 24x | weights_emmeans = weights_emmeans | 
| 131 | ) | |
| 132 | ||
| 133 | 24x | sum_fit <- summary( | 
| 134 | 24x | emmeans_fit, | 
| 135 | 24x | level = conf_level | 
| 136 | ) | |
| 137 | ||
| 138 | 24x | arm <- variables$arm | 
| 139 | ||
| 140 | 24x | sum_level <- as.character(unique(df[[arm]])) | 
| 141 | ||
| 142 | # Ensure that there is only one element in sum_level. | |
| 143 | 24x | checkmate::assert_scalar(sum_level) | 
| 144 | ||
| 145 | 23x | sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ] | 
| 146 | ||
| 147 | # Get the index of the ref arm | |
| 148 | 23x |   if (interaction_y != FALSE) { | 
| 149 | 4x | y <- unlist(df[(df[[interaction_item]] == interaction_y), .var]) | 
| 150 | # convert characters selected in interaction_y into the numeric order | |
| 151 | 4x | interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y) | 
| 152 | 4x | sum_fit_level <- sum_fit_level[interaction_y, ] | 
| 153 | # if interaction is called, reset the index | |
| 154 | 4x | ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])]) | 
| 155 | 4x | ref_key <- tail(ref_key, n = 1) | 
| 156 | 4x | ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key | 
| 157 |   } else { | |
| 158 | 19x | y <- df[[.var]] | 
| 159 | # Get the index of the ref arm when interaction is not called | |
| 160 | 19x | ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])]) | 
| 161 | 19x | ref_key <- tail(ref_key, n = 1) | 
| 162 | } | |
| 163 | ||
| 164 | 23x |   if (.in_ref_col) { | 
| 165 | 8x | list( | 
| 166 | 8x | n = length(y[!is.na(y)]), | 
| 167 | 8x | lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"), | 
| 168 | 8x | lsmean_diff = formatters::with_label(numeric(), "Difference in Adjusted Means"), | 
| 169 | 8x | lsmean_diff_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), | 
| 170 | 8x | pval = formatters::with_label(numeric(), "p-value") | 
| 171 | ) | |
| 172 |   } else { | |
| 173 | # Estimate the differences between the marginal means. | |
| 174 | 15x | emmeans_contrasts <- emmeans::contrast( | 
| 175 | 15x | emmeans_fit, | 
| 176 | # Compare all arms versus the control arm. | |
| 177 | 15x | method = "trt.vs.ctrl", | 
| 178 | # Take the arm factor from .ref_group as the control arm. | |
| 179 | 15x | ref = ref_key, | 
| 180 | 15x | level = conf_level | 
| 181 | ) | |
| 182 | 15x | sum_contrasts <- summary( | 
| 183 | 15x | emmeans_contrasts, | 
| 184 | # Derive confidence intervals, t-tests and p-values. | |
| 185 | 15x | infer = TRUE, | 
| 186 | # Do not adjust the p-values for multiplicity. | |
| 187 | 15x | adjust = "none" | 
| 188 | ) | |
| 189 | ||
| 190 | 15x | contrast_lvls <- gsub( | 
| 191 | 15x |       "^\\(|\\)$", "", gsub(paste0(" - \\(*", .ref_group[[arm]][1], ".*"), "", sum_contrasts$contrast) | 
| 192 | ) | |
| 193 | 15x |     if (!is.null(interaction_item)) { | 
| 194 | 2x | sum_contrasts_level <- sum_contrasts[grepl(sum_level, contrast_lvls, fixed = TRUE), ] | 
| 195 |     } else { | |
| 196 | 13x | sum_contrasts_level <- sum_contrasts[sum_level == contrast_lvls, ] | 
| 197 | } | |
| 198 | 15x |     if (interaction_y != FALSE) { | 
| 199 | 2x | sum_contrasts_level <- sum_contrasts_level[interaction_y, ] | 
| 200 | } | |
| 201 | ||
| 202 | 15x | list( | 
| 203 | 15x | n = length(y[!is.na(y)]), | 
| 204 | 15x | lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"), | 
| 205 | 15x | lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"), | 
| 206 | 15x | lsmean_diff_ci = formatters::with_label( | 
| 207 | 15x | c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL), | 
| 208 | 15x | f_conf_level(conf_level) | 
| 209 | ), | |
| 210 | 15x | pval = formatters::with_label(sum_contrasts_level$p.value, "p-value") | 
| 211 | ) | |
| 212 | } | |
| 213 | } | |
| 214 | ||
| 215 | #' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`. | |
| 216 | #' | |
| 217 | #' @return | |
| 218 | #' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 219 | #' | |
| 220 | #' @keywords internal | |
| 221 | a_ancova <- function(df, | |
| 222 | ..., | |
| 223 | .stats = NULL, | |
| 224 | .stat_names = NULL, | |
| 225 | .formats = NULL, | |
| 226 | .labels = NULL, | |
| 227 |                      .indent_mods = NULL) { | |
| 228 | # Check for additional parameters to the statistics function | |
| 229 | 21x | dots_extra_args <- list(...) | 
| 230 | 21x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 231 | 21x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 232 | ||
| 233 | # Check for user-defined functions | |
| 234 | 21x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 235 | 21x | .stats <- default_and_custom_stats_list$all_stats | 
| 236 | 21x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 237 | ||
| 238 | # Apply statistics function | |
| 239 | 21x | x_stats <- .apply_stat_functions( | 
| 240 | 21x | default_stat_fnc = s_ancova, | 
| 241 | 21x | custom_stat_fnc_list = custom_stat_functions, | 
| 242 | 21x | args_list = c( | 
| 243 | 21x | df = list(df), | 
| 244 | 21x | extra_afun_params, | 
| 245 | 21x | dots_extra_args | 
| 246 | ) | |
| 247 | ) | |
| 248 | ||
| 249 | # Fill in formatting defaults | |
| 250 | 21x |   .stats <- get_stats("summarize_ancova", | 
| 251 | 21x | stats_in = .stats, | 
| 252 | 21x | custom_stats_in = names(custom_stat_functions) | 
| 253 | ) | |
| 254 | 21x | x_stats <- x_stats[.stats] | 
| 255 | 21x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 256 | 21x | .labels <- get_labels_from_stats( | 
| 257 | 21x | .stats, .labels, | 
| 258 | 21x | tern_defaults = c(lapply(x_stats[names(x_stats) != "n"], attr, "label"), tern_default_labels) | 
| 259 | ) | |
| 260 | 21x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 261 | ||
| 262 | # Auto format handling | |
| 263 | 21x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 264 | ||
| 265 | # Get and check statistical names | |
| 266 | 21x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 267 | ||
| 268 | 21x | in_rows( | 
| 269 | 21x | .list = x_stats, | 
| 270 | 21x | .formats = .formats, | 
| 271 | 21x | .names = .labels %>% .unlist_keep_nulls(), | 
| 272 | 21x | .stat_names = .stat_names, | 
| 273 | 21x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 274 | 21x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 275 | ) | |
| 276 | } | |
| 277 | ||
| 278 | #' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments | |
| 279 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 280 | #' | |
| 281 | #' @return | |
| 282 | #' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions, | |
| 283 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 284 | #' the statistics from `s_ancova()` to the table layout. | |
| 285 | #' | |
| 286 | #' @examples | |
| 287 | #' basic_table() %>% | |
| 288 | #'   split_cols_by("Species", ref_group = "setosa") %>% | |
| 289 | #' add_colcounts() %>% | |
| 290 | #' summarize_ancova( | |
| 291 | #' vars = "Petal.Length", | |
| 292 | #' variables = list(arm = "Species", covariates = NULL), | |
| 293 | #' table_names = "unadj", | |
| 294 | #' conf_level = 0.95, var_labels = "Unadjusted comparison", | |
| 295 | #' .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means") | |
| 296 | #' ) %>% | |
| 297 | #' summarize_ancova( | |
| 298 | #' vars = "Petal.Length", | |
| 299 | #'     variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")), | |
| 300 | #' table_names = "adj", | |
| 301 | #' conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)" | |
| 302 | #' ) %>% | |
| 303 | #' build_table(iris) | |
| 304 | #' | |
| 305 | #' @export | |
| 306 | #' @order 2 | |
| 307 | summarize_ancova <- function(lyt, | |
| 308 | vars, | |
| 309 | variables, | |
| 310 | conf_level, | |
| 311 | interaction_y = FALSE, | |
| 312 | interaction_item = NULL, | |
| 313 | weights_emmeans = NULL, | |
| 314 | var_labels, | |
| 315 | na_str = default_na_str(), | |
| 316 | nested = TRUE, | |
| 317 | ..., | |
| 318 | show_labels = "visible", | |
| 319 | table_names = vars, | |
| 320 |                              .stats = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"), | |
| 321 | .stat_names = NULL, | |
| 322 | .formats = NULL, | |
| 323 | .labels = NULL, | |
| 324 |                              .indent_mods = list("lsmean_diff_ci" = 1L, "pval" = 1L)) { | |
| 325 | # Process standard extra arguments | |
| 326 | 7x |   extra_args <- list(".stats" = .stats) | 
| 327 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 328 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 329 | 3x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 330 | 7x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 331 | ||
| 332 | # Process additional arguments to the statistic function | |
| 333 | 7x | extra_args <- c( | 
| 334 | 7x | extra_args, | 
| 335 | 7x | variables = list(variables), conf_level = list(conf_level), interaction_y = list(interaction_y), | 
| 336 | 7x | interaction_item = list(interaction_item), | 
| 337 | 7x | weights_emmeans = weights_emmeans, | 
| 338 | ... | |
| 339 | ) | |
| 340 | ||
| 341 | # Append additional info from layout to the analysis function | |
| 342 | 7x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 343 | 7x | formals(a_ancova) <- c(formals(a_ancova), extra_args[[".additional_fun_parameters"]]) | 
| 344 | ||
| 345 | 7x | analyze( | 
| 346 | 7x | lyt = lyt, | 
| 347 | 7x | vars = vars, | 
| 348 | 7x | afun = a_ancova, | 
| 349 | 7x | na_str = na_str, | 
| 350 | 7x | nested = nested, | 
| 351 | 7x | extra_args = extra_args, | 
| 352 | 7x | var_labels = var_labels, | 
| 353 | 7x | show_labels = show_labels, | 
| 354 | 7x | table_names = table_names | 
| 355 | ) | |
| 356 | } | 
| 1 | #' Helper functions for tabulating biomarker effects on survival by subgroup | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper functions which are documented here separately to not confuse the user | |
| 6 | #' when reading about the user-facing functions. | |
| 7 | #' | |
| 8 | #' @inheritParams survival_biomarkers_subgroups | |
| 9 | #' @inheritParams argument_convention | |
| 10 | #' @inheritParams fit_coxreg_multivar | |
| 11 | #' | |
| 12 | #' @examples | |
| 13 | #' library(dplyr) | |
| 14 | #' library(forcats) | |
| 15 | #' | |
| 16 | #' adtte <- tern_ex_adtte | |
| 17 | #' | |
| 18 | #' # Save variable labels before data processing steps. | |
| 19 | #' adtte_labels <- formatters::var_labels(adtte, fill = FALSE) | |
| 20 | #' | |
| 21 | #' adtte_f <- adtte %>% | |
| 22 | #' filter(PARAMCD == "OS") %>% | |
| 23 | #' mutate( | |
| 24 | #' AVALU = as.character(AVALU), | |
| 25 | #' is_event = CNSR == 0 | |
| 26 | #' ) | |
| 27 | #' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag") | |
| 28 | #' formatters::var_labels(adtte_f)[names(labels)] <- labels | |
| 29 | #' | |
| 30 | #' @name h_survival_biomarkers_subgroups | |
| 31 | NULL | |
| 32 | ||
| 33 | #' @describeIn h_survival_biomarkers_subgroups Helps with converting the "survival" function variable list | |
| 34 | #' to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable | |
| 35 | #' names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`. | |
| 36 | #' | |
| 37 | #' @param biomarker (`string`)\cr the name of the biomarker variable. | |
| 38 | #' | |
| 39 | #' @return | |
| 40 | #' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`, | |
| 41 | #' `covariates`, and `strata`. | |
| 42 | #' | |
| 43 | #' @examples | |
| 44 | #' # This is how the variable list is converted internally. | |
| 45 | #' h_surv_to_coxreg_variables( | |
| 46 | #' variables = list( | |
| 47 | #' tte = "AVAL", | |
| 48 | #' is_event = "EVNT", | |
| 49 | #'     covariates = c("A", "B"), | |
| 50 | #' strata = "D" | |
| 51 | #' ), | |
| 52 | #' biomarker = "AGE" | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' @export | |
| 56 | h_surv_to_coxreg_variables <- function(variables, biomarker) { | |
| 57 | 65x | checkmate::assert_list(variables) | 
| 58 | 65x | checkmate::assert_string(variables$tte) | 
| 59 | 65x | checkmate::assert_string(variables$is_event) | 
| 60 | 65x | checkmate::assert_string(biomarker) | 
| 61 | 65x | list( | 
| 62 | 65x | time = variables$tte, | 
| 63 | 65x | event = variables$is_event, | 
| 64 | 65x | arm = biomarker, | 
| 65 | 65x | covariates = variables$covariates, | 
| 66 | 65x | strata = variables$strata | 
| 67 | ) | |
| 68 | } | |
| 69 | ||
| 70 | #' @describeIn h_survival_biomarkers_subgroups Prepares estimates for number of events, patients and median survival | |
| 71 | #' times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers | |
| 72 | #' in a given single data set. | |
| 73 | #' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements | |
| 74 | #' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strata`. | |
| 75 | #' | |
| 76 | #' @return | |
| 77 | #' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers. | |
| 78 | #' | |
| 79 | #' @examples | |
| 80 | #' # For a single population, estimate separately the effects | |
| 81 | #' # of two biomarkers. | |
| 82 | #' df <- h_coxreg_mult_cont_df( | |
| 83 | #' variables = list( | |
| 84 | #' tte = "AVAL", | |
| 85 | #' is_event = "is_event", | |
| 86 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 87 | #' covariates = "SEX", | |
| 88 | #'     strata = c("STRATA1", "STRATA2") | |
| 89 | #' ), | |
| 90 | #' data = adtte_f | |
| 91 | #' ) | |
| 92 | #' df | |
| 93 | #' | |
| 94 | #' # If the data set is empty, still the corresponding rows with missings are returned. | |
| 95 | #' h_coxreg_mult_cont_df( | |
| 96 | #' variables = list( | |
| 97 | #' tte = "AVAL", | |
| 98 | #' is_event = "is_event", | |
| 99 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 100 | #' covariates = "REGION1", | |
| 101 | #'     strata = c("STRATA1", "STRATA2") | |
| 102 | #' ), | |
| 103 | #' data = adtte_f[NULL, ] | |
| 104 | #' ) | |
| 105 | #' | |
| 106 | #' @export | |
| 107 | h_coxreg_mult_cont_df <- function(variables, | |
| 108 | data, | |
| 109 |                                   control = control_coxreg()) { | |
| 110 | 33x |   if ("strat" %in% names(variables)) { | 
| 111 | ! | warning( | 
| 112 | ! | "Warning: the `strat` element name of the `variables` list argument to `h_coxreg_mult_cont_df() ", | 
| 113 | ! | "was deprecated in tern 0.9.4.\n ", | 
| 114 | ! | "Please use the name `strata` instead of `strat` in the `variables` argument." | 
| 115 | ) | |
| 116 | ! | variables[["strata"]] <- variables[["strat"]] | 
| 117 | } | |
| 118 | ||
| 119 | 33x | assert_df_with_variables(data, variables) | 
| 120 | 33x | checkmate::assert_list(control, names = "named") | 
| 121 | 33x | checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE) | 
| 122 | 33x | conf_level <- control[["conf_level"]] | 
| 123 | 33x | pval_label <- paste0( | 
| 124 | # the regex capitalizes the first letter of the string / senetence. | |
| 125 | 33x |     "p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")" | 
| 126 | ) | |
| 127 | # If there is any data, run model, otherwise return empty results. | |
| 128 | 33x |   if (nrow(data) > 0) { | 
| 129 | 32x | bm_cols <- match(variables$biomarkers, names(data)) | 
| 130 | 32x |     l_result <- lapply(variables$biomarkers, function(bm) { | 
| 131 | 64x | coxreg_list <- fit_coxreg_multivar( | 
| 132 | 64x | variables = h_surv_to_coxreg_variables(variables, bm), | 
| 133 | 64x | data = data, | 
| 134 | 64x | control = control | 
| 135 | ) | |
| 136 | 64x | result <- do.call( | 
| 137 | 64x | h_coxreg_multivar_extract, | 
| 138 | 64x |         c(list(var = bm), coxreg_list[c("mod", "data", "control")]) | 
| 139 | ) | |
| 140 | 64x | data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y)) | 
| 141 | 64x | data_fit$status <- as.logical(data_fit$status) | 
| 142 | 64x | median <- s_surv_time( | 
| 143 | 64x | df = data_fit, | 
| 144 | 64x | .var = "time", | 
| 145 | 64x | is_event = "status" | 
| 146 | 64x | )$median | 
| 147 | 64x | data.frame( | 
| 148 | # Dummy column needed downstream to create a nested header. | |
| 149 | 64x | biomarker = bm, | 
| 150 | 64x | biomarker_label = formatters::var_labels(data[bm], fill = TRUE), | 
| 151 | 64x | n_tot = coxreg_list$mod$n, | 
| 152 | 64x | n_tot_events = coxreg_list$mod$nevent, | 
| 153 | 64x | median = as.numeric(median), | 
| 154 | 64x |         result[1L, c("hr", "lcl", "ucl")], | 
| 155 | 64x | conf_level = conf_level, | 
| 156 | 64x | pval = result[1L, "pval"], | 
| 157 | 64x | pval_label = pval_label, | 
| 158 | 64x | stringsAsFactors = FALSE | 
| 159 | ) | |
| 160 | }) | |
| 161 | 32x | do.call(rbind, args = c(l_result, make.row.names = FALSE)) | 
| 162 |   } else { | |
| 163 | 1x | data.frame( | 
| 164 | 1x | biomarker = variables$biomarkers, | 
| 165 | 1x | biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE), | 
| 166 | 1x | n_tot = 0L, | 
| 167 | 1x | n_tot_events = 0L, | 
| 168 | 1x | median = NA, | 
| 169 | 1x | hr = NA, | 
| 170 | 1x | lcl = NA, | 
| 171 | 1x | ucl = NA, | 
| 172 | 1x | conf_level = conf_level, | 
| 173 | 1x | pval = NA, | 
| 174 | 1x | pval_label = pval_label, | 
| 175 | 1x | row.names = seq_along(variables$biomarkers), | 
| 176 | 1x | stringsAsFactors = FALSE | 
| 177 | ) | |
| 178 | } | |
| 179 | } | 
| 1 | #' Count patient events in columns | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The summarize function [summarize_patients_events_in_cols()] creates a layout element to summarize patient | |
| 6 | #' event counts in columns. | |
| 7 | #' | |
| 8 | #' This function analyzes the elements (events) supplied via the `filters_list` parameter and returns a row | |
| 9 | #' with counts of number of patients for each event as well as the total numbers of patients and events. | |
| 10 | #' The `id` variable is used to indicate unique subject identifiers (defaults to `USUBJID`). | |
| 11 | #' | |
| 12 | #' If there are multiple occurrences of the same event recorded for a patient, the event is only counted once. | |
| 13 | #' | |
| 14 | #' @inheritParams argument_convention | |
| 15 | #' @param filters_list (named `list` of `character`)\cr list where each element in this list describes one | |
| 16 | #' type of event describe by filters, in the same format as [s_count_patients_with_event()]. | |
| 17 | #' If it has a label, then this will be used for the column title. | |
| 18 | #' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such | |
| 19 | #' that corresponding table cells will stay blank. | |
| 20 | #' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will | |
| 21 | #' be used as label. | |
| 22 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 23 | #' | |
| 24 | #' In addition to any statistics added using `filters_list`, statistic options are: | |
| 25 | #'   ``r shQuote(get_stats("summarize_patients_events_in_cols"), type = "sh")`` | |
| 26 | #' | |
| 27 | #' @name count_patients_events_in_cols | |
| 28 | #' @order 1 | |
| 29 | NULL | |
| 30 | ||
| 31 | #' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple | |
| 32 | #' events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`. | |
| 33 | #' | |
| 34 | #' @return | |
| 35 | #' * `s_count_patients_and_multiple_events()` returns a list with the statistics: | |
| 36 | #' - `unique`: number of unique patients in `df`. | |
| 37 | #' - `all`: number of rows in `df`. | |
| 38 | #' - one element with the same name as in `filters_list`: number of rows in `df`, | |
| 39 | #' i.e. events, fulfilling the filter condition. | |
| 40 | #' | |
| 41 | #' @keywords internal | |
| 42 | s_count_patients_and_multiple_events <- function(df, | |
| 43 | id, | |
| 44 | filters_list, | |
| 45 | empty_stats = character(), | |
| 46 | labelstr = "", | |
| 47 |                                                  custom_label = NULL) { | |
| 48 | 9x | checkmate::assert_list(filters_list, names = "named") | 
| 49 | 9x | checkmate::assert_data_frame(df) | 
| 50 | 9x | checkmate::assert_string(id) | 
| 51 | 9x |   checkmate::assert_disjunct(c("unique", "all"), names(filters_list)) | 
| 52 | 9x | checkmate::assert_character(empty_stats) | 
| 53 | 9x | checkmate::assert_string(labelstr) | 
| 54 | 9x | checkmate::assert_string(custom_label, null.ok = TRUE) | 
| 55 | ||
| 56 | # Below we want to count each row in `df` once, therefore introducing this helper index column. | |
| 57 | 9x | df$.row_index <- as.character(seq_len(nrow(df))) | 
| 58 | 9x | y <- list() | 
| 59 | 9x |   row_label <- if (labelstr != "") { | 
| 60 | ! | labelstr | 
| 61 | 9x |   } else if (!is.null(custom_label)) { | 
| 62 | 2x | custom_label | 
| 63 |   } else { | |
| 64 | 7x | "counts" | 
| 65 | } | |
| 66 | 9x | y$unique <- formatters::with_label( | 
| 67 | 9x | s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L], | 
| 68 | 9x | row_label | 
| 69 | ) | |
| 70 | 9x | y$all <- formatters::with_label( | 
| 71 | 9x | nrow(df), | 
| 72 | 9x | row_label | 
| 73 | ) | |
| 74 | 9x | events <- Map( | 
| 75 | 9x |     function(filters) { | 
| 76 | 25x | formatters::with_label( | 
| 77 | 25x | s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count, | 
| 78 | 25x | row_label | 
| 79 | ) | |
| 80 | }, | |
| 81 | 9x | filters = filters_list | 
| 82 | ) | |
| 83 | 9x | y_complete <- c(y, events) | 
| 84 | 9x |   y <- if (length(empty_stats) > 0) { | 
| 85 | 3x | y_reduced <- y_complete | 
| 86 | 3x |     for (stat in intersect(names(y_complete), empty_stats)) { | 
| 87 | 4x | y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]])) | 
| 88 | } | |
| 89 | 3x | y_reduced | 
| 90 |   } else { | |
| 91 | 6x | y_complete | 
| 92 | } | |
| 93 | 9x | y | 
| 94 | } | |
| 95 | ||
| 96 | #' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function | |
| 97 | #' arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. | |
| 98 | #' | |
| 99 | #' @param col_split (`flag`)\cr whether the columns should be split. | |
| 100 | #' Set to `FALSE` when the required column split has been done already earlier in the layout pipe. | |
| 101 | #' | |
| 102 | #' @return | |
| 103 | #' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions, | |
| 104 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows | |
| 105 | #' containing the statistics from `s_count_patients_and_multiple_events()` to the table layout. | |
| 106 | #' | |
| 107 | #' @examples | |
| 108 | #' df <- data.frame( | |
| 109 | #'   USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)), | |
| 110 | #'   ARM = c("A", "A", "B", "B", "B", "B", "A"), | |
| 111 | #'   AESER = rep("Y", 7), | |
| 112 | #'   AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"), | |
| 113 | #'   AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"), | |
| 114 | #'   AEDECOD = c("A", "A", "A", "B", "B", "C", "D"), | |
| 115 | #'   AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1)) | |
| 116 | #' ) | |
| 117 | #' | |
| 118 | #' # `summarize_patients_events_in_cols()` | |
| 119 | #' basic_table() %>% | |
| 120 | #' summarize_patients_events_in_cols( | |
| 121 | #' filters_list = list( | |
| 122 | #' related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"), | |
| 123 | #' fatal = c(AESDTH = "Y"), | |
| 124 | #' fatal_related = c(AEREL = "Y", AESDTH = "Y") | |
| 125 | #' ), | |
| 126 | #' custom_label = "%s Total number of patients and events" | |
| 127 | #' ) %>% | |
| 128 | #' build_table(df) | |
| 129 | #' | |
| 130 | #' @export | |
| 131 | #' @order 2 | |
| 132 | summarize_patients_events_in_cols <- function(lyt, | |
| 133 | id = "USUBJID", | |
| 134 | filters_list = list(), | |
| 135 | empty_stats = character(), | |
| 136 | na_str = default_na_str(), | |
| 137 | ..., | |
| 138 | .stats = c( | |
| 139 | "unique", | |
| 140 | "all", | |
| 141 | names(filters_list) | |
| 142 | ), | |
| 143 | .labels = c( | |
| 144 | unique = "Patients (All)", | |
| 145 | all = "Events (All)", | |
| 146 | labels_or_names(filters_list) | |
| 147 | ), | |
| 148 |                                               col_split = TRUE) { | |
| 149 | 2x | extra_args <- list(id = id, filters_list = filters_list, empty_stats = empty_stats, ...) | 
| 150 | ||
| 151 | 2x | afun_list <- Map( | 
| 152 | 2x |     function(stat) { | 
| 153 | 7x | make_afun( | 
| 154 | 7x | s_count_patients_and_multiple_events, | 
| 155 | 7x | .stats = stat, | 
| 156 | 7x | .formats = "xx." | 
| 157 | ) | |
| 158 | }, | |
| 159 | 2x | stat = .stats | 
| 160 | ) | |
| 161 | 2x |   if (col_split) { | 
| 162 | 2x | lyt <- split_cols_by_multivar( | 
| 163 | 2x | lyt = lyt, | 
| 164 | 2x | vars = rep(id, length(.stats)), | 
| 165 | 2x | varlabels = .labels[.stats] | 
| 166 | ) | |
| 167 | } | |
| 168 | 2x | summarize_row_groups( | 
| 169 | 2x | lyt = lyt, | 
| 170 | 2x | cfun = afun_list, | 
| 171 | 2x | na_str = na_str, | 
| 172 | 2x | extra_args = extra_args | 
| 173 | ) | |
| 174 | } | 
| 1 | #' Estimate proportions of each level of a variable | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze & summarize function [estimate_multinomial_response()] creates a layout element to estimate the | |
| 6 | #' proportion and proportion confidence interval for each level of a factor variable. The primary analysis variable, | |
| 7 | #' `var`, should be a factor variable, the values of which will be used as labels within the output table. | |
| 8 | #' | |
| 9 | #' @inheritParams argument_convention | |
| 10 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 11 | #' | |
| 12 | #'   Options are: ``r shQuote(get_stats("estimate_multinomial_response"), type = "sh")`` | |
| 13 | #' | |
| 14 | #' @seealso Relevant description function [d_onco_rsp_label()]. | |
| 15 | #' | |
| 16 | #' @name estimate_multinomial_rsp | |
| 17 | #' @order 1 | |
| 18 | NULL | |
| 19 | ||
| 20 | #' Description of standard oncology response | |
| 21 | #' | |
| 22 | #' @description `r lifecycle::badge("stable")` | |
| 23 | #' | |
| 24 | #' Describe the oncology response in a standard way. | |
| 25 | #' | |
| 26 | #' @param x (`character`)\cr the standard oncology codes to be described. | |
| 27 | #' | |
| 28 | #' @return Response labels. | |
| 29 | #' | |
| 30 | #' @seealso [estimate_multinomial_rsp()] | |
| 31 | #' | |
| 32 | #' @examples | |
| 33 | #' d_onco_rsp_label( | |
| 34 | #'   c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing") | |
| 35 | #' ) | |
| 36 | #' | |
| 37 | #' # Adding some values not considered in d_onco_rsp_label | |
| 38 | #' | |
| 39 | #' d_onco_rsp_label( | |
| 40 | #'   c("CR", "PR", "hello", "hi") | |
| 41 | #' ) | |
| 42 | #' | |
| 43 | #' @export | |
| 44 | d_onco_rsp_label <- function(x) { | |
| 45 | 2x | x <- as.character(x) | 
| 46 | 2x | desc <- c( | 
| 47 | 2x | CR = "Complete Response (CR)", | 
| 48 | 2x | PR = "Partial Response (PR)", | 
| 49 | 2x | MR = "Minimal/Minor Response (MR)", | 
| 50 | 2x | MRD = "Minimal Residual Disease (MRD)", | 
| 51 | 2x | SD = "Stable Disease (SD)", | 
| 52 | 2x | PD = "Progressive Disease (PD)", | 
| 53 | 2x | `NON CR/PD` = "Non-CR or Non-PD (NON CR/PD)", | 
| 54 | 2x | NE = "Not Evaluable (NE)", | 
| 55 | 2x | `NE/Missing` = "Missing or unevaluable", | 
| 56 | 2x | Missing = "Missing", | 
| 57 | 2x | `NA` = "Not Applicable (NA)", | 
| 58 | 2x | ND = "Not Done (ND)" | 
| 59 | ) | |
| 60 | ||
| 61 | 2x | values_label <- vapply( | 
| 62 | 2x | X = x, | 
| 63 | 2x | FUN.VALUE = character(1), | 
| 64 | 2x |     function(val) { | 
| 65 | ! | if (val %in% names(desc)) desc[val] else val | 
| 66 | } | |
| 67 | ) | |
| 68 | ||
| 69 | 2x | factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))) | 
| 70 | } | |
| 71 | ||
| 72 | #' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number | |
| 73 | #' of successes, and `.N_col` as total number of successes and failures into [s_proportion()]. | |
| 74 | #' | |
| 75 | #' @return | |
| 76 | #' * `s_length_proportion()` returns statistics from [s_proportion()]. | |
| 77 | #' | |
| 78 | #' @examples | |
| 79 | #' s_length_proportion(rep("CR", 10), .N_col = 100) | |
| 80 | #' s_length_proportion(factor(character(0)), .N_col = 100) | |
| 81 | #' | |
| 82 | #' @export | |
| 83 | s_length_proportion <- function(x, | |
| 84 | ..., | |
| 85 |                                 .N_col) { # nolint | |
| 86 | 10x |   checkmate::assert_multi_class(x, classes = c("factor", "character")) | 
| 87 | 9x | checkmate::assert_vector(x, min.len = 0, max.len = .N_col) | 
| 88 | 7x | checkmate::assert_vector(unique(x), min.len = 0, max.len = 1) | 
| 89 | ||
| 90 | 7x | n_true <- length(x) | 
| 91 | 7x | n_false <- .N_col - n_true | 
| 92 | 7x | x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false)) | 
| 93 | 7x | s_proportion(df = x_logical, ...) | 
| 94 | } | |
| 95 | ||
| 96 | #' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun` | |
| 97 | #' in `estimate_multinomial_response()`. | |
| 98 | #' | |
| 99 | #' @return | |
| 100 | #' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 101 | #' | |
| 102 | #' @examples | |
| 103 | #' a_length_proportion(rep("CR", 10), .N_col = 100) | |
| 104 | #' a_length_proportion(factor(character(0)), .N_col = 100) | |
| 105 | #' | |
| 106 | #' @export | |
| 107 | a_length_proportion <- function(x, | |
| 108 | ..., | |
| 109 | .stats = NULL, | |
| 110 | .stat_names = NULL, | |
| 111 | .formats = NULL, | |
| 112 | .labels = NULL, | |
| 113 |                                 .indent_mods = NULL) { | |
| 114 | # Check for additional parameters to the statistics function | |
| 115 | 6x | dots_extra_args <- list(...) | 
| 116 | 6x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 117 | 6x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 118 | ||
| 119 | # Check for user-defined functions | |
| 120 | 6x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 121 | 6x | .stats <- default_and_custom_stats_list$all_stats | 
| 122 | 6x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 123 | ||
| 124 | # Apply statistics function | |
| 125 | 6x | x_stats <- .apply_stat_functions( | 
| 126 | 6x | default_stat_fnc = s_length_proportion, | 
| 127 | 6x | custom_stat_fnc_list = custom_stat_functions, | 
| 128 | 6x | args_list = c( | 
| 129 | 6x | x = list(x), | 
| 130 | 6x | extra_afun_params, | 
| 131 | 6x | dots_extra_args | 
| 132 | ) | |
| 133 | ) | |
| 134 | ||
| 135 | # Fill in formatting defaults | |
| 136 | 6x |   .stats <- get_stats("estimate_multinomial_response", | 
| 137 | 6x | stats_in = .stats, | 
| 138 | 6x | custom_stats_in = names(custom_stat_functions) | 
| 139 | ) | |
| 140 | 6x | x_stats <- x_stats[.stats] | 
| 141 | 6x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 142 | 6x | .labels <- get_labels_from_stats( | 
| 143 | 6x | .stats, .labels, | 
| 144 | 6x | tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) | 
| 145 | ) | |
| 146 | 6x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 147 | ||
| 148 | # Auto format handling | |
| 149 | 6x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 150 | ||
| 151 | # Get and check statistical names | |
| 152 | 6x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 153 | ||
| 154 | 6x | in_rows( | 
| 155 | 6x | .list = x_stats, | 
| 156 | 6x | .formats = .formats, | 
| 157 | 6x | .names = .labels %>% .unlist_keep_nulls(), | 
| 158 | 6x | .stat_names = .stat_names, | 
| 159 | 6x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 160 | 6x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 161 | ) | |
| 162 | } | |
| 163 | ||
| 164 | #' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments | |
| 165 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()] and | |
| 166 | #' [rtables::summarize_row_groups()]. | |
| 167 | #' | |
| 168 | #' @return | |
| 169 | #' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions, | |
| 170 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 171 | #' the statistics from `s_length_proportion()` to the table layout. | |
| 172 | #' | |
| 173 | #' @examples | |
| 174 | #' library(dplyr) | |
| 175 | #' | |
| 176 | #' # Use of the layout creating function. | |
| 177 | #' dta_test <- data.frame( | |
| 178 | #'   USUBJID = paste0("S", 1:12), | |
| 179 | #' ARM = factor(rep(LETTERS[1:3], each = 4)), | |
| 180 | #' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0)) | |
| 181 | #' ) %>% mutate( | |
| 182 | #' AVALC = factor(AVAL, | |
| 183 | #' levels = c(0, 1), | |
| 184 | #'     labels = c("Complete Response (CR)", "Partial Response (PR)") | |
| 185 | #' ) | |
| 186 | #' ) | |
| 187 | #' | |
| 188 | #' lyt <- basic_table() %>% | |
| 189 | #'   split_cols_by("ARM") %>% | |
| 190 | #' estimate_multinomial_response(var = "AVALC") | |
| 191 | #' | |
| 192 | #' tbl <- build_table(lyt, dta_test) | |
| 193 | #' | |
| 194 | #' tbl | |
| 195 | #' | |
| 196 | #' @export | |
| 197 | #' @order 2 | |
| 198 | estimate_multinomial_response <- function(lyt, | |
| 199 | var, | |
| 200 | na_str = default_na_str(), | |
| 201 | nested = TRUE, | |
| 202 | ..., | |
| 203 | show_labels = "hidden", | |
| 204 | table_names = var, | |
| 205 | .stats = "prop_ci", | |
| 206 | .stat_names = NULL, | |
| 207 | .formats = list(prop_ci = "(xx.xx, xx.xx)"), | |
| 208 | .labels = NULL, | |
| 209 |                                           .indent_mods = NULL) { | |
| 210 | # Process standard extra arguments | |
| 211 | 1x |   extra_args <- list(".stats" = .stats) | 
| 212 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 213 | 1x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 214 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 215 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 216 | ||
| 217 | # Process additional arguments to the statistic function | |
| 218 | 1x | extra_args <- c(extra_args, ...) | 
| 219 | ||
| 220 | # Append additional info from layout to the analysis function | |
| 221 | 1x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 222 | 1x | formals(a_length_proportion) <- c(formals(a_length_proportion), extra_args[[".additional_fun_parameters"]]) | 
| 223 | ||
| 224 | 1x | lyt <- split_rows_by(lyt, var = var) | 
| 225 | 1x | lyt <- summarize_row_groups(lyt, na_str = na_str) | 
| 226 | ||
| 227 | 1x | analyze( | 
| 228 | 1x | lyt = lyt, | 
| 229 | 1x | vars = var, | 
| 230 | 1x | afun = a_length_proportion, | 
| 231 | 1x | na_str = na_str, | 
| 232 | 1x | nested = nested, | 
| 233 | 1x | extra_args = extra_args, | 
| 234 | 1x | show_labels = show_labels, | 
| 235 | 1x | table_names = table_names | 
| 236 | ) | |
| 237 | } | 
| 1 | #' Incidence rate estimation | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [estimate_incidence_rate()] creates a layout element to estimate an event rate adjusted for | |
| 6 | #' person-years at risk, otherwise known as incidence rate. The primary analysis variable specified via `vars` is | |
| 7 | #' the person-years at risk. In addition to this variable, the `n_events` variable for number of events observed (where | |
| 8 | #' a value of 1 means an event was observed and 0 means that no event was observed) must also be specified. | |
| 9 | #' | |
| 10 | #' @inheritParams argument_convention | |
| 11 | #' @param control (`list`)\cr parameters for estimation details, specified by using | |
| 12 | #' the helper function [control_incidence_rate()]. Possible parameter options are: | |
| 13 | #' * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate. | |
| 14 | #' * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar` | |
| 15 | #' for confidence interval type. | |
| 16 | #' * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default) | |
| 17 | #' indicating time unit for data input. | |
| 18 | #' * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years). | |
| 19 | #' @param n_events (`string`)\cr name of integer variable indicating whether an event has been observed (1) or not (0). | |
| 20 | #' @param id_var (`string`)\cr name of variable used as patient identifier if `"n_unique"` is included in `.stats`. | |
| 21 | #' Defaults to `"USUBJID"`. | |
| 22 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 23 | #' | |
| 24 | #'   Options are: ``r shQuote(get_stats("estimate_incidence_rate"), type = "sh")`` | |
| 25 | #' @param summarize (`flag`)\cr whether the function should act as an analyze function (`summarize = FALSE`), or a | |
| 26 | #' summarize function (`summarize = TRUE`). Defaults to `FALSE`. | |
| 27 | #' @param label_fmt (`string`)\cr how labels should be formatted after a row split occurs if `summarize = TRUE`. The | |
| 28 | #' string should use `"%s"` to represent row split levels, and `"%.labels"` to represent labels supplied to the | |
| 29 | #' `.labels` argument. Defaults to `"%s - %.labels"`. | |
| 30 | #' | |
| 31 | #' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate]. | |
| 32 | #' | |
| 33 | #' @examples | |
| 34 | #' df <- data.frame( | |
| 35 | #' USUBJID = as.character(seq(6)), | |
| 36 | #' CNSR = c(0, 1, 1, 0, 0, 0), | |
| 37 | #' AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4), | |
| 38 | #'   ARM = factor(c("A", "A", "A", "B", "B", "B")), | |
| 39 | #'   STRATA1 = factor(c("X", "Y", "Y", "X", "X", "Y")) | |
| 40 | #' ) | |
| 41 | #' df$n_events <- 1 - df$CNSR | |
| 42 | #' | |
| 43 | #' @name incidence_rate | |
| 44 | #' @order 1 | |
| 45 | NULL | |
| 46 | ||
| 47 | #' @describeIn incidence_rate Statistics function which estimates the incidence rate and the | |
| 48 | #' associated confidence interval. | |
| 49 | #' | |
| 50 | #' @return | |
| 51 | #' * `s_incidence_rate()` returns the following statistics: | |
| 52 | #' - `person_years`: Total person-years at risk. | |
| 53 | #' - `n_events`: Total number of events observed. | |
| 54 | #' - `rate`: Estimated incidence rate. | |
| 55 | #' - `rate_ci`: Confidence interval for the incidence rate. | |
| 56 | #' - `n_unique`: Total number of patients with at least one event observed. | |
| 57 | #' - `n_rate`: Total number of events observed & estimated incidence rate. | |
| 58 | #' | |
| 59 | #' @keywords internal | |
| 60 | s_incidence_rate <- function(df, | |
| 61 | .var, | |
| 62 | ..., | |
| 63 | n_events, | |
| 64 | is_event = lifecycle::deprecated(), | |
| 65 | id_var = "USUBJID", | |
| 66 |                              control = control_incidence_rate()) { | |
| 67 | 17x |   if (lifecycle::is_present(is_event)) { | 
| 68 | ! | checkmate::assert_string(is_event) | 
| 69 | ! | lifecycle::deprecate_warn( | 
| 70 | ! | "0.9.6", "s_incidence_rate(is_event)", "s_incidence_rate(n_events)" | 
| 71 | ) | |
| 72 | ! | n_events <- is_event | 
| 73 | ! | df[[n_events]] <- as.numeric(df[[is_event]]) | 
| 74 | } | |
| 75 | ||
| 76 | 17x | assert_df_with_variables(df, list(tte = .var, n_events = n_events)) | 
| 77 | 17x | checkmate::assert_string(.var) | 
| 78 | 17x | checkmate::assert_string(n_events) | 
| 79 | 17x | checkmate::assert_string(id_var) | 
| 80 | 17x | checkmate::assert_numeric(df[[.var]], any.missing = FALSE) | 
| 81 | 17x | checkmate::assert_integerish(df[[n_events]], any.missing = FALSE) | 
| 82 | ||
| 83 | 17x | n_unique <- n_available(unique(df[[id_var]][df[[n_events]] == 1])) | 
| 84 | 17x | input_time_unit <- control$input_time_unit | 
| 85 | 17x | num_pt_year <- control$num_pt_year | 
| 86 | 17x | conf_level <- control$conf_level | 
| 87 | 17x | person_years <- sum(df[[.var]], na.rm = TRUE) * ( | 
| 88 | 17x | 1 * (input_time_unit == "year") + | 
| 89 | 17x | 1 / 12 * (input_time_unit == "month") + | 
| 90 | 17x | 1 / 52.14 * (input_time_unit == "week") + | 
| 91 | 17x | 1 / 365.24 * (input_time_unit == "day") | 
| 92 | ) | |
| 93 | 17x | n_events <- sum(df[[n_events]], na.rm = TRUE) | 
| 94 | ||
| 95 | 17x | result <- h_incidence_rate( | 
| 96 | 17x | person_years, | 
| 97 | 17x | n_events, | 
| 98 | 17x | control | 
| 99 | ) | |
| 100 | 17x | list( | 
| 101 | 17x | person_years = formatters::with_label(person_years, "Total patient-years at risk"), | 
| 102 | 17x | n_events = formatters::with_label(n_events, "Number of adverse events observed"), | 
| 103 | 17x |     rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")), | 
| 104 | 17x | rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level)), | 
| 105 | 17x | n_unique = formatters::with_label(n_unique, "Total number of patients with at least one adverse event"), | 
| 106 | 17x | n_rate = formatters::with_label( | 
| 107 | 17x | c(n_events, result$rate), | 
| 108 | 17x |       paste("Number of adverse events observed (AE rate per", num_pt_year, "patient-years)") | 
| 109 | ) | |
| 110 | ) | |
| 111 | } | |
| 112 | ||
| 113 | #' @describeIn incidence_rate Formatted analysis function which is used as `afun` in `estimate_incidence_rate()`. | |
| 114 | #' | |
| 115 | #' @return | |
| 116 | #' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 117 | #' | |
| 118 | #' @examples | |
| 119 | #' a_incidence_rate( | |
| 120 | #' df, | |
| 121 | #' .var = "AVAL", | |
| 122 | #' .df_row = df, | |
| 123 | #' n_events = "n_events" | |
| 124 | #' ) | |
| 125 | #' | |
| 126 | #' @export | |
| 127 | a_incidence_rate <- function(df, | |
| 128 | labelstr = "", | |
| 129 | label_fmt = "%s - %.labels", | |
| 130 | ..., | |
| 131 | .stats = NULL, | |
| 132 | .stat_names = NULL, | |
| 133 | .formats = NULL, | |
| 134 | .labels = NULL, | |
| 135 |                              .indent_mods = NULL) { | |
| 136 | 16x | checkmate::assert_string(label_fmt) | 
| 137 | ||
| 138 | # Check for additional parameters to the statistics function | |
| 139 | 16x | dots_extra_args <- list(...) | 
| 140 | 16x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 141 | 16x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 142 | ||
| 143 | # Check for user-defined functions | |
| 144 | 16x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 145 | 16x | .stats <- default_and_custom_stats_list$all_stats | 
| 146 | 16x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 147 | ||
| 148 | # Main statistic calculations | |
| 149 | 16x | x_stats <- .apply_stat_functions( | 
| 150 | 16x | default_stat_fnc = s_incidence_rate, | 
| 151 | 16x | custom_stat_fnc_list = custom_stat_functions, | 
| 152 | 16x | args_list = c( | 
| 153 | 16x | df = list(df), | 
| 154 | 16x | extra_afun_params, | 
| 155 | 16x | dots_extra_args | 
| 156 | ) | |
| 157 | ) | |
| 158 | ||
| 159 | # Fill in formatting defaults | |
| 160 | 16x |   .stats <- get_stats("estimate_incidence_rate", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 161 | 16x | x_stats <- x_stats[.stats] | 
| 162 | 16x | .formats <- get_formats_from_stats(.stats, .formats) | 
| 163 | 16x | .labels <- get_labels_from_stats(.stats, .labels, tern_defaults = lapply(x_stats, attr, "label")) | 
| 164 | 16x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods) | 
| 165 | ||
| 166 | # Apply label format | |
| 167 | 16x |   if (nzchar(labelstr) > 0) { | 
| 168 | 8x |     .labels <- sapply(.labels, function(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt))) | 
| 169 | } | |
| 170 | ||
| 171 | # Auto format handling | |
| 172 | 16x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 173 | ||
| 174 | # Get and check statistical names | |
| 175 | 16x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 176 | ||
| 177 | 16x | in_rows( | 
| 178 | 16x | .list = x_stats, | 
| 179 | 16x | .formats = .formats, | 
| 180 | 16x | .names = names(.labels), | 
| 181 | 16x | .stat_names = .stat_names, | 
| 182 | 16x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 183 | 16x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 184 | ) | |
| 185 | } | |
| 186 | ||
| 187 | #' @describeIn incidence_rate Layout-creating function which can take statistics function arguments | |
| 188 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 189 | #' | |
| 190 | #' @return | |
| 191 | #' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions, | |
| 192 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 193 | #' the statistics from `s_incidence_rate()` to the table layout. | |
| 194 | #' | |
| 195 | #' @examples | |
| 196 | #' basic_table(show_colcounts = TRUE) %>% | |
| 197 | #'   split_cols_by("ARM") %>% | |
| 198 | #' estimate_incidence_rate( | |
| 199 | #' vars = "AVAL", | |
| 200 | #' n_events = "n_events", | |
| 201 | #' control = control_incidence_rate( | |
| 202 | #' input_time_unit = "month", | |
| 203 | #' num_pt_year = 100 | |
| 204 | #' ) | |
| 205 | #' ) %>% | |
| 206 | #' build_table(df) | |
| 207 | #' | |
| 208 | #' # summarize = TRUE | |
| 209 | #' basic_table(show_colcounts = TRUE) %>% | |
| 210 | #'   split_cols_by("ARM") %>% | |
| 211 | #'   split_rows_by("STRATA1", child_labels = "visible") %>% | |
| 212 | #' estimate_incidence_rate( | |
| 213 | #' vars = "AVAL", | |
| 214 | #' n_events = "n_events", | |
| 215 | #'     .stats = c("n_unique", "n_rate"), | |
| 216 | #' summarize = TRUE, | |
| 217 | #' label_fmt = "%.labels" | |
| 218 | #' ) %>% | |
| 219 | #' build_table(df) | |
| 220 | #' | |
| 221 | #' @export | |
| 222 | #' @order 2 | |
| 223 | estimate_incidence_rate <- function(lyt, | |
| 224 | vars, | |
| 225 | n_events, | |
| 226 | id_var = "USUBJID", | |
| 227 | control = control_incidence_rate(), | |
| 228 | na_str = default_na_str(), | |
| 229 | nested = TRUE, | |
| 230 | summarize = FALSE, | |
| 231 | label_fmt = "%s - %.labels", | |
| 232 | ..., | |
| 233 | show_labels = "hidden", | |
| 234 | table_names = vars, | |
| 235 |                                     .stats = c("person_years", "n_events", "rate", "rate_ci"), | |
| 236 | .stat_names = NULL, | |
| 237 | .formats = list(rate = "xx.xx", rate_ci = "(xx.xx, xx.xx)"), | |
| 238 | .labels = NULL, | |
| 239 |                                     .indent_mods = NULL) { | |
| 240 | # Process standard extra arguments | |
| 241 | 5x |   extra_args <- list(".stats" = .stats) | 
| 242 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 243 | 5x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 244 | 1x | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 245 | 1x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 246 | ||
| 247 | # Process additional arguments to the statistic function | |
| 248 | 5x | extra_args <- c( | 
| 249 | 5x | extra_args, | 
| 250 | 5x | n_events = n_events, id_var = id_var, control = list(control), label_fmt = label_fmt, | 
| 251 | ... | |
| 252 | ) | |
| 253 | ||
| 254 | # Adding additional info from layout to analysis function | |
| 255 | 5x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 256 | 5x | formals(a_incidence_rate) <- c(formals(a_incidence_rate), extra_args[[".additional_fun_parameters"]]) | 
| 257 | ||
| 258 | 5x |   if (!summarize) { | 
| 259 | 3x | analyze( | 
| 260 | 3x | lyt = lyt, | 
| 261 | 3x | vars = vars, | 
| 262 | 3x | afun = a_incidence_rate, | 
| 263 | 3x | na_str = na_str, | 
| 264 | 3x | nested = nested, | 
| 265 | 3x | extra_args = extra_args, | 
| 266 | 3x | show_labels = show_labels, | 
| 267 | 3x | table_names = table_names | 
| 268 | ) | |
| 269 |   } else { | |
| 270 | 2x | summarize_row_groups( | 
| 271 | 2x | lyt = lyt, | 
| 272 | 2x | var = vars, | 
| 273 | 2x | cfun = a_incidence_rate, | 
| 274 | 2x | na_str = na_str, | 
| 275 | 2x | extra_args = extra_args | 
| 276 | ) | |
| 277 | } | |
| 278 | } | 
| 1 | #' Control function for subgroup treatment effect pattern (STEP) calculations | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This is an auxiliary function for controlling arguments for STEP calculations. | |
| 6 | #' | |
| 7 | #' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which | |
| 8 | #' could be used to infer `bandwidth`, see below. | |
| 9 | #' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to | |
| 10 | #' quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data | |
| 11 | #' covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly | |
| 12 | #' distributed. | |
| 13 | #' @param bandwidth (`numeric(1)` or `NULL`)\cr indicating the bandwidth of each window. | |
| 14 | #' Depending on the argument `use_percentile`, it can be either the length of actual-value | |
| 15 | #' windows on the real biomarker scale, or percentage windows. | |
| 16 | #' If `use_percentile = TRUE`, it should be a number between 0 and 1. | |
| 17 | #' If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted. | |
| 18 | #' By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker` | |
| 19 | #' variable for actual-value windows. | |
| 20 | #' @param degree (`integer(1)`)\cr the degree of polynomial function of the biomarker as an interaction term | |
| 21 | #' with the treatment arm fitted at each window. If 0 (default), then the biomarker variable | |
| 22 | #' is not included in the model fitted in each biomarker window. | |
| 23 | #' @param num_points (`integer(1)`)\cr the number of points at which the hazard ratios are estimated. The | |
| 24 | #' smallest number is 2. | |
| 25 | #' | |
| 26 | #' @return A list of components with the same names as the arguments, except `biomarker` which is | |
| 27 | #' just used to calculate the `bandwidth` in case that actual biomarker windows are requested. | |
| 28 | #' | |
| 29 | #' @examples | |
| 30 | #' # Provide biomarker values and request actual values to be used, | |
| 31 | #' # so that bandwidth is chosen from range. | |
| 32 | #' control_step(biomarker = 1:10, use_percentile = FALSE) | |
| 33 | #' | |
| 34 | #' # Use a global model with quadratic biomarker interaction term. | |
| 35 | #' control_step(bandwidth = NULL, degree = 2) | |
| 36 | #' | |
| 37 | #' # Reduce number of points to be used. | |
| 38 | #' control_step(num_points = 10) | |
| 39 | #' | |
| 40 | #' @export | |
| 41 | control_step <- function(biomarker = NULL, | |
| 42 | use_percentile = TRUE, | |
| 43 | bandwidth, | |
| 44 | degree = 0L, | |
| 45 |                          num_points = 39L) { | |
| 46 | 31x | checkmate::assert_numeric(biomarker, null.ok = TRUE) | 
| 47 | 30x | checkmate::assert_flag(use_percentile) | 
| 48 | 30x | checkmate::assert_int(num_points, lower = 2) | 
| 49 | 29x | checkmate::assert_count(degree) | 
| 50 | ||
| 51 | 29x |   if (missing(bandwidth)) { | 
| 52 | # Infer bandwidth | |
| 53 | 21x |     bandwidth <- if (use_percentile) { | 
| 54 | 18x | 0.25 | 
| 55 | 21x |     } else if (!is.null(biomarker)) { | 
| 56 | 3x | diff(range(biomarker, na.rm = TRUE)) / 4 | 
| 57 |     } else { | |
| 58 | ! | NULL | 
| 59 | } | |
| 60 |   } else { | |
| 61 | # Check bandwidth | |
| 62 | 8x |     if (!is.null(bandwidth)) { | 
| 63 | 5x |       if (use_percentile) { | 
| 64 | 4x | assert_proportion_value(bandwidth) | 
| 65 |       } else { | |
| 66 | 1x | checkmate::assert_scalar(bandwidth) | 
| 67 | 1x | checkmate::assert_true(bandwidth > 0) | 
| 68 | } | |
| 69 | } | |
| 70 | } | |
| 71 | 28x | list( | 
| 72 | 28x | use_percentile = use_percentile, | 
| 73 | 28x | bandwidth = bandwidth, | 
| 74 | 28x | degree = as.integer(degree), | 
| 75 | 28x | num_points = as.integer(num_points) | 
| 76 | ) | |
| 77 | } | 
| 1 | #' Count number of patients with missed doses by thresholds | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function creates a layout element to calculate cumulative counts of patients with number of missed | |
| 6 | #' doses at least equal to user-specified threshold values. | |
| 7 | #' | |
| 8 | #' This function analyzes numeric variable `vars`, a variable with numbers of missed doses, | |
| 9 | #' against the threshold values supplied to the `thresholds` argument as a numeric vector. This function | |
| 10 | #' assumes that every row of the given data frame corresponds to a unique patient. | |
| 11 | #' | |
| 12 | #' @inheritParams s_count_cumulative | |
| 13 | #' @inheritParams argument_convention | |
| 14 | #' @param thresholds (`numeric`)\cr minimum number of missed doses the patients had. | |
| 15 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 16 | #' | |
| 17 | #'   Options are: ``r shQuote(get_stats("count_missed_doses"), type = "sh")`` | |
| 18 | #' | |
| 19 | #' @seealso | |
| 20 | #' * Relevant description function [d_count_missed_doses()] which generates labels for [count_missed_doses()]. | |
| 21 | #' * Similar analyze function [count_cumulative()] which more generally counts cumulative values and has more | |
| 22 | #' options for threshold handling, but uses different labels. | |
| 23 | #' | |
| 24 | #' @name count_missed_doses | |
| 25 | #' @order 1 | |
| 26 | NULL | |
| 27 | ||
| 28 | #' Description function that calculates labels for `s_count_missed_doses()` | |
| 29 | #' | |
| 30 | #' @description `r lifecycle::badge("stable")` | |
| 31 | #' | |
| 32 | #' @inheritParams s_count_missed_doses | |
| 33 | #' | |
| 34 | #' @return [d_count_missed_doses()] returns a named `character` vector with the labels. | |
| 35 | #' | |
| 36 | #' @seealso [s_count_missed_doses()] | |
| 37 | #' | |
| 38 | #' @export | |
| 39 | d_count_missed_doses <- function(thresholds) { | |
| 40 | 8x |   paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", "")) | 
| 41 | } | |
| 42 | ||
| 43 | #' @describeIn count_missed_doses Statistics function to count patients with missed doses. | |
| 44 | #' | |
| 45 | #' @return | |
| 46 | #' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold. | |
| 47 | #' | |
| 48 | #' @keywords internal | |
| 49 | s_count_missed_doses <- function(x, | |
| 50 | thresholds, | |
| 51 | .N_col, # nolint | |
| 52 | .N_row, # nolint | |
| 53 |                                  denom = c("N_col", "n", "N_row"), | |
| 54 |                                  ...) { | |
| 55 | 7x | stat <- s_count_cumulative( | 
| 56 | 7x | x = x, | 
| 57 | 7x | thresholds = thresholds, | 
| 58 | 7x | lower_tail = FALSE, | 
| 59 | 7x | include_eq = TRUE, | 
| 60 | 7x | .N_col = .N_col, | 
| 61 | 7x | .N_row = .N_row, | 
| 62 | 7x | denom = denom, | 
| 63 | ... | |
| 64 | ) | |
| 65 | 7x | labels <- d_count_missed_doses(thresholds) | 
| 66 | 7x |   for (i in seq_along(stat$count_fraction)) { | 
| 67 | 14x | stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i]) | 
| 68 | } | |
| 69 | ||
| 70 | 7x | c(list(n = n_available(x)), stat) | 
| 71 | } | |
| 72 | ||
| 73 | #' @describeIn count_missed_doses Formatted analysis function which is used as `afun` | |
| 74 | #' in `count_missed_doses()`. | |
| 75 | #' | |
| 76 | #' @return | |
| 77 | #' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 78 | #' | |
| 79 | #' @keywords internal | |
| 80 | a_count_missed_doses <- function(x, | |
| 81 | ..., | |
| 82 | .stats = NULL, | |
| 83 | .stat_names = NULL, | |
| 84 | .formats = NULL, | |
| 85 | .labels = NULL, | |
| 86 |                                  .indent_mods = NULL) { | |
| 87 | 6x | dots_extra_args <- list(...) | 
| 88 | ||
| 89 | # Check if there are user-defined functions | |
| 90 | 6x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 91 | 6x | .stats <- default_and_custom_stats_list$all_stats | 
| 92 | 6x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 93 | ||
| 94 | # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) | |
| 95 | 6x | extra_afun_params <- retrieve_extra_afun_params( | 
| 96 | 6x | names(dots_extra_args$.additional_fun_parameters) | 
| 97 | ) | |
| 98 | 6x | dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore | 
| 99 | ||
| 100 | # Main statistical functions application | |
| 101 | 6x | x_stats <- .apply_stat_functions( | 
| 102 | 6x | default_stat_fnc = s_count_missed_doses, | 
| 103 | 6x | custom_stat_fnc_list = custom_stat_functions, | 
| 104 | 6x | args_list = c( | 
| 105 | 6x | x = list(x), | 
| 106 | 6x | extra_afun_params, | 
| 107 | 6x | dots_extra_args | 
| 108 | ) | |
| 109 | ) | |
| 110 | ||
| 111 | # Fill in with stats defaults if needed | |
| 112 | 6x |   .stats <- get_stats("count_missed_doses", | 
| 113 | 6x | stats_in = .stats, | 
| 114 | 6x | custom_stats_in = names(custom_stat_functions) | 
| 115 | ) | |
| 116 | ||
| 117 | 6x | x_stats <- x_stats[.stats] | 
| 118 | 6x | levels_per_stats <- lapply(x_stats, names) | 
| 119 | ||
| 120 | # Fill in formats/indents/labels with custom input and defaults | |
| 121 | 6x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 122 | 6x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 123 | 6x | .labels <- get_labels_from_stats( | 
| 124 | 6x | .stats, .labels, levels_per_stats, | 
| 125 | 6x | label_attr_from_stats = sapply(.unlist_keep_nulls(x_stats), attr, "label") | 
| 126 | ) | |
| 127 | ||
| 128 | # Unlist stats | |
| 129 | 6x | x_stats <- x_stats %>% | 
| 130 | 6x | .unlist_keep_nulls() %>% | 
| 131 | 6x | setNames(names(.formats)) | 
| 132 | ||
| 133 | # Auto format handling | |
| 134 | 6x | .formats <- apply_auto_formatting( | 
| 135 | 6x | .formats, | 
| 136 | 6x | x_stats, | 
| 137 | 6x | extra_afun_params$.df_row, | 
| 138 | 6x | extra_afun_params$.var | 
| 139 | ) | |
| 140 | ||
| 141 | # Get and check statistical names from defaults | |
| 142 | 6x | .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats | 
| 143 | ||
| 144 | 6x | in_rows( | 
| 145 | 6x | .list = x_stats, | 
| 146 | 6x | .formats = .formats, | 
| 147 | 6x | .names = names(.labels), | 
| 148 | 6x | .stat_names = .stat_names, | 
| 149 | 6x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 150 | 6x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 151 | ) | |
| 152 | } | |
| 153 | ||
| 154 | #' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments | |
| 155 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 156 | #' | |
| 157 | #' @return | |
| 158 | #' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions, | |
| 159 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 160 | #' the statistics from `s_count_missed_doses()` to the table layout. | |
| 161 | #' | |
| 162 | #' @examples | |
| 163 | #' library(dplyr) | |
| 164 | #' | |
| 165 | #' anl <- tern_ex_adsl %>% | |
| 166 | #' distinct(STUDYID, USUBJID, ARM) %>% | |
| 167 | #' mutate( | |
| 168 | #' PARAMCD = "TNDOSMIS", | |
| 169 | #' PARAM = "Total number of missed doses during study", | |
| 170 | #' AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE), | |
| 171 | #' AVALC = "" | |
| 172 | #' ) | |
| 173 | #' | |
| 174 | #' basic_table() %>% | |
| 175 | #'   split_cols_by("ARM") %>% | |
| 176 | #' add_colcounts() %>% | |
| 177 | #'   count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>% | |
| 178 | #' build_table(anl, alt_counts_df = tern_ex_adsl) | |
| 179 | #' | |
| 180 | #' @export | |
| 181 | #' @order 2 | |
| 182 | count_missed_doses <- function(lyt, | |
| 183 | vars, | |
| 184 | thresholds, | |
| 185 | var_labels = vars, | |
| 186 | show_labels = "visible", | |
| 187 | na_str = default_na_str(), | |
| 188 | nested = TRUE, | |
| 189 | table_names = vars, | |
| 190 | ..., | |
| 191 | na_rm = TRUE, | |
| 192 |                                .stats = c("n", "count_fraction"), | |
| 193 | .stat_names = NULL, | |
| 194 | .formats = NULL, | |
| 195 | .labels = NULL, | |
| 196 |                                .indent_mods = NULL) { | |
| 197 | # Depending on main functions | |
| 198 | 2x | extra_args <- list( | 
| 199 | 2x | "na_rm" = na_rm, | 
| 200 | 2x | "thresholds" = thresholds, | 
| 201 | ... | |
| 202 | ) | |
| 203 | ||
| 204 | # Needed defaults | |
| 205 | 2x | if (!is.null(.stats)) extra_args[[".stats"]] <- .stats | 
| 206 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 207 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 208 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 209 | ! | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 210 | ||
| 211 | # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) | |
| 212 | 2x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 213 | 2x | formals(a_count_missed_doses) <- c( | 
| 214 | 2x | formals(a_count_missed_doses), | 
| 215 | 2x | extra_args[[".additional_fun_parameters"]] | 
| 216 | ) | |
| 217 | ||
| 218 |   # Main {rtables} structural call | |
| 219 | 2x | analyze( | 
| 220 | 2x | lyt, | 
| 221 | 2x | vars, | 
| 222 | 2x | afun = a_count_missed_doses, | 
| 223 | 2x | na_str = na_str, | 
| 224 | 2x | inclNAs = !na_rm, | 
| 225 | 2x | table_names = table_names, | 
| 226 | 2x | var_labels = var_labels, | 
| 227 | 2x | show_labels = show_labels, | 
| 228 | 2x | nested = nested, | 
| 229 | 2x | extra_args = extra_args | 
| 230 | ) | |
| 231 | } | 
| 1 | #' Count patients with abnormal range values | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [count_abnormal()] creates a layout element to count patients with abnormal analysis range | |
| 6 | #' values in each direction. | |
| 7 | #' | |
| 8 | #' This function analyzes primary analysis variable `var` which indicates abnormal range results. | |
| 9 | #' Additional analysis variables that can be supplied as a list via the `variables` parameter are | |
| 10 | #' `id` (defaults to `USUBJID`), a variable to indicate unique subject identifiers, and `baseline` | |
| 11 | #' (defaults to `BNRIND`), a variable to indicate baseline reference ranges. | |
| 12 | #' | |
| 13 | #' For each direction specified via the `abnormal` parameter (e.g. High or Low), a fraction of | |
| 14 | #' patient counts is returned, with numerator and denominator calculated as follows: | |
| 15 | #' * `num`: The number of patients with this abnormality recorded while on treatment. | |
| 16 | #' * `denom`: The total number of patients with at least one post-baseline assessment. | |
| 17 | #' | |
| 18 | #' This function assumes that `df` has been filtered to only include post-baseline records. | |
| 19 | #' | |
| 20 | #' @inheritParams argument_convention | |
| 21 | #' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to | |
| 22 | #' `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list, | |
| 23 | #'   for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`. | |
| 24 | #' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality | |
| 25 | #' from numerator and denominator. | |
| 26 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 27 | #' | |
| 28 | #'   Options are: ``r shQuote(get_stats("abnormal"), type = "sh")`` | |
| 29 | #' | |
| 30 | #' @note | |
| 31 | #' * `count_abnormal()` only considers a single variable that contains multiple abnormal levels. | |
| 32 | #' * `df` should be filtered to only include post-baseline records. | |
| 33 | #' * The denominator includes patients that may have other abnormal levels at baseline, | |
| 34 | #' and patients missing baseline records. Patients with these abnormalities at | |
| 35 | #' baseline can be optionally excluded from numerator and denominator via the | |
| 36 | #' `exclude_base_abn` parameter. | |
| 37 | #' | |
| 38 | #' @name abnormal | |
| 39 | #' @include formatting_functions.R | |
| 40 | #' @order 1 | |
| 41 | NULL | |
| 42 | ||
| 43 | #' @describeIn abnormal Statistics function which counts patients with abnormal range values | |
| 44 | #' for a single `abnormal` level. | |
| 45 | #' | |
| 46 | #' @return | |
| 47 | #' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients. | |
| 48 | #' | |
| 49 | #' @keywords internal | |
| 50 | s_count_abnormal <- function(df, | |
| 51 | .var, | |
| 52 | abnormal = list(Low = "LOW", High = "HIGH"), | |
| 53 | variables = list(id = "USUBJID", baseline = "BNRIND"), | |
| 54 | exclude_base_abn = FALSE, | |
| 55 |                              ...) { | |
| 56 | 8x | checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE) | 
| 57 | 8x | checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]]))) | 
| 58 | 8x | checkmate::assert_factor(df[[.var]]) | 
| 59 | 8x | checkmate::assert_flag(exclude_base_abn) | 
| 60 | 8x | assert_df_with_variables(df, c(range = .var, variables)) | 
| 61 | 8x |   checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character")) | 
| 62 | 8x |   checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) | 
| 63 | ||
| 64 | 8x |   count_abnormal_single <- function(abn_name, abn) { | 
| 65 | # Patients in the denominator fulfill: | |
| 66 | # - have at least one post-baseline visit | |
| 67 | # - their baseline must not be abnormal if `exclude_base_abn`. | |
| 68 | 16x |     if (exclude_base_abn) { | 
| 69 | 8x | denom_select <- !(df[[variables$baseline]] %in% abn) | 
| 70 |     } else { | |
| 71 | 8x | denom_select <- TRUE | 
| 72 | } | |
| 73 | 16x | denom <- length(unique(df[denom_select, variables$id, drop = TRUE])) | 
| 74 | ||
| 75 | # Patients in the numerator fulfill: | |
| 76 | # - have at least one post-baseline visit with the required abnormality level | |
| 77 | # - are part of the denominator patients. | |
| 78 | 16x | num_select <- (df[[.var]] %in% abn) & denom_select | 
| 79 | 16x | num <- length(unique(df[num_select, variables$id, drop = TRUE])) | 
| 80 | ||
| 81 | 16x | formatters::with_label(c(num = num, denom = denom), abn_name) | 
| 82 | } | |
| 83 | ||
| 84 | # This will define the abnormal levels theoretically possible for a specific lab parameter | |
| 85 | # within a split level of a layout. | |
| 86 | 8x | abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]])) | 
| 87 | 8x | abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))] | 
| 88 | ||
| 89 | 8x | result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE) | 
| 90 | 8x | result <- list(fraction = result) | 
| 91 | 8x | result | 
| 92 | } | |
| 93 | ||
| 94 | #' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`. | |
| 95 | #' | |
| 96 | #' @return | |
| 97 | #' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()]. | |
| 98 | #' | |
| 99 | #' @keywords internal | |
| 100 | a_count_abnormal <- function(df, | |
| 101 | ..., | |
| 102 | .stats = NULL, | |
| 103 | .stat_names = NULL, | |
| 104 | .formats = NULL, | |
| 105 | .labels = NULL, | |
| 106 |                              .indent_mods = NULL) { | |
| 107 | # Check for additional parameters to the statistics function | |
| 108 | 4x | dots_extra_args <- list(...) | 
| 109 | 4x | extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) | 
| 110 | 4x | dots_extra_args$.additional_fun_parameters <- NULL | 
| 111 | ||
| 112 | # Check for user-defined functions | |
| 113 | 4x | default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) | 
| 114 | 4x | .stats <- default_and_custom_stats_list$all_stats | 
| 115 | 4x | custom_stat_functions <- default_and_custom_stats_list$custom_stats | 
| 116 | ||
| 117 | # Apply statistics function | |
| 118 | 4x | x_stats <- .apply_stat_functions( | 
| 119 | 4x | default_stat_fnc = s_count_abnormal, | 
| 120 | 4x | custom_stat_fnc_list = custom_stat_functions, | 
| 121 | 4x | args_list = c( | 
| 122 | 4x | df = list(df), | 
| 123 | 4x | extra_afun_params, | 
| 124 | 4x | dots_extra_args | 
| 125 | ) | |
| 126 | ) | |
| 127 | ||
| 128 | # Fill in formatting defaults | |
| 129 | 4x |   .stats <- get_stats("abnormal", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) | 
| 130 | 4x | levels_per_stats <- lapply(x_stats, names) | 
| 131 | 4x | .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) | 
| 132 | 4x | .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) | 
| 133 | 4x | .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) | 
| 134 | ||
| 135 | 4x | x_stats <- x_stats[.stats] %>% | 
| 136 | 4x | .unlist_keep_nulls() %>% | 
| 137 | 4x | setNames(names(.formats)) | 
| 138 | ||
| 139 | # Auto format handling | |
| 140 | 4x | .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) | 
| 141 | ||
| 142 | # Get and check statistical names | |
| 143 | 4x | .stat_names <- get_stat_names(x_stats, .stat_names) | 
| 144 | ||
| 145 | 4x | in_rows( | 
| 146 | 4x | .list = x_stats, | 
| 147 | 4x | .formats = .formats, | 
| 148 | 4x | .names = .labels %>% .unlist_keep_nulls(), | 
| 149 | 4x | .stat_names = .stat_names, | 
| 150 | 4x | .labels = .labels %>% .unlist_keep_nulls(), | 
| 151 | 4x | .indent_mods = .indent_mods %>% .unlist_keep_nulls() | 
| 152 | ) | |
| 153 | } | |
| 154 | ||
| 155 | #' @describeIn abnormal Layout-creating function which can take statistics function arguments | |
| 156 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 157 | #' | |
| 158 | #' @return | |
| 159 | #' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions, | |
| 160 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 161 | #' the statistics from `s_count_abnormal()` to the table layout. | |
| 162 | #' | |
| 163 | #' @examples | |
| 164 | #' library(dplyr) | |
| 165 | #' | |
| 166 | #' df <- data.frame( | |
| 167 | #' USUBJID = as.character(c(1, 1, 2, 2)), | |
| 168 | #'   ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")), | |
| 169 | #'   BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")), | |
| 170 | #'   ONTRTFL = c("", "Y", "", "Y"), | |
| 171 | #' stringsAsFactors = FALSE | |
| 172 | #' ) | |
| 173 | #' | |
| 174 | #' # Select only post-baseline records. | |
| 175 | #' df <- df %>% | |
| 176 | #' filter(ONTRTFL == "Y") | |
| 177 | #' | |
| 178 | #' # Layout creating function. | |
| 179 | #' basic_table() %>% | |
| 180 | #' count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>% | |
| 181 | #' build_table(df) | |
| 182 | #' | |
| 183 | #' # Passing of statistics function and formatting arguments. | |
| 184 | #' df2 <- data.frame( | |
| 185 | #' ID = as.character(c(1, 1, 2, 2)), | |
| 186 | #'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")), | |
| 187 | #'   BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")), | |
| 188 | #'   ONTRTFL = c("", "Y", "", "Y"), | |
| 189 | #' stringsAsFactors = FALSE | |
| 190 | #' ) | |
| 191 | #' | |
| 192 | #' # Select only post-baseline records. | |
| 193 | #' df2 <- df2 %>% | |
| 194 | #' filter(ONTRTFL == "Y") | |
| 195 | #' | |
| 196 | #' basic_table() %>% | |
| 197 | #' count_abnormal( | |
| 198 | #' var = "RANGE", | |
| 199 | #' abnormal = list(low = "LOW", high = "HIGH"), | |
| 200 | #' variables = list(id = "ID", baseline = "BL_RANGE") | |
| 201 | #' ) %>% | |
| 202 | #' build_table(df2) | |
| 203 | #' | |
| 204 | #' @export | |
| 205 | #' @order 2 | |
| 206 | count_abnormal <- function(lyt, | |
| 207 | var, | |
| 208 | abnormal = list(Low = "LOW", High = "HIGH"), | |
| 209 | variables = list(id = "USUBJID", baseline = "BNRIND"), | |
| 210 | exclude_base_abn = FALSE, | |
| 211 | na_str = default_na_str(), | |
| 212 | nested = TRUE, | |
| 213 | ..., | |
| 214 | table_names = var, | |
| 215 | .stats = "fraction", | |
| 216 | .stat_names = NULL, | |
| 217 | .formats = list(fraction = format_fraction), | |
| 218 | .labels = NULL, | |
| 219 |                            .indent_mods = NULL) { | |
| 220 | # Process standard extra arguments | |
| 221 | 3x |   extra_args <- list(".stats" = .stats) | 
| 222 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 223 | 3x | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 224 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 225 | 1x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 226 | ||
| 227 | # Process additional arguments to the statistic function | |
| 228 | 3x | extra_args <- c( | 
| 229 | 3x | extra_args, | 
| 230 | 3x | "abnormal" = list(abnormal), "variables" = list(variables), "exclude_base_abn" = exclude_base_abn, | 
| 231 | ... | |
| 232 | ) | |
| 233 | ||
| 234 | # Append additional info from layout to the analysis function | |
| 235 | 3x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 236 | 3x | formals(a_count_abnormal) <- c(formals(a_count_abnormal), extra_args[[".additional_fun_parameters"]]) | 
| 237 | ||
| 238 | 3x | analyze( | 
| 239 | 3x | lyt = lyt, | 
| 240 | 3x | vars = var, | 
| 241 | 3x | afun = a_count_abnormal, | 
| 242 | 3x | na_str = na_str, | 
| 243 | 3x | nested = nested, | 
| 244 | 3x | extra_args = extra_args, | 
| 245 | 3x | show_labels = "hidden", | 
| 246 | 3x | table_names = table_names | 
| 247 | ) | |
| 248 | } | 
| 1 | #' Helper function to create a map data frame for `trim_levels_to_map()` | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper function to create a map data frame from the input dataset, which can be used as an argument in the | |
| 6 | #' `trim_levels_to_map` split function. Based on different method, the map is constructed differently. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of | |
| 10 | #' abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or | |
| 11 | #' `abnormal = list(Low = "LOW", High = "HIGH"))` | |
| 12 | #' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`. | |
| 13 | #' | |
| 14 | #' @return A map `data.frame`. | |
| 15 | #' | |
| 16 | #' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the | |
| 17 | #' `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is | |
| 18 | #' `"range"`, the returned map will be based on the rule that at least one observation with low range > 0 | |
| 19 | #' for low direction and at least one observation with high range is not missing for high direction. | |
| 20 | #' | |
| 21 | #' @examples | |
| 22 | #' adlb <- df_explicit_na(tern_ex_adlb) | |
| 23 | #' | |
| 24 | #' h_map_for_count_abnormal( | |
| 25 | #' df = adlb, | |
| 26 | #'   variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")), | |
| 27 | #'   abnormal = list(low = c("LOW"), high = c("HIGH")), | |
| 28 | #' method = "default", | |
| 29 | #' na_str = "<Missing>" | |
| 30 | #' ) | |
| 31 | #' | |
| 32 | #' df <- data.frame( | |
| 33 | #'   USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)), | |
| 34 | #' AVISIT = c( | |
| 35 | #'     rep("WEEK 1", 2), | |
| 36 | #'     rep("WEEK 2", 2), | |
| 37 | #'     rep("WEEK 1", 2), | |
| 38 | #'     rep("WEEK 2", 2), | |
| 39 | #'     rep("WEEK 1", 2), | |
| 40 | #'     rep("WEEK 2", 2) | |
| 41 | #' ), | |
| 42 | #'   PARAM = rep(c("ALT", "CPR"), 6), | |
| 43 | #' ANRIND = c( | |
| 44 | #' "NORMAL", "NORMAL", "LOW", | |
| 45 | #'     "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4) | |
| 46 | #' ), | |
| 47 | #' ANRLO = rep(5, 12), | |
| 48 | #' ANRHI = rep(20, 12) | |
| 49 | #' ) | |
| 50 | #' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL")) | |
| 51 | #' h_map_for_count_abnormal( | |
| 52 | #' df = df, | |
| 53 | #' variables = list( | |
| 54 | #' anl = "ANRIND", | |
| 55 | #'     split_rows = c("PARAM"), | |
| 56 | #' range_low = "ANRLO", | |
| 57 | #' range_high = "ANRHI" | |
| 58 | #' ), | |
| 59 | #'   abnormal = list(low = c("LOW"), high = c("HIGH")), | |
| 60 | #' method = "range", | |
| 61 | #' na_str = "<Missing>" | |
| 62 | #' ) | |
| 63 | #' | |
| 64 | #' @export | |
| 65 | h_map_for_count_abnormal <- function(df, | |
| 66 | variables = list( | |
| 67 | anl = "ANRIND", | |
| 68 |                                        split_rows = c("PARAM"), | |
| 69 | range_low = "ANRLO", | |
| 70 | range_high = "ANRHI" | |
| 71 | ), | |
| 72 |                                      abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")), | |
| 73 |                                      method = c("default", "range"), | |
| 74 |                                      na_str = "<Missing>") { | |
| 75 | 7x | method <- match.arg(method) | 
| 76 | 7x |   checkmate::assert_subset(c("anl", "split_rows"), names(variables)) | 
| 77 | 7x | checkmate::assert_false(anyNA(df[variables$split_rows])) | 
| 78 | 7x | assert_df_with_variables(df, | 
| 79 | 7x | variables = list(anl = variables$anl, split_rows = variables$split_rows), | 
| 80 | 7x | na_level = na_str | 
| 81 | ) | |
| 82 | 7x | assert_df_with_factors(df, list(val = variables$anl)) | 
| 83 | 7x | assert_valid_factor(df[[variables$anl]], any.missing = FALSE) | 
| 84 | 7x | assert_list_of_variables(variables) | 
| 85 | 7x | checkmate::assert_list(abnormal, types = "character", len = 2) | 
| 86 | ||
| 87 | # Drop usued levels from df as they are not supposed to be in the final map | |
| 88 | 7x | df <- droplevels(df) | 
| 89 | ||
| 90 | 7x | normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal)) | 
| 91 | ||
| 92 | # Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL" | |
| 93 | 7x | checkmate::assert_vector(normal_value, len = 1) | 
| 94 | ||
| 95 | # Default method will only have what is observed in the df, and records with all normal values will be excluded to | |
| 96 | # avoid error in layout building. | |
| 97 | 7x |   if (method == "default") { | 
| 98 | 3x | df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal)) | 
| 99 | 3x | map <- unique(df_abnormal[c(variables$split_rows, variables$anl)]) | 
| 100 | 3x | map_normal <- unique(subset(map, select = variables$split_rows)) | 
| 101 | 3x | map_normal[[variables$anl]] <- normal_value | 
| 102 | 3x | map <- rbind(map, map_normal) | 
| 103 | 4x |   } else if (method == "range") { | 
| 104 | # range method follows the rule that at least one observation with ANRLO > 0 for low | |
| 105 | # direction and at least one observation with ANRHI is not missing for high direction. | |
| 106 | 4x |     checkmate::assert_subset(c("range_low", "range_high"), names(variables)) | 
| 107 | 4x |     checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal))) | 
| 108 | ||
| 109 | 4x | assert_df_with_variables(df, | 
| 110 | 4x | variables = list( | 
| 111 | 4x | range_low = variables$range_low, | 
| 112 | 4x | range_high = variables$range_high | 
| 113 | ) | |
| 114 | ) | |
| 115 | ||
| 116 | # Define low direction of map | |
| 117 | 4x | df_low <- subset(df, df[[variables$range_low]] > 0) | 
| 118 | 4x | map_low <- unique(df_low[variables$split_rows]) | 
| 119 | 4x | low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"])) | 
| 120 | 4x | low_levels_df <- as.data.frame(low_levels) | 
| 121 | 4x | colnames(low_levels_df) <- variables$anl | 
| 122 | 4x |     low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE)) | 
| 123 | 4x | rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed | 
| 124 | 4x | map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE] | 
| 125 | 4x | map_low <- cbind(map_low, low_levels_df) | 
| 126 | ||
| 127 | # Define high direction of map | |
| 128 | 4x | df_high <- subset(df, df[[variables$range_high]] != na_str | !is.na(df[[variables$range_high]])) | 
| 129 | 4x | map_high <- unique(df_high[variables$split_rows]) | 
| 130 | 4x | high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"])) | 
| 131 | 4x | high_levels_df <- as.data.frame(high_levels) | 
| 132 | 4x | colnames(high_levels_df) <- variables$anl | 
| 133 | 4x |     high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE)) | 
| 134 | 4x | rownames(map_high) <- NULL | 
| 135 | 4x | map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE] | 
| 136 | 4x | map_high <- cbind(map_high, high_levels_df) | 
| 137 | ||
| 138 | # Define normal of map | |
| 139 | 4x | map_normal <- unique(rbind(map_low, map_high)[variables$split_rows]) | 
| 140 | 4x | map_normal[variables$anl] <- normal_value | 
| 141 | ||
| 142 | 4x | map <- rbind(map_low, map_high, map_normal) | 
| 143 | } | |
| 144 | ||
| 145 | # map should be all characters | |
| 146 | 7x | map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE) | 
| 147 | ||
| 148 | # sort the map final output by split_rows variables | |
| 149 | 7x |   for (i in rev(seq_len(length(variables$split_rows)))) { | 
| 150 | 7x | map <- map[order(map[[i]]), ] | 
| 151 | } | |
| 152 | 7x | map | 
| 153 | } | 
| 1 | #' Add titles, footnotes, page Number, and a bounding box to a grid grob | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots) | |
| 6 | #' with title, footnote, and page numbers. | |
| 7 | #' | |
| 8 | #' @inheritParams grid::grob | |
| 9 | #' @param grob (`grob`)\cr a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown. | |
| 10 | #' @param titles (`character`)\cr titles given as a vector of strings that are each separated by a newline and wrapped | |
| 11 | #' according to the page width. | |
| 12 | #' @param footnotes (`character`)\cr footnotes. Uses the same formatting rules as `titles`. | |
| 13 | #' @param page (`string` or `NULL`)\cr page numeration. If `NULL` then no page number is displayed. | |
| 14 | #' @param width_titles (`grid::unit`)\cr width of titles. Usually defined as all the available space | |
| 15 | #' `grid::unit(1, "npc")`, it is affected by the parameter `outer_margins`. Right margins (`outer_margins[4]`) | |
| 16 | #' need to be subtracted to the allowed width. | |
| 17 | #' @param width_footnotes (`grid::unit`)\cr width of footnotes. Same default and margin correction as `width_titles`. | |
| 18 | #' @param border (`flag`)\cr whether a border should be drawn around the plot or not. | |
| 19 | #' @param padding (`grid::unit`)\cr padding. A unit object of length 4. Innermost margin between the plot (`grob`) | |
| 20 | #' and, possibly, the border of the plot. Usually expressed in 4 identical values (usually `"lines"`). It defaults | |
| 21 | #' to `grid::unit(rep(1, 4), "lines")`. | |
| 22 | #' @param margins (`grid::unit`)\cr margins. A unit object of length 4. Margins between the plot and the other | |
| 23 | #' elements in the list (e.g. titles, plot, and footers). This is usually expressed in 4 `"lines"`, where the | |
| 24 | #' lateral ones are 0s, while top and bottom are 1s. It defaults to `grid::unit(c(1, 0, 1, 0), "lines")`. | |
| 25 | #' @param outer_margins (`grid::unit`)\cr outer margins. A unit object of length 4. It defines the general margin of | |
| 26 | #' the plot, considering also decorations like titles, footnotes, and page numbers. It defaults to | |
| 27 | #' `grid::unit(c(2, 1.5, 3, 1.5), "cm")`. | |
| 28 | #' @param gp_titles (`gpar`)\cr a `gpar` object. Mainly used to set different `"fontsize"`. | |
| 29 | #' @param gp_footnotes (`gpar`)\cr a `gpar` object. Mainly used to set different `"fontsize"`. | |
| 30 | #' | |
| 31 | #' @return A grid grob (`gTree`). | |
| 32 | #' | |
| 33 | #' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually. | |
| 34 | #' | |
| 35 | #' @examples | |
| 36 | #' library(grid) | |
| 37 | #' | |
| 38 | #' titles <- c( | |
| 39 | #' "Edgar Anderson's Iris Data", | |
| 40 | #' paste( | |
| 41 | #' "This famous (Fisher's or Anderson's) iris data set gives the measurements", | |
| 42 | #' "in centimeters of the variables sepal length and width and petal length", | |
| 43 | #' "and width, respectively, for 50 flowers from each of 3 species of iris." | |
| 44 | #' ) | |
| 45 | #' ) | |
| 46 | #' | |
| 47 | #' footnotes <- c( | |
| 48 | #' "The species are Iris setosa, versicolor, and virginica.", | |
| 49 | #' paste( | |
| 50 | #' "iris is a data frame with 150 cases (rows) and 5 variables (columns) named", | |
| 51 | #' "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species." | |
| 52 | #' ) | |
| 53 | #' ) | |
| 54 | #' | |
| 55 | #' ## empty plot | |
| 56 | #' grid.newpage() | |
| 57 | #' | |
| 58 | #' grid.draw( | |
| 59 | #' decorate_grob( | |
| 60 | #' NULL, | |
| 61 | #' titles = titles, | |
| 62 | #' footnotes = footnotes, | |
| 63 | #' page = "Page 4 of 10" | |
| 64 | #' ) | |
| 65 | #' ) | |
| 66 | #' | |
| 67 | #' # grid | |
| 68 | #' p <- gTree( | |
| 69 | #' children = gList( | |
| 70 | #' rectGrob(), | |
| 71 | #' xaxisGrob(), | |
| 72 | #' yaxisGrob(), | |
| 73 | #'     textGrob("Sepal.Length", y = unit(-4, "lines")), | |
| 74 | #'     textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90), | |
| 75 | #' pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16) | |
| 76 | #' ), | |
| 77 | #' vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length)) | |
| 78 | #' ) | |
| 79 | #' grid.newpage() | |
| 80 | #' grid.draw(p) | |
| 81 | #' | |
| 82 | #' grid.newpage() | |
| 83 | #' grid.draw( | |
| 84 | #' decorate_grob( | |
| 85 | #' grob = p, | |
| 86 | #' titles = titles, | |
| 87 | #' footnotes = footnotes, | |
| 88 | #' page = "Page 6 of 129" | |
| 89 | #' ) | |
| 90 | #' ) | |
| 91 | #' | |
| 92 | #' ## with ggplot2 | |
| 93 | #' library(ggplot2) | |
| 94 | #' | |
| 95 | #' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) + | |
| 96 | #' ggplot2::geom_point() | |
| 97 | #' p_gg | |
| 98 | #' p <- ggplotGrob(p_gg) | |
| 99 | #' grid.newpage() | |
| 100 | #' grid.draw( | |
| 101 | #' decorate_grob( | |
| 102 | #' grob = p, | |
| 103 | #' titles = titles, | |
| 104 | #' footnotes = footnotes, | |
| 105 | #' page = "Page 6 of 129" | |
| 106 | #' ) | |
| 107 | #' ) | |
| 108 | #' | |
| 109 | #' ## with lattice | |
| 110 | #' library(lattice) | |
| 111 | #' | |
| 112 | #' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species) | |
| 113 | #' p <- grid.grab() | |
| 114 | #' grid.newpage() | |
| 115 | #' grid.draw( | |
| 116 | #' decorate_grob( | |
| 117 | #' grob = p, | |
| 118 | #' titles = titles, | |
| 119 | #' footnotes = footnotes, | |
| 120 | #' page = "Page 6 of 129" | |
| 121 | #' ) | |
| 122 | #' ) | |
| 123 | #' | |
| 124 | #' # with gridExtra - no borders | |
| 125 | #' library(gridExtra) | |
| 126 | #' grid.newpage() | |
| 127 | #' grid.draw( | |
| 128 | #' decorate_grob( | |
| 129 | #' tableGrob( | |
| 130 | #' head(mtcars) | |
| 131 | #' ), | |
| 132 | #' titles = "title", | |
| 133 | #' footnotes = "footnote", | |
| 134 | #' border = FALSE | |
| 135 | #' ) | |
| 136 | #' ) | |
| 137 | #' | |
| 138 | #' @export | |
| 139 | decorate_grob <- function(grob, | |
| 140 | titles, | |
| 141 | footnotes, | |
| 142 | page = "", | |
| 143 | width_titles = grid::unit(1, "npc"), | |
| 144 | width_footnotes = grid::unit(1, "npc"), | |
| 145 | border = TRUE, | |
| 146 | padding = grid::unit(rep(1, 4), "lines"), | |
| 147 | margins = grid::unit(c(1, 0, 1, 0), "lines"), | |
| 148 | outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"), | |
| 149 | gp_titles = grid::gpar(), | |
| 150 | gp_footnotes = grid::gpar(fontsize = 8), | |
| 151 | name = NULL, | |
| 152 | gp = grid::gpar(), | |
| 153 |                           vp = NULL) { | |
| 154 | # External margins need to be taken into account when defining the width of titles and footers | |
| 155 | # because the text is split in advance depending on only the width of the viewport. | |
| 156 | 9x |   if (any(as.numeric(outer_margins) > 0)) { | 
| 157 | 9x | width_titles <- width_titles - outer_margins[4] | 
| 158 | 9x | width_footnotes <- width_footnotes - outer_margins[4] | 
| 159 | } | |
| 160 | ||
| 161 | 9x | st_titles <- split_text_grob( | 
| 162 | 9x | titles, | 
| 163 | 9x | x = 0, y = 1, | 
| 164 | 9x |     just = c("left", "top"), | 
| 165 | 9x | width = width_titles, | 
| 166 | 9x | vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1), | 
| 167 | 9x | gp = gp_titles | 
| 168 | ) | |
| 169 | ||
| 170 | 9x | st_footnotes <- split_text_grob( | 
| 171 | 9x | footnotes, | 
| 172 | 9x | x = 0, y = 1, | 
| 173 | 9x |     just = c("left", "top"), | 
| 174 | 9x | width = width_footnotes, | 
| 175 | 9x | vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1), | 
| 176 | 9x | gp = gp_footnotes | 
| 177 | ) | |
| 178 | ||
| 179 | 9x | pg_footnote <- grid::textGrob( | 
| 180 | 9x |     paste("\n", page), | 
| 181 | 9x | x = 1, y = 0, | 
| 182 | 9x |     just = c("right", "bottom"), | 
| 183 | 9x | vp = grid::viewport(layout.pos.row = 4, layout.pos.col = 1), | 
| 184 | 9x | gp = gp_footnotes | 
| 185 | ) | |
| 186 | ||
| 187 | # Initial decoration of the grob -> border, paddings, and margins are used here | |
| 188 | 9x | main_plot <- grid::gTree( | 
| 189 | 9x | children = grid::gList( | 
| 190 | 9x | if (border) grid::rectGrob(), | 
| 191 | 9x | grid::gTree( | 
| 192 | 9x | children = grid::gList( | 
| 193 | 9x | grob | 
| 194 | ), | |
| 195 | 9x | vp = grid::plotViewport(margins = padding) # innermost margins of the grob plot | 
| 196 | ) | |
| 197 | ), | |
| 198 | 9x | vp = grid::vpStack( | 
| 199 | 9x | grid::viewport(layout.pos.row = 2, layout.pos.col = 1), | 
| 200 | 9x | grid::plotViewport(margins = margins) # margins around the border plot | 
| 201 | ) | |
| 202 | ) | |
| 203 | ||
| 204 | 9x | grid::gTree( | 
| 205 | 9x | grob = grob, | 
| 206 | 9x | titles = titles, | 
| 207 | 9x | footnotes = footnotes, | 
| 208 | 9x | page = page, | 
| 209 | 9x | width_titles = width_titles, | 
| 210 | 9x | width_footnotes = width_footnotes, | 
| 211 | 9x | outer_margins = outer_margins, | 
| 212 | 9x | gp_titles = gp_titles, | 
| 213 | 9x | gp_footnotes = gp_footnotes, | 
| 214 | 9x | children = grid::gList( | 
| 215 | 9x | grid::gTree( | 
| 216 | 9x | children = grid::gList( | 
| 217 | 9x | st_titles, | 
| 218 | 9x | main_plot, # main plot with border, padding, and margins | 
| 219 | 9x | st_footnotes, | 
| 220 | 9x | pg_footnote | 
| 221 | ), | |
| 222 | 9x | childrenvp = NULL, | 
| 223 | 9x | name = "titles_grob_footnotes", | 
| 224 | 9x | vp = grid::vpStack( | 
| 225 | 9x | grid::plotViewport(margins = outer_margins), # Main external margins | 
| 226 | 9x | grid::viewport( | 
| 227 | 9x | layout = grid::grid.layout( | 
| 228 | 9x | nrow = 4, ncol = 1, | 
| 229 | 9x | heights = grid::unit.c( | 
| 230 | 9x | grid::grobHeight(st_titles), | 
| 231 | 9x | grid::unit(1, "null"), | 
| 232 | 9x | grid::grobHeight(st_footnotes), | 
| 233 | 9x | grid::grobHeight(pg_footnote) | 
| 234 | ) | |
| 235 | ) | |
| 236 | ) | |
| 237 | ) | |
| 238 | ) | |
| 239 | ), | |
| 240 | 9x | name = name, | 
| 241 | 9x | gp = gp, | 
| 242 | 9x | vp = vp, | 
| 243 | 9x | cl = "decoratedGrob" | 
| 244 | ) | |
| 245 | } | |
| 246 | ||
| 247 | # nocov start | |
| 248 | #' @importFrom grid validDetails | |
| 249 | #' @noRd | |
| 250 | validDetails.decoratedGrob <- function(x) { | |
| 251 | checkmate::assert_character(x$titles) | |
| 252 | checkmate::assert_character(x$footnotes) | |
| 253 | ||
| 254 |   if (!is.null(x$grob)) { | |
| 255 | checkmate::assert_true(grid::is.grob(x$grob)) | |
| 256 | } | |
| 257 |   if (length(x$page) == 1) { | |
| 258 | checkmate::assert_character(x$page) | |
| 259 | } | |
| 260 |   if (!grid::is.unit(x$outer_margins)) { | |
| 261 | checkmate::assert_vector(x$outer_margins, len = 4) | |
| 262 | } | |
| 263 |   if (!grid::is.unit(x$margins)) { | |
| 264 | checkmate::assert_vector(x$margins, len = 4) | |
| 265 | } | |
| 266 |   if (!grid::is.unit(x$padding)) { | |
| 267 | checkmate::assert_vector(x$padding, len = 4) | |
| 268 | } | |
| 269 | ||
| 270 | x | |
| 271 | } | |
| 272 | ||
| 273 | #' @importFrom grid widthDetails | |
| 274 | #' @noRd | |
| 275 | widthDetails.decoratedGrob <- function(x) { | |
| 276 | grid::unit(1, "null") | |
| 277 | } | |
| 278 | ||
| 279 | #' @importFrom grid heightDetails | |
| 280 | #' @noRd | |
| 281 | heightDetails.decoratedGrob <- function(x) { | |
| 282 | grid::unit(1, "null") | |
| 283 | } | |
| 284 | ||
| 285 | #' Split text according to available text width | |
| 286 | #' | |
| 287 | #' Dynamically wrap text. | |
| 288 | #' | |
| 289 | #' @inheritParams grid::grid.text | |
| 290 | #' @param text (`string`)\cr the text to wrap. | |
| 291 | #' @param width (`grid::unit`)\cr a unit object specifying maximum width of text. | |
| 292 | #' | |
| 293 | #' @return A text `grob`. | |
| 294 | #' | |
| 295 | #' @details This code is taken from `R Graphics by Paul Murell, 2nd edition` | |
| 296 | #' | |
| 297 | #' @keywords internal | |
| 298 | split_text_grob <- function(text, | |
| 299 | x = grid::unit(0.5, "npc"), | |
| 300 | y = grid::unit(0.5, "npc"), | |
| 301 | width = grid::unit(1, "npc"), | |
| 302 | just = "centre", | |
| 303 | hjust = NULL, | |
| 304 | vjust = NULL, | |
| 305 | default.units = "npc", # nolint | |
| 306 | name = NULL, | |
| 307 | gp = grid::gpar(), | |
| 308 |                             vp = NULL) { | |
| 309 |   text <- gsub("\\\\n", "\n", text) # fixing cases of mixed behavior (\n and \\n) | |
| 310 | ||
| 311 | if (!grid::is.unit(x)) x <- grid::unit(x, default.units) | |
| 312 | if (!grid::is.unit(y)) y <- grid::unit(y, default.units) | |
| 313 | if (!grid::is.unit(width)) width <- grid::unit(width, default.units) | |
| 314 |   if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units) | |
| 315 |   if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units) | |
| 316 |   if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units) | |
| 317 | ||
| 318 |   if (length(gp) > 0) { # account for effect of gp on text width -> it was bugging when text was empty | |
| 319 | horizontal_npc_width_no_gp <- grid::convertWidth( | |
| 320 | grid::grobWidth( | |
| 321 | grid::textGrob( | |
| 322 | paste0(text, collapse = "\n") | |
| 323 | ) | |
| 324 | ), "npc", | |
| 325 | valueOnly = TRUE | |
| 326 | ) | |
| 327 | horizontal_npc_width_with_gp <- grid::convertWidth(grid::grobWidth( | |
| 328 | grid::textGrob( | |
| 329 | paste0(text, collapse = "\n"), | |
| 330 | gp = gp | |
| 331 | ) | |
| 332 | ), "npc", valueOnly = TRUE) | |
| 333 | ||
| 334 | # Adapting width to the input gpar (it is normalized so does not matter what is text) | |
| 335 | width <- width * horizontal_npc_width_no_gp / horizontal_npc_width_with_gp | |
| 336 | } | |
| 337 | ||
| 338 | ## if it is a fixed unit then we do not need to recalculate when viewport resized | |
| 339 | if (!inherits(width, "unit.arithmetic") && !is.null(attr(width, "unit")) && | |
| 340 |     attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) { # nolint | |
| 341 | attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n") | |
| 342 | } | |
| 343 | ||
| 344 | # Fix for split_string in case of residual \n (otherwise is counted as character) | |
| 345 | text2 <- unlist( | |
| 346 | strsplit( | |
| 347 | paste0(text, collapse = "\n"), # for "" cases | |
| 348 | "\n" | |
| 349 | ) | |
| 350 | ) | |
| 351 | ||
| 352 | # Final grid text with cat-friendly split_string | |
| 353 | grid::grid.text( | |
| 354 | label = split_string(text2, width), | |
| 355 | x = x, y = y, | |
| 356 | just = just, | |
| 357 | hjust = hjust, | |
| 358 | vjust = vjust, | |
| 359 | rot = 0, | |
| 360 | check.overlap = FALSE, | |
| 361 | name = name, | |
| 362 | gp = gp, | |
| 363 | vp = vp, | |
| 364 | draw = FALSE | |
| 365 | ) | |
| 366 | } | |
| 367 | ||
| 368 | #' @importFrom grid validDetails | |
| 369 | #' @noRd | |
| 370 | validDetails.dynamicSplitText <- function(x) { | |
| 371 | checkmate::assert_character(x$text) | |
| 372 | checkmate::assert_true(grid::is.unit(x$width)) | |
| 373 | checkmate::assert_vector(x$width, len = 1) | |
| 374 | x | |
| 375 | } | |
| 376 | ||
| 377 | #' @importFrom grid heightDetails | |
| 378 | #' @noRd | |
| 379 | heightDetails.dynamicSplitText <- function(x) { | |
| 380 |   txt <- if (!is.null(attr(x$text, "fixed_text"))) { | |
| 381 | attr(x$text, "fixed_text") | |
| 382 |   } else { | |
| 383 | paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n") | |
| 384 | } | |
| 385 | grid::stringHeight(txt) | |
| 386 | } | |
| 387 | ||
| 388 | #' @importFrom grid widthDetails | |
| 389 | #' @noRd | |
| 390 | widthDetails.dynamicSplitText <- function(x) { | |
| 391 | x$width | |
| 392 | } | |
| 393 | ||
| 394 | #' @importFrom grid drawDetails | |
| 395 | #' @noRd | |
| 396 | drawDetails.dynamicSplitText <- function(x, recording) { | |
| 397 |   txt <- if (!is.null(attr(x$text, "fixed_text"))) { | |
| 398 | attr(x$text, "fixed_text") | |
| 399 |   } else { | |
| 400 | paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n") | |
| 401 | } | |
| 402 | ||
| 403 | x$width <- NULL | |
| 404 | x$label <- txt | |
| 405 | x$text <- NULL | |
| 406 |   class(x) <- c("text", class(x)[-1]) | |
| 407 | ||
| 408 | grid::grid.draw(x) | |
| 409 | } | |
| 410 | # nocov end | |
| 411 | ||
| 412 | # Adapted from Paul Murell R Graphics 2nd Edition | |
| 413 | # https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R | |
| 414 | split_string <- function(text, width) { | |
| 415 | 26x | strings <- strsplit(text, " ") | 
| 416 | 26x | out_string <- NA | 
| 417 | 26x |   for (string_i in seq_along(strings)) { | 
| 418 | 48x | newline_str <- strings[[string_i]] | 
| 419 | 6x | if (length(newline_str) == 0) newline_str <- "" | 
| 420 | 48x |     if (is.na(out_string[string_i])) { | 
| 421 | 48x | out_string[string_i] <- newline_str[[1]][[1]] | 
| 422 | 48x | linewidth <- grid::stringWidth(out_string[string_i]) | 
| 423 | } | |
| 424 | 48x |     gapwidth <- grid::stringWidth(" ") | 
| 425 | 48x | availwidth <- as.numeric(width) | 
| 426 | 48x |     if (length(newline_str) > 1) { | 
| 427 | 12x |       for (i in seq(2, length(newline_str))) { | 
| 428 | 184x | width_i <- grid::stringWidth(newline_str[i]) | 
| 429 | # Main conversion of allowed text width -> npc units are 0<npc<1. External viewport is used for conversion | |
| 430 | 184x |         if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) { | 
| 431 | 177x | sep <- " " | 
| 432 | 177x | linewidth <- linewidth + gapwidth + width_i | 
| 433 |         } else { | |
| 434 | 7x | sep <- "\n" | 
| 435 | 7x | linewidth <- width_i | 
| 436 | } | |
| 437 | 184x | out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep) | 
| 438 | } | |
| 439 | } | |
| 440 | } | |
| 441 | 26x | paste(out_string, collapse = "\n") | 
| 442 | } | |
| 443 | ||
| 444 | #' Update page number | |
| 445 | #' | |
| 446 | #' Automatically updates page number. | |
| 447 | #' | |
| 448 | #' @param npages (`numeric(1)`)\cr total number of pages. | |
| 449 | #' @param ... arguments passed on to [decorate_grob()]. | |
| 450 | #' | |
| 451 | #' @return Closure that increments the page number. | |
| 452 | #' | |
| 453 | #' @keywords internal | |
| 454 | decorate_grob_factory <- function(npages, ...) { | |
| 455 | 2x | current_page <- 0 | 
| 456 | 2x |   function(grob) { | 
| 457 | 7x | current_page <<- current_page + 1 | 
| 458 | 7x |     if (current_page > npages) { | 
| 459 | 1x |       stop(paste("current page is", current_page, "but max.", npages, "specified.")) | 
| 460 | } | |
| 461 | 6x |     decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...) | 
| 462 | } | |
| 463 | } | |
| 464 | ||
| 465 | #' Decorate set of `grob`s and add page numbering | |
| 466 | #' | |
| 467 | #' @description `r lifecycle::badge("stable")` | |
| 468 | #' | |
| 469 | #' Note that this uses the [decorate_grob_factory()] function. | |
| 470 | #' | |
| 471 | #' @param grobs (`list` of `grob`)\cr a list of grid grobs. | |
| 472 | #' @param ... arguments passed on to [decorate_grob()]. | |
| 473 | #' | |
| 474 | #' @return A decorated grob. | |
| 475 | #' | |
| 476 | #' @examples | |
| 477 | #' library(ggplot2) | |
| 478 | #' library(grid) | |
| 479 | #' g <- with(data = iris, { | |
| 480 | #' list( | |
| 481 | #' ggplot2::ggplotGrob( | |
| 482 | #' ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) + | |
| 483 | #' ggplot2::geom_point() | |
| 484 | #' ), | |
| 485 | #' ggplot2::ggplotGrob( | |
| 486 | #' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) + | |
| 487 | #' ggplot2::geom_point() | |
| 488 | #' ), | |
| 489 | #' ggplot2::ggplotGrob( | |
| 490 | #' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) + | |
| 491 | #' ggplot2::geom_point() | |
| 492 | #' ), | |
| 493 | #' ggplot2::ggplotGrob( | |
| 494 | #' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) + | |
| 495 | #' ggplot2::geom_point() | |
| 496 | #' ), | |
| 497 | #' ggplot2::ggplotGrob( | |
| 498 | #' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) + | |
| 499 | #' ggplot2::geom_point() | |
| 500 | #' ), | |
| 501 | #' ggplot2::ggplotGrob( | |
| 502 | #' ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) + | |
| 503 | #' ggplot2::geom_point() | |
| 504 | #' ) | |
| 505 | #' ) | |
| 506 | #' }) | |
| 507 | #' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "") | |
| 508 | #' | |
| 509 | #' draw_grob(lg[[1]]) | |
| 510 | #' draw_grob(lg[[2]]) | |
| 511 | #' draw_grob(lg[[6]]) | |
| 512 | #' | |
| 513 | #' @export | |
| 514 | decorate_grob_set <- function(grobs, ...) { | |
| 515 | 1x | n <- length(grobs) | 
| 516 | 1x | lgf <- decorate_grob_factory(npages = n, ...) | 
| 517 | 1x | lapply(grobs, lgf) | 
| 518 | } | 
| 1 | #' Custom split functions | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Collection of useful functions that are expanding on the core list of functions | |
| 6 | #' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()] | |
| 7 | #' for more information on how to make a custom split function. All these functions | |
| 8 | #' work with [rtables::split_rows_by()] argument `split_fun` to modify the way the split | |
| 9 | #' happens. For other split functions, consider consulting [`rtables::split_funcs`]. | |
| 10 | #' | |
| 11 | #' @seealso [rtables::make_split_fun()] | |
| 12 | #' | |
| 13 | #' @name utils_split_funs | |
| 14 | NULL | |
| 15 | ||
| 16 | #' @describeIn utils_split_funs Split function to place reference group facet at a specific position | |
| 17 | #' during post-processing stage. | |
| 18 | #' | |
| 19 | #' @param position (`string` or `integer`)\cr position to use for the reference group facet. Can be `"first"`, | |
| 20 | #' `"last"`, or a specific position. | |
| 21 | #' | |
| 22 | #' @return | |
| 23 | #' * `ref_group_position()` returns an utility function that puts the reference group | |
| 24 | #' as first, last or at a certain position and needs to be assigned to `split_fun`. | |
| 25 | #' | |
| 26 | #' @examples | |
| 27 | #' library(dplyr) | |
| 28 | #' | |
| 29 | #' dat <- data.frame( | |
| 30 | #' x = factor(letters[1:5], levels = letters[5:1]), | |
| 31 | #' y = 1:5 | |
| 32 | #' ) | |
| 33 | #' | |
| 34 | #' # With rtables layout functions | |
| 35 | #' basic_table() %>% | |
| 36 | #'   split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>% | |
| 37 | #'   analyze("y") %>% | |
| 38 | #' build_table(dat) | |
| 39 | #' | |
| 40 | #' # With tern layout funcitons | |
| 41 | #' adtte_f <- tern_ex_adtte %>% | |
| 42 | #' filter(PARAMCD == "OS") %>% | |
| 43 | #' mutate( | |
| 44 | #' AVAL = day2month(AVAL), | |
| 45 | #' is_event = CNSR == 0 | |
| 46 | #' ) | |
| 47 | #' | |
| 48 | #' basic_table() %>% | |
| 49 | #'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>% | |
| 50 | #' add_colcounts() %>% | |
| 51 | #' surv_time( | |
| 52 | #' vars = "AVAL", | |
| 53 | #' var_labels = "Survival Time (Months)", | |
| 54 | #' is_event = "is_event", | |
| 55 | #' ) %>% | |
| 56 | #' build_table(df = adtte_f) | |
| 57 | #' | |
| 58 | #' basic_table() %>% | |
| 59 | #' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>% | |
| 60 | #' add_colcounts() %>% | |
| 61 | #' surv_time( | |
| 62 | #' vars = "AVAL", | |
| 63 | #' var_labels = "Survival Time (Months)", | |
| 64 | #' is_event = "is_event", | |
| 65 | #' ) %>% | |
| 66 | #' build_table(df = adtte_f) | |
| 67 | #' | |
| 68 | #' @export | |
| 69 | ref_group_position <- function(position = "first") { | |
| 70 | 20x | make_split_fun( | 
| 71 | 20x | post = list( | 
| 72 | 20x |       function(splret, spl, fulldf) { | 
| 73 | 57x |         if (!"ref_group_value" %in% methods::slotNames(spl)) { | 
| 74 | 1x |           stop("Reference group is undefined.") | 
| 75 | } | |
| 76 | ||
| 77 | 56x | spl_var <- rtables:::spl_payload(spl) | 
| 78 | 56x | fulldf[[spl_var]] <- factor(fulldf[[spl_var]]) | 
| 79 | 56x | init_lvls <- levels(fulldf[[spl_var]]) | 
| 80 | ||
| 81 | 56x |         if (!all(names(splret$values) %in% init_lvls)) { | 
| 82 | ! |           stop("This split function does not work with combination facets.") | 
| 83 | } | |
| 84 | ||
| 85 | 56x | ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl)) | 
| 86 | 56x |         pos_choices <- c("first", "last") | 
| 87 | 56x |         if (checkmate::test_choice(position, pos_choices) && position == "first") { | 
| 88 | 41x | pos <- 0 | 
| 89 | 15x |         } else if (checkmate::test_choice(position, pos_choices) && position == "last") { | 
| 90 | 12x | pos <- length(init_lvls) | 
| 91 | 3x |         } else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) { | 
| 92 | 3x | pos <- position - 1 | 
| 93 |         } else { | |
| 94 | ! |           stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.") | 
| 95 | } | |
| 96 | ||
| 97 | 56x | reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos) | 
| 98 | 56x | ord <- match(reord_lvls, names(splret$values)) | 
| 99 | ||
| 100 | 56x | make_split_result( | 
| 101 | 56x | splret$values[ord], | 
| 102 | 56x | splret$datasplit[ord], | 
| 103 | 56x | splret$labels[ord] | 
| 104 | ) | |
| 105 | } | |
| 106 | ) | |
| 107 | ) | |
| 108 | } | |
| 109 | ||
| 110 | #' @describeIn utils_split_funs Split function to change level order based on an `integer` | |
| 111 | #' vector or a `character` vector that represent the split variable's factor levels. | |
| 112 | #' | |
| 113 | #' @param order (`character` or `numeric`)\cr vector of ordering indices for the split facets. | |
| 114 | #' | |
| 115 | #' @return | |
| 116 | #' * `level_order()` returns an utility function that changes the original levels' order, | |
| 117 | #' depending on input `order` and split levels. | |
| 118 | #' | |
| 119 | #' @examples | |
| 120 | #' # level_order -------- | |
| 121 | #' # Even if default would bring ref_group first, the original order puts it last | |
| 122 | #' basic_table() %>% | |
| 123 | #'   split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>% | |
| 124 | #'   analyze("Sepal.Length") %>% | |
| 125 | #' build_table(iris) | |
| 126 | #' | |
| 127 | #' # character vector | |
| 128 | #' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) | |
| 129 | #' basic_table() %>% | |
| 130 | #'   split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>% | |
| 131 | #'   analyze("Sepal.Length") %>% | |
| 132 | #' build_table(iris) | |
| 133 | #' | |
| 134 | #' @export | |
| 135 | level_order <- function(order) { | |
| 136 | 2x | make_split_fun( | 
| 137 | 2x | post = list( | 
| 138 | 2x |       function(splret, spl, fulldf) { | 
| 139 | 4x |         if (checkmate::test_integerish(order)) { | 
| 140 | 1x | checkmate::assert_integerish(order, lower = 1, upper = length(splret$values)) | 
| 141 | 1x | ord <- order | 
| 142 |         } else { | |
| 143 | 3x | checkmate::assert_character(order, len = length(splret$values)) | 
| 144 | 3x | checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE) | 
| 145 | 3x | ord <- match(order, names(splret$values)) | 
| 146 | } | |
| 147 | 4x | make_split_result( | 
| 148 | 4x | splret$values[ord], | 
| 149 | 4x | splret$datasplit[ord], | 
| 150 | 4x | splret$labels[ord] | 
| 151 | ) | |
| 152 | } | |
| 153 | ) | |
| 154 | ) | |
| 155 | } | 
| 1 | #' Helper function to create a new SMQ variable in ADAE by stacking SMQ and/or CQ records. | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Helper function to create a new SMQ variable in ADAE that consists of all adverse events belonging to | |
| 6 | #' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events | |
| 7 | #' belonging to any of the selected baskets. Remember that `na_str` must match the needed pre-processing | |
| 8 | #' done with [df_explicit_na()] to have the desired output. | |
| 9 | #' | |
| 10 | #' @inheritParams argument_convention | |
| 11 | #' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries. | |
| 12 | #' @param smq_varlabel (`string`)\cr a label for the new variable created. | |
| 13 | #' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created. | |
| 14 | #' @param aag_summary (`data.frame`)\cr containing the SMQ baskets and the levels of interest for the final SMQ | |
| 15 | #' variable. This is useful when there are some levels of interest that are not observed in the `df` dataset. | |
| 16 | #' The two columns of this dataset should be named `basket` and `basket_name`. | |
| 17 | #' | |
| 18 | #' @return A `data.frame` with variables in `keys` taken from `df` and new variable SMQ containing | |
| 19 | #' records belonging to the baskets selected via the `baskets` argument. | |
| 20 | #' | |
| 21 | #' @examples | |
| 22 | #' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na() | |
| 23 | #' h_stack_by_baskets(df = adae) | |
| 24 | #' | |
| 25 | #' aag <- data.frame( | |
| 26 | #'   NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"), | |
| 27 | #' REFNAME = c( | |
| 28 | #' "D.2.1.5.3/A.1.1.1.1 aesi", "X.9.9.9.9/Y.8.8.8.8 aesi", | |
| 29 | #' "C.1.1.1.3/B.2.2.3.1 aesi", "C.1.1.1.3/B.3.3.3.3 aesi" | |
| 30 | #' ), | |
| 31 | #'   SCOPE = c("", "", "BROAD", "BROAD"), | |
| 32 | #' stringsAsFactors = FALSE | |
| 33 | #' ) | |
| 34 | #' | |
| 35 | #' basket_name <- character(nrow(aag)) | |
| 36 | #' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR) | |
| 37 | #' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR) | |
| 38 | #' basket_name[cq_pos] <- aag$REFNAME[cq_pos] | |
| 39 | #' basket_name[smq_pos] <- paste0( | |
| 40 | #'   aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")" | |
| 41 | #' ) | |
| 42 | #' | |
| 43 | #' aag_summary <- data.frame( | |
| 44 | #' basket = aag$NAMVAR, | |
| 45 | #' basket_name = basket_name, | |
| 46 | #' stringsAsFactors = TRUE | |
| 47 | #' ) | |
| 48 | #' | |
| 49 | #' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary) | |
| 50 | #' all(levels(aag_summary$basket_name) %in% levels(result$SMQ)) | |
| 51 | #' | |
| 52 | #' h_stack_by_baskets( | |
| 53 | #' df = adae, | |
| 54 | #' aag_summary = NULL, | |
| 55 | #'   keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"), | |
| 56 | #' baskets = "SMQ01NAM" | |
| 57 | #' ) | |
| 58 | #' | |
| 59 | #' @export | |
| 60 | h_stack_by_baskets <- function(df, | |
| 61 |                                baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE), | |
| 62 | smq_varlabel = "Standardized MedDRA Query", | |
| 63 |                                keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"), | |
| 64 | aag_summary = NULL, | |
| 65 |                                na_str = "<Missing>") { | |
| 66 | 5x | smq_nam <- baskets[startsWith(baskets, "SMQ")] | 
| 67 | # SC corresponding to NAM | |
| 68 | 5x | smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE) | 
| 69 | 5x | smq <- stats::setNames(smq_sc, smq_nam) | 
| 70 | ||
| 71 | 5x | checkmate::assert_character(baskets) | 
| 72 | 5x | checkmate::assert_string(smq_varlabel) | 
| 73 | 5x | checkmate::assert_data_frame(df) | 
| 74 | 5x | checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ"))) | 
| 75 | 4x | checkmate::assert_true(all(endsWith(baskets, "NAM"))) | 
| 76 | 3x | checkmate::assert_subset(baskets, names(df)) | 
| 77 | 3x | checkmate::assert_subset(keys, names(df)) | 
| 78 | 3x | checkmate::assert_subset(smq_sc, names(df)) | 
| 79 | 3x | checkmate::assert_string(na_str) | 
| 80 | ||
| 81 | 3x |   if (!is.null(aag_summary)) { | 
| 82 | 1x | assert_df_with_variables( | 
| 83 | 1x | df = aag_summary, | 
| 84 | 1x |       variables = list(val = c("basket", "basket_name")) | 
| 85 | ) | |
| 86 | # Warning in case there is no match between `aag_summary$basket` and `baskets` argument. | |
| 87 | # Honestly, I think those should completely match. Target baskets should be the same. | |
| 88 | 1x |     if (length(intersect(baskets, unique(aag_summary$basket))) == 0) { | 
| 89 | ! |       warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.") | 
| 90 | } | |
| 91 | } | |
| 92 | ||
| 93 | 3x | var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel) | 
| 94 | ||
| 95 | # convert `na_str` records from baskets to NA for the later loop and from wide to long steps | |
| 96 | 3x | df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_str] <- NA | 
| 97 | ||
| 98 | 3x |   if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets | 
| 99 | 1x | df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty data frame keeping all factor levels | 
| 100 |   } else { | |
| 101 | # Concatenate SMQxxxNAM with corresponding SMQxxxSC | |
| 102 | 2x | df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])] | 
| 103 | ||
| 104 | 2x |     for (nam in names(smq)) { | 
| 105 | 4x | sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM | 
| 106 | 4x | nam_notna <- !is.na(df[[nam]]) | 
| 107 | 4x | new_colname <- paste(nam, sc, sep = "_") | 
| 108 | 4x |       df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna] | 
| 109 | } | |
| 110 | ||
| 111 | 2x | df_cnct$unique_id <- seq(1, nrow(df_cnct)) | 
| 112 | 2x | var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))] | 
| 113 | # have to convert df_cnct from tibble to data frame | |
| 114 | # as it throws a warning otherwise about rownames. | |
| 115 | # tibble do not support rownames and reshape creates rownames | |
| 116 | ||
| 117 | 2x | df_long <- stats::reshape( | 
| 118 | 2x | data = as.data.frame(df_cnct), | 
| 119 | 2x | varying = var_cols, | 
| 120 | 2x | v.names = "SMQ", | 
| 121 | 2x | idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")], | 
| 122 | 2x | direction = "long", | 
| 123 | 2x | new.row.names = seq(prod(length(var_cols), nrow(df_cnct))) | 
| 124 | ) | |
| 125 | ||
| 126 | 2x |     df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))] | 
| 127 | 2x | df_long$SMQ <- as.factor(df_long$SMQ) | 
| 128 | } | |
| 129 | ||
| 130 | 3x | smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_str) | 
| 131 | ||
| 132 | 3x |   if (!is.null(aag_summary)) { | 
| 133 | # A warning in case there is no match between df and aag_summary records | |
| 134 | 1x |     if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) { | 
| 135 | 1x |       warning("There are 0 basket levels in common between aag_summary$basket_name and df.") | 
| 136 | } | |
| 137 | 1x | df_long[["SMQ"]] <- factor( | 
| 138 | 1x | df_long[["SMQ"]], | 
| 139 | 1x | levels = sort( | 
| 140 | 1x | c( | 
| 141 | 1x | smq_levels, | 
| 142 | 1x | setdiff(unique(aag_summary$basket_name), smq_levels) | 
| 143 | ) | |
| 144 | ) | |
| 145 | ) | |
| 146 |   } else { | |
| 147 | 2x |     all_na_basket_flag <- vapply(df[, baskets], function(x) { | 
| 148 | 6x | all(is.na(x)) | 
| 149 | 2x | }, FUN.VALUE = logical(1)) | 
| 150 | 2x | all_na_basket <- baskets[all_na_basket_flag] | 
| 151 | ||
| 152 | 2x | df_long[["SMQ"]] <- factor( | 
| 153 | 2x | df_long[["SMQ"]], | 
| 154 | 2x | levels = sort(c(smq_levels, all_na_basket)) | 
| 155 | ) | |
| 156 | } | |
| 157 | 3x | formatters::var_labels(df_long) <- var_labels | 
| 158 | 3x | tibble::tibble(df_long) | 
| 159 | } | 
| 1 | ## Deprecated ------------------------------------------------------------ | |
| 2 | ||
| 3 | #' Helper functions for tabulation of a single biomarker result | |
| 4 | #' | |
| 5 | #' @description `r lifecycle::badge("deprecated")` | |
| 6 | #' | |
| 7 | #' @inheritParams argument_convention | |
| 8 | #' @inheritParams survival_duration_subgroups | |
| 9 | #' @param df (`data.frame`)\cr results for a single biomarker. For `h_tab_rsp_one_biomarker()`, the results returned by | |
| 10 | #' [extract_rsp_biomarkers()]. For `h_tab_surv_one_biomarker()`, the results returned by | |
| 11 | #' [extract_survival_biomarkers()]. | |
| 12 | #' @param afuns (named `list` of `function`)\cr analysis functions. | |
| 13 | #' @param colvars (named `list`)\cr named list with elements `vars` (variables to tabulate) and `labels` (their labels). | |
| 14 | #' | |
| 15 | #' @return An `rtables` table object with statistics in columns. | |
| 16 | #' | |
| 17 | #' @name h_biomarkers_subgroups | |
| 18 | NULL | |
| 19 | ||
| 20 | #' @describeIn h_biomarkers_subgroups Helper function to calculate statistics in columns for one biomarker. | |
| 21 | #' | |
| 22 | #' @export | |
| 23 | h_tab_one_biomarker <- function(df, | |
| 24 | afuns, | |
| 25 | colvars, | |
| 26 | na_str = default_na_str(), | |
| 27 | ..., | |
| 28 | .stats = NULL, | |
| 29 | .stat_names = NULL, | |
| 30 | .formats = NULL, | |
| 31 | .labels = NULL, | |
| 32 |                                 .indent_mods = NULL) { | |
| 33 | 2x | lifecycle::deprecate_warn( | 
| 34 | 2x | "0.9.8", "h_tab_one_biomarker()", | 
| 35 | 2x | details = "This function is no longer used within `tern`." | 
| 36 | ) | |
| 37 | ||
| 38 | # Process standard extra arguments | |
| 39 | 2x |   extra_args <- list(".stats" = .stats) | 
| 40 | ! | if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names | 
| 41 | ! | if (!is.null(.formats)) extra_args[[".formats"]] <- .formats | 
| 42 | ! | if (!is.null(.labels)) extra_args[[".labels"]] <- .labels | 
| 43 | 2x | if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods | 
| 44 | ||
| 45 | # Process additional arguments to the statistic function | |
| 46 | 2x | extra_args <- c(extra_args, biomarker = TRUE, ...) | 
| 47 | ||
| 48 | # Adding additional info from layout to analysis function | |
| 49 | 2x | extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) | 
| 50 | 2x | formals(afuns) <- c(formals(afuns), extra_args[[".additional_fun_parameters"]]) | 
| 51 | ||
| 52 | # Create "ci" column from "lcl" and "ucl" | |
| 53 | 2x | df$ci <- combine_vectors(df$lcl, df$ucl) | 
| 54 | ||
| 55 | 2x | colvars$vars <- intersect(colvars$vars, names(df)) | 
| 56 | 2x | colvars$labels <- colvars$labels[colvars$vars] | 
| 57 | ||
| 58 | 2x | lyt <- basic_table() | 
| 59 | ||
| 60 | # Split cols by the multiple variables to populate into columns. | |
| 61 | 2x | lyt <- split_cols_by_multivar( | 
| 62 | 2x | lyt = lyt, | 
| 63 | 2x | vars = colvars$vars, | 
| 64 | 2x | varlabels = colvars$labels | 
| 65 | ) | |
| 66 | ||
| 67 | # Add "All Patients" row | |
| 68 | 2x | lyt <- split_rows_by( | 
| 69 | 2x | lyt = lyt, | 
| 70 | 2x | var = "row_type", | 
| 71 | 2x |     split_fun = keep_split_levels("content"), | 
| 72 | 2x | nested = TRUE, | 
| 73 | 2x | child_labels = "hidden" | 
| 74 | ) | |
| 75 | 2x | lyt <- analyze_colvars( | 
| 76 | 2x | lyt = lyt, | 
| 77 | 2x | afun = afuns, | 
| 78 | 2x | na_str = na_str, | 
| 79 | 2x | extra_args = c(extra_args) | 
| 80 | ) | |
| 81 | ||
| 82 | # Add analysis rows | |
| 83 | 2x |   if ("analysis" %in% df$row_type) { | 
| 84 | ! | lyt <- split_rows_by( | 
| 85 | ! | lyt = lyt, | 
| 86 | ! | var = "row_type", | 
| 87 | ! |       split_fun = keep_split_levels("analysis"), | 
| 88 | ! | nested = TRUE, | 
| 89 | ! | child_labels = "hidden" | 
| 90 | ) | |
| 91 | ! | lyt <- split_rows_by( | 
| 92 | ! | lyt = lyt, | 
| 93 | ! | var = "var_label", | 
| 94 | ! | nested = TRUE, | 
| 95 | ! | indent_mod = 1L | 
| 96 | ) | |
| 97 | ! | lyt <- analyze_colvars( | 
| 98 | ! | lyt = lyt, | 
| 99 | ! | afun = afuns, | 
| 100 | ! | na_str = na_str, | 
| 101 | ! | inclNAs = TRUE, | 
| 102 | ! | extra_args = extra_args | 
| 103 | ) | |
| 104 | } | |
| 105 | ||
| 106 | 2x | build_table(lyt, df = df) | 
| 107 | } | |
| 108 | ||
| 109 | #' @describeIn h_biomarkers_subgroups Helper function that prepares a single response sub-table given the results for a | |
| 110 | #' single biomarker. | |
| 111 | #' | |
| 112 | #' @examples | |
| 113 | #' library(dplyr) | |
| 114 | #' library(forcats) | |
| 115 | #' | |
| 116 | #' adrs <- tern_ex_adrs | |
| 117 | #' adrs_labels <- formatters::var_labels(adrs) | |
| 118 | #' | |
| 119 | #' adrs_f <- adrs %>% | |
| 120 | #' filter(PARAMCD == "BESRSPI") %>% | |
| 121 | #' mutate(rsp = AVALC == "CR") | |
| 122 | #' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") | |
| 123 | #' | |
| 124 | #' # For a single population, separately estimate the effects of two biomarkers. | |
| 125 | #' df <- h_logistic_mult_cont_df( | |
| 126 | #' variables = list( | |
| 127 | #' rsp = "rsp", | |
| 128 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 129 | #' covariates = "SEX" | |
| 130 | #' ), | |
| 131 | #' data = adrs_f | |
| 132 | #' ) | |
| 133 | #' | |
| 134 | #' # Starting from above `df`, zoom in on one biomarker and add required columns. | |
| 135 | #' df1 <- df[1, ] | |
| 136 | #' df1$subgroup <- "All patients" | |
| 137 | #' df1$row_type <- "content" | |
| 138 | #' df1$var <- "ALL" | |
| 139 | #' df1$var_label <- "All patients" | |
| 140 | #' | |
| 141 | #' h_tab_rsp_one_biomarker( | |
| 142 | #' df1, | |
| 143 | #'   vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval") | |
| 144 | #' ) | |
| 145 | #' | |
| 146 | #' @export | |
| 147 | h_tab_rsp_one_biomarker <- function(df, | |
| 148 | vars, | |
| 149 | na_str = default_na_str(), | |
| 150 | .indent_mods = 0L, | |
| 151 |                                     ...) { | |
| 152 | 1x | lifecycle::deprecate_warn( | 
| 153 | 1x | "0.9.8", "h_tab_rsp_one_biomarker()", | 
| 154 | 1x | details = "This function is no longer used within `tern`." | 
| 155 | ) | |
| 156 | ||
| 157 | 1x | colvars <- d_rsp_subgroups_colvars( | 
| 158 | 1x | vars, | 
| 159 | 1x | conf_level = df$conf_level[1], | 
| 160 | 1x | method = df$pval_label[1] | 
| 161 | ) | |
| 162 | ||
| 163 | 1x | h_tab_one_biomarker( | 
| 164 | 1x | df = df, | 
| 165 | 1x | afuns = a_response_subgroups, | 
| 166 | 1x | colvars = colvars, | 
| 167 | 1x | na_str = na_str, | 
| 168 | 1x | .indent_mods = .indent_mods, | 
| 169 | ... | |
| 170 | ) | |
| 171 | } | |
| 172 | ||
| 173 | #' @describeIn h_biomarkers_subgroups Helper function that prepares a single survival sub-table given the results for a | |
| 174 | #' single biomarker. | |
| 175 | #' | |
| 176 | #' @examples | |
| 177 | #' adtte <- tern_ex_adtte | |
| 178 | #' | |
| 179 | #' # Save variable labels before data processing steps. | |
| 180 | #' adtte_labels <- formatters::var_labels(adtte, fill = FALSE) | |
| 181 | #' | |
| 182 | #' adtte_f <- adtte %>% | |
| 183 | #' filter(PARAMCD == "OS") %>% | |
| 184 | #' mutate( | |
| 185 | #' AVALU = as.character(AVALU), | |
| 186 | #' is_event = CNSR == 0 | |
| 187 | #' ) | |
| 188 | #' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag") | |
| 189 | #' formatters::var_labels(adtte_f)[names(labels)] <- labels | |
| 190 | #' | |
| 191 | #' # For a single population, separately estimate the effects of two biomarkers. | |
| 192 | #' df <- h_coxreg_mult_cont_df( | |
| 193 | #' variables = list( | |
| 194 | #' tte = "AVAL", | |
| 195 | #' is_event = "is_event", | |
| 196 | #'     biomarkers = c("BMRKR1", "AGE"), | |
| 197 | #' covariates = "SEX", | |
| 198 | #'     strata = c("STRATA1", "STRATA2") | |
| 199 | #' ), | |
| 200 | #' data = adtte_f | |
| 201 | #' ) | |
| 202 | #' | |
| 203 | #' # Starting from above `df`, zoom in on one biomarker and add required columns. | |
| 204 | #' df1 <- df[1, ] | |
| 205 | #' df1$subgroup <- "All patients" | |
| 206 | #' df1$row_type <- "content" | |
| 207 | #' df1$var <- "ALL" | |
| 208 | #' df1$var_label <- "All patients" | |
| 209 | #' h_tab_surv_one_biomarker( | |
| 210 | #' df1, | |
| 211 | #'   vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), | |
| 212 | #' time_unit = "days" | |
| 213 | #' ) | |
| 214 | #' | |
| 215 | #' @export | |
| 216 | h_tab_surv_one_biomarker <- function(df, | |
| 217 | vars, | |
| 218 | time_unit, | |
| 219 | na_str = default_na_str(), | |
| 220 | .indent_mods = 0L, | |
| 221 |                                      ...) { | |
| 222 | 1x | lifecycle::deprecate_warn( | 
| 223 | 1x | "0.9.8", "h_tab_surv_one_biomarker()", | 
| 224 | 1x | details = "This function is no longer used within `tern`." | 
| 225 | ) | |
| 226 | ||
| 227 | 1x | colvars <- d_survival_subgroups_colvars( | 
| 228 | 1x | vars, | 
| 229 | 1x | conf_level = df$conf_level[1], | 
| 230 | 1x | method = df$pval_label[1], | 
| 231 | 1x | time_unit = time_unit | 
| 232 | ) | |
| 233 | ||
| 234 | 1x | h_tab_one_biomarker( | 
| 235 | 1x | df = df, | 
| 236 | 1x | afuns = a_survival_subgroups, | 
| 237 | 1x | colvars = colvars, | 
| 238 | 1x | na_str = na_str, | 
| 239 | 1x | .indent_mods = .indent_mods, | 
| 240 | ... | |
| 241 | ) | |
| 242 | } | 
| 1 | #' Encode categorical missing values in a data frame | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This is a helper function to encode missing entries across groups of categorical | |
| 6 | #' variables in a data frame. | |
| 7 | #' | |
| 8 | #' @details Missing entries are those with `NA` or empty strings and will | |
| 9 | #' be replaced with a specified value. If factor variables include missing | |
| 10 | #' values, the missing value will be inserted as the last level. | |
| 11 | #' Similarly, in case character or logical variables should be converted to factors | |
| 12 | #' with the `char_as_factor` or `logical_as_factor` options, the missing values will | |
| 13 | #' be set as the last level. | |
| 14 | #' | |
| 15 | #' @param data (`data.frame`)\cr data set. | |
| 16 | #' @param omit_columns (`character`)\cr names of variables from `data` that should | |
| 17 | #' not be modified by this function. | |
| 18 | #' @param char_as_factor (`flag`)\cr whether to convert character variables | |
| 19 | #' in `data` to factors. | |
| 20 | #' @param logical_as_factor (`flag`)\cr whether to convert logical variables | |
| 21 | #' in `data` to factors. | |
| 22 | #' @param na_level (`string`)\cr string used to replace all `NA` or empty | |
| 23 | #' values inside non-`omit_columns` columns. | |
| 24 | #' | |
| 25 | #' @return A `data.frame` with the chosen modifications applied. | |
| 26 | #' | |
| 27 | #' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions. | |
| 28 | #' | |
| 29 | #' @examples | |
| 30 | #' my_data <- data.frame( | |
| 31 | #' u = c(TRUE, FALSE, NA, TRUE), | |
| 32 | #'   v = factor(c("A", NA, NA, NA), levels = c("Z", "A")), | |
| 33 | #'   w = c("A", "B", NA, "C"), | |
| 34 | #'   x = c("D", "E", "F", NA), | |
| 35 | #'   y = c("G", "H", "I", ""), | |
| 36 | #' z = c(1, 2, 3, 4), | |
| 37 | #' stringsAsFactors = FALSE | |
| 38 | #' ) | |
| 39 | #' | |
| 40 | #' # Example 1 | |
| 41 | #' # Encode missing values in all character or factor columns. | |
| 42 | #' df_explicit_na(my_data) | |
| 43 | #' # Also convert logical columns to factor columns. | |
| 44 | #' df_explicit_na(my_data, logical_as_factor = TRUE) | |
| 45 | #' # Encode missing values in a subset of columns. | |
| 46 | #' df_explicit_na(my_data, omit_columns = c("x", "y")) | |
| 47 | #' | |
| 48 | #' # Example 2 | |
| 49 | #' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable. | |
| 50 | #' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not | |
| 51 | #' # included when generating `rtables`. | |
| 52 | #' adsl <- tern_ex_adsl | |
| 53 | #' adsl$SEX[adsl$SEX == "M"] <- NA | |
| 54 | #' adsl <- df_explicit_na(adsl) | |
| 55 | #' | |
| 56 | #' # If you want the `Na` values to be displayed in the table use the `na_level` argument. | |
| 57 | #' adsl <- tern_ex_adsl | |
| 58 | #' adsl$SEX[adsl$SEX == "M"] <- NA | |
| 59 | #' adsl <- df_explicit_na(adsl, na_level = "Missing Values") | |
| 60 | #' | |
| 61 | #' # Example 3 | |
| 62 | #' # Numeric variables that have missing values are not altered. This means that any `NA` value in | |
| 63 | #' # a numeric variable will not be included in the summary statistics, nor will they be included | |
| 64 | #' # in the denominator value for calculating the percent values. | |
| 65 | #' adsl <- tern_ex_adsl | |
| 66 | #' adsl$AGE[adsl$AGE < 30] <- NA | |
| 67 | #' adsl <- df_explicit_na(adsl) | |
| 68 | #' | |
| 69 | #' @export | |
| 70 | df_explicit_na <- function(data, | |
| 71 | omit_columns = NULL, | |
| 72 | char_as_factor = TRUE, | |
| 73 | logical_as_factor = FALSE, | |
| 74 |                            na_level = "<Missing>") { | |
| 75 | 24x | checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE) | 
| 76 | 23x | checkmate::assert_data_frame(data) | 
| 77 | 22x | checkmate::assert_flag(char_as_factor) | 
| 78 | 21x | checkmate::assert_flag(logical_as_factor) | 
| 79 | 21x | checkmate::assert_string(na_level) | 
| 80 | ||
| 81 | 19x |   target_vars <- if (is.null(omit_columns)) { | 
| 82 | 17x | names(data) | 
| 83 |   } else { | |
| 84 | 2x | setdiff(names(data), omit_columns) # May have duplicates. | 
| 85 | } | |
| 86 | 19x |   if (length(target_vars) == 0) { | 
| 87 | 1x | return(data) | 
| 88 | } | |
| 89 | ||
| 90 | 18x | l_target_vars <- split(target_vars, target_vars) | 
| 91 | ||
| 92 | # Makes sure target_vars exist in data and names are not duplicated. | |
| 93 | 18x | assert_df_with_variables(data, l_target_vars) | 
| 94 | ||
| 95 | 18x |   for (x in target_vars) { | 
| 96 | 306x | xi <- data[[x]] | 
| 97 | 306x | xi_label <- obj_label(xi) | 
| 98 | ||
| 99 | # Determine whether to convert character or logical input. | |
| 100 | 306x | do_char_conversion <- is.character(xi) && char_as_factor | 
| 101 | 306x | do_logical_conversion <- is.logical(xi) && logical_as_factor | 
| 102 | ||
| 103 | # Pre-convert logical to character to deal correctly with replacing NA | |
| 104 | # values below. | |
| 105 | 306x |     if (do_logical_conversion) { | 
| 106 | 2x | xi <- as.character(xi) | 
| 107 | } | |
| 108 | ||
| 109 | 306x |     if (is.factor(xi) || is.character(xi)) { | 
| 110 | # Handle empty strings and NA values. | |
| 111 | 219x | xi <- explicit_na(sas_na(xi), label = na_level) | 
| 112 | ||
| 113 | # Convert to factors if requested for the original type, | |
| 114 | # set na_level as the last value. | |
| 115 | 219x |       if (do_char_conversion || do_logical_conversion) { | 
| 116 | 78x | levels_xi <- setdiff(sort(unique(xi)), na_level) | 
| 117 | 78x |         if (na_level %in% unique(xi)) { | 
| 118 | 18x | levels_xi <- c(levels_xi, na_level) | 
| 119 | } | |
| 120 | ||
| 121 | 78x | xi <- factor(xi, levels = levels_xi) | 
| 122 | } | |
| 123 | ||
| 124 | 219x | data[, x] <- formatters::with_label(xi, label = xi_label) | 
| 125 | } | |
| 126 | } | |
| 127 | 18x | return(data) | 
| 128 | } | 
| 1 | #' Subgroup treatment effect pattern (STEP) fit for survival outcome | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This fits the subgroup treatment effect pattern (STEP) models for a survival outcome. The treatment arm | |
| 6 | #' variable must have exactly 2 levels, where the first one is taken as reference and the estimated | |
| 7 | #' hazard ratios are for the comparison of the second level vs. the first one. | |
| 8 | #' | |
| 9 | #' The model which is fit is: | |
| 10 | #' | |
| 11 | #' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)` | |
| 12 | #' | |
| 13 | #' where `degree` is specified by `control_step()`. | |
| 14 | #' | |
| 15 | #' @inheritParams argument_convention | |
| 16 | #' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`, | |
| 17 | #' `arm`, `biomarker`, and optional `covariates` and `strata`. | |
| 18 | #' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()]. | |
| 19 | #' | |
| 20 | #' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used | |
| 21 | #' for the biomarker variable, including where the center of the intervals are and their bounds. The | |
| 22 | #' second part of the columns contain the estimates for the treatment arm comparison. | |
| 23 | #' | |
| 24 | #' @note For the default degree 0 the `biomarker` variable is not included in the model. | |
| 25 | #' | |
| 26 | #' @seealso [control_step()] and [control_coxph()] for the available customization options. | |
| 27 | #' | |
| 28 | #' @examples | |
| 29 | #' # Testing dataset with just two treatment arms. | |
| 30 | #' library(dplyr) | |
| 31 | #' | |
| 32 | #' adtte_f <- tern_ex_adtte %>% | |
| 33 | #' filter( | |
| 34 | #' PARAMCD == "OS", | |
| 35 | #'     ARM %in% c("B: Placebo", "A: Drug X") | |
| 36 | #' ) %>% | |
| 37 | #' mutate( | |
| 38 | #' # Reorder levels of ARM to display reference arm before treatment arm. | |
| 39 | #' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")), | |
| 40 | #' is_event = CNSR == 0 | |
| 41 | #' ) | |
| 42 | #' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag") | |
| 43 | #' formatters::var_labels(adtte_f)[names(labels)] <- labels | |
| 44 | #' | |
| 45 | #' variables <- list( | |
| 46 | #' arm = "ARM", | |
| 47 | #' biomarker = "BMRKR1", | |
| 48 | #'   covariates = c("AGE", "BMRKR2"), | |
| 49 | #' event = "is_event", | |
| 50 | #' time = "AVAL" | |
| 51 | #' ) | |
| 52 | #' | |
| 53 | #' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup. | |
| 54 | #' step_matrix <- fit_survival_step( | |
| 55 | #' variables = variables, | |
| 56 | #' data = adtte_f | |
| 57 | #' ) | |
| 58 | #' dim(step_matrix) | |
| 59 | #' head(step_matrix) | |
| 60 | #' | |
| 61 | #' # Specify different polynomial degree for the biomarker interaction to use more flexible local | |
| 62 | #' # models. Or specify different Cox regression options. | |
| 63 | #' step_matrix2 <- fit_survival_step( | |
| 64 | #' variables = variables, | |
| 65 | #' data = adtte_f, | |
| 66 | #' control = c(control_coxph(conf_level = 0.9), control_step(degree = 2)) | |
| 67 | #' ) | |
| 68 | #' | |
| 69 | #' # Use a global model with cubic interaction and only 5 points. | |
| 70 | #' step_matrix3 <- fit_survival_step( | |
| 71 | #' variables = variables, | |
| 72 | #' data = adtte_f, | |
| 73 | #' control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L)) | |
| 74 | #' ) | |
| 75 | #' | |
| 76 | #' @export | |
| 77 | fit_survival_step <- function(variables, | |
| 78 | data, | |
| 79 |                               control = c(control_step(), control_coxph())) { | |
| 80 | 4x | checkmate::assert_list(control) | 
| 81 | 4x | assert_df_with_variables(data, variables) | 
| 82 | 4x | data <- data[!is.na(data[[variables$biomarker]]), ] | 
| 83 | 4x | window_sel <- h_step_window(x = data[[variables$biomarker]], control = control) | 
| 84 | 4x | interval_center <- window_sel$interval[, "Interval Center"] | 
| 85 | 4x | form <- h_step_survival_formula(variables = variables, control = control) | 
| 86 | 4x |   estimates <- if (is.null(control$bandwidth)) { | 
| 87 | 1x | h_step_survival_est( | 
| 88 | 1x | formula = form, | 
| 89 | 1x | data = data, | 
| 90 | 1x | variables = variables, | 
| 91 | 1x | x = interval_center, | 
| 92 | 1x | control = control | 
| 93 | ) | |
| 94 |   } else { | |
| 95 | 3x | tmp <- mapply( | 
| 96 | 3x | FUN = h_step_survival_est, | 
| 97 | 3x | x = interval_center, | 
| 98 | 3x | subset = as.list(as.data.frame(window_sel$sel)), | 
| 99 | 3x | MoreArgs = list( | 
| 100 | 3x | formula = form, | 
| 101 | 3x | data = data, | 
| 102 | 3x | variables = variables, | 
| 103 | 3x | control = control | 
| 104 | ) | |
| 105 | ) | |
| 106 | # Maybe we find a more elegant solution than this. | |
| 107 | 3x |     rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper") | 
| 108 | 3x | t(tmp) | 
| 109 | } | |
| 110 | 4x | result <- cbind(window_sel$interval, estimates) | 
| 111 | 4x | structure( | 
| 112 | 4x | result, | 
| 113 | 4x |     class = c("step", "matrix"), | 
| 114 | 4x | variables = variables, | 
| 115 | 4x | control = control | 
| 116 | ) | |
| 117 | } | 
| 1 | #' Create a STEP graph | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR | |
| 6 | #' along the continuous biomarker value subgroups. | |
| 7 | #' | |
| 8 | #' @param df (`tibble`)\cr result of [tidy.step()]. | |
| 9 | #' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual | |
| 10 | #' biomarker values. | |
| 11 | #' @param est (named `list`)\cr `col` and `lty` settings for estimate line. | |
| 12 | #' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval | |
| 13 | #' ribbon area, or `NULL` to not plot a CI ribbon. | |
| 14 | #' @param col (`character`)\cr color(s). | |
| 15 | #' | |
| 16 | #' @return A `ggplot` STEP graph. | |
| 17 | #' | |
| 18 | #' @seealso Custom tidy method [tidy.step()]. | |
| 19 | #' | |
| 20 | #' @examples | |
| 21 | #' library(survival) | |
| 22 | #' lung$sex <- factor(lung$sex) | |
| 23 | #' | |
| 24 | #' # Survival example. | |
| 25 | #' vars <- list( | |
| 26 | #' time = "time", | |
| 27 | #' event = "status", | |
| 28 | #' arm = "sex", | |
| 29 | #' biomarker = "age" | |
| 30 | #' ) | |
| 31 | #' | |
| 32 | #' step_matrix <- fit_survival_step( | |
| 33 | #' variables = vars, | |
| 34 | #' data = lung, | |
| 35 | #' control = c(control_coxph(), control_step(num_points = 10, degree = 2)) | |
| 36 | #' ) | |
| 37 | #' step_data <- broom::tidy(step_matrix) | |
| 38 | #' | |
| 39 | #' # Default plot. | |
| 40 | #' g_step(step_data) | |
| 41 | #' | |
| 42 | #' # Add the reference 1 horizontal line. | |
| 43 | #' library(ggplot2) | |
| 44 | #' g_step(step_data) + | |
| 45 | #' ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2) | |
| 46 | #' | |
| 47 | #' # Use actual values instead of percentiles, different color for estimate and no CI, | |
| 48 | #' # use log scale for y axis. | |
| 49 | #' g_step( | |
| 50 | #' step_data, | |
| 51 | #' use_percentile = FALSE, | |
| 52 | #' est = list(col = "blue", lty = 1), | |
| 53 | #' ci_ribbon = NULL | |
| 54 | #' ) + scale_y_log10() | |
| 55 | #' | |
| 56 | #' # Adding another curve based on additional column. | |
| 57 | #' step_data$extra <- exp(step_data$`Percentile Center`) | |
| 58 | #' g_step(step_data) + | |
| 59 | #' ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green") | |
| 60 | #' | |
| 61 | #' # Response example. | |
| 62 | #' vars <- list( | |
| 63 | #' response = "status", | |
| 64 | #' arm = "sex", | |
| 65 | #' biomarker = "age" | |
| 66 | #' ) | |
| 67 | #' | |
| 68 | #' step_matrix <- fit_rsp_step( | |
| 69 | #' variables = vars, | |
| 70 | #' data = lung, | |
| 71 | #' control = c( | |
| 72 | #' control_logistic(response_definition = "I(response == 2)"), | |
| 73 | #' control_step() | |
| 74 | #' ) | |
| 75 | #' ) | |
| 76 | #' step_data <- broom::tidy(step_matrix) | |
| 77 | #' g_step(step_data) | |
| 78 | #' | |
| 79 | #' @export | |
| 80 | g_step <- function(df, | |
| 81 | use_percentile = "Percentile Center" %in% names(df), | |
| 82 | est = list(col = "blue", lty = 1), | |
| 83 |                    ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5), | |
| 84 |                    col = getOption("ggplot2.discrete.colour")) { | |
| 85 | 2x | checkmate::assert_tibble(df) | 
| 86 | 2x | checkmate::assert_flag(use_percentile) | 
| 87 | 2x | checkmate::assert_character(col, null.ok = TRUE) | 
| 88 | 2x | checkmate::assert_list(est, names = "named") | 
| 89 | 2x | checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE) | 
| 90 | ||
| 91 | 2x | x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center") | 
| 92 | 2x | df$x <- df[[x_var]] | 
| 93 | 2x | attrs <- attributes(df) | 
| 94 | 2x | df$y <- df[[attrs$estimate]] | 
| 95 | ||
| 96 | # Set legend names. To be modified also at call level | |
| 97 | 2x |   legend_names <- c("Estimate", "CI 95%") | 
| 98 | ||
| 99 | 2x | p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]])) | 
| 100 | ||
| 101 | 2x |   if (!is.null(col)) { | 
| 102 | 2x | p <- p + | 
| 103 | 2x | ggplot2::scale_color_manual(values = col) | 
| 104 | } | |
| 105 | ||
| 106 | 2x |   if (!is.null(ci_ribbon)) { | 
| 107 | 1x |     if (is.null(ci_ribbon$fill)) { | 
| 108 | ! | ci_ribbon$fill <- "lightblue" | 
| 109 | } | |
| 110 | 1x | p <- p + ggplot2::geom_ribbon( | 
| 111 | 1x | ggplot2::aes( | 
| 112 | 1x | ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]], | 
| 113 | 1x | fill = legend_names[2] | 
| 114 | ), | |
| 115 | 1x | alpha = ci_ribbon$alpha | 
| 116 | ) + | |
| 117 | 1x | scale_fill_manual( | 
| 118 | 1x |         name = "", values = c("CI 95%" = ci_ribbon$fill) | 
| 119 | ) | |
| 120 | } | |
| 121 | 2x | suppressMessages(p <- p + | 
| 122 | 2x | ggplot2::geom_line( | 
| 123 | 2x | ggplot2::aes(y = .data[["y"]], color = legend_names[1]), | 
| 124 | 2x | linetype = est$lty | 
| 125 | ) + | |
| 126 | 2x | scale_colour_manual( | 
| 127 | 2x |       name = "", values = c("Estimate" = "blue") | 
| 128 | )) | |
| 129 | ||
| 130 | 2x | p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate) | 
| 131 | 2x |   if (use_percentile) { | 
| 132 | 1x | p <- p + ggplot2::scale_x_continuous(labels = scales::percent) | 
| 133 | } | |
| 134 | 2x | p | 
| 135 | } | |
| 136 | ||
| 137 | #' Custom tidy method for STEP results | |
| 138 | #' | |
| 139 | #' @description `r lifecycle::badge("stable")` | |
| 140 | #' | |
| 141 | #' Tidy the STEP results into a `tibble` format ready for plotting. | |
| 142 | #' | |
| 143 | #' @param x (`matrix`)\cr results from [fit_survival_step()]. | |
| 144 | #' @param ... not used. | |
| 145 | #' | |
| 146 | #' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale, | |
| 147 | #' respectively. Additional attributes carry metadata also used for plotting. | |
| 148 | #' | |
| 149 | #' @seealso [g_step()] which consumes the result from this function. | |
| 150 | #' | |
| 151 | #' @method tidy step | |
| 152 | #' | |
| 153 | #' @examples | |
| 154 | #' library(survival) | |
| 155 | #' lung$sex <- factor(lung$sex) | |
| 156 | #' vars <- list( | |
| 157 | #' time = "time", | |
| 158 | #' event = "status", | |
| 159 | #' arm = "sex", | |
| 160 | #' biomarker = "age" | |
| 161 | #' ) | |
| 162 | #' step_matrix <- fit_survival_step( | |
| 163 | #' variables = vars, | |
| 164 | #' data = lung, | |
| 165 | #' control = c(control_coxph(), control_step(num_points = 10, degree = 2)) | |
| 166 | #' ) | |
| 167 | #' broom::tidy(step_matrix) | |
| 168 | #' | |
| 169 | #' @export | |
| 170 | tidy.step <- function(x, ...) { # nolint | |
| 171 | 7x | checkmate::assert_class(x, "step") | 
| 172 | 7x | dat <- as.data.frame(x) | 
| 173 | 7x | nams <- names(dat) | 
| 174 | 7x | is_surv <- "loghr" %in% names(dat) | 
| 175 | 7x | est_var <- ifelse(is_surv, "loghr", "logor") | 
| 176 | 7x | new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio") | 
| 177 | 7x |   new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper")) | 
| 178 | 7x | names(dat)[match(est_var, nams)] <- new_est_var | 
| 179 | 7x | dat[, new_y_vars] <- exp(dat[, new_y_vars]) | 
| 180 | 7x | any_is_na <- any(is.na(dat[, new_y_vars])) | 
| 181 | 7x | any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE) | 
| 182 | 7x |   if (any_is_na) { | 
| 183 | 2x | warning(paste( | 
| 184 | 2x | "Missing values in the point estimate or CI columns,", | 
| 185 | 2x | "this will lead to holes in the `g_step()` plot" | 
| 186 | )) | |
| 187 | } | |
| 188 | 7x |   if (any_is_very_large) { | 
| 189 | 2x | warning(paste( | 
| 190 | 2x | "Very large absolute values in the point estimate or CI columns,", | 
| 191 | 2x | "consider adding `scale_y_log10()` to the `g_step()` result for plotting" | 
| 192 | )) | |
| 193 | } | |
| 194 | 7x |   if (any_is_na || any_is_very_large) { | 
| 195 | 4x |     warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting") | 
| 196 | } | |
| 197 | 7x | structure( | 
| 198 | 7x | tibble::as_tibble(dat), | 
| 199 | 7x | estimate = new_est_var, | 
| 200 | 7x | biomarker = attr(x, "variables")$biomarker, | 
| 201 | 7x | ci = f_conf_level(attr(x, "control")$conf_level) | 
| 202 | ) | |
| 203 | } | 
| 1 | #' Control function for incidence rate | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This is an auxiliary function for controlling arguments for the incidence rate, used | |
| 6 | #' internally to specify details in `s_incidence_rate()`. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar` | |
| 10 | #' for confidence interval type. | |
| 11 | #' @param input_time_unit (`string`)\cr `day`, `week`, `month`, or `year` (default) | |
| 12 | #' indicating time unit for data input. | |
| 13 | #' @param num_pt_year (`numeric(1)`)\cr number of patient-years to use when calculating adverse event rates. | |
| 14 | #' | |
| 15 | #' @return A list of components with the same names as the arguments. | |
| 16 | #' | |
| 17 | #' @seealso [incidence_rate] | |
| 18 | #' | |
| 19 | #' @examples | |
| 20 | #' control_incidence_rate(0.9, "exact", "month", 100) | |
| 21 | #' | |
| 22 | #' @export | |
| 23 | control_incidence_rate <- function(conf_level = 0.95, | |
| 24 |                                    conf_type = c("normal", "normal_log", "exact", "byar"), | |
| 25 |                                    input_time_unit = c("year", "day", "week", "month"), | |
| 26 |                                    num_pt_year = 100) { | |
| 27 | 14x | conf_type <- match.arg(conf_type) | 
| 28 | 13x | input_time_unit <- match.arg(input_time_unit) | 
| 29 | 12x | checkmate::assert_number(num_pt_year) | 
| 30 | 11x | assert_proportion_value(conf_level) | 
| 31 | ||
| 32 | 10x | list( | 
| 33 | 10x | conf_level = conf_level, | 
| 34 | 10x | conf_type = conf_type, | 
| 35 | 10x | input_time_unit = input_time_unit, | 
| 36 | 10x | num_pt_year = num_pt_year | 
| 37 | ) | |
| 38 | } | 
| 1 | #' Helper functions for accessing information from `rtables` | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' These are a couple of functions that help with accessing the data in `rtables` objects. | |
| 6 | #' Currently these work for occurrence tables, which are defined as having a count as the first | |
| 7 | #' element and a fraction as the second element in each cell. | |
| 8 | #' | |
| 9 | #' @seealso [prune_occurrences] for usage of these functions. | |
| 10 | #' | |
| 11 | #' @name rtables_access | |
| 12 | NULL | |
| 13 | ||
| 14 | #' @describeIn rtables_access Helper function to extract the first values from each content | |
| 15 | #' cell and from specified columns in a `TableRow`. Defaults to all columns. | |
| 16 | #' | |
| 17 | #' @param table_row (`TableRow`)\cr an analysis row in a occurrence table. | |
| 18 | #' @param col_names (`character`)\cr the names of the columns to extract from. | |
| 19 | #' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided, | |
| 20 | #' then these are inferred from the names of `table_row`. Note that this currently only works well with a single | |
| 21 | #' column split. | |
| 22 | #' | |
| 23 | #' @return | |
| 24 | #' * `h_row_first_values()` returns a `vector` of numeric values. | |
| 25 | #' | |
| 26 | #' @examples | |
| 27 | #' tbl <- basic_table() %>% | |
| 28 | #'   split_cols_by("ARM") %>% | |
| 29 | #'   split_rows_by("RACE") %>% | |
| 30 | #'   analyze("AGE", function(x) { | |
| 31 | #' list( | |
| 32 | #' "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"), | |
| 33 | #' "n" = length(x), | |
| 34 | #' "frac" = rcell(c(0.1, 0.1), format = "xx (xx)") | |
| 35 | #' ) | |
| 36 | #' }) %>% | |
| 37 | #' build_table(tern_ex_adsl) %>% | |
| 38 | #' prune_table() | |
| 39 | #' tree_row_elem <- collect_leaves(tbl[2, ])[[1]] | |
| 40 | #' result <- max(h_row_first_values(tree_row_elem)) | |
| 41 | #' result | |
| 42 | #' | |
| 43 | #' @export | |
| 44 | h_row_first_values <- function(table_row, | |
| 45 | col_names = NULL, | |
| 46 |                                col_indices = NULL) { | |
| 47 | 745x | col_indices <- check_names_indices(table_row, col_names, col_indices) | 
| 48 | 744x | checkmate::assert_integerish(col_indices) | 
| 49 | 744x | checkmate::assert_subset(col_indices, seq_len(ncol(table_row))) | 
| 50 | ||
| 51 | # Main values are extracted | |
| 52 | 744x | row_vals <- row_values(table_row)[col_indices] | 
| 53 | ||
| 54 | # Main return | |
| 55 | 744x |   vapply(row_vals, function(rv) { | 
| 56 | 2096x |     if (is.null(rv)) { | 
| 57 | 744x | NA_real_ | 
| 58 |     } else { | |
| 59 | 2093x | rv[1L] | 
| 60 | } | |
| 61 | 744x | }, FUN.VALUE = numeric(1)) | 
| 62 | } | |
| 63 | ||
| 64 | #' @describeIn rtables_access Helper function that extracts row values and checks if they are | |
| 65 | #' convertible to integers (`integerish` values). | |
| 66 | #' | |
| 67 | #' @return | |
| 68 | #' * `h_row_counts()` returns a `vector` of numeric values. | |
| 69 | #' | |
| 70 | #' @examples | |
| 71 | #' # Row counts (integer values) | |
| 72 | #' # h_row_counts(tree_row_elem) # Fails because there are no integers | |
| 73 | #' # Using values with integers | |
| 74 | #' tree_row_elem <- collect_leaves(tbl[3, ])[[1]] | |
| 75 | #' result <- h_row_counts(tree_row_elem) | |
| 76 | #' # result | |
| 77 | #' | |
| 78 | #' @export | |
| 79 | h_row_counts <- function(table_row, | |
| 80 | col_names = NULL, | |
| 81 |                          col_indices = NULL) { | |
| 82 | 741x | counts <- h_row_first_values(table_row, col_names, col_indices) | 
| 83 | 741x | checkmate::assert_integerish(counts) | 
| 84 | 741x | counts | 
| 85 | } | |
| 86 | ||
| 87 | #' @describeIn rtables_access Helper function to extract fractions from specified columns in a `TableRow`. | |
| 88 | #' More specifically it extracts the second values from each content cell and checks it is a fraction. | |
| 89 | #' | |
| 90 | #' @return | |
| 91 | #' * `h_row_fractions()` returns a `vector` of proportions. | |
| 92 | #' | |
| 93 | #' @examples | |
| 94 | #' # Row fractions | |
| 95 | #' tree_row_elem <- collect_leaves(tbl[4, ])[[1]] | |
| 96 | #' h_row_fractions(tree_row_elem) | |
| 97 | #' | |
| 98 | #' @export | |
| 99 | h_row_fractions <- function(table_row, | |
| 100 | col_names = NULL, | |
| 101 |                             col_indices = NULL) { | |
| 102 | 250x | col_indices <- check_names_indices(table_row, col_names, col_indices) | 
| 103 | 250x | row_vals <- row_values(table_row)[col_indices] | 
| 104 | 250x | fractions <- sapply(row_vals, "[", 2L) | 
| 105 | 250x | checkmate::assert_numeric(fractions, lower = 0, upper = 1) | 
| 106 | 250x | fractions | 
| 107 | } | |
| 108 | ||
| 109 | #' @describeIn rtables_access Helper function to extract column counts from specified columns in a table. | |
| 110 | #' | |
| 111 | #' @param table (`VTableNodeInfo`)\cr an occurrence table or row. | |
| 112 | #' | |
| 113 | #' @return | |
| 114 | #' * `h_col_counts()` returns a `vector` of column counts. | |
| 115 | #' | |
| 116 | #' @export | |
| 117 | h_col_counts <- function(table, | |
| 118 | col_names = NULL, | |
| 119 |                          col_indices = NULL) { | |
| 120 | 307x | col_indices <- check_names_indices(table, col_names, col_indices) | 
| 121 | 307x | counts <- col_counts(table)[col_indices] | 
| 122 | 307x | stats::setNames(counts, col_names) | 
| 123 | } | |
| 124 | ||
| 125 | #' @describeIn rtables_access Helper function to get first row of content table of current table. | |
| 126 | #' | |
| 127 | #' @return | |
| 128 | #' * `h_content_first_row()` returns a row from an `rtables` table. | |
| 129 | #' | |
| 130 | #' @export | |
| 131 | h_content_first_row <- function(table) { | |
| 132 | 27x | ct <- content_table(table) | 
| 133 | 27x | tree_children(ct)[[1]] | 
| 134 | } | |
| 135 | ||
| 136 | #' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree. | |
| 137 | #' | |
| 138 | #' @return | |
| 139 | #' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf. | |
| 140 | #' | |
| 141 | #' @keywords internal | |
| 142 | is_leaf_table <- function(table) { | |
| 143 | 168x | children <- tree_children(table) | 
| 144 | 168x | child_classes <- unique(sapply(children, class)) | 
| 145 | 168x | identical(child_classes, "ElementaryTable") | 
| 146 | } | |
| 147 | ||
| 148 | #' @describeIn rtables_access Internal helper function that tests standard inputs for column indices. | |
| 149 | #' | |
| 150 | #' @return | |
| 151 | #' * `check_names_indices` returns column indices. | |
| 152 | #' | |
| 153 | #' @keywords internal | |
| 154 | check_names_indices <- function(table_row, | |
| 155 | col_names = NULL, | |
| 156 |                                 col_indices = NULL) { | |
| 157 | 1302x |   if (!is.null(col_names)) { | 
| 158 | 1256x |     if (!is.null(col_indices)) { | 
| 159 | 1x | stop( | 
| 160 | 1x | "Inserted both col_names and col_indices when selecting row values. ", | 
| 161 | 1x | "Please choose one." | 
| 162 | ) | |
| 163 | } | |
| 164 | 1255x | col_indices <- h_col_indices(table_row, col_names) | 
| 165 | } | |
| 166 | 1301x |   if (is.null(col_indices)) { | 
| 167 | 39x | ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row)) | 
| 168 | 39x | col_indices <- seq_len(ll) | 
| 169 | } | |
| 170 | ||
| 171 | 1301x | return(col_indices) | 
| 172 | } | 
| 1 | #' Control function for logistic regression model fitting | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This is an auxiliary function for controlling arguments for logistic regression models. | |
| 6 | #' `conf_level` refers to the confidence level used for the Odds Ratio CIs. | |
| 7 | #' | |
| 8 | #' @inheritParams argument_convention | |
| 9 | #' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`. | |
| 10 | #' This will be used when fitting the logistic regression model on the left hand side of the formula. | |
| 11 | #' Note that the evaluated expression should result in either a logical vector or a factor with 2 | |
| 12 | #' levels. By default this is just `"response"` such that the original response variable is used | |
| 13 | #' and not modified further. | |
| 14 | #' | |
| 15 | #' @return A list of components with the same names as the arguments. | |
| 16 | #' | |
| 17 | #' @examples | |
| 18 | #' # Standard options. | |
| 19 | #' control_logistic() | |
| 20 | #' | |
| 21 | #' # Modify confidence level. | |
| 22 | #' control_logistic(conf_level = 0.9) | |
| 23 | #' | |
| 24 | #' # Use a different response definition. | |
| 25 | #' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))") | |
| 26 | #' | |
| 27 | #' @export | |
| 28 | control_logistic <- function(response_definition = "response", | |
| 29 |                              conf_level = 0.95) { | |
| 30 | 29x |   checkmate::assert_true(grepl("response", response_definition)) | 
| 31 | 28x | checkmate::assert_string(response_definition) | 
| 32 | 28x | assert_proportion_value(conf_level) | 
| 33 | 27x | list( | 
| 34 | 27x | response_definition = response_definition, | 
| 35 | 27x | conf_level = conf_level | 
| 36 | ) | |
| 37 | } | 
| 1 | #' Sort pharmacokinetic data by `PARAM` variable | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' @param pk_data (`data.frame`)\cr pharmacokinetic data frame. | |
| 6 | #' @param key_var (`string`)\cr key variable used to merge pk_data and metadata created by [d_pkparam()]. | |
| 7 | #' | |
| 8 | #' @return A pharmacokinetic `data.frame` sorted by a `PARAM` variable. | |
| 9 | #' | |
| 10 | #' @examples | |
| 11 | #' library(dplyr) | |
| 12 | #' | |
| 13 | #' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")"))) | |
| 14 | #' pk_ordered_data <- h_pkparam_sort(adpp) | |
| 15 | #' | |
| 16 | #' @export | |
| 17 | h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") { | |
| 18 | 4x | assert_df_with_variables(pk_data, list(key_var = key_var)) | 
| 19 | 4x | pk_data$PARAMCD <- pk_data[[key_var]] | 
| 20 | ||
| 21 | 4x | ordered_pk_data <- d_pkparam() | 
| 22 | ||
| 23 | # Add the numeric values from ordered_pk_data to pk_data | |
| 24 | 4x |   joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffixes = c("", ".y")) | 
| 25 | ||
| 26 | 4x |   joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))] | 
| 27 | ||
| 28 | 4x | joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER) | 
| 29 | ||
| 30 | # Then order PARAM based on this column | |
| 31 | 4x | joined_data$PARAM <- factor(joined_data$PARAM, | 
| 32 | 4x | levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]), | 
| 33 | 4x | ordered = TRUE | 
| 34 | ) | |
| 35 | ||
| 36 | 4x | joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY, | 
| 37 | 4x | levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]), | 
| 38 | 4x | ordered = TRUE | 
| 39 | ) | |
| 40 | ||
| 41 | 4x | joined_data | 
| 42 | } | 
| 1 | #' Subgroup treatment effect pattern (STEP) fit for binary (response) outcome | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary | |
| 6 | #' (response) outcome. The treatment arm variable must have exactly 2 levels, | |
| 7 | #' where the first one is taken as reference and the estimated odds ratios are | |
| 8 | #' for the comparison of the second level vs. the first one. | |
| 9 | #' | |
| 10 | #' The (conditional) logistic regression model which is fit is: | |
| 11 | #' | |
| 12 | #' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)` | |
| 13 | #' | |
| 14 | #' where `degree` is specified by `control_step()`. | |
| 15 | #' | |
| 16 | #' @inheritParams argument_convention | |
| 17 | #' @param variables (named `list` of `character`)\cr list of analysis variables: | |
| 18 | #' needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`. | |
| 19 | #' @param control (named `list`)\cr combined control list from [control_step()] | |
| 20 | #' and [control_logistic()]. | |
| 21 | #' | |
| 22 | #' @return A matrix of class `step`. The first part of the columns describe the | |
| 23 | #' subgroup intervals used for the biomarker variable, including where the | |
| 24 | #' center of the intervals are and their bounds. The second part of the | |
| 25 | #' columns contain the estimates for the treatment arm comparison. | |
| 26 | #' | |
| 27 | #' @note For the default degree 0 the `biomarker` variable is not included in the model. | |
| 28 | #' | |
| 29 | #' @seealso [control_step()] and [control_logistic()] for the available | |
| 30 | #' customization options. | |
| 31 | #' | |
| 32 | #' @examples | |
| 33 | #' # Testing dataset with just two treatment arms. | |
| 34 | #' library(survival) | |
| 35 | #' library(dplyr) | |
| 36 | #' | |
| 37 | #' adrs_f <- tern_ex_adrs %>% | |
| 38 | #' filter( | |
| 39 | #' PARAMCD == "BESRSPI", | |
| 40 | #'     ARM %in% c("B: Placebo", "A: Drug X") | |
| 41 | #' ) %>% | |
| 42 | #' mutate( | |
| 43 | #' # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations. | |
| 44 | #' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")), | |
| 45 | #'     RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), | |
| 46 | #' SEX = factor(SEX) | |
| 47 | #' ) | |
| 48 | #' | |
| 49 | #' variables <- list( | |
| 50 | #' arm = "ARM", | |
| 51 | #' biomarker = "BMRKR1", | |
| 52 | #' covariates = "AGE", | |
| 53 | #' response = "RSP" | |
| 54 | #' ) | |
| 55 | #' | |
| 56 | #' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup. | |
| 57 | #' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those. | |
| 58 | #' step_matrix <- fit_rsp_step( | |
| 59 | #' variables = variables, | |
| 60 | #' data = adrs_f, | |
| 61 | #' control = c(control_logistic(), control_step(bandwidth = 0.9)) | |
| 62 | #' ) | |
| 63 | #' dim(step_matrix) | |
| 64 | #' head(step_matrix) | |
| 65 | #' | |
| 66 | #' # Specify different polynomial degree for the biomarker interaction to use more flexible local | |
| 67 | #' # models. Or specify different logistic regression options, including confidence level. | |
| 68 | #' step_matrix2 <- fit_rsp_step( | |
| 69 | #' variables = variables, | |
| 70 | #' data = adrs_f, | |
| 71 | #' control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = NULL, degree = 1)) | |
| 72 | #' ) | |
| 73 | #' | |
| 74 | #' # Use a global constant model. This is helpful as a reference for the subgroup models. | |
| 75 | #' step_matrix3 <- fit_rsp_step( | |
| 76 | #' variables = variables, | |
| 77 | #' data = adrs_f, | |
| 78 | #' control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L)) | |
| 79 | #' ) | |
| 80 | #' | |
| 81 | #' # It is also possible to use strata, i.e. use conditional logistic regression models. | |
| 82 | #' variables2 <- list( | |
| 83 | #' arm = "ARM", | |
| 84 | #' biomarker = "BMRKR1", | |
| 85 | #' covariates = "AGE", | |
| 86 | #' response = "RSP", | |
| 87 | #'   strata = c("STRATA1", "STRATA2") | |
| 88 | #' ) | |
| 89 | #' | |
| 90 | #' step_matrix4 <- fit_rsp_step( | |
| 91 | #' variables = variables2, | |
| 92 | #' data = adrs_f, | |
| 93 | #' control = c(control_logistic(), control_step(bandwidth = NULL)) | |
| 94 | #' ) | |
| 95 | #' | |
| 96 | #' @export | |
| 97 | fit_rsp_step <- function(variables, | |
| 98 | data, | |
| 99 |                          control = c(control_step(), control_logistic())) { | |
| 100 | 5x | assert_df_with_variables(data, variables) | 
| 101 | 5x | checkmate::assert_list(control, names = "named") | 
| 102 | 5x | data <- data[!is.na(data[[variables$biomarker]]), ] | 
| 103 | 5x | window_sel <- h_step_window(x = data[[variables$biomarker]], control = control) | 
| 104 | 5x | interval_center <- window_sel$interval[, "Interval Center"] | 
| 105 | 5x | form <- h_step_rsp_formula(variables = variables, control = control) | 
| 106 | 5x |   estimates <- if (is.null(control$bandwidth)) { | 
| 107 | 1x | h_step_rsp_est( | 
| 108 | 1x | formula = form, | 
| 109 | 1x | data = data, | 
| 110 | 1x | variables = variables, | 
| 111 | 1x | x = interval_center, | 
| 112 | 1x | control = control | 
| 113 | ) | |
| 114 |   } else { | |
| 115 | 4x | tmp <- mapply( | 
| 116 | 4x | FUN = h_step_rsp_est, | 
| 117 | 4x | x = interval_center, | 
| 118 | 4x | subset = as.list(as.data.frame(window_sel$sel)), | 
| 119 | 4x | MoreArgs = list( | 
| 120 | 4x | formula = form, | 
| 121 | 4x | data = data, | 
| 122 | 4x | variables = variables, | 
| 123 | 4x | control = control | 
| 124 | ) | |
| 125 | ) | |
| 126 | # Maybe we find a more elegant solution than this. | |
| 127 | 4x |     rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper") | 
| 128 | 4x | t(tmp) | 
| 129 | } | |
| 130 | 5x | result <- cbind(window_sel$interval, estimates) | 
| 131 | 5x | structure( | 
| 132 | 5x | result, | 
| 133 | 5x |     class = c("step", "matrix"), | 
| 134 | 5x | variables = variables, | 
| 135 | 5x | control = control | 
| 136 | ) | |
| 137 | } | 
| 1 | #' Class for `CombinationFunction` | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that | |
| 6 | #' can be combined and negated with the logical operators. | |
| 7 | #' | |
| 8 | #' @param e1 (`CombinationFunction`)\cr left hand side of logical operator. | |
| 9 | #' @param e2 (`CombinationFunction`)\cr right hand side of logical operator. | |
| 10 | #' @param x (`CombinationFunction`)\cr the function which should be negated. | |
| 11 | #' | |
| 12 | #' @return A logical value indicating whether the left hand side of the equation equals the right hand side. | |
| 13 | #' | |
| 14 | #' @examples | |
| 15 | #' higher <- function(a) { | |
| 16 | #' force(a) | |
| 17 | #' CombinationFunction( | |
| 18 | #'     function(x) { | |
| 19 | #' x > a | |
| 20 | #' } | |
| 21 | #' ) | |
| 22 | #' } | |
| 23 | #' | |
| 24 | #' lower <- function(b) { | |
| 25 | #' force(b) | |
| 26 | #' CombinationFunction( | |
| 27 | #'     function(x) { | |
| 28 | #' x < b | |
| 29 | #' } | |
| 30 | #' ) | |
| 31 | #' } | |
| 32 | #' | |
| 33 | #' c1 <- higher(5) | |
| 34 | #' c2 <- lower(10) | |
| 35 | #' c3 <- higher(5) & lower(10) | |
| 36 | #' c3(7) | |
| 37 | #' | |
| 38 | #' @name combination_function | |
| 39 | #' @aliases CombinationFunction-class | |
| 40 | #' @exportClass CombinationFunction | |
| 41 | #' @export CombinationFunction | |
| 42 | CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint | |
| 43 | ||
| 44 | #' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions. | |
| 45 | #' The resulting object is of the same class, and evaluates the two argument functions. The result | |
| 46 | #' is then the "AND" of the two individual results. | |
| 47 | #' | |
| 48 | #' @export | |
| 49 | methods::setMethod( | |
| 50 | "&", | |
| 51 | signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"), | |
| 52 |   definition = function(e1, e2) { | |
| 53 | 4x |     CombinationFunction(function(...) { | 
| 54 | 490x | e1(...) && e2(...) | 
| 55 | }) | |
| 56 | } | |
| 57 | ) | |
| 58 | ||
| 59 | #' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions. | |
| 60 | #' The resulting object is of the same class, and evaluates the two argument functions. The result | |
| 61 | #' is then the "OR" of the two individual results. | |
| 62 | #' | |
| 63 | #' @export | |
| 64 | methods::setMethod( | |
| 65 | "|", | |
| 66 | signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"), | |
| 67 |   definition = function(e1, e2) { | |
| 68 | 2x |     CombinationFunction(function(...) { | 
| 69 | 4x | e1(...) || e2(...) | 
| 70 | }) | |
| 71 | } | |
| 72 | ) | |
| 73 | ||
| 74 | #' @describeIn combination_function Logical negation of `CombinationFunction` functions. | |
| 75 | #' The resulting object is of the same class, and evaluates the original function. The result | |
| 76 | #' is then the opposite of this results. | |
| 77 | #' | |
| 78 | #' @export | |
| 79 | methods::setMethod( | |
| 80 | "!", | |
| 81 | signature = c(x = "CombinationFunction"), | |
| 82 |   definition = function(x) { | |
| 83 | 2x |     CombinationFunction(function(...) { | 
| 84 | 305x | !x(...) | 
| 85 | }) | |
| 86 | } | |
| 87 | ) | 
| 1 | #' Compare variables between groups | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' The analyze function [compare_vars()] creates a layout element to summarize and compare one or more variables, using | |
| 6 | #' the S3 generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics | |
| 7 | #' for numeric variables can be viewed by running `get_stats("analyze_vars_numeric", add_pval = TRUE)` and for | |
| 8 | #' non-numeric variables by running `get_stats("analyze_vars_counts", add_pval = TRUE)`. Use the `.stats` parameter to | |
| 9 | #' specify the statistics to include in your output summary table. | |
| 10 | #' | |
| 11 | #' Prior to using this function in your table layout you must use [rtables::split_cols_by()] to create a column | |
| 12 | #' split on the variable to be used in comparisons, and specify a reference group via the `ref_group` parameter. | |
| 13 | #' Comparisons can be performed for each group (column) against the specified reference group by including the p-value | |
| 14 | #' statistic. | |
| 15 | #' | |
| 16 | #' @inheritParams argument_convention | |
| 17 | #' @param .stats (`character`)\cr statistics to select for the table. | |
| 18 | #' | |
| 19 | #'  Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric", add_pval = TRUE), type = "sh")`` | |
| 20 | #' | |
| 21 | #'  Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts", add_pval = TRUE), type = "sh")`` | |
| 22 | #' | |
| 23 | #' @note | |
| 24 | #' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions | |
| 25 | #' between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would | |
| 26 | #' be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted | |
| 27 | #' for as explicit factor levels. | |
| 28 | #' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values | |
| 29 | #' set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit | |
| 30 | #' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the | |
| 31 | #' default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`. | |
| 32 | #' * For character variables, automatic conversion to factor does not guarantee that the table | |
| 33 | #' will be generated correctly. In particular for sparse tables this very likely can fail. | |
| 34 | #' Therefore it is always better to manually convert character variables to factors during pre-processing. | |
| 35 | #' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison | |
| 36 | #' is well defined. | |
| 37 | #' | |
| 38 | #' @seealso [s_summary()] which is used internally to compute a summary within `s_compare()`, and [a_summary()] | |
| 39 | #' which is used (with `compare = TRUE`) as the analysis function for `compare_vars()`. | |
| 40 | #' | |
| 41 | #' @name compare_variables | |
| 42 | #' @include analyze_variables.R | |
| 43 | #' @order 1 | |
| 44 | NULL | |
| 45 | ||
| 46 | #' @describeIn compare_variables S3 generic function to produce a comparison summary. | |
| 47 | #' | |
| 48 | #' @return | |
| 49 | #' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values. | |
| 50 | #' | |
| 51 | #' @export | |
| 52 | s_compare <- function(x, | |
| 53 |                       ...) { | |
| 54 | 9x |   UseMethod("s_compare", x) | 
| 55 | } | |
| 56 | ||
| 57 | #' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test | |
| 58 | #' to calculate the p-value. | |
| 59 | #' | |
| 60 | #' @method s_compare numeric | |
| 61 | #' | |
| 62 | #' @examples | |
| 63 | #' # `s_compare.numeric` | |
| 64 | #' | |
| 65 | #' ## Usual case where both this and the reference group vector have more than 1 value. | |
| 66 | #' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE) | |
| 67 | #' | |
| 68 | #' ## If one group has not more than 1 value, then p-value is not calculated. | |
| 69 | #' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE) | |
| 70 | #' | |
| 71 | #' ## Empty numeric does not fail, it returns NA-filled items and no p-value. | |
| 72 | #' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE) | |
| 73 | #' | |
| 74 | #' @export | |
| 75 | s_compare.numeric <- function(x, ...) { | |
| 76 | 2x | s_summary.numeric(x = x, compare_with_ref_group = TRUE, ...) | 
| 77 | } | |
| 78 | ||
| 79 | #' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test | |
| 80 | #' to calculate the p-value. | |
| 81 | #' | |
| 82 | #' @method s_compare factor | |
| 83 | #' | |
| 84 | #' @examples | |
| 85 | #' # `s_compare.factor` | |
| 86 | #' | |
| 87 | #' ## Basic usage: | |
| 88 | #' x <- factor(c("a", "a", "b", "c", "a")) | |
| 89 | #' y <- factor(c("a", "b", "c")) | |
| 90 | #' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE) | |
| 91 | #' | |
| 92 | #' ## Management of NA values. | |
| 93 | #' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA))) | |
| 94 | #' y <- explicit_na(factor(c("a", "b", "c", NA))) | |
| 95 | #' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE) | |
| 96 | #' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE) | |
| 97 | #' | |
| 98 | #' @export | |
| 99 | s_compare.factor <- function(x, ...) { | |
| 100 | 3x | s_summary.factor( | 
| 101 | 3x | x = x, | 
| 102 | 3x | compare_with_ref_group = TRUE, | 
| 103 | ... | |
| 104 | ) | |
| 105 | } | |
| 106 | ||
| 107 | #' @describeIn compare_variables Method for `character` class. This makes an automatic | |
| 108 | #' conversion to `factor` (with a warning) and then forwards to the method for factors. | |
| 109 | #' | |
| 110 | #' @method s_compare character | |
| 111 | #' | |
| 112 | #' @examples | |
| 113 | #' # `s_compare.character` | |
| 114 | #' | |
| 115 | #' ## Basic usage: | |
| 116 | #' x <- c("a", "a", "b", "c", "a") | |
| 117 | #' y <- c("a", "b", "c") | |
| 118 | #' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE) | |
| 119 | #' | |
| 120 | #' ## Note that missing values handling can make a large difference: | |
| 121 | #' x <- c("a", "a", "b", "c", "a", NA) | |
| 122 | #' y <- c("a", "b", "c", rep(NA, 20)) | |
| 123 | #' s_compare(x, | |
| 124 | #' .ref_group = y, .in_ref_col = FALSE, | |
| 125 | #' .var = "x", verbose = FALSE | |
| 126 | #' ) | |
| 127 | #' s_compare(x, | |
| 128 | #' .ref_group = y, .in_ref_col = FALSE, .var = "x", | |
| 129 | #' na.rm = FALSE, verbose = FALSE | |
| 130 | #' ) | |
| 131 | #' | |
| 132 | #' @export | |
| 133 | s_compare.character <- function(x, ...) { | |
| 134 | 1x | s_summary.character( | 
| 135 | 1x | x, | 
| 136 | 1x | compare_with_ref_group = TRUE, | 
| 137 | ... | |
| 138 | ) | |
| 139 | } | |
| 140 | ||
| 141 | #' @describeIn compare_variables Method for `logical` class. A chi-squared test | |
| 142 | #' is used. If missing values are not removed, then they are counted as `FALSE`. | |
| 143 | #' | |
| 144 | #' @method s_compare logical | |
| 145 | #' | |
| 146 | #' @examples | |
| 147 | #' # `s_compare.logical` | |
| 148 | #' | |
| 149 | #' ## Basic usage: | |
| 150 | #' x <- c(TRUE, FALSE, TRUE, TRUE) | |
| 151 | #' y <- c(FALSE, FALSE, TRUE) | |
| 152 | #' s_compare(x, .ref_group = y, .in_ref_col = FALSE) | |
| 153 | #' | |
| 154 | #' ## Management of NA values. | |
| 155 | #' x <- c(NA, TRUE, FALSE) | |
| 156 | #' y <- c(NA, NA, NA, NA, FALSE) | |
| 157 | #' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE) | |
| 158 | #' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE) | |
| 159 | #' | |
| 160 | #' @export | |
| 161 | s_compare.logical <- function(x, ...) { | |
| 162 | 3x | s_summary.logical( | 
| 163 | 3x | x = x, | 
| 164 | 3x | compare_with_ref_group = TRUE, | 
| 165 | ... | |
| 166 | ) | |
| 167 | } | |
| 168 | ||
| 169 | #' @describeIn compare_variables Layout-creating function which can take statistics function arguments | |
| 170 | #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. | |
| 171 | #' | |
| 172 | #' @param ... additional arguments passed to `s_compare()`, including: | |
| 173 | #'   * `denom`: (`string`) choice of denominator. Options are `c("n", "N_col", "N_row")`. For factor variables, can | |
| 174 | #' only be `"n"` (number of values in this row and column intersection). | |
| 175 | #' * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no | |
| 176 | #' column-based subsetting). | |
| 177 | #' * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within. | |
| 178 | #' * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out | |
| 179 | #' information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only. | |
| 180 | #' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector | |
| 181 | #' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation | |
| 182 | #' for that statistic's row label. | |
| 183 | #' | |
| 184 | #' @return | |
| 185 | #' * `compare_vars()` returns a layout object suitable for passing to further layouting functions, | |
| 186 | #' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing | |
| 187 | #' the statistics from `s_compare()` to the table layout. | |
| 188 | #' | |
| 189 | #' @examples | |
| 190 | #' # `compare_vars()` in `rtables` pipelines | |
| 191 | #' | |
| 192 | #' ## Default output within a `rtables` pipeline. | |
| 193 | #' lyt <- basic_table() %>% | |
| 194 | #'   split_cols_by("ARMCD", ref_group = "ARM B") %>% | |
| 195 | #'   compare_vars(c("AGE", "SEX")) | |
| 196 | #' build_table(lyt, tern_ex_adsl) | |
| 197 | #' | |
| 198 | #' ## Select and format statistics output. | |
| 199 | #' lyt <- basic_table() %>% | |
| 200 | #'   split_cols_by("ARMCD", ref_group = "ARM C") %>% | |
| 201 | #' compare_vars( | |
| 202 | #' vars = "AGE", | |
| 203 | #'     .stats = c("mean_sd", "pval"), | |
| 204 | #' .formats = c(mean_sd = "xx.x, xx.x"), | |
| 205 | #' .labels = c(mean_sd = "Mean, SD") | |
| 206 | #' ) | |
| 207 | #' build_table(lyt, df = tern_ex_adsl) | |
| 208 | #' | |
| 209 | #' @export | |
| 210 | #' @order 2 | |
| 211 | compare_vars <- function(lyt, | |
| 212 | vars, | |
| 213 | var_labels = vars, | |
| 214 | na_str = default_na_str(), | |
| 215 | nested = TRUE, | |
| 216 | ..., | |
| 217 | na_rm = TRUE, | |
| 218 | show_labels = "default", | |
| 219 | table_names = vars, | |
| 220 | section_div = NA_character_, | |
| 221 |                          .stats = c("n", "mean_sd", "count_fraction", "pval"), | |
| 222 | .stat_names = NULL, | |
| 223 | .formats = NULL, | |
| 224 | .labels = NULL, | |
| 225 |                          .indent_mods = NULL) { | |
| 226 | 4x | analyze_vars( | 
| 227 | 4x | lyt = lyt, | 
| 228 | 4x | compare_with_ref_group = TRUE, | 
| 229 | 4x | vars = vars, | 
| 230 | 4x | var_labels = var_labels, | 
| 231 | 4x | na_str = na_str, | 
| 232 | 4x | nested = nested, | 
| 233 | 4x | na_rm = na_rm, | 
| 234 | 4x | show_labels = show_labels, | 
| 235 | 4x | table_names = table_names, | 
| 236 | 4x | section_div = section_div, | 
| 237 | 4x | .stats = .stats, | 
| 238 | 4x | .stat_names = .stat_names, | 
| 239 | 4x | .formats = .formats, | 
| 240 | 4x | .labels = .labels, | 
| 241 | 4x | .indent_mods = .indent_mods, | 
| 242 | ... | |
| 243 | ) | |
| 244 | } | 
| 1 | #' Generate PK reference dataset | |
| 2 | #' | |
| 3 | #' @description `r lifecycle::badge("stable")` | |
| 4 | #' | |
| 5 | #' @return A `data.frame` of PK parameters. | |
| 6 | #' | |
| 7 | #' @examples | |
| 8 | #' pk_reference_dataset <- d_pkparam() | |
| 9 | #' | |
| 10 | #' @export | |
| 11 | d_pkparam <- function() { | |
| 12 | 4x | pk_dataset <- as.data.frame(matrix( | 
| 13 | 4x | c( | 
| 14 | 4x | "TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1", | 
| 15 | 4x | "CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2", | 
| 16 | 4x | "CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3", | 
| 17 | 4x | "AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4", | 
| 18 | 4x | "AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5", | 
| 19 | 4x | "AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6", | 
| 20 | 4x | "AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7", | 
| 21 | 4x | "AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8", | 
| 22 | 4x | "AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9", | 
| 23 | 4x | "AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10", | 
| 24 | 4x | "AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11", | 
| 25 | 4x | "AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12", | 
| 26 | 4x | "AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13", | 
| 27 | 4x | "AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14", | 
| 28 | 4x | "AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15", | 
| 29 | 4x | "AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16", | 
| 30 | 4x | "AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17", | 
| 31 | 4x | "AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18", | 
| 32 | 4x | "AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19", | 
| 33 | 4x | "AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20", | 
| 34 | 4x | "AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21", | 
| 35 | 4x | "AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22", | 
| 36 | 4x | "AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23", | 
| 37 | 4x | "AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24", | 
| 38 | 4x | "AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25", | 
| 39 | 4x | "TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26", | 
| 40 | 4x | "CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27", | 
| 41 | 4x | "C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28", | 
| 42 | 4x | "CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29", | 
| 43 | 4x | "CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30", | 
| 44 | 4x | "CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31", | 
| 45 | 4x | "LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32", | 
| 46 | 4x | "CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33", | 
| 47 | 4x | "CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34", | 
| 48 | 4x | "CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35", | 
| 49 | 4x | "CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36", | 
| 50 | 4x | "CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37", | 
| 51 | 4x | "CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38", | 
| 52 | 4x | "VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39", | 
| 53 | 4x | "VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40", | 
| 54 | 4x | "VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41", | 
| 55 | 4x | "VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42", | 
| 56 | 4x | "VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43", | 
| 57 | 4x | "VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44", | 
| 58 | 4x | "LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45", | 
| 59 | 4x | "LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46", | 
| 60 | 4x | "LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47", | 
| 61 | 4x | "LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48", | 
| 62 | 4x | "MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49", | 
| 63 | 4x | "MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50", | 
| 64 | 4x | "MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51", | 
| 65 | 4x | "R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52", | 
| 66 | 4x | "R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53", | 
| 67 | 4x | "TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54", | 
| 68 | 4x | "TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55", | 
| 69 | 4x | "ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56", | 
| 70 | 4x | "FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57", | 
| 71 | 4x | "CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58", | 
| 72 | 4x | "RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59", | 
| 73 | 4x | "RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60", | 
| 74 | 4x | "VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61", | 
| 75 | 4x | "RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62", | 
| 76 | 4x | "ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63", | 
| 77 | 4x | "RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64", | 
| 78 | 4x | "RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65", | 
| 79 | 4x | "ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66", | 
| 80 | 4x | "MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67", | 
| 81 | 4x | "ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68", | 
| 82 | 4x | "TON", "Time to Onset", "Tonset", "Matrix of PD", "69", | 
| 83 | 4x | "TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70", | 
| 84 | 4x | "TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71", | 
| 85 | 4x | "TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72", | 
| 86 | 4x | "TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73", | 
| 87 | 4x | "TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74", | 
| 88 | 4x | "TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75", | 
| 89 | 4x | "TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76", | 
| 90 | 4x | "BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77", | 
| 91 | 4x | "TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78", | 
| 92 | 4x | "AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79", | 
| 93 | 4x | "AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80", | 
| 94 | 4x | "AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81", | 
| 95 | 4x | "AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82", | 
| 96 | 4x | "AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83", | 
| 97 | 4x | "AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84", | 
| 98 | 4x | "TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85", | 
| 99 | 4x | "AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86", | 
| 100 | 4x | "AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87", | 
| 101 | 4x | "TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88", | 
| 102 | 4x | "TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89", | 
| 103 | 4x | "TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90", | 
| 104 | 4x | "TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91", | 
| 105 | 4x | "SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92", | 
| 106 | 4x | "SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93", | 
| 107 | 4x | "SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94", | 
| 108 | 4x | "SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95", | 
| 109 | 4x | "R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96", | 
| 110 | 4x | "R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97", | 
| 111 | 4x | "R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98", | 
| 112 | 4x | "R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99", | 
| 113 | 4x | "CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100", | 
| 114 | 4x | "CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101", | 
| 115 | 4x | "AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102", | 
| 116 | 4x | "ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103", | 
| 117 | 4x | "ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104", | 
| 118 | 4x | "ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105", | 
| 119 | 4x | "ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106", | 
| 120 | 4x | "ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107", | 
| 121 | 4x | "ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108", | 
| 122 | 4x | "ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109", | 
| 123 | 4x | "ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110", | 
| 124 | 4x | "ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111", | 
| 125 | 4x | "ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112", | 
| 126 | 4x | "ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113", | 
| 127 | 4x | "ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114", | 
| 128 | 4x | "ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115", | 
| 129 | 4x | "ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116", | 
| 130 | 4x | "ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117", | 
| 131 | 4x | "AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118", | 
| 132 | 4x | "AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119", | 
| 133 | 4x | "AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120", | 
| 134 | 4x | "AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121", | 
| 135 | 4x | "AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122", | 
| 136 | 4x | "AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123", | 
| 137 | 4x | "AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124", | 
| 138 | 4x | "AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125", | 
| 139 | 4x | "AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126", | 
| 140 | 4x | "AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127", | 
| 141 | 4x | "AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128", | 
| 142 | 4x | "AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129", | 
| 143 | 4x | "AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130", | 
| 144 | 4x | "AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131", | 
| 145 | 4x | "AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132", | 
| 146 | 4x | "AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133", | 
| 147 | 4x | "AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134", | 
| 148 | 4x | "AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135", | 
| 149 | 4x | "AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136", | 
| 150 | 4x | "AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137", | 
| 151 | 4x | "AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138", | 
| 152 | 4x | "AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139", | 
| 153 | 4x | "AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140", | 
| 154 | 4x | "AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141", | 
| 155 | 4x | "AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142", | 
| 156 | 4x | "AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143", | 
| 157 | 4x | "AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144", | 
| 158 | 4x | "AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145", | 
| 159 | 4x | "AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146", | 
| 160 | 4x | "AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147", | 
| 161 | 4x | "AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148", | 
| 162 | 4x | "AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149", | 
| 163 | 4x | "AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150", | 
| 164 | 4x | "AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151", | 
| 165 | 4x | "AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152", | 
| 166 | 4x | "AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153", | 
| 167 | 4x | "AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154", | 
| 168 | 4x | "AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155", | 
| 169 | 4x | "AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156", | 
| 170 | 4x | "AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157", | 
| 171 | 4x | "AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158", | 
| 172 | 4x | "AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159", | 
| 173 | 4x | "AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160", | 
| 174 | 4x | "AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161", | 
| 175 | 4x | "AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162", | 
| 176 | 4x | "AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163", | 
| 177 | 4x | "AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164", | 
| 178 | 4x | "AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165", | 
| 179 | 4x | "AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166", | 
| 180 | 4x | "AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167", | 
| 181 | 4x | "AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168", | 
| 182 | 4x | "AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169", | 
| 183 | 4x | "AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170", | 
| 184 | 4x | "AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171", | 
| 185 | 4x | "AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172", | 
| 186 | 4x | "AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173", | 
| 187 | 4x | "AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174", | 
| 188 | 4x | "AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175", | 
| 189 | 4x | "AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176", | 
| 190 | 4x | "AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177", | 
| 191 | 4x | "AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178", | 
| 192 | 4x | "AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179", | 
| 193 | 4x | "AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180", | 
| 194 | 4x | "AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181", | 
| 195 | 4x | "AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182", | 
| 196 | 4x | "C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183", | 
| 197 | 4x | "C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184", | 
| 198 | 4x | "C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185", | 
| 199 | 4x | "C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186", | 
| 200 | 4x | "CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187", | 
| 201 | 4x | "CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188", | 
| 202 | 4x | "CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189", | 
| 203 | 4x | "CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190", | 
| 204 | 4x | "CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191", | 
| 205 | 4x | "CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192", | 
| 206 | 4x | "CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193", | 
| 207 | 4x | "CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194", | 
| 208 | 4x | "CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195", | 
| 209 | 4x | "CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196", | 
| 210 | 4x | "CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197", | 
| 211 | 4x | "CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198", | 
| 212 | 4x | "CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199", | 
| 213 | 4x | "CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200", | 
| 214 | 4x | "CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201", | 
| 215 | 4x | "CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202", | 
| 216 | 4x | "CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203", | 
| 217 | 4x | "CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204", | 
| 218 | 4x | "CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205", | 
| 219 | 4x | "CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206", | 
| 220 | 4x | "CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207", | 
| 221 | 4x | "CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208", | 
| 222 | 4x | "CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209", | 
| 223 | 4x | "CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210", | 
| 224 | 4x | "CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211", | 
| 225 | 4x | "CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212", | 
| 226 | 4x | "CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213", | 
| 227 | 4x | "CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214", | 
| 228 | 4x | "CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215", | 
| 229 | 4x | "CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216", | 
| 230 | 4x | "CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217", | 
| 231 | 4x | "CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218", | 
| 232 | 4x | "CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219", | 
| 233 | 4x | "CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220", | 
| 234 | 4x | "CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221", | 
| 235 | 4x | "CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222", | 
| 236 | 4x | "CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223", | 
| 237 | 4x | "CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224", | 
| 238 | 4x | "CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225", | 
| 239 | 4x | "CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226", | 
| 240 | 4x | "CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227", | 
| 241 | 4x | "CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228", | 
| 242 | 4x | "CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229", | 
| 243 | 4x | "CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230", | 
| 244 | 4x | "CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231", | 
| 245 | 4x | "CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232", | 
| 246 | 4x | "CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233", | 
| 247 | 4x | "CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234", | 
| 248 | 4x | "CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235", | 
| 249 | 4x | "CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236", | 
| 250 | 4x | "CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237", | 
| 251 | 4x | "CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238", | 
| 252 | 4x | "CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239", | 
| 253 | 4x | "CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240", | 
| 254 | 4x | "CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241", | 
| 255 | 4x | "CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242", | 
| 256 | 4x | "CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243", | 
| 257 | 4x | "CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244", | 
| 258 | 4x | "CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245", | 
| 259 | 4x | "CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246", | 
| 260 | 4x | "CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247", | 
| 261 | 4x | "CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248", | 
| 262 | 4x | "CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249", | 
| 263 | 4x | "CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250", | 
| 264 | 4x | "EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251", | 
| 265 | 4x | "ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252", | 
| 266 | 4x | "ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253", | 
| 267 | 4x | "ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254", | 
| 268 | 4x | "ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255", | 
| 269 | 4x | "ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256", | 
| 270 | 4x | "ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257", | 
| 271 | 4x | "ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258", | 
| 272 | 4x | "ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259", | 
| 273 | 4x | "ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260", | 
| 274 | 4x | "ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261", | 
| 275 | 4x | "ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262", | 
| 276 | 4x | "ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263", | 
| 277 | 4x | "ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264", | 
| 278 | 4x | "ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265", | 
| 279 | 4x | "FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266", | 
| 280 | 4x | "FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267", | 
| 281 | 4x | "FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268", | 
| 282 | 4x | "FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269", | 
| 283 | 4x | "FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270", | 
| 284 | 4x | "HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271", | 
| 285 | 4x | "HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272", | 
| 286 | 4x | "HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273", | 
| 287 | 4x | "LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274", | 
| 288 | 4x | "LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275", | 
| 289 | 4x | "LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276", | 
| 290 | 4x | "LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277", | 
| 291 | 4x | "LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278", | 
| 292 | 4x | "MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279", | 
| 293 | 4x | "MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280", | 
| 294 | 4x | "MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281", | 
| 295 | 4x | "MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282", | 
| 296 | 4x | "MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283", | 
| 297 | 4x | "MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284", | 
| 298 | 4x | "MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285", | 
| 299 | 4x | "MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286", | 
| 300 | 4x | "MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287", | 
| 301 | 4x | "MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288", | 
| 302 | 4x | "MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289", | 
| 303 | 4x | "MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290", | 
| 304 | 4x | "MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291", | 
| 305 | 4x | "NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292", | 
| 306 | 4x | "NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293", | 
| 307 | 4x | "NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294", | 
| 308 | 4x | "NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295", | 
| 309 | 4x | "NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296", | 
| 310 | 4x | "PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297", | 
| 311 | 4x | "RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298", | 
| 312 | 4x | "RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299", | 
| 313 | 4x | "RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300", | 
| 314 | 4x | "RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301", | 
| 315 | 4x | "RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302", | 
| 316 | 4x | "RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303", | 
| 317 | 4x | "RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304", | 
| 318 | 4x | "RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305", | 
| 319 | 4x | "RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306", | 
| 320 | 4x | "RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307", | 
| 321 | 4x | "RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308", | 
| 322 | 4x | "RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309", | 
| 323 | 4x | "RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310", | 
| 324 | 4x | "RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311", | 
| 325 | 4x | "RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312", | 
| 326 | 4x | "RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313", | 
| 327 | 4x | "RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314", | 
| 328 | 4x | "RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315", | 
| 329 | 4x | "RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316", | 
| 330 | 4x | "RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317", | 
| 331 | 4x | "RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318", | 
| 332 | 4x | "RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319", | 
| 333 | 4x | "RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320", | 
| 334 | 4x | "RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321", | 
| 335 | 4x | "RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322", | 
| 336 | 4x | "RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323", | 
| 337 | 4x | "RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324", | 
| 338 | 4x | "RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325", | 
| 339 | 4x | "RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326", | 
| 340 | 4x | "RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327", | 
| 341 | 4x | "RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328", | 
| 342 | 4x | "RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329", | 
| 343 | 4x | "RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330", | 
| 344 | 4x | "RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331", | 
| 345 | 4x | "RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332", | 
| 346 | 4x | "RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333", | 
| 347 | 4x | "RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334", | 
| 348 | 4x | "RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335", | 
| 349 | 4x | "RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336", | 
| 350 | 4x | "RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337", | 
| 351 | 4x | "RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338", | 
| 352 | 4x | "RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339", | 
| 353 | 4x | "RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340", | 
| 354 | 4x | "RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341", | 
| 355 | 4x | "RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342", | 
| 356 | 4x | "RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343", | 
| 357 | 4x | "RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344", | 
| 358 | 4x | "RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345", | 
| 359 | 4x | "RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346", | 
| 360 | 4x | "RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347", | 
| 361 | 4x | "RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348", | 
| 362 | 4x | "SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349", | 
| 363 | 4x | "SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350", | 
| 364 | 4x | "TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351", | 
| 365 | 4x | "TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352", | 
| 366 | 4x | "TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353", | 
| 367 | 4x | "V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354", | 
| 368 | 4x | "V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355", | 
| 369 | 4x | "V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356", | 
| 370 | 4x | "V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357", | 
| 371 | 4x | "V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358", | 
| 372 | 4x | "VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359", | 
| 373 | 4x | "VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360", | 
| 374 | 4x | "VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361", | 
| 375 | 4x | "VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362", | 
| 376 | 4x | "VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363", | 
| 377 | 4x | "VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364", | 
| 378 | 4x | "VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365", | 
| 379 | 4x | "VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366", | 
| 380 | 4x | "VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367", | 
| 381 | 4x | "VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368", | 
| 382 | 4x | "VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369", | 
| 383 | 4x | "VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370", | 
| 384 | 4x | "VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371", | 
| 385 | 4x | "VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372", | 
| 386 | 4x | "VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373", | 
| 387 | 4x | "VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374", | 
| 388 | 4x | "VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375", | 
| 389 | 4x | "VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376", | 
| 390 | 4x | "VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377", | 
| 391 | 4x | "VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378", | 
| 392 | 4x | "VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379", | 
| 393 | 4x | "VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380", | 
| 394 | 4x | "VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381", | 
| 395 | 4x | "VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382", | 
| 396 | 4x | "VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383", | 
| 397 | 4x | "VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384", | 
| 398 | 4x | "VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385", | 
| 399 | 4x | "VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386", | 
| 400 | 4x | "VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387", | 
| 401 | 4x | "VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388", | 
| 402 | 4x | "VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389", | 
| 403 | 4x | "VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390", | 
| 404 | 4x | "VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391", | 
| 405 | 4x | "VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392", | 
| 406 | 4x | "VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393", | 
| 407 | 4x | "VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394", | 
| 408 | 4x | "VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395", | 
| 409 | 4x | "VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396", | 
| 410 | 4x | "VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397", | 
| 411 | 4x | "VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398" | 
| 412 | ), | |
| 413 | 4x | ncol = 5, | 
| 414 | 4x | byrow = TRUE | 
| 415 | )) | |
| 416 | 4x |   colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER") | 
| 417 | 4x | pk_dataset | 
| 418 | } |