| 1 |
#' Helper function to create a new SMQ variable in ADAE by stacking SMQ and/or CQ records. |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Helper function to create a new SMQ variable in ADAE that consists of all adverse events belonging to |
|
| 6 |
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events |
|
| 7 |
#' belonging to any of the selected baskets. Remember that `na_str` must match the needed pre-processing |
|
| 8 |
#' done with [df_explicit_na()] to have the desired output. |
|
| 9 |
#' |
|
| 10 |
#' @inheritParams argument_convention |
|
| 11 |
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries. |
|
| 12 |
#' @param smq_varlabel (`string`)\cr a label for the new variable created. |
|
| 13 |
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created. |
|
| 14 |
#' @param aag_summary (`data.frame`)\cr containing the SMQ baskets and the levels of interest for the final SMQ |
|
| 15 |
#' variable. This is useful when there are some levels of interest that are not observed in the `df` dataset. |
|
| 16 |
#' The two columns of this dataset should be named `basket` and `basket_name`. |
|
| 17 |
#' |
|
| 18 |
#' @return A `data.frame` with variables in `keys` taken from `df` and new variable SMQ containing |
|
| 19 |
#' records belonging to the baskets selected via the `baskets` argument. |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na() |
|
| 23 |
#' h_stack_by_baskets(df = adae) |
|
| 24 |
#' |
|
| 25 |
#' aag <- data.frame( |
|
| 26 |
#' NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
|
|
| 27 |
#' REFNAME = c( |
|
| 28 |
#' "D.2.1.5.3/A.1.1.1.1 aesi", "X.9.9.9.9/Y.8.8.8.8 aesi", |
|
| 29 |
#' "C.1.1.1.3/B.2.2.3.1 aesi", "C.1.1.1.3/B.3.3.3.3 aesi" |
|
| 30 |
#' ), |
|
| 31 |
#' SCOPE = c("", "", "BROAD", "BROAD"),
|
|
| 32 |
#' stringsAsFactors = FALSE |
|
| 33 |
#' ) |
|
| 34 |
#' |
|
| 35 |
#' basket_name <- character(nrow(aag)) |
|
| 36 |
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
|
|
| 37 |
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
|
|
| 38 |
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos] |
|
| 39 |
#' basket_name[smq_pos] <- paste0( |
|
| 40 |
#' aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
|
|
| 41 |
#' ) |
|
| 42 |
#' |
|
| 43 |
#' aag_summary <- data.frame( |
|
| 44 |
#' basket = aag$NAMVAR, |
|
| 45 |
#' basket_name = basket_name, |
|
| 46 |
#' stringsAsFactors = TRUE |
|
| 47 |
#' ) |
|
| 48 |
#' |
|
| 49 |
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary) |
|
| 50 |
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ)) |
|
| 51 |
#' |
|
| 52 |
#' h_stack_by_baskets( |
|
| 53 |
#' df = adae, |
|
| 54 |
#' aag_summary = NULL, |
|
| 55 |
#' keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
|
|
| 56 |
#' baskets = "SMQ01NAM" |
|
| 57 |
#' ) |
|
| 58 |
#' |
|
| 59 |
#' @export |
|
| 60 |
h_stack_by_baskets <- function(df, |
|
| 61 |
baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE),
|
|
| 62 |
smq_varlabel = "Standardized MedDRA Query", |
|
| 63 |
keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
|
|
| 64 |
aag_summary = NULL, |
|
| 65 |
na_str = "<Missing>") {
|
|
| 66 | 5x |
smq_nam <- baskets[startsWith(baskets, "SMQ")] |
| 67 |
# SC corresponding to NAM |
|
| 68 | 5x |
smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE) |
| 69 | 5x |
smq <- stats::setNames(smq_sc, smq_nam) |
| 70 | ||
| 71 | 5x |
checkmate::assert_character(baskets) |
| 72 | 5x |
checkmate::assert_string(smq_varlabel) |
| 73 | 5x |
checkmate::assert_data_frame(df) |
| 74 | 5x |
checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ"))) |
| 75 | 4x |
checkmate::assert_true(all(endsWith(baskets, "NAM"))) |
| 76 | 3x |
checkmate::assert_subset(baskets, names(df)) |
| 77 | 3x |
checkmate::assert_subset(keys, names(df)) |
| 78 | 3x |
checkmate::assert_subset(smq_sc, names(df)) |
| 79 | 3x |
checkmate::assert_string(na_str) |
| 80 | ||
| 81 | 3x |
if (!is.null(aag_summary)) {
|
| 82 | 1x |
assert_df_with_variables( |
| 83 | 1x |
df = aag_summary, |
| 84 | 1x |
variables = list(val = c("basket", "basket_name"))
|
| 85 |
) |
|
| 86 |
# Warning in case there is no match between `aag_summary$basket` and `baskets` argument. |
|
| 87 |
# Honestly, I think those should completely match. Target baskets should be the same. |
|
| 88 | 1x |
if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
|
| 89 | ! |
warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
|
| 90 |
} |
|
| 91 |
} |
|
| 92 | ||
| 93 | 3x |
var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel) |
| 94 | ||
| 95 |
# convert `na_str` records from baskets to NA for the later loop and from wide to long steps |
|
| 96 | 3x |
df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_str] <- NA |
| 97 | ||
| 98 | 3x |
if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
|
| 99 | 1x |
df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty data frame keeping all factor levels |
| 100 |
} else {
|
|
| 101 |
# Concatenate SMQxxxNAM with corresponding SMQxxxSC |
|
| 102 | 2x |
df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])] |
| 103 | ||
| 104 | 2x |
for (nam in names(smq)) {
|
| 105 | 4x |
sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM |
| 106 | 4x |
nam_notna <- !is.na(df[[nam]]) |
| 107 | 4x |
new_colname <- paste(nam, sc, sep = "_") |
| 108 | 4x |
df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
|
| 109 |
} |
|
| 110 | ||
| 111 | 2x |
df_cnct$unique_id <- seq(1, nrow(df_cnct)) |
| 112 | 2x |
var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))] |
| 113 |
# have to convert df_cnct from tibble to data frame |
|
| 114 |
# as it throws a warning otherwise about rownames. |
|
| 115 |
# tibble do not support rownames and reshape creates rownames |
|
| 116 | ||
| 117 | 2x |
df_long <- stats::reshape( |
| 118 | 2x |
data = as.data.frame(df_cnct), |
| 119 | 2x |
varying = var_cols, |
| 120 | 2x |
v.names = "SMQ", |
| 121 | 2x |
idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")], |
| 122 | 2x |
direction = "long", |
| 123 | 2x |
new.row.names = seq(prod(length(var_cols), nrow(df_cnct))) |
| 124 |
) |
|
| 125 | ||
| 126 | 2x |
df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
|
| 127 | 2x |
df_long$SMQ <- as.factor(df_long$SMQ) |
| 128 |
} |
|
| 129 | ||
| 130 | 3x |
smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_str) |
| 131 | ||
| 132 | 3x |
if (!is.null(aag_summary)) {
|
| 133 |
# A warning in case there is no match between df and aag_summary records |
|
| 134 | 1x |
if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
|
| 135 | 1x |
warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
|
| 136 |
} |
|
| 137 | 1x |
df_long[["SMQ"]] <- factor( |
| 138 | 1x |
df_long[["SMQ"]], |
| 139 | 1x |
levels = sort( |
| 140 | 1x |
c( |
| 141 | 1x |
smq_levels, |
| 142 | 1x |
setdiff(unique(aag_summary$basket_name), smq_levels) |
| 143 |
) |
|
| 144 |
) |
|
| 145 |
) |
|
| 146 |
} else {
|
|
| 147 | 2x |
all_na_basket_flag <- vapply(df[, baskets], function(x) {
|
| 148 | 6x |
all(is.na(x)) |
| 149 | 2x |
}, FUN.VALUE = logical(1)) |
| 150 | 2x |
all_na_basket <- baskets[all_na_basket_flag] |
| 151 | ||
| 152 | 2x |
df_long[["SMQ"]] <- factor( |
| 153 | 2x |
df_long[["SMQ"]], |
| 154 | 2x |
levels = sort(c(smq_levels, all_na_basket)) |
| 155 |
) |
|
| 156 |
} |
|
| 157 | 3x |
formatters::var_labels(df_long) <- var_labels |
| 158 | 3x |
tibble::tibble(df_long) |
| 159 |
} |
| 1 |
#' Control function for Cox regression |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Sets a list of parameters for Cox regression fit. Used internally. |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams argument_convention |
|
| 8 |
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`. |
|
| 9 |
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied |
|
| 10 |
#' treatment and candidate covariate. Note that for univariate models without treatment arm, and |
|
| 11 |
#' multivariate models, no interaction can be used so that this needs to be `FALSE`. |
|
| 12 |
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`, |
|
| 13 |
#' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R. |
|
| 14 |
#' |
|
| 15 |
#' @return A `list` of items with names corresponding to the arguments. |
|
| 16 |
#' |
|
| 17 |
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()]. |
|
| 18 |
#' |
|
| 19 |
#' @examples |
|
| 20 |
#' control_coxreg() |
|
| 21 |
#' |
|
| 22 |
#' @export |
|
| 23 |
control_coxreg <- function(pval_method = c("wald", "likelihood"),
|
|
| 24 |
ties = c("exact", "efron", "breslow"),
|
|
| 25 |
conf_level = 0.95, |
|
| 26 |
interaction = FALSE) {
|
|
| 27 | 55x |
pval_method <- match.arg(pval_method) |
| 28 | 55x |
ties <- match.arg(ties) |
| 29 | 55x |
checkmate::assert_flag(interaction) |
| 30 | 55x |
assert_proportion_value(conf_level) |
| 31 | 55x |
list( |
| 32 | 55x |
pval_method = pval_method, |
| 33 | 55x |
ties = ties, |
| 34 | 55x |
conf_level = conf_level, |
| 35 | 55x |
interaction = interaction |
| 36 |
) |
|
| 37 |
} |
|
| 38 | ||
| 39 |
#' Custom tidy methods for Cox regression |
|
| 40 |
#' |
|
| 41 |
#' @description `r lifecycle::badge("stable")`
|
|
| 42 |
#' |
|
| 43 |
#' @inheritParams argument_convention |
|
| 44 |
#' @param x (`list`)\cr result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models) |
|
| 45 |
#' or [fit_coxreg_multivar()] (for multivariate models). |
|
| 46 |
#' |
|
| 47 |
#' @return [broom::tidy()] returns: |
|
| 48 |
#' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`, |
|
| 49 |
#' `upper .95`, `level`, and `n`. |
|
| 50 |
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`, |
|
| 51 |
#' `lcl`, `ucl`, `pval`, and `ci`. |
|
| 52 |
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`, |
|
| 53 |
#' `level`, and `ci`. |
|
| 54 |
#' |
|
| 55 |
#' @seealso [cox_regression] |
|
| 56 |
#' |
|
| 57 |
#' @name tidy_coxreg |
|
| 58 |
NULL |
|
| 59 | ||
| 60 |
#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results. |
|
| 61 |
#' |
|
| 62 |
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results. |
|
| 63 |
#' |
|
| 64 |
#' @method tidy summary.coxph |
|
| 65 |
#' |
|
| 66 |
#' @examples |
|
| 67 |
#' library(survival) |
|
| 68 |
#' library(broom) |
|
| 69 |
#' |
|
| 70 |
#' set.seed(1, kind = "Mersenne-Twister") |
|
| 71 |
#' |
|
| 72 |
#' dta_bladder <- with( |
|
| 73 |
#' data = bladder[bladder$enum < 5, ], |
|
| 74 |
#' data.frame( |
|
| 75 |
#' time = stop, |
|
| 76 |
#' status = event, |
|
| 77 |
#' armcd = as.factor(rx), |
|
| 78 |
#' covar1 = as.factor(enum), |
|
| 79 |
#' covar2 = factor( |
|
| 80 |
#' sample(as.factor(enum)), |
|
| 81 |
#' levels = 1:4, labels = c("F", "F", "M", "M")
|
|
| 82 |
#' ) |
|
| 83 |
#' ) |
|
| 84 |
#' ) |
|
| 85 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
|
|
| 86 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels |
|
| 87 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) |
|
| 88 |
#' |
|
| 89 |
#' formula <- "survival::Surv(time, status) ~ armcd + covar1" |
|
| 90 |
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder)) |
|
| 91 |
#' tidy(msum) |
|
| 92 |
#' |
|
| 93 |
#' @export |
|
| 94 |
tidy.summary.coxph <- function(x, # nolint |
|
| 95 |
...) {
|
|
| 96 | 199x |
checkmate::assert_class(x, "summary.coxph") |
| 97 | 199x |
pval <- x$coefficients |
| 98 | 199x |
confint <- x$conf.int |
| 99 | 199x |
levels <- rownames(pval) |
| 100 | ||
| 101 | 199x |
pval <- tibble::as_tibble(pval) |
| 102 | 199x |
confint <- tibble::as_tibble(confint) |
| 103 | ||
| 104 | 199x |
ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
|
| 105 | 199x |
ret$level <- levels |
| 106 | 199x |
ret$n <- x[["n"]] |
| 107 | 199x |
ret |
| 108 |
} |
|
| 109 | ||
| 110 |
#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression. |
|
| 111 |
#' |
|
| 112 |
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()]. |
|
| 113 |
#' |
|
| 114 |
#' @method tidy coxreg.univar |
|
| 115 |
#' |
|
| 116 |
#' @examples |
|
| 117 |
#' ## Cox regression: arm + 1 covariate. |
|
| 118 |
#' mod1 <- fit_coxreg_univar( |
|
| 119 |
#' variables = list( |
|
| 120 |
#' time = "time", event = "status", arm = "armcd", |
|
| 121 |
#' covariates = "covar1" |
|
| 122 |
#' ), |
|
| 123 |
#' data = dta_bladder, |
|
| 124 |
#' control = control_coxreg(conf_level = 0.91) |
|
| 125 |
#' ) |
|
| 126 |
#' |
|
| 127 |
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates. |
|
| 128 |
#' mod2 <- fit_coxreg_univar( |
|
| 129 |
#' variables = list( |
|
| 130 |
#' time = "time", event = "status", arm = "armcd", |
|
| 131 |
#' covariates = c("covar1", "covar2")
|
|
| 132 |
#' ), |
|
| 133 |
#' data = dta_bladder, |
|
| 134 |
#' control = control_coxreg(conf_level = 0.91, interaction = TRUE) |
|
| 135 |
#' ) |
|
| 136 |
#' |
|
| 137 |
#' tidy(mod1) |
|
| 138 |
#' tidy(mod2) |
|
| 139 |
#' |
|
| 140 |
#' @export |
|
| 141 |
tidy.coxreg.univar <- function(x, # nolint |
|
| 142 |
...) {
|
|
| 143 | 38x |
checkmate::assert_class(x, "coxreg.univar") |
| 144 | 38x |
mod <- x$mod |
| 145 | 38x |
vars <- c(x$vars$arm, x$vars$covariates) |
| 146 | 38x |
has_arm <- "arm" %in% names(x$vars) |
| 147 | ||
| 148 | 38x |
result <- if (!has_arm) {
|
| 149 | 5x |
Map( |
| 150 | 5x |
mod = mod, vars = vars, |
| 151 | 5x |
f = function(mod, vars) {
|
| 152 | 6x |
h_coxreg_multivar_extract( |
| 153 | 6x |
var = vars, |
| 154 | 6x |
data = x$data, |
| 155 | 6x |
mod = mod, |
| 156 | 6x |
control = x$control |
| 157 |
) |
|
| 158 |
} |
|
| 159 |
) |
|
| 160 | 38x |
} else if (x$control$interaction) {
|
| 161 | 12x |
Map( |
| 162 | 12x |
mod = mod, covar = vars, |
| 163 | 12x |
f = function(mod, covar) {
|
| 164 | 26x |
h_coxreg_extract_interaction( |
| 165 | 26x |
effect = x$vars$arm, covar = covar, mod = mod, data = x$data, |
| 166 | 26x |
at = x$at, control = x$control |
| 167 |
) |
|
| 168 |
} |
|
| 169 |
) |
|
| 170 |
} else {
|
|
| 171 | 21x |
Map( |
| 172 | 21x |
mod = mod, vars = vars, |
| 173 | 21x |
f = function(mod, vars) {
|
| 174 | 53x |
h_coxreg_univar_extract( |
| 175 | 53x |
effect = x$vars$arm, covar = vars, data = x$data, mod = mod, |
| 176 | 53x |
control = x$control |
| 177 |
) |
|
| 178 |
} |
|
| 179 |
) |
|
| 180 |
} |
|
| 181 | 38x |
result <- do.call(rbind, result) |
| 182 | ||
| 183 | 38x |
result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) |
| 184 | 38x |
result$n <- lapply(result$n, empty_vector_if_na) |
| 185 | 38x |
result$ci <- lapply(result$ci, empty_vector_if_na) |
| 186 | 38x |
result$hr <- lapply(result$hr, empty_vector_if_na) |
| 187 | 38x |
if (x$control$interaction) {
|
| 188 | 12x |
result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na) |
| 189 |
# Remove interaction p-values due to change in specifications. |
|
| 190 | 12x |
result$pval[result$effect != "Treatment:"] <- NA |
| 191 |
} |
|
| 192 | 38x |
result$pval <- lapply(result$pval, empty_vector_if_na) |
| 193 | 38x |
attr(result, "conf_level") <- x$control$conf_level |
| 194 | 38x |
result |
| 195 |
} |
|
| 196 | ||
| 197 |
#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression. |
|
| 198 |
#' |
|
| 199 |
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()]. |
|
| 200 |
#' |
|
| 201 |
#' @method tidy coxreg.multivar |
|
| 202 |
#' |
|
| 203 |
#' @examples |
|
| 204 |
#' multivar_model <- fit_coxreg_multivar( |
|
| 205 |
#' variables = list( |
|
| 206 |
#' time = "time", event = "status", arm = "armcd", |
|
| 207 |
#' covariates = c("covar1", "covar2")
|
|
| 208 |
#' ), |
|
| 209 |
#' data = dta_bladder |
|
| 210 |
#' ) |
|
| 211 |
#' broom::tidy(multivar_model) |
|
| 212 |
#' |
|
| 213 |
#' @export |
|
| 214 |
tidy.coxreg.multivar <- function(x, # nolint |
|
| 215 |
...) {
|
|
| 216 | 16x |
checkmate::assert_class(x, "coxreg.multivar") |
| 217 | 16x |
vars <- c(x$vars$arm, x$vars$covariates) |
| 218 | ||
| 219 |
# Convert the model summaries to data. |
|
| 220 | 16x |
result <- Map( |
| 221 | 16x |
vars = vars, |
| 222 | 16x |
f = function(vars) {
|
| 223 | 60x |
h_coxreg_multivar_extract( |
| 224 | 60x |
var = vars, data = x$data, |
| 225 | 60x |
mod = x$mod, control = x$control |
| 226 |
) |
|
| 227 |
} |
|
| 228 |
) |
|
| 229 | 16x |
result <- do.call(rbind, result) |
| 230 | ||
| 231 | 16x |
result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) |
| 232 | 16x |
result$ci <- lapply(result$ci, empty_vector_if_na) |
| 233 | 16x |
result$hr <- lapply(result$hr, empty_vector_if_na) |
| 234 | 16x |
result$pval <- lapply(result$pval, empty_vector_if_na) |
| 235 | 16x |
result <- result[, names(result) != "n"] |
| 236 | 16x |
attr(result, "conf_level") <- x$control$conf_level |
| 237 | ||
| 238 | 16x |
result |
| 239 |
} |
|
| 240 | ||
| 241 |
#' Fitting functions for Cox proportional hazards regression |
|
| 242 |
#' |
|
| 243 |
#' @description `r lifecycle::badge("stable")`
|
|
| 244 |
#' |
|
| 245 |
#' Fitting functions for univariate and multivariate Cox regression models. |
|
| 246 |
#' |
|
| 247 |
#' @param variables (named `list`)\cr the names of the variables found in `data`, passed as a named list and |
|
| 248 |
#' corresponding to the `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from |
|
| 249 |
#' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect |
|
| 250 |
#' estimates will be tabulated later. |
|
| 251 |
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models. |
|
| 252 |
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify |
|
| 253 |
#' the value of the covariate at which the effect should be estimated. |
|
| 254 |
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()]. |
|
| 255 |
#' |
|
| 256 |
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression]. |
|
| 257 |
#' |
|
| 258 |
#' @examples |
|
| 259 |
#' library(survival) |
|
| 260 |
#' |
|
| 261 |
#' set.seed(1, kind = "Mersenne-Twister") |
|
| 262 |
#' |
|
| 263 |
#' # Testing dataset [survival::bladder]. |
|
| 264 |
#' dta_bladder <- with( |
|
| 265 |
#' data = bladder[bladder$enum < 5, ], |
|
| 266 |
#' data.frame( |
|
| 267 |
#' time = stop, |
|
| 268 |
#' status = event, |
|
| 269 |
#' armcd = as.factor(rx), |
|
| 270 |
#' covar1 = as.factor(enum), |
|
| 271 |
#' covar2 = factor( |
|
| 272 |
#' sample(as.factor(enum)), |
|
| 273 |
#' levels = 1:4, labels = c("F", "F", "M", "M")
|
|
| 274 |
#' ) |
|
| 275 |
#' ) |
|
| 276 |
#' ) |
|
| 277 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
|
|
| 278 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels |
|
| 279 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) |
|
| 280 |
#' |
|
| 281 |
#' plot( |
|
| 282 |
#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder), |
|
| 283 |
#' lty = 2:4, |
|
| 284 |
#' xlab = "Months", |
|
| 285 |
#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
|
|
| 286 |
#' ) |
|
| 287 |
#' |
|
| 288 |
#' @name fit_coxreg |
|
| 289 |
NULL |
|
| 290 | ||
| 291 |
#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs. |
|
| 292 |
#' |
|
| 293 |
#' @return |
|
| 294 |
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list` |
|
| 295 |
#' with 5 elements: |
|
| 296 |
#' * `mod`: Cox regression models fitted by [survival::coxph()]. |
|
| 297 |
#' * `data`: The original data frame input. |
|
| 298 |
#' * `control`: The original control input. |
|
| 299 |
#' * `vars`: The variables used in the model. |
|
| 300 |
#' * `at`: Value of the covariate at which the effect should be estimated. |
|
| 301 |
#' |
|
| 302 |
#' @note When using `fit_coxreg_univar` there should be two study arms. |
|
| 303 |
#' |
|
| 304 |
#' @examples |
|
| 305 |
#' # fit_coxreg_univar |
|
| 306 |
#' |
|
| 307 |
#' ## Cox regression: arm + 1 covariate. |
|
| 308 |
#' mod1 <- fit_coxreg_univar( |
|
| 309 |
#' variables = list( |
|
| 310 |
#' time = "time", event = "status", arm = "armcd", |
|
| 311 |
#' covariates = "covar1" |
|
| 312 |
#' ), |
|
| 313 |
#' data = dta_bladder, |
|
| 314 |
#' control = control_coxreg(conf_level = 0.91) |
|
| 315 |
#' ) |
|
| 316 |
#' |
|
| 317 |
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates. |
|
| 318 |
#' mod2 <- fit_coxreg_univar( |
|
| 319 |
#' variables = list( |
|
| 320 |
#' time = "time", event = "status", arm = "armcd", |
|
| 321 |
#' covariates = c("covar1", "covar2")
|
|
| 322 |
#' ), |
|
| 323 |
#' data = dta_bladder, |
|
| 324 |
#' control = control_coxreg(conf_level = 0.91, interaction = TRUE) |
|
| 325 |
#' ) |
|
| 326 |
#' |
|
| 327 |
#' ## Cox regression: arm + 1 covariate, stratified analysis. |
|
| 328 |
#' mod3 <- fit_coxreg_univar( |
|
| 329 |
#' variables = list( |
|
| 330 |
#' time = "time", event = "status", arm = "armcd", strata = "covar2", |
|
| 331 |
#' covariates = c("covar1")
|
|
| 332 |
#' ), |
|
| 333 |
#' data = dta_bladder, |
|
| 334 |
#' control = control_coxreg(conf_level = 0.91) |
|
| 335 |
#' ) |
|
| 336 |
#' |
|
| 337 |
#' ## Cox regression: no arm, only covariates. |
|
| 338 |
#' mod4 <- fit_coxreg_univar( |
|
| 339 |
#' variables = list( |
|
| 340 |
#' time = "time", event = "status", |
|
| 341 |
#' covariates = c("covar1", "covar2")
|
|
| 342 |
#' ), |
|
| 343 |
#' data = dta_bladder |
|
| 344 |
#' ) |
|
| 345 |
#' |
|
| 346 |
#' @export |
|
| 347 |
fit_coxreg_univar <- function(variables, |
|
| 348 |
data, |
|
| 349 |
at = list(), |
|
| 350 |
control = control_coxreg()) {
|
|
| 351 | 43x |
checkmate::assert_list(variables, names = "named") |
| 352 | 43x |
has_arm <- "arm" %in% names(variables) |
| 353 | 43x |
arm_name <- if (has_arm) "arm" else NULL |
| 354 | ||
| 355 | 43x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
| 356 | ||
| 357 | 43x |
assert_df_with_variables(data, variables) |
| 358 | 43x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
| 359 | ||
| 360 | 43x |
if (!is.null(variables$strata)) {
|
| 361 | 4x |
checkmate::assert_disjunct(control$pval_method, "likelihood") |
| 362 |
} |
|
| 363 | 42x |
if (has_arm) {
|
| 364 | 36x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
| 365 |
} |
|
| 366 | 41x |
vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE) |
| 367 | 41x |
for (i in vars) {
|
| 368 | 94x |
if (is.factor(data[[i]])) {
|
| 369 | 82x |
attr(data[[i]], "levels") <- levels(droplevels(data[[i]])) |
| 370 |
} |
|
| 371 |
} |
|
| 372 | 41x |
forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction) |
| 373 | 41x |
mod <- lapply( |
| 374 | 41x |
forms, function(x) {
|
| 375 | 90x |
survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties) |
| 376 |
} |
|
| 377 |
) |
|
| 378 | 41x |
structure( |
| 379 | 41x |
list( |
| 380 | 41x |
mod = mod, |
| 381 | 41x |
data = data, |
| 382 | 41x |
control = control, |
| 383 | 41x |
vars = variables, |
| 384 | 41x |
at = at |
| 385 |
), |
|
| 386 | 41x |
class = "coxreg.univar" |
| 387 |
) |
|
| 388 |
} |
|
| 389 | ||
| 390 |
#' @describeIn fit_coxreg Fit a multivariate Cox regression model. |
|
| 391 |
#' |
|
| 392 |
#' @return |
|
| 393 |
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list |
|
| 394 |
#' with 4 elements: |
|
| 395 |
#' * `mod`: Cox regression model fitted by [survival::coxph()]. |
|
| 396 |
#' * `data`: The original data frame input. |
|
| 397 |
#' * `control`: The original control input. |
|
| 398 |
#' * `vars`: The variables used in the model. |
|
| 399 |
#' |
|
| 400 |
#' @examples |
|
| 401 |
#' # fit_coxreg_multivar |
|
| 402 |
#' |
|
| 403 |
#' ## Cox regression: multivariate Cox regression. |
|
| 404 |
#' multivar_model <- fit_coxreg_multivar( |
|
| 405 |
#' variables = list( |
|
| 406 |
#' time = "time", event = "status", arm = "armcd", |
|
| 407 |
#' covariates = c("covar1", "covar2")
|
|
| 408 |
#' ), |
|
| 409 |
#' data = dta_bladder |
|
| 410 |
#' ) |
|
| 411 |
#' |
|
| 412 |
#' # Example without treatment arm. |
|
| 413 |
#' multivar_covs_model <- fit_coxreg_multivar( |
|
| 414 |
#' variables = list( |
|
| 415 |
#' time = "time", event = "status", |
|
| 416 |
#' covariates = c("covar1", "covar2")
|
|
| 417 |
#' ), |
|
| 418 |
#' data = dta_bladder |
|
| 419 |
#' ) |
|
| 420 |
#' |
|
| 421 |
#' @export |
|
| 422 |
fit_coxreg_multivar <- function(variables, |
|
| 423 |
data, |
|
| 424 |
control = control_coxreg()) {
|
|
| 425 | 83x |
checkmate::assert_list(variables, names = "named") |
| 426 | 83x |
has_arm <- "arm" %in% names(variables) |
| 427 | 83x |
arm_name <- if (has_arm) "arm" else NULL |
| 428 | ||
| 429 | 83x |
if (!is.null(variables$covariates)) {
|
| 430 | 21x |
checkmate::assert_character(variables$covariates) |
| 431 |
} |
|
| 432 | ||
| 433 | 83x |
checkmate::assert_false(control$interaction) |
| 434 | 83x |
assert_df_with_variables(data, variables) |
| 435 | 83x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
| 436 | ||
| 437 | 83x |
if (!is.null(variables$strata)) {
|
| 438 | 3x |
checkmate::assert_disjunct(control$pval_method, "likelihood") |
| 439 |
} |
|
| 440 | ||
| 441 | 82x |
form <- h_coxreg_multivar_formula(variables) |
| 442 | 82x |
mod <- survival::coxph( |
| 443 | 82x |
formula = stats::as.formula(form), |
| 444 | 82x |
data = data, |
| 445 | 82x |
ties = control$ties |
| 446 |
) |
|
| 447 | 82x |
structure( |
| 448 | 82x |
list( |
| 449 | 82x |
mod = mod, |
| 450 | 82x |
data = data, |
| 451 | 82x |
control = control, |
| 452 | 82x |
vars = variables |
| 453 |
), |
|
| 454 | 82x |
class = "coxreg.multivar" |
| 455 |
) |
|
| 456 |
} |
|
| 457 | ||
| 458 |
#' Muffled `car::Anova` |
|
| 459 |
#' |
|
| 460 |
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when |
|
| 461 |
#' present, this function deliberately muffles this message. |
|
| 462 |
#' |
|
| 463 |
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()]. |
|
| 464 |
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`. |
|
| 465 |
#' |
|
| 466 |
#' @return The output of [car::Anova()], with convergence message muffled. |
|
| 467 |
#' |
|
| 468 |
#' @keywords internal |
|
| 469 |
muffled_car_anova <- function(mod, test_statistic) {
|
|
| 470 | 219x |
tryCatch( |
| 471 | 219x |
withCallingHandlers( |
| 472 | 219x |
expr = {
|
| 473 | 219x |
car::Anova( |
| 474 | 219x |
mod, |
| 475 | 219x |
test.statistic = test_statistic, |
| 476 | 219x |
type = "III" |
| 477 |
) |
|
| 478 |
}, |
|
| 479 | 219x |
message = function(m) invokeRestart("muffleMessage"),
|
| 480 | 219x |
error = function(e) {
|
| 481 | 1x |
stop(paste( |
| 482 | 1x |
"the model seems to have convergence problems, please try to change", |
| 483 | 1x |
"the configuration of covariates or strata variables, e.g.", |
| 484 | 1x |
"- original error:", e |
| 485 |
)) |
|
| 486 |
} |
|
| 487 |
) |
|
| 488 |
) |
|
| 489 |
} |
| 1 |
#' Multivariate logistic regression table |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with |
|
| 6 |
#' categorical/continuous covariates in model statement. For each covariate category (if categorical) |
|
| 7 |
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and |
|
| 8 |
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate |
|
| 9 |
#' category or specified values and corresponding Wald confidence intervals as default but allow user |
|
| 10 |
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis |
|
| 11 |
#' that covariate has no effect on response in model containing all specified covariates. |
|
| 12 |
#' Allow option to include one two-way interaction and present similar output for |
|
| 13 |
#' each interaction degree of freedom. |
|
| 14 |
#' |
|
| 15 |
#' @inheritParams argument_convention |
|
| 16 |
#' @param drop_and_remove_str (`string`)\cr string to be dropped and removed. |
|
| 17 |
#' |
|
| 18 |
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()]. |
|
| 19 |
#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout. |
|
| 20 |
#' |
|
| 21 |
#' @note For the formula, the variable names need to be standard `data.frame` column names without |
|
| 22 |
#' special characters. |
|
| 23 |
#' |
|
| 24 |
#' @examples |
|
| 25 |
#' library(dplyr) |
|
| 26 |
#' library(broom) |
|
| 27 |
#' |
|
| 28 |
#' adrs_f <- tern_ex_adrs %>% |
|
| 29 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 30 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
| 31 |
#' mutate( |
|
| 32 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
| 33 |
#' RACE = factor(RACE), |
|
| 34 |
#' SEX = factor(SEX) |
|
| 35 |
#' ) |
|
| 36 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
| 37 |
#' mod1 <- fit_logistic( |
|
| 38 |
#' data = adrs_f, |
|
| 39 |
#' variables = list( |
|
| 40 |
#' response = "Response", |
|
| 41 |
#' arm = "ARMCD", |
|
| 42 |
#' covariates = c("AGE", "RACE")
|
|
| 43 |
#' ) |
|
| 44 |
#' ) |
|
| 45 |
#' mod2 <- fit_logistic( |
|
| 46 |
#' data = adrs_f, |
|
| 47 |
#' variables = list( |
|
| 48 |
#' response = "Response", |
|
| 49 |
#' arm = "ARMCD", |
|
| 50 |
#' covariates = c("AGE", "RACE"),
|
|
| 51 |
#' interaction = "AGE" |
|
| 52 |
#' ) |
|
| 53 |
#' ) |
|
| 54 |
#' |
|
| 55 |
#' df <- tidy(mod1, conf_level = 0.99) |
|
| 56 |
#' df2 <- tidy(mod2, conf_level = 0.99) |
|
| 57 |
#' |
|
| 58 |
#' # flagging empty strings with "_" |
|
| 59 |
#' df <- df_explicit_na(df, na_level = "_") |
|
| 60 |
#' df2 <- df_explicit_na(df2, na_level = "_") |
|
| 61 |
#' |
|
| 62 |
#' result1 <- basic_table() %>% |
|
| 63 |
#' summarize_logistic( |
|
| 64 |
#' conf_level = 0.95, |
|
| 65 |
#' drop_and_remove_str = "_" |
|
| 66 |
#' ) %>% |
|
| 67 |
#' build_table(df = df) |
|
| 68 |
#' result1 |
|
| 69 |
#' |
|
| 70 |
#' result2 <- basic_table() %>% |
|
| 71 |
#' summarize_logistic( |
|
| 72 |
#' conf_level = 0.95, |
|
| 73 |
#' drop_and_remove_str = "_" |
|
| 74 |
#' ) %>% |
|
| 75 |
#' build_table(df = df2) |
|
| 76 |
#' result2 |
|
| 77 |
#' |
|
| 78 |
#' @export |
|
| 79 |
#' @order 1 |
|
| 80 |
summarize_logistic <- function(lyt, |
|
| 81 |
conf_level, |
|
| 82 |
drop_and_remove_str = "", |
|
| 83 |
.indent_mods = NULL) {
|
|
| 84 |
# checks |
|
| 85 | 3x |
checkmate::assert_string(drop_and_remove_str) |
| 86 | ||
| 87 | 3x |
sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
|
| 88 | 3x |
sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
|
| 89 | 3x |
sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
|
| 90 | 3x |
split_fun <- drop_and_remove_levels(drop_and_remove_str) |
| 91 | ||
| 92 | 3x |
lyt <- logistic_regression_cols(lyt, conf_level = conf_level) |
| 93 | 3x |
lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun) |
| 94 | 3x |
lyt <- sum_logistic_variable_test(lyt) |
| 95 | 3x |
lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun) |
| 96 | 3x |
lyt <- sum_logistic_term_estimates(lyt) |
| 97 | 3x |
lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun) |
| 98 | 3x |
lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun) |
| 99 | 3x |
lyt <- sum_logistic_odds_ratios(lyt) |
| 100 | 3x |
lyt |
| 101 |
} |
|
| 102 | ||
| 103 |
#' Fit for logistic regression |
|
| 104 |
#' |
|
| 105 |
#' @description `r lifecycle::badge("stable")`
|
|
| 106 |
#' |
|
| 107 |
#' Fit a (conditional) logistic regression model. |
|
| 108 |
#' |
|
| 109 |
#' @inheritParams argument_convention |
|
| 110 |
#' @param data (`data.frame`)\cr the data frame on which the model was fit. |
|
| 111 |
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`. |
|
| 112 |
#' This will be used when fitting the (conditional) logistic regression model on the left hand |
|
| 113 |
#' side of the formula. |
|
| 114 |
#' |
|
| 115 |
#' @return A fitted logistic regression model. |
|
| 116 |
#' |
|
| 117 |
#' @section Model Specification: |
|
| 118 |
#' |
|
| 119 |
#' The `variables` list needs to include the following elements: |
|
| 120 |
#' * `arm`: Treatment arm variable name. |
|
| 121 |
#' * `response`: The response arm variable name. Usually this is a 0/1 variable. |
|
| 122 |
#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names. |
|
| 123 |
#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already |
|
| 124 |
#' included in `covariates`. Then the interaction with the treatment arm is included in the model. |
|
| 125 |
#' |
|
| 126 |
#' @examples |
|
| 127 |
#' library(dplyr) |
|
| 128 |
#' |
|
| 129 |
#' adrs_f <- tern_ex_adrs %>% |
|
| 130 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 131 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
| 132 |
#' mutate( |
|
| 133 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
| 134 |
#' RACE = factor(RACE), |
|
| 135 |
#' SEX = factor(SEX) |
|
| 136 |
#' ) |
|
| 137 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
| 138 |
#' mod1 <- fit_logistic( |
|
| 139 |
#' data = adrs_f, |
|
| 140 |
#' variables = list( |
|
| 141 |
#' response = "Response", |
|
| 142 |
#' arm = "ARMCD", |
|
| 143 |
#' covariates = c("AGE", "RACE")
|
|
| 144 |
#' ) |
|
| 145 |
#' ) |
|
| 146 |
#' mod2 <- fit_logistic( |
|
| 147 |
#' data = adrs_f, |
|
| 148 |
#' variables = list( |
|
| 149 |
#' response = "Response", |
|
| 150 |
#' arm = "ARMCD", |
|
| 151 |
#' covariates = c("AGE", "RACE"),
|
|
| 152 |
#' interaction = "AGE" |
|
| 153 |
#' ) |
|
| 154 |
#' ) |
|
| 155 |
#' |
|
| 156 |
#' @export |
|
| 157 |
fit_logistic <- function(data, |
|
| 158 |
variables = list( |
|
| 159 |
response = "Response", |
|
| 160 |
arm = "ARMCD", |
|
| 161 |
covariates = NULL, |
|
| 162 |
interaction = NULL, |
|
| 163 |
strata = NULL |
|
| 164 |
), |
|
| 165 |
response_definition = "response") {
|
|
| 166 | 75x |
assert_df_with_variables(data, variables) |
| 167 | 75x |
checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
|
| 168 | 75x |
checkmate::assert_string(response_definition) |
| 169 | 75x |
checkmate::assert_true(grepl("response", response_definition))
|
| 170 | ||
| 171 | 75x |
response_definition <- sub( |
| 172 | 75x |
pattern = "response", |
| 173 | 75x |
replacement = variables$response, |
| 174 | 75x |
x = response_definition, |
| 175 | 75x |
fixed = TRUE |
| 176 |
) |
|
| 177 | 75x |
form <- paste0(response_definition, " ~ ", variables$arm) |
| 178 | 75x |
if (!is.null(variables$covariates)) {
|
| 179 | 29x |
form <- paste0(form, " + ", paste(variables$covariates, collapse = " + ")) |
| 180 |
} |
|
| 181 | 75x |
if (!is.null(variables$interaction)) {
|
| 182 | 18x |
checkmate::assert_string(variables$interaction) |
| 183 | 18x |
checkmate::assert_subset(variables$interaction, variables$covariates) |
| 184 | 18x |
form <- paste0(form, " + ", variables$arm, ":", variables$interaction) |
| 185 |
} |
|
| 186 | 75x |
if (!is.null(variables$strata)) {
|
| 187 | 14x |
strata_arg <- if (length(variables$strata) > 1) {
|
| 188 | 7x |
paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
|
| 189 |
} else {
|
|
| 190 | 7x |
variables$strata |
| 191 |
} |
|
| 192 | 14x |
form <- paste0(form, "+ strata(", strata_arg, ")")
|
| 193 |
} |
|
| 194 | 75x |
formula <- stats::as.formula(form) |
| 195 | 75x |
if (is.null(variables$strata)) {
|
| 196 | 61x |
stats::glm( |
| 197 | 61x |
formula = formula, |
| 198 | 61x |
data = data, |
| 199 | 61x |
family = stats::binomial("logit")
|
| 200 |
) |
|
| 201 |
} else {
|
|
| 202 | 14x |
clogit_with_tryCatch( |
| 203 | 14x |
formula = formula, |
| 204 | 14x |
data = data, |
| 205 | 14x |
x = TRUE |
| 206 |
) |
|
| 207 |
} |
|
| 208 |
} |
|
| 209 | ||
| 210 |
#' Custom tidy method for binomial GLM results |
|
| 211 |
#' |
|
| 212 |
#' @description `r lifecycle::badge("stable")`
|
|
| 213 |
#' |
|
| 214 |
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object |
|
| 215 |
#' with `binomial` family. |
|
| 216 |
#' |
|
| 217 |
#' @inheritParams argument_convention |
|
| 218 |
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise the median is used. |
|
| 219 |
#' @param x (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. |
|
| 220 |
#' |
|
| 221 |
#' @return A `data.frame` containing the tidied model. |
|
| 222 |
#' |
|
| 223 |
#' @method tidy glm |
|
| 224 |
#' |
|
| 225 |
#' @seealso [h_logistic_regression] for relevant helper functions. |
|
| 226 |
#' |
|
| 227 |
#' @examples |
|
| 228 |
#' library(dplyr) |
|
| 229 |
#' library(broom) |
|
| 230 |
#' |
|
| 231 |
#' adrs_f <- tern_ex_adrs %>% |
|
| 232 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 233 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
| 234 |
#' mutate( |
|
| 235 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
| 236 |
#' RACE = factor(RACE), |
|
| 237 |
#' SEX = factor(SEX) |
|
| 238 |
#' ) |
|
| 239 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
| 240 |
#' mod1 <- fit_logistic( |
|
| 241 |
#' data = adrs_f, |
|
| 242 |
#' variables = list( |
|
| 243 |
#' response = "Response", |
|
| 244 |
#' arm = "ARMCD", |
|
| 245 |
#' covariates = c("AGE", "RACE")
|
|
| 246 |
#' ) |
|
| 247 |
#' ) |
|
| 248 |
#' mod2 <- fit_logistic( |
|
| 249 |
#' data = adrs_f, |
|
| 250 |
#' variables = list( |
|
| 251 |
#' response = "Response", |
|
| 252 |
#' arm = "ARMCD", |
|
| 253 |
#' covariates = c("AGE", "RACE"),
|
|
| 254 |
#' interaction = "AGE" |
|
| 255 |
#' ) |
|
| 256 |
#' ) |
|
| 257 |
#' |
|
| 258 |
#' df <- tidy(mod1, conf_level = 0.99) |
|
| 259 |
#' df2 <- tidy(mod2, conf_level = 0.99) |
|
| 260 |
#' |
|
| 261 |
#' @export |
|
| 262 |
tidy.glm <- function(x, # nolint |
|
| 263 |
conf_level = 0.95, |
|
| 264 |
at = NULL, |
|
| 265 |
...) {
|
|
| 266 | 5x |
checkmate::assert_class(x, "glm") |
| 267 | 5x |
checkmate::assert_set_equal(x$family$family, "binomial") |
| 268 | ||
| 269 | 5x |
terms_name <- attr(stats::terms(x), "term.labels") |
| 270 | 5x |
xs_class <- attr(x$terms, "dataClasses") |
| 271 | 5x |
interaction <- terms_name[which(!terms_name %in% names(xs_class))] |
| 272 | 5x |
df <- if (length(interaction) == 0) {
|
| 273 | 2x |
h_logistic_simple_terms( |
| 274 | 2x |
x = terms_name, |
| 275 | 2x |
fit_glm = x, |
| 276 | 2x |
conf_level = conf_level |
| 277 |
) |
|
| 278 |
} else {
|
|
| 279 | 3x |
h_logistic_inter_terms( |
| 280 | 3x |
x = terms_name, |
| 281 | 3x |
fit_glm = x, |
| 282 | 3x |
conf_level = conf_level, |
| 283 | 3x |
at = at |
| 284 |
) |
|
| 285 |
} |
|
| 286 | 5x |
for (var in c("variable", "term", "interaction", "reference")) {
|
| 287 | 20x |
df[[var]] <- factor(df[[var]], levels = unique(df[[var]])) |
| 288 |
} |
|
| 289 | 5x |
df |
| 290 |
} |
|
| 291 | ||
| 292 |
#' Logistic regression multivariate column layout function |
|
| 293 |
#' |
|
| 294 |
#' @description `r lifecycle::badge("stable")`
|
|
| 295 |
#' |
|
| 296 |
#' Layout-creating function which creates a multivariate column layout summarizing logistic |
|
| 297 |
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()]. |
|
| 298 |
#' |
|
| 299 |
#' @inheritParams argument_convention |
|
| 300 |
#' |
|
| 301 |
#' @return A layout object suitable for passing to further layouting functions. Adding this |
|
| 302 |
#' function to an `rtable` layout will split the table into columns corresponding to |
|
| 303 |
#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`. |
|
| 304 |
#' |
|
| 305 |
#' @export |
|
| 306 |
logistic_regression_cols <- function(lyt, |
|
| 307 |
conf_level = 0.95) {
|
|
| 308 | 4x |
vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
|
| 309 | 4x |
var_labels <- c( |
| 310 | 4x |
df = "Degrees of Freedom", |
| 311 | 4x |
estimate = "Parameter Estimate", |
| 312 | 4x |
std_error = "Standard Error", |
| 313 | 4x |
odds_ratio = "Odds Ratio", |
| 314 | 4x |
ci = paste("Wald", f_conf_level(conf_level)),
|
| 315 | 4x |
pvalue = "p-value" |
| 316 |
) |
|
| 317 | 4x |
split_cols_by_multivar( |
| 318 | 4x |
lyt = lyt, |
| 319 | 4x |
vars = vars, |
| 320 | 4x |
varlabels = var_labels |
| 321 |
) |
|
| 322 |
} |
|
| 323 | ||
| 324 |
#' Logistic regression summary table |
|
| 325 |
#' |
|
| 326 |
#' @description `r lifecycle::badge("stable")`
|
|
| 327 |
#' |
|
| 328 |
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize |
|
| 329 |
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()]. |
|
| 330 |
#' |
|
| 331 |
#' @inheritParams argument_convention |
|
| 332 |
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this |
|
| 333 |
#' content function. |
|
| 334 |
#' |
|
| 335 |
#' @return A content function. |
|
| 336 |
#' |
|
| 337 |
#' @export |
|
| 338 |
logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) {
|
|
| 339 | 10x |
checkmate::assert_string(flag_var) |
| 340 | 10x |
function(lyt) {
|
| 341 | 10x |
cfun_list <- list( |
| 342 | 10x |
df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
|
| 343 | 10x |
estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
|
| 344 | 10x |
std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
|
| 345 | 10x |
odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
|
| 346 | 10x |
ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
|
| 347 | 10x |
pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods)
|
| 348 |
) |
|
| 349 | 10x |
summarize_row_groups( |
| 350 | 10x |
lyt = lyt, |
| 351 | 10x |
cfun = cfun_list, |
| 352 | 10x |
na_str = na_str |
| 353 |
) |
|
| 354 |
} |
|
| 355 |
} |
| 1 |
# Utility functions to cooperate with {rtables} package
|
|
| 2 | ||
| 3 |
#' Convert table into matrix of strings |
|
| 4 |
#' |
|
| 5 |
#' @description `r lifecycle::badge("stable")`
|
|
| 6 |
#' |
|
| 7 |
#' Helper function to use mostly within tests. `with_spaces`parameter allows |
|
| 8 |
#' to test not only for content but also indentation and table structure. |
|
| 9 |
#' `print_txt_to_copy` instead facilitate the testing development by returning a well |
|
| 10 |
#' formatted text that needs only to be copied and pasted in the expected output. |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams formatters::toString |
|
| 13 |
#' @param x (`VTableTree`)\cr `rtables` table object. |
|
| 14 |
#' @param with_spaces (`flag`)\cr whether the tested table should keep the indentation and other relevant spaces. |
|
| 15 |
#' @param print_txt_to_copy (`flag`)\cr utility to have a way to copy the input table directly |
|
| 16 |
#' into the expected variable instead of copying it too manually. |
|
| 17 |
#' |
|
| 18 |
#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the |
|
| 19 |
#' table will be printed to console, ready to be copied as a expected value. |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' tbl <- basic_table() %>% |
|
| 23 |
#' split_rows_by("SEX") %>%
|
|
| 24 |
#' split_cols_by("ARM") %>%
|
|
| 25 |
#' analyze("AGE") %>%
|
|
| 26 |
#' build_table(tern_ex_adsl) |
|
| 27 |
#' |
|
| 28 |
#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2)) |
|
| 29 |
#' |
|
| 30 |
#' @export |
|
| 31 |
to_string_matrix <- function(x, widths = NULL, max_width = NULL, |
|
| 32 |
hsep = formatters::default_hsep(), |
|
| 33 |
with_spaces = TRUE, print_txt_to_copy = FALSE) {
|
|
| 34 | 11x |
checkmate::assert_flag(with_spaces) |
| 35 | 11x |
checkmate::assert_flag(print_txt_to_copy) |
| 36 | 11x |
checkmate::assert_int(max_width, null.ok = TRUE) |
| 37 | ||
| 38 | 11x |
if (inherits(x, "MatrixPrintForm")) {
|
| 39 | ! |
tx <- x |
| 40 |
} else {
|
|
| 41 | 11x |
tx <- matrix_form(x, TRUE) |
| 42 |
} |
|
| 43 | ||
| 44 | 11x |
tf_wrap <- FALSE |
| 45 | 11x |
if (!is.null(max_width)) {
|
| 46 | ! |
tf_wrap <- TRUE |
| 47 |
} |
|
| 48 | ||
| 49 |
# Producing the matrix to test |
|
| 50 | 11x |
if (with_spaces) {
|
| 51 | 2x |
out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\n")[[1]] |
| 52 |
} else {
|
|
| 53 | 9x |
out <- tx$strings |
| 54 |
} |
|
| 55 | ||
| 56 |
# Printing to console formatted output that needs to be copied in "expected" |
|
| 57 | 11x |
if (print_txt_to_copy) {
|
| 58 | 2x |
out_tmp <- out |
| 59 | 2x |
if (!with_spaces) {
|
| 60 | 1x |
out_tmp <- apply(out, 1, paste0, collapse = '", "') |
| 61 |
} |
|
| 62 | 2x |
cat(paste0('c(\n "', paste0(out_tmp, collapse = '",\n "'), '"\n)'))
|
| 63 |
} |
|
| 64 | ||
| 65 |
# Return values |
|
| 66 | 11x |
out |
| 67 |
} |
|
| 68 | ||
| 69 |
#' Blank for missing input |
|
| 70 |
#' |
|
| 71 |
#' Helper function to use in tabulating model results. |
|
| 72 |
#' |
|
| 73 |
#' @param x (`vector`)\cr input for a cell. |
|
| 74 |
#' |
|
| 75 |
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise |
|
| 76 |
#' the unlisted version of `x`. |
|
| 77 |
#' |
|
| 78 |
#' @keywords internal |
|
| 79 |
unlist_and_blank_na <- function(x) {
|
|
| 80 | 267x |
unl <- unlist(x) |
| 81 | 267x |
if (all(is.na(unl))) {
|
| 82 | 161x |
character() |
| 83 |
} else {
|
|
| 84 | 106x |
unl |
| 85 |
} |
|
| 86 |
} |
|
| 87 | ||
| 88 |
#' Constructor for content functions given a data frame with flag input |
|
| 89 |
#' |
|
| 90 |
#' This can be useful for tabulating model results. |
|
| 91 |
#' |
|
| 92 |
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the |
|
| 93 |
#' content function. |
|
| 94 |
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned. |
|
| 95 |
#' @param format (`string`)\cr `rtables` format to use. |
|
| 96 |
#' |
|
| 97 |
#' @return A content function which gives `df$analysis_var` at the row identified by |
|
| 98 |
#' `.df_row$flag` in the given format. |
|
| 99 |
#' |
|
| 100 |
#' @keywords internal |
|
| 101 |
cfun_by_flag <- function(analysis_var, |
|
| 102 |
flag_var, |
|
| 103 |
format = "xx", |
|
| 104 |
.indent_mods = NULL) {
|
|
| 105 | 61x |
checkmate::assert_string(analysis_var) |
| 106 | 61x |
checkmate::assert_string(flag_var) |
| 107 | 61x |
function(df, labelstr) {
|
| 108 | 265x |
row_index <- which(df[[flag_var]]) |
| 109 | 265x |
x <- unlist_and_blank_na(df[[analysis_var]][row_index]) |
| 110 | 265x |
formatters::with_label( |
| 111 | 265x |
rcell(x, format = format, indent_mod = .indent_mods), |
| 112 | 265x |
labelstr |
| 113 |
) |
|
| 114 |
} |
|
| 115 |
} |
|
| 116 | ||
| 117 |
#' Content row function to add row total to labels |
|
| 118 |
#' |
|
| 119 |
#' This takes the label of the latest row split level and adds the row total from `df` in parentheses. |
|
| 120 |
#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than |
|
| 121 |
#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`. |
|
| 122 |
#' |
|
| 123 |
#' @inheritParams argument_convention |
|
| 124 |
#' |
|
| 125 |
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label. |
|
| 126 |
#' |
|
| 127 |
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because |
|
| 128 |
#' the former is already split by columns and will refer to the first column of the data only. |
|
| 129 |
#' |
|
| 130 |
#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from |
|
| 131 |
#' `alt_counts_df` instead of `df`. |
|
| 132 |
#' |
|
| 133 |
#' @keywords internal |
|
| 134 |
c_label_n <- function(df, |
|
| 135 |
labelstr, |
|
| 136 |
.N_row) { # nolint
|
|
| 137 | 273x |
label <- paste0(labelstr, " (N=", .N_row, ")") |
| 138 | 273x |
in_rows( |
| 139 | 273x |
.list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)), |
| 140 | 273x |
.formats = c(row_count = function(x, ...) "") |
| 141 |
) |
|
| 142 |
} |
|
| 143 | ||
| 144 |
#' Content row function to add `alt_counts_df` row total to labels |
|
| 145 |
#' |
|
| 146 |
#' This takes the label of the latest row split level and adds the row total from `alt_counts_df` |
|
| 147 |
#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df` |
|
| 148 |
#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`. |
|
| 149 |
#' |
|
| 150 |
#' @inheritParams argument_convention |
|
| 151 |
#' |
|
| 152 |
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label. |
|
| 153 |
#' |
|
| 154 |
#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead |
|
| 155 |
#' of `alt_counts_df`. |
|
| 156 |
#' |
|
| 157 |
#' @keywords internal |
|
| 158 |
c_label_n_alt <- function(df, |
|
| 159 |
labelstr, |
|
| 160 |
.alt_df_row) {
|
|
| 161 | 7x |
N_row_alt <- nrow(.alt_df_row) # nolint |
| 162 | 7x |
label <- paste0(labelstr, " (N=", N_row_alt, ")") |
| 163 | 7x |
in_rows( |
| 164 | 7x |
.list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)), |
| 165 | 7x |
.formats = c(row_count = function(x, ...) "") |
| 166 |
) |
|
| 167 |
} |
|
| 168 | ||
| 169 |
#' Layout-creating function to add row total counts |
|
| 170 |
#' |
|
| 171 |
#' @description `r lifecycle::badge("stable")`
|
|
| 172 |
#' |
|
| 173 |
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function |
|
| 174 |
#' is a wrapper for [rtables::summarize_row_groups()]. |
|
| 175 |
#' |
|
| 176 |
#' @inheritParams argument_convention |
|
| 177 |
#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`) |
|
| 178 |
#' or from `df` (`FALSE`). Defaults to `FALSE`. |
|
| 179 |
#' |
|
| 180 |
#' @return A modified layout where the latest row split labels now have the row-wise |
|
| 181 |
#' total counts (i.e. without column-based subsetting) attached in parentheses. |
|
| 182 |
#' |
|
| 183 |
#' @note Row count values are contained in these row count rows but are not displayed |
|
| 184 |
#' so that they are not considered zero rows by default when pruning. |
|
| 185 |
#' |
|
| 186 |
#' @examples |
|
| 187 |
#' basic_table() %>% |
|
| 188 |
#' split_cols_by("ARM") %>%
|
|
| 189 |
#' add_colcounts() %>% |
|
| 190 |
#' split_rows_by("RACE", split_fun = drop_split_levels) %>%
|
|
| 191 |
#' add_rowcounts() %>% |
|
| 192 |
#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
|
|
| 193 |
#' build_table(DM) |
|
| 194 |
#' |
|
| 195 |
#' @export |
|
| 196 |
add_rowcounts <- function(lyt, alt_counts = FALSE) {
|
|
| 197 | 7x |
summarize_row_groups( |
| 198 | 7x |
lyt, |
| 199 | 7x |
cfun = if (alt_counts) c_label_n_alt else c_label_n |
| 200 |
) |
|
| 201 |
} |
|
| 202 | ||
| 203 |
#' Obtain column indices |
|
| 204 |
#' |
|
| 205 |
#' @description `r lifecycle::badge("stable")`
|
|
| 206 |
#' |
|
| 207 |
#' Helper function to extract column indices from a `VTableTree` for a given |
|
| 208 |
#' vector of column names. |
|
| 209 |
#' |
|
| 210 |
#' @param table_tree (`VTableTree`)\cr `rtables` table object to extract the indices from. |
|
| 211 |
#' @param col_names (`character`)\cr vector of column names. |
|
| 212 |
#' |
|
| 213 |
#' @return A vector of column indices. |
|
| 214 |
#' |
|
| 215 |
#' @export |
|
| 216 |
h_col_indices <- function(table_tree, col_names) {
|
|
| 217 | 1256x |
checkmate::assert_class(table_tree, "VTableNodeInfo") |
| 218 | 1256x |
checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE) |
| 219 | 1256x |
match(col_names, names(attr(col_info(table_tree), "cextra_args"))) |
| 220 |
} |
|
| 221 | ||
| 222 |
#' Labels or names of list elements |
|
| 223 |
#' |
|
| 224 |
#' Helper function for working with nested statistic function results which typically |
|
| 225 |
#' don't have labels but names that we can use. |
|
| 226 |
#' |
|
| 227 |
#' @param x (`list`)\cr a list. |
|
| 228 |
#' |
|
| 229 |
#' @return A `character` vector with the labels or names for the list elements. |
|
| 230 |
#' |
|
| 231 |
#' @examples |
|
| 232 |
#' x <- data.frame( |
|
| 233 |
#' a = 1:10, |
|
| 234 |
#' b = rnorm(10) |
|
| 235 |
#' ) |
|
| 236 |
#' labels_or_names(x) |
|
| 237 |
#' var_labels(x) <- c(b = "Label for b", a = NA) |
|
| 238 |
#' labels_or_names(x) |
|
| 239 |
#' |
|
| 240 |
#' @export |
|
| 241 |
labels_or_names <- function(x) {
|
|
| 242 | 190x |
checkmate::assert_multi_class(x, c("data.frame", "list"))
|
| 243 | 190x |
labs <- sapply(x, obj_label) |
| 244 | 190x |
nams <- rlang::names2(x) |
| 245 | 190x |
label_is_null <- sapply(labs, is.null) |
| 246 | 190x |
result <- unlist(ifelse(label_is_null, nams, labs)) |
| 247 | 190x |
result |
| 248 |
} |
|
| 249 | ||
| 250 |
#' Convert to `rtable` |
|
| 251 |
#' |
|
| 252 |
#' @description `r lifecycle::badge("stable")`
|
|
| 253 |
#' |
|
| 254 |
#' This is a new generic function to convert objects to `rtable` tables. |
|
| 255 |
#' |
|
| 256 |
#' @param x (`data.frame`)\cr the object which should be converted to an `rtable`. |
|
| 257 |
#' @param ... additional arguments for methods. |
|
| 258 |
#' |
|
| 259 |
#' @return An `rtables` table object. Note that the concrete class will depend on the method used. |
|
| 260 |
#' |
|
| 261 |
#' @export |
|
| 262 |
as.rtable <- function(x, ...) { # nolint
|
|
| 263 | 3x |
UseMethod("as.rtable", x)
|
| 264 |
} |
|
| 265 | ||
| 266 |
#' @describeIn as.rtable Method for converting a `data.frame` that contains numeric columns to `rtable`. |
|
| 267 |
#' |
|
| 268 |
#' @param format (`string` or `function`)\cr the format which should be used for the columns. |
|
| 269 |
#' |
|
| 270 |
#' @method as.rtable data.frame |
|
| 271 |
#' |
|
| 272 |
#' @examples |
|
| 273 |
#' x <- data.frame( |
|
| 274 |
#' a = 1:10, |
|
| 275 |
#' b = rnorm(10) |
|
| 276 |
#' ) |
|
| 277 |
#' as.rtable(x) |
|
| 278 |
#' |
|
| 279 |
#' @export |
|
| 280 |
as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
|
|
| 281 | 3x |
checkmate::assert_numeric(unlist(x)) |
| 282 | 2x |
do.call( |
| 283 | 2x |
rtable, |
| 284 | 2x |
c( |
| 285 | 2x |
list( |
| 286 | 2x |
header = labels_or_names(x), |
| 287 | 2x |
format = format |
| 288 |
), |
|
| 289 | 2x |
Map( |
| 290 | 2x |
function(row, row_name) {
|
| 291 | 20x |
do.call( |
| 292 | 20x |
rrow, |
| 293 | 20x |
c(as.list(unname(row)), |
| 294 | 20x |
row.name = row_name |
| 295 |
) |
|
| 296 |
) |
|
| 297 |
}, |
|
| 298 | 2x |
row = as.data.frame(t(x)), |
| 299 | 2x |
row_name = rownames(x) |
| 300 |
) |
|
| 301 |
) |
|
| 302 |
) |
|
| 303 |
} |
|
| 304 | ||
| 305 |
#' Split parameters |
|
| 306 |
#' |
|
| 307 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 308 |
#' |
|
| 309 |
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant |
|
| 310 |
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to |
|
| 311 |
#' specific analysis function. |
|
| 312 |
#' |
|
| 313 |
#' @param param (`vector`)\cr the parameter to be split. |
|
| 314 |
#' @param value (`vector`)\cr the value used to split. |
|
| 315 |
#' @param f (`list`)\cr the reference to make the split. |
|
| 316 |
#' |
|
| 317 |
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`. |
|
| 318 |
#' |
|
| 319 |
#' @examples |
|
| 320 |
#' f <- list( |
|
| 321 |
#' surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
|
|
| 322 |
#' surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
|
|
| 323 |
#' ) |
|
| 324 |
#' |
|
| 325 |
#' .stats <- c("pt_at_risk", "rate_diff")
|
|
| 326 |
#' h_split_param(.stats, .stats, f = f) |
|
| 327 |
#' |
|
| 328 |
#' # $surv |
|
| 329 |
#' # [1] "pt_at_risk" |
|
| 330 |
#' # |
|
| 331 |
#' # $surv_diff |
|
| 332 |
#' # [1] "rate_diff" |
|
| 333 |
#' |
|
| 334 |
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
|
|
| 335 |
#' h_split_param(.formats, names(.formats), f = f) |
|
| 336 |
#' |
|
| 337 |
#' # $surv |
|
| 338 |
#' # pt_at_risk event_free_rate |
|
| 339 |
#' # "xx" "xxx" |
|
| 340 |
#' # |
|
| 341 |
#' # $surv_diff |
|
| 342 |
#' # NULL |
|
| 343 |
#' |
|
| 344 |
#' @export |
|
| 345 |
h_split_param <- function(param, |
|
| 346 |
value, |
|
| 347 |
f) {
|
|
| 348 | 2x |
lifecycle::deprecate_warn("0.9.8", "h_split_param()")
|
| 349 | ||
| 350 | 2x |
y <- lapply(f, function(x) param[value %in% x]) |
| 351 | 2x |
lapply(y, function(x) if (length(x) == 0) NULL else x) |
| 352 |
} |
|
| 353 | ||
| 354 |
#' Get selected statistics names |
|
| 355 |
#' |
|
| 356 |
#' Helper function to be used for creating `afun`. |
|
| 357 |
#' |
|
| 358 |
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means |
|
| 359 |
#' in this context that all default statistics should be used. |
|
| 360 |
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially. |
|
| 361 |
#' |
|
| 362 |
#' @return A `character` vector with the selected statistics. |
|
| 363 |
#' |
|
| 364 |
#' @keywords internal |
|
| 365 |
afun_selected_stats <- function(.stats, all_stats) {
|
|
| 366 | 2x |
checkmate::assert_character(.stats, null.ok = TRUE) |
| 367 | 2x |
checkmate::assert_character(all_stats) |
| 368 | 2x |
if (is.null(.stats)) {
|
| 369 | 1x |
all_stats |
| 370 |
} else {
|
|
| 371 | 1x |
intersect(.stats, all_stats) |
| 372 |
} |
|
| 373 |
} |
|
| 374 | ||
| 375 |
#' Add variable labels to top left corner in table |
|
| 376 |
#' |
|
| 377 |
#' @description `r lifecycle::badge("stable")`
|
|
| 378 |
#' |
|
| 379 |
#' Helper layout-creating function to append the variable labels of a given variables vector |
|
| 380 |
#' from a given dataset in the top left corner. If a variable label is not found then the |
|
| 381 |
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes. |
|
| 382 |
#' |
|
| 383 |
#' @inheritParams argument_convention |
|
| 384 |
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`. |
|
| 385 |
#' @param indent (`integer(1)`)\cr non-negative number of nested indent space, default to 0L which means no indent. |
|
| 386 |
#' 1L means two spaces indent, 2L means four spaces indent and so on. |
|
| 387 |
#' |
|
| 388 |
#' @return A modified layout with the new variable label(s) added to the top-left material. |
|
| 389 |
#' |
|
| 390 |
#' @note This is not an optimal implementation of course, since we are using here the data set |
|
| 391 |
#' itself during the layout creation. When we have a more mature `rtables` implementation then |
|
| 392 |
#' this will also be improved or not necessary anymore. |
|
| 393 |
#' |
|
| 394 |
#' @examples |
|
| 395 |
#' lyt <- basic_table() %>% |
|
| 396 |
#' split_cols_by("ARM") %>%
|
|
| 397 |
#' add_colcounts() %>% |
|
| 398 |
#' split_rows_by("SEX") %>%
|
|
| 399 |
#' append_varlabels(DM, "SEX") %>% |
|
| 400 |
#' analyze("AGE", afun = mean) %>%
|
|
| 401 |
#' append_varlabels(DM, "AGE", indent = 1) |
|
| 402 |
#' build_table(lyt, DM) |
|
| 403 |
#' |
|
| 404 |
#' lyt <- basic_table() %>% |
|
| 405 |
#' split_cols_by("ARM") %>%
|
|
| 406 |
#' split_rows_by("SEX") %>%
|
|
| 407 |
#' analyze("AGE", afun = mean) %>%
|
|
| 408 |
#' append_varlabels(DM, c("SEX", "AGE"))
|
|
| 409 |
#' build_table(lyt, DM) |
|
| 410 |
#' |
|
| 411 |
#' @export |
|
| 412 |
append_varlabels <- function(lyt, df, vars, indent = 0L) {
|
|
| 413 | 3x |
if (checkmate::test_flag(indent)) {
|
| 414 | ! |
warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
|
| 415 | ! |
indent <- as.integer(indent) |
| 416 |
} |
|
| 417 | ||
| 418 | 3x |
checkmate::assert_data_frame(df) |
| 419 | 3x |
checkmate::assert_character(vars) |
| 420 | 3x |
checkmate::assert_count(indent) |
| 421 | ||
| 422 | 3x |
lab <- formatters::var_labels(df[vars], fill = TRUE) |
| 423 | 3x |
lab <- paste(lab, collapse = " / ") |
| 424 | 3x |
space <- paste(rep(" ", indent * 2), collapse = "")
|
| 425 | 3x |
lab <- paste0(space, lab) |
| 426 | ||
| 427 | 3x |
append_topleft(lyt, lab) |
| 428 |
} |
|
| 429 | ||
| 430 |
#' Default string replacement for `NA` values |
|
| 431 |
#' |
|
| 432 |
#' @description `r lifecycle::badge("stable")`
|
|
| 433 |
#' |
|
| 434 |
#' The default string used to represent `NA` values. This value is used as the default |
|
| 435 |
#' value for the `na_str` argument throughout the `tern` package, and printed in place |
|
| 436 |
#' of `NA` values in output tables. If not specified for each `tern` function by the user |
|
| 437 |
#' via the `na_str` argument, or in the R environment options via [set_default_na_str()], |
|
| 438 |
#' then `NA` is used. |
|
| 439 |
#' |
|
| 440 |
#' @param na_str (`string`)\cr single string value to set in the R environment options as |
|
| 441 |
#' the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the
|
|
| 442 |
#' current value set in the R environment (defaults to `NULL` if not set). |
|
| 443 |
#' |
|
| 444 |
#' @name default_na_str |
|
| 445 |
NULL |
|
| 446 | ||
| 447 |
#' @describeIn default_na_str Accessor for default `NA` value replacement string. |
|
| 448 |
#' |
|
| 449 |
#' @return |
|
| 450 |
#' * `default_na_str` returns the current value if an R environment option has been set |
|
| 451 |
#' for `"tern_default_na_str"`, or `NA_character_` otherwise. |
|
| 452 |
#' |
|
| 453 |
#' @examples |
|
| 454 |
#' # Default settings |
|
| 455 |
#' default_na_str() |
|
| 456 |
#' getOption("tern_default_na_str")
|
|
| 457 |
#' |
|
| 458 |
#' # Set custom value |
|
| 459 |
#' set_default_na_str("<Missing>")
|
|
| 460 |
#' |
|
| 461 |
#' # Settings after value has been set |
|
| 462 |
#' default_na_str() |
|
| 463 |
#' getOption("tern_default_na_str")
|
|
| 464 |
#' |
|
| 465 |
#' @export |
|
| 466 |
default_na_str <- function() {
|
|
| 467 | 284x |
getOption("tern_default_na_str", default = NA_character_)
|
| 468 |
} |
|
| 469 | ||
| 470 |
#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the |
|
| 471 |
#' option `"tern_default_na_str"` within the R environment. |
|
| 472 |
#' |
|
| 473 |
#' @return |
|
| 474 |
#' * `set_default_na_str` has no return value. |
|
| 475 |
#' |
|
| 476 |
#' @export |
|
| 477 |
set_default_na_str <- function(na_str) {
|
|
| 478 | 4x |
checkmate::assert_character(na_str, len = 1, null.ok = TRUE) |
| 479 | 4x |
options("tern_default_na_str" = na_str)
|
| 480 |
} |
|
| 481 | ||
| 482 | ||
| 483 |
#' Utilities to handle extra arguments in analysis functions |
|
| 484 |
#' |
|
| 485 |
#' @description `r lifecycle::badge("stable")`
|
|
| 486 |
#' Important additional parameters, useful to modify behavior of analysis and summary |
|
| 487 |
#' functions are listed in [rtables::additional_fun_params]. With these utility functions |
|
| 488 |
#' we can retrieve a curated list of these parameters from the environment, and pass them |
|
| 489 |
#' to the analysis functions with dedicated `...`; notice that the final `s_*` function |
|
| 490 |
#' will get them through argument matching. |
|
| 491 |
#' |
|
| 492 |
#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be |
|
| 493 |
#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params]. |
|
| 494 |
#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row` |
|
| 495 |
#' parameters. |
|
| 496 |
#' |
|
| 497 |
#' @name util_handling_additional_fun_params |
|
| 498 |
NULL |
|
| 499 | ||
| 500 |
#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment. |
|
| 501 |
#' |
|
| 502 |
#' @return |
|
| 503 |
#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment. |
|
| 504 |
#' |
|
| 505 |
#' @keywords internal |
|
| 506 |
retrieve_extra_afun_params <- function(extra_afun_params) {
|
|
| 507 | 1607x |
out <- list() |
| 508 | 1607x |
for (extra_param in extra_afun_params) {
|
| 509 | 16115x |
out <- c(out, list(get(extra_param, envir = parent.frame()))) |
| 510 |
} |
|
| 511 | 1607x |
setNames(out, extra_afun_params) |
| 512 |
} |
|
| 513 | ||
| 514 |
#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for |
|
| 515 |
#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions. |
|
| 516 |
#' |
|
| 517 |
#' @return |
|
| 518 |
#' * `get_additional_afun_params` returns a list of additional parameters. |
|
| 519 |
#' |
|
| 520 |
#' @keywords internal |
|
| 521 |
get_additional_afun_params <- function(add_alt_df = FALSE) {
|
|
| 522 | 247x |
out_list <- list( |
| 523 | 247x |
.N_col = integer(), |
| 524 | 247x |
.N_total = integer(), |
| 525 | 247x |
.N_row = integer(), |
| 526 | 247x |
.df_row = data.frame(), |
| 527 | 247x |
.var = character(), |
| 528 | 247x |
.ref_group = character(), |
| 529 | 247x |
.ref_full = vector(mode = "numeric"), |
| 530 | 247x |
.in_ref_col = logical(), |
| 531 | 247x |
.spl_context = data.frame(), |
| 532 | 247x |
.all_col_exprs = vector(mode = "expression"), |
| 533 | 247x |
.all_col_counts = vector(mode = "integer") |
| 534 |
) |
|
| 535 | ||
| 536 | 247x |
if (isTRUE(add_alt_df)) {
|
| 537 | ! |
out_list <- c( |
| 538 | ! |
out_list, |
| 539 | ! |
.alt_df_row = data.frame(), |
| 540 | ! |
.alt_df = data.frame() |
| 541 |
) |
|
| 542 |
} |
|
| 543 | ||
| 544 | 247x |
out_list |
| 545 |
} |
| 1 |
#' Apply 1/3 or 1/2 imputation rule to data |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' @inheritParams argument_convention |
|
| 6 |
#' @param x_stats (named `list`)\cr a named list of statistics, typically the results of [s_summary()]. |
|
| 7 |
#' @param stat (`string`)\cr statistic to return the value/NA level of according to the imputation |
|
| 8 |
#' rule applied. |
|
| 9 |
#' @param imp_rule (`string`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation |
|
| 10 |
#' rule or `"1/2"` to implement 1/2 imputation rule. |
|
| 11 |
#' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`). |
|
| 12 |
#' This parameter is only used when `imp_rule` is set to `"1/3"`. |
|
| 13 |
#' @param avalcat_var (`string`)\cr name of variable that indicates whether a row in `df` corresponds |
|
| 14 |
#' to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above |
|
| 15 |
#' (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`. |
|
| 16 |
#' |
|
| 17 |
#' @return A `list` containing statistic value (`val`) and NA level (`na_str`) that should be displayed |
|
| 18 |
#' according to the specified imputation rule. |
|
| 19 |
#' |
|
| 20 |
#' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule` |
|
| 21 |
#' argument. |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' set.seed(1) |
|
| 25 |
#' df <- data.frame( |
|
| 26 |
#' AVAL = runif(50, 0, 1), |
|
| 27 |
#' AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE) |
|
| 28 |
#' ) |
|
| 29 |
#' x_stats <- s_summary(df$AVAL) |
|
| 30 |
#' imputation_rule(df, x_stats, "max", "1/3") |
|
| 31 |
#' imputation_rule(df, x_stats, "geom_mean", "1/3") |
|
| 32 |
#' imputation_rule(df, x_stats, "mean", "1/2") |
|
| 33 |
#' |
|
| 34 |
#' @export |
|
| 35 |
imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") {
|
|
| 36 | 128x |
checkmate::assert_choice(avalcat_var, names(df)) |
| 37 | 128x |
checkmate::assert_choice(imp_rule, c("1/3", "1/2"))
|
| 38 | 128x |
n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", df[[avalcat_var]]))
|
| 39 | 128x |
ltr_blq_ratio <- n_blq / max(1, nrow(df)) |
| 40 | ||
| 41 |
# defaults |
|
| 42 | 128x |
val <- x_stats[[stat]] |
| 43 | 128x |
na_str <- "NE" |
| 44 | ||
| 45 | 128x |
if (imp_rule == "1/3") {
|
| 46 | 2x |
if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT |
| 47 | 84x |
if (ltr_blq_ratio > 1 / 3) {
|
| 48 | 63x |
if (stat != "geom_mean") na_str <- "ND" # 1/3_pre_GT, 1/3_post_GT |
| 49 | 9x |
if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT
|
| 50 | 39x |
if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT
|
| 51 |
} |
|
| 52 | 44x |
} else if (imp_rule == "1/2") {
|
| 53 | 44x |
if (ltr_blq_ratio > 1 / 2 && !stat == "max") {
|
| 54 | 12x |
val <- NA # 1/2_GT |
| 55 | 12x |
na_str <- "ND" # 1/2_GT |
| 56 |
} |
|
| 57 |
} |
|
| 58 | ||
| 59 | 128x |
list(val = val, na_str = na_str) |
| 60 |
} |
| 1 |
#' Control functions for Kaplan-Meier plot annotation tables |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Auxiliary functions for controlling arguments for formatting the annotation tables that can be added to plots |
|
| 6 |
#' generated via [g_km()]. |
|
| 7 |
#' |
|
| 8 |
#' @param x (`proportion`)\cr x-coordinate for center of annotation table. |
|
| 9 |
#' @param y (`proportion`)\cr y-coordinate for center of annotation table. |
|
| 10 |
#' @param w (`proportion`)\cr relative width of the annotation table. |
|
| 11 |
#' @param h (`proportion`)\cr relative height of the annotation table. |
|
| 12 |
#' @param fill (`flag` or `character`)\cr whether the annotation table should have a background fill color. |
|
| 13 |
#' Can also be a color code to use as the background fill color. If `TRUE`, color code defaults to `"#00000020"`. |
|
| 14 |
#' |
|
| 15 |
#' @return A list of components with the same names as the arguments. |
|
| 16 |
#' |
|
| 17 |
#' @seealso [g_km()] |
|
| 18 |
#' |
|
| 19 |
#' @name control_annot |
|
| 20 |
NULL |
|
| 21 | ||
| 22 |
#' @describeIn control_annot Control function for formatting the median survival time annotation table. This annotation |
|
| 23 |
#' table can be added in [g_km()] by setting `annot_surv_med=TRUE`, and can be configured using the |
|
| 24 |
#' `control_surv_med_annot()` function by setting it as the `control_annot_surv_med` argument. |
|
| 25 |
#' |
|
| 26 |
#' @examples |
|
| 27 |
#' control_surv_med_annot() |
|
| 28 |
#' |
|
| 29 |
#' @export |
|
| 30 |
control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) {
|
|
| 31 | 22x |
assert_proportion_value(x) |
| 32 | 22x |
assert_proportion_value(y) |
| 33 | 22x |
assert_proportion_value(w) |
| 34 | 22x |
assert_proportion_value(h) |
| 35 | ||
| 36 | 22x |
list(x = x, y = y, w = w, h = h, fill = fill) |
| 37 |
} |
|
| 38 | ||
| 39 |
#' @describeIn control_annot Control function for formatting the Cox-PH annotation table. This annotation table can be |
|
| 40 |
#' added in [g_km()] by setting `annot_coxph=TRUE`, and can be configured using the `control_coxph_annot()` function |
|
| 41 |
#' by setting it as the `control_annot_coxph` argument. |
|
| 42 |
#' |
|
| 43 |
#' @param ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the |
|
| 44 |
#' annotation table. If `FALSE` (default), only comparison groups will be printed in the table labels. |
|
| 45 |
#' |
|
| 46 |
#' @examples |
|
| 47 |
#' control_coxph_annot() |
|
| 48 |
#' |
|
| 49 |
#' @export |
|
| 50 |
control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) {
|
|
| 51 | 11x |
checkmate::assert_logical(ref_lbls, any.missing = FALSE) |
| 52 | ||
| 53 | 11x |
res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) |
| 54 | 11x |
res |
| 55 |
} |
|
| 56 | ||
| 57 |
#' Helper function to calculate x-tick positions |
|
| 58 |
#' |
|
| 59 |
#' @description `r lifecycle::badge("stable")`
|
|
| 60 |
#' |
|
| 61 |
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already |
|
| 62 |
#' exists it is kept as is. It is based on the same function `ggplot2` relies on, |
|
| 63 |
#' and is required in the graphic and the patient-at-risk annotation table. |
|
| 64 |
#' |
|
| 65 |
#' @inheritParams g_km |
|
| 66 |
#' @inheritParams h_ggkm |
|
| 67 |
#' |
|
| 68 |
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object. |
|
| 69 |
#' |
|
| 70 |
#' @examples |
|
| 71 |
#' library(dplyr) |
|
| 72 |
#' library(survival) |
|
| 73 |
#' |
|
| 74 |
#' data <- tern_ex_adtte %>% |
|
| 75 |
#' filter(PARAMCD == "OS") %>% |
|
| 76 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
| 77 |
#' h_data_plot() |
|
| 78 |
#' |
|
| 79 |
#' h_xticks(data) |
|
| 80 |
#' h_xticks(data, xticks = seq(0, 3000, 500)) |
|
| 81 |
#' h_xticks(data, xticks = 500) |
|
| 82 |
#' h_xticks(data, xticks = 500, max_time = 6000) |
|
| 83 |
#' h_xticks(data, xticks = c(0, 500), max_time = 300) |
|
| 84 |
#' h_xticks(data, xticks = 500, max_time = 300) |
|
| 85 |
#' |
|
| 86 |
#' @export |
|
| 87 |
h_xticks <- function(data, xticks = NULL, max_time = NULL) {
|
|
| 88 | 18x |
if (is.null(xticks)) {
|
| 89 | 13x |
if (is.null(max_time)) {
|
| 90 | 11x |
labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) |
| 91 |
} else {
|
|
| 92 | 2x |
labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) |
| 93 |
} |
|
| 94 | 5x |
} else if (checkmate::test_number(xticks)) {
|
| 95 | 2x |
if (is.null(max_time)) {
|
| 96 | 1x |
seq(0, max(data$time), xticks) |
| 97 |
} else {
|
|
| 98 | 1x |
seq(0, max(data$time, max_time), xticks) |
| 99 |
} |
|
| 100 | 3x |
} else if (is.numeric(xticks)) {
|
| 101 | 2x |
xticks |
| 102 |
} else {
|
|
| 103 | 1x |
stop( |
| 104 | 1x |
paste( |
| 105 | 1x |
"xticks should be either `NULL`", |
| 106 | 1x |
"or a single number (interval between x ticks)", |
| 107 | 1x |
"or a numeric vector (position of ticks on the x axis)" |
| 108 |
) |
|
| 109 |
) |
|
| 110 |
} |
|
| 111 |
} |
|
| 112 | ||
| 113 |
#' Helper function for survival estimations |
|
| 114 |
#' |
|
| 115 |
#' @description `r lifecycle::badge("stable")`
|
|
| 116 |
#' |
|
| 117 |
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval. |
|
| 118 |
#' |
|
| 119 |
#' @inheritParams h_data_plot |
|
| 120 |
#' |
|
| 121 |
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`). |
|
| 122 |
#' |
|
| 123 |
#' @examples |
|
| 124 |
#' library(dplyr) |
|
| 125 |
#' library(survival) |
|
| 126 |
#' |
|
| 127 |
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS") |
|
| 128 |
#' fit <- survfit( |
|
| 129 |
#' formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, |
|
| 130 |
#' data = adtte |
|
| 131 |
#' ) |
|
| 132 |
#' h_tbl_median_surv(fit_km = fit) |
|
| 133 |
#' |
|
| 134 |
#' @export |
|
| 135 |
h_tbl_median_surv <- function(fit_km, armval = "All") {
|
|
| 136 | 10x |
y <- if (is.null(fit_km$strata)) {
|
| 137 | ! |
as.data.frame(t(summary(fit_km)$table), row.names = armval) |
| 138 |
} else {
|
|
| 139 | 10x |
tbl <- summary(fit_km)$table |
| 140 | 10x |
rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals")
|
| 141 | 10x |
rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] |
| 142 | 10x |
as.data.frame(tbl) |
| 143 |
} |
|
| 144 | 10x |
conf.int <- summary(fit_km)$conf.int # nolint |
| 145 | 10x |
y$records <- round(y$records) |
| 146 | 10x |
y$median <- signif(y$median, 4) |
| 147 | 10x |
y$`CI` <- paste0( |
| 148 | 10x |
"(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")"
|
| 149 |
) |
|
| 150 | 10x |
stats::setNames( |
| 151 | 10x |
y[c("records", "median", "CI")],
|
| 152 | 10x |
c("N", "Median", f_conf_level(conf.int))
|
| 153 |
) |
|
| 154 |
} |
|
| 155 | ||
| 156 |
#' Helper function for generating a pairwise Cox-PH table |
|
| 157 |
#' |
|
| 158 |
#' @description `r lifecycle::badge("stable")`
|
|
| 159 |
#' |
|
| 160 |
#' Create a `data.frame` of pairwise stratified or unstratified Cox-PH analysis results. |
|
| 161 |
#' |
|
| 162 |
#' @inheritParams g_km |
|
| 163 |
#' @param annot_coxph_ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the |
|
| 164 |
#' `annot_coxph` table. If `FALSE` (default), only comparison groups will be printed in `annot_coxph` table labels. |
|
| 165 |
#' |
|
| 166 |
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`), |
|
| 167 |
#' and `p-value (log-rank)`. |
|
| 168 |
#' |
|
| 169 |
#' @examples |
|
| 170 |
#' library(dplyr) |
|
| 171 |
#' |
|
| 172 |
#' adtte <- tern_ex_adtte %>% |
|
| 173 |
#' filter(PARAMCD == "OS") %>% |
|
| 174 |
#' mutate(is_event = CNSR == 0) |
|
| 175 |
#' |
|
| 176 |
#' h_tbl_coxph_pairwise( |
|
| 177 |
#' df = adtte, |
|
| 178 |
#' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"), |
|
| 179 |
#' control_coxph_pw = control_coxph(conf_level = 0.9) |
|
| 180 |
#' ) |
|
| 181 |
#' |
|
| 182 |
#' @export |
|
| 183 |
h_tbl_coxph_pairwise <- function(df, |
|
| 184 |
variables, |
|
| 185 |
ref_group_coxph = NULL, |
|
| 186 |
control_coxph_pw = control_coxph(), |
|
| 187 |
annot_coxph_ref_lbls = FALSE) {
|
|
| 188 | 4x |
if ("strat" %in% names(variables)) {
|
| 189 | ! |
warning( |
| 190 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_tbl_coxph_pairwise() ", |
| 191 | ! |
"was deprecated in tern 0.9.4.\n ", |
| 192 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
| 193 |
) |
|
| 194 | ! |
variables[["strata"]] <- variables[["strat"]] |
| 195 |
} |
|
| 196 | ||
| 197 | 4x |
assert_df_with_variables(df, variables) |
| 198 | 4x |
checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) |
| 199 | 4x |
checkmate::assert_flag(annot_coxph_ref_lbls) |
| 200 | ||
| 201 | 4x |
arm <- variables$arm |
| 202 | 4x |
df[[arm]] <- factor(df[[arm]]) |
| 203 | ||
| 204 | 4x |
ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] |
| 205 | 4x |
comp_group <- setdiff(levels(df[[arm]]), ref_group) |
| 206 | ||
| 207 | 4x |
results <- Map(function(comp) {
|
| 208 | 8x |
res <- s_coxph_pairwise( |
| 209 | 8x |
df = df[df[[arm]] == comp, , drop = FALSE], |
| 210 | 8x |
.ref_group = df[df[[arm]] == ref_group, , drop = FALSE], |
| 211 | 8x |
.in_ref_col = FALSE, |
| 212 | 8x |
.var = variables$tte, |
| 213 | 8x |
is_event = variables$is_event, |
| 214 | 8x |
strata = variables$strata, |
| 215 | 8x |
control = control_coxph_pw |
| 216 |
) |
|
| 217 | 8x |
res_df <- data.frame( |
| 218 | 8x |
hr = format(round(res$hr, 2), nsmall = 2), |
| 219 | 8x |
hr_ci = paste0( |
| 220 | 8x |
"(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ",
|
| 221 | 8x |
format(round(res$hr_ci[2], 2), nsmall = 2), ")" |
| 222 |
), |
|
| 223 | 8x |
pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), |
| 224 | 8x |
stringsAsFactors = FALSE |
| 225 |
) |
|
| 226 | 8x |
colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character"))
|
| 227 | 8x |
row.names(res_df) <- comp |
| 228 | 8x |
res_df |
| 229 | 4x |
}, comp_group) |
| 230 | 1x |
if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) |
| 231 | ||
| 232 | 4x |
do.call(rbind, results) |
| 233 |
} |
|
| 234 | ||
| 235 |
#' Helper function to tidy survival fit data |
|
| 236 |
#' |
|
| 237 |
#' @description `r lifecycle::badge("stable")`
|
|
| 238 |
#' |
|
| 239 |
#' Convert the survival fit data into a data frame designed for plotting |
|
| 240 |
#' within `g_km`. |
|
| 241 |
#' |
|
| 242 |
#' This starts from the [broom::tidy()] result, and then: |
|
| 243 |
#' * Post-processes the `strata` column into a factor. |
|
| 244 |
#' * Extends each stratum by an additional first row with time 0 and probability 1 so that |
|
| 245 |
#' downstream plot lines start at those coordinates. |
|
| 246 |
#' * Adds a `censor` column. |
|
| 247 |
#' * Filters the rows before `max_time`. |
|
| 248 |
#' |
|
| 249 |
#' @inheritParams g_km |
|
| 250 |
#' @param fit_km (`survfit`)\cr result of [survival::survfit()]. |
|
| 251 |
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`. |
|
| 252 |
#' |
|
| 253 |
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`, |
|
| 254 |
#' `conf.low`, `strata`, and `censor`. |
|
| 255 |
#' |
|
| 256 |
#' @examples |
|
| 257 |
#' library(dplyr) |
|
| 258 |
#' library(survival) |
|
| 259 |
#' |
|
| 260 |
#' # Test with multiple arms |
|
| 261 |
#' tern_ex_adtte %>% |
|
| 262 |
#' filter(PARAMCD == "OS") %>% |
|
| 263 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
| 264 |
#' h_data_plot() |
|
| 265 |
#' |
|
| 266 |
#' # Test with single arm |
|
| 267 |
#' tern_ex_adtte %>% |
|
| 268 |
#' filter(PARAMCD == "OS", ARMCD == "ARM B") %>% |
|
| 269 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
| 270 |
#' h_data_plot(armval = "ARM B") |
|
| 271 |
#' |
|
| 272 |
#' @export |
|
| 273 |
h_data_plot <- function(fit_km, |
|
| 274 |
armval = "All", |
|
| 275 |
max_time = NULL) {
|
|
| 276 | 18x |
y <- broom::tidy(fit_km) |
| 277 | ||
| 278 | 18x |
if (!is.null(fit_km$strata)) {
|
| 279 | 18x |
fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals")
|
| 280 | 18x |
strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) |
| 281 | 18x |
strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals")
|
| 282 | 18x |
y$strata <- factor( |
| 283 | 18x |
vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), |
| 284 | 18x |
levels = strata_levels |
| 285 |
) |
|
| 286 |
} else {
|
|
| 287 | ! |
y$strata <- armval |
| 288 |
} |
|
| 289 | ||
| 290 | 18x |
y_by_strata <- split(y, y$strata) |
| 291 | 18x |
y_by_strata_extended <- lapply( |
| 292 | 18x |
y_by_strata, |
| 293 | 18x |
FUN = function(tbl) {
|
| 294 | 53x |
first_row <- tbl[1L, ] |
| 295 | 53x |
first_row$time <- 0 |
| 296 | 53x |
first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")])
|
| 297 | 53x |
first_row$n.event <- first_row$n.censor <- 0 |
| 298 | 53x |
first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 |
| 299 | 53x |
first_row$std.error <- 0 |
| 300 | 53x |
rbind( |
| 301 | 53x |
first_row, |
| 302 | 53x |
tbl |
| 303 |
) |
|
| 304 |
} |
|
| 305 |
) |
|
| 306 | 18x |
y <- do.call(rbind, y_by_strata_extended) |
| 307 | ||
| 308 | 18x |
y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) |
| 309 | 18x |
if (!is.null(max_time)) {
|
| 310 | 1x |
y <- y[y$time <= max(max_time), ] |
| 311 |
} |
|
| 312 | 18x |
y |
| 313 |
} |
|
| 314 | ||
| 315 |
## Deprecated Functions ---- |
|
| 316 | ||
| 317 |
#' Helper function to create a KM plot |
|
| 318 |
#' |
|
| 319 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 320 |
#' |
|
| 321 |
#' Draw the Kaplan-Meier plot using `ggplot2`. |
|
| 322 |
#' |
|
| 323 |
#' @inheritParams g_km |
|
| 324 |
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`. |
|
| 325 |
#' |
|
| 326 |
#' @return A `ggplot` object. |
|
| 327 |
#' |
|
| 328 |
#' @examples |
|
| 329 |
#' \donttest{
|
|
| 330 |
#' library(dplyr) |
|
| 331 |
#' library(survival) |
|
| 332 |
#' |
|
| 333 |
#' fit_km <- tern_ex_adtte %>% |
|
| 334 |
#' filter(PARAMCD == "OS") %>% |
|
| 335 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
| 336 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
| 337 |
#' xticks <- h_xticks(data = data_plot) |
|
| 338 |
#' gg <- h_ggkm( |
|
| 339 |
#' data = data_plot, |
|
| 340 |
#' censor_show = TRUE, |
|
| 341 |
#' xticks = xticks, |
|
| 342 |
#' xlab = "Days", |
|
| 343 |
#' yval = "Survival", |
|
| 344 |
#' ylab = "Survival Probability", |
|
| 345 |
#' title = "Survival" |
|
| 346 |
#' ) |
|
| 347 |
#' gg |
|
| 348 |
#' } |
|
| 349 |
#' |
|
| 350 |
#' @export |
|
| 351 |
h_ggkm <- function(data, |
|
| 352 |
xticks = NULL, |
|
| 353 |
yval = "Survival", |
|
| 354 |
censor_show, |
|
| 355 |
xlab, |
|
| 356 |
ylab, |
|
| 357 |
ylim = NULL, |
|
| 358 |
title, |
|
| 359 |
footnotes = NULL, |
|
| 360 |
max_time = NULL, |
|
| 361 |
lwd = 1, |
|
| 362 |
lty = NULL, |
|
| 363 |
pch = 3, |
|
| 364 |
size = 2, |
|
| 365 |
col = NULL, |
|
| 366 |
ci_ribbon = FALSE, |
|
| 367 |
ggtheme = nestcolor::theme_nest()) {
|
|
| 368 | 1x |
lifecycle::deprecate_warn( |
| 369 | 1x |
"0.9.4", |
| 370 | 1x |
"h_ggkm()", |
| 371 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 372 |
) |
|
| 373 | 1x |
checkmate::assert_numeric(lty, null.ok = TRUE) |
| 374 | 1x |
checkmate::assert_character(col, null.ok = TRUE) |
| 375 | ||
| 376 | 1x |
if (is.null(ylim)) {
|
| 377 | 1x |
data_lims <- data |
| 378 | ! |
if (yval == "Failure") data_lims[["estimate"]] <- 1 - data_lims[["estimate"]] |
| 379 | 1x |
if (!is.null(max_time)) {
|
| 380 | ! |
y_lwr <- min(data_lims[data_lims$time < max_time, ][["estimate"]]) |
| 381 | ! |
y_upr <- max(data_lims[data_lims$time < max_time, ][["estimate"]]) |
| 382 |
} else {
|
|
| 383 | 1x |
y_lwr <- min(data_lims[["estimate"]]) |
| 384 | 1x |
y_upr <- max(data_lims[["estimate"]]) |
| 385 |
} |
|
| 386 | 1x |
ylim <- c(y_lwr, y_upr) |
| 387 |
} |
|
| 388 | 1x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE) |
| 389 | ||
| 390 |
# change estimates of survival to estimates of failure (1 - survival) |
|
| 391 | 1x |
if (yval == "Failure") {
|
| 392 | ! |
data$estimate <- 1 - data$estimate |
| 393 | ! |
data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high)
|
| 394 | ! |
data$censor <- 1 - data$censor |
| 395 |
} |
|
| 396 | ||
| 397 | 1x |
gg <- {
|
| 398 | 1x |
ggplot2::ggplot( |
| 399 | 1x |
data = data, |
| 400 | 1x |
mapping = ggplot2::aes( |
| 401 | 1x |
x = .data[["time"]], |
| 402 | 1x |
y = .data[["estimate"]], |
| 403 | 1x |
ymin = .data[["conf.low"]], |
| 404 | 1x |
ymax = .data[["conf.high"]], |
| 405 | 1x |
color = .data[["strata"]], |
| 406 | 1x |
fill = .data[["strata"]] |
| 407 |
) |
|
| 408 |
) + |
|
| 409 | 1x |
ggplot2::geom_hline(yintercept = 0) |
| 410 |
} |
|
| 411 | ||
| 412 | 1x |
if (ci_ribbon) {
|
| 413 | ! |
gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0) |
| 414 |
} |
|
| 415 | ||
| 416 | 1x |
gg <- if (is.null(lty)) {
|
| 417 | 1x |
gg + |
| 418 | 1x |
ggplot2::geom_step(linewidth = lwd) |
| 419 | 1x |
} else if (checkmate::test_number(lty)) {
|
| 420 | ! |
gg + |
| 421 | ! |
ggplot2::geom_step(linewidth = lwd, lty = lty) |
| 422 | 1x |
} else if (is.numeric(lty)) {
|
| 423 | ! |
gg + |
| 424 | ! |
ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) + |
| 425 | ! |
ggplot2::scale_linetype_manual(values = lty) |
| 426 |
} |
|
| 427 | ||
| 428 | 1x |
gg <- gg + |
| 429 | 1x |
ggplot2::coord_cartesian(ylim = ylim) + |
| 430 | 1x |
ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes) |
| 431 | ||
| 432 | 1x |
if (!is.null(col)) {
|
| 433 | ! |
gg <- gg + |
| 434 | ! |
ggplot2::scale_color_manual(values = col) + |
| 435 | ! |
ggplot2::scale_fill_manual(values = col) |
| 436 |
} |
|
| 437 | 1x |
if (censor_show) {
|
| 438 | 1x |
dt <- data[data$n.censor != 0, ] |
| 439 | 1x |
dt$censor_lbl <- factor("Censored")
|
| 440 | ||
| 441 | 1x |
gg <- gg + ggplot2::geom_point( |
| 442 | 1x |
data = dt, |
| 443 | 1x |
ggplot2::aes( |
| 444 | 1x |
x = .data[["time"]], |
| 445 | 1x |
y = .data[["censor"]], |
| 446 | 1x |
shape = .data[["censor_lbl"]] |
| 447 |
), |
|
| 448 | 1x |
size = size, |
| 449 | 1x |
show.legend = TRUE, |
| 450 | 1x |
inherit.aes = TRUE |
| 451 |
) + |
|
| 452 | 1x |
ggplot2::scale_shape_manual(name = NULL, values = pch) + |
| 453 | 1x |
ggplot2::guides( |
| 454 | 1x |
shape = ggplot2::guide_legend(override.aes = list(linetype = NA)), |
| 455 | 1x |
fill = ggplot2::guide_legend(override.aes = list(shape = NA)) |
| 456 |
) |
|
| 457 |
} |
|
| 458 | ||
| 459 | 1x |
if (!is.null(max_time) && !is.null(xticks)) {
|
| 460 | ! |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time)))) |
| 461 | 1x |
} else if (!is.null(xticks)) {
|
| 462 | 1x |
if (max(data$time) <= max(xticks)) {
|
| 463 | 1x |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks))) |
| 464 |
} else {
|
|
| 465 | ! |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks) |
| 466 |
} |
|
| 467 | ! |
} else if (!is.null(max_time)) {
|
| 468 | ! |
gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time)) |
| 469 |
} |
|
| 470 | ||
| 471 | 1x |
if (!is.null(ggtheme)) {
|
| 472 | 1x |
gg <- gg + ggtheme |
| 473 |
} |
|
| 474 | ||
| 475 | 1x |
gg + ggplot2::theme( |
| 476 | 1x |
legend.position = "bottom", |
| 477 | 1x |
legend.title = ggplot2::element_blank(), |
| 478 | 1x |
legend.key.height = unit(0.02, "npc"), |
| 479 | 1x |
panel.grid.major.x = ggplot2::element_line(linewidth = 2) |
| 480 |
) |
|
| 481 |
} |
|
| 482 | ||
| 483 |
#' `ggplot` decomposition |
|
| 484 |
#' |
|
| 485 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 486 |
#' |
|
| 487 |
#' The elements composing the `ggplot` are extracted and organized in a `list`. |
|
| 488 |
#' |
|
| 489 |
#' @param gg (`ggplot`)\cr a graphic to decompose. |
|
| 490 |
#' |
|
| 491 |
#' @return A named `list` with elements: |
|
| 492 |
#' * `panel`: The panel. |
|
| 493 |
#' * `yaxis`: The y-axis. |
|
| 494 |
#' * `xaxis`: The x-axis. |
|
| 495 |
#' * `xlab`: The x-axis label. |
|
| 496 |
#' * `ylab`: The y-axis label. |
|
| 497 |
#' * `guide`: The legend. |
|
| 498 |
#' |
|
| 499 |
#' @examples |
|
| 500 |
#' \donttest{
|
|
| 501 |
#' library(dplyr) |
|
| 502 |
#' library(survival) |
|
| 503 |
#' library(grid) |
|
| 504 |
#' |
|
| 505 |
#' fit_km <- tern_ex_adtte %>% |
|
| 506 |
#' filter(PARAMCD == "OS") %>% |
|
| 507 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
| 508 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
| 509 |
#' xticks <- h_xticks(data = data_plot) |
|
| 510 |
#' gg <- h_ggkm( |
|
| 511 |
#' data = data_plot, |
|
| 512 |
#' yval = "Survival", |
|
| 513 |
#' censor_show = TRUE, |
|
| 514 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
| 515 |
#' title = "tt", |
|
| 516 |
#' footnotes = "ff" |
|
| 517 |
#' ) |
|
| 518 |
#' |
|
| 519 |
#' g_el <- h_decompose_gg(gg) |
|
| 520 |
#' grid::grid.newpage() |
|
| 521 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5)) |
|
| 522 |
#' grid::grid.draw(g_el$panel) |
|
| 523 |
#' |
|
| 524 |
#' grid::grid.newpage() |
|
| 525 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5)) |
|
| 526 |
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis))) |
|
| 527 |
#' } |
|
| 528 |
#' |
|
| 529 |
#' @export |
|
| 530 |
h_decompose_gg <- function(gg) {
|
|
| 531 | 1x |
lifecycle::deprecate_warn( |
| 532 | 1x |
"0.9.4", |
| 533 | 1x |
"h_decompose_gg()", |
| 534 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 535 |
) |
|
| 536 | 1x |
g_el <- ggplot2::ggplotGrob(gg) |
| 537 | 1x |
y <- c( |
| 538 | 1x |
panel = "panel", |
| 539 | 1x |
yaxis = "axis-l", |
| 540 | 1x |
xaxis = "axis-b", |
| 541 | 1x |
xlab = "xlab-b", |
| 542 | 1x |
ylab = "ylab-l", |
| 543 | 1x |
guide = "guide" |
| 544 |
) |
|
| 545 | 1x |
lapply(X = y, function(x) gtable::gtable_filter(g_el, x)) |
| 546 |
} |
|
| 547 | ||
| 548 |
#' Helper function to prepare a KM layout |
|
| 549 |
#' |
|
| 550 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 551 |
#' |
|
| 552 |
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve. |
|
| 553 |
#' |
|
| 554 |
#' @inheritParams g_km |
|
| 555 |
#' @inheritParams h_ggkm |
|
| 556 |
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`. |
|
| 557 |
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of |
|
| 558 |
#' patient at risk matching the main grid of the Kaplan-Meier curve. |
|
| 559 |
#' |
|
| 560 |
#' @return A grid layout. |
|
| 561 |
#' |
|
| 562 |
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the |
|
| 563 |
#' dimension are fixed, only the curve is flexible and will accommodate with the remaining free space. |
|
| 564 |
#' * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient |
|
| 565 |
#' at risk tabulation. The main constraint is about the width of the columns which must allow the writing of |
|
| 566 |
#' the strata name. |
|
| 567 |
#' * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table. |
|
| 568 |
#' |
|
| 569 |
#' @examples |
|
| 570 |
#' \donttest{
|
|
| 571 |
#' library(dplyr) |
|
| 572 |
#' library(survival) |
|
| 573 |
#' library(grid) |
|
| 574 |
#' |
|
| 575 |
#' fit_km <- tern_ex_adtte %>% |
|
| 576 |
#' filter(PARAMCD == "OS") %>% |
|
| 577 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
| 578 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
| 579 |
#' xticks <- h_xticks(data = data_plot) |
|
| 580 |
#' gg <- h_ggkm( |
|
| 581 |
#' data = data_plot, |
|
| 582 |
#' censor_show = TRUE, |
|
| 583 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
| 584 |
#' title = "tt", footnotes = "ff", yval = "Survival" |
|
| 585 |
#' ) |
|
| 586 |
#' g_el <- h_decompose_gg(gg) |
|
| 587 |
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f") |
|
| 588 |
#' grid.show.layout(lyt) |
|
| 589 |
#' } |
|
| 590 |
#' |
|
| 591 |
#' @export |
|
| 592 |
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE, annot_at_risk_title = TRUE) {
|
|
| 593 | 1x |
lifecycle::deprecate_warn( |
| 594 | 1x |
"0.9.4", |
| 595 | 1x |
"h_km_layout()", |
| 596 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 597 |
) |
|
| 598 | 1x |
txtlines <- levels(as.factor(data$strata)) |
| 599 | 1x |
nlines <- nlevels(as.factor(data$strata)) |
| 600 | 1x |
col_annot_width <- max( |
| 601 | 1x |
c( |
| 602 | 1x |
as.numeric(grid::convertX(g_el$yaxis$widths + g_el$ylab$widths, "pt")), |
| 603 | 1x |
as.numeric( |
| 604 | 1x |
grid::convertX( |
| 605 | 1x |
grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt" |
| 606 |
) |
|
| 607 |
) |
|
| 608 |
) |
|
| 609 |
) |
|
| 610 | ||
| 611 | 1x |
ttl_row <- as.numeric(!is.null(title)) |
| 612 | 1x |
foot_row <- as.numeric(!is.null(footnotes)) |
| 613 | 1x |
no_tbl_ind <- c() |
| 614 | 1x |
ht_x <- c() |
| 615 | 1x |
ht_units <- c() |
| 616 | ||
| 617 | 1x |
if (ttl_row == 1) {
|
| 618 | 1x |
no_tbl_ind <- c(no_tbl_ind, TRUE) |
| 619 | 1x |
ht_x <- c(ht_x, 2) |
| 620 | 1x |
ht_units <- c(ht_units, "lines") |
| 621 |
} |
|
| 622 | ||
| 623 | 1x |
no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2)) |
| 624 | 1x |
ht_x <- c( |
| 625 | 1x |
ht_x, |
| 626 | 1x |
1, |
| 627 | 1x |
grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") + grid::unit(5, "pt"), |
| 628 | 1x |
grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"), |
| 629 | 1x |
1, |
| 630 | 1x |
nlines + 0.5, |
| 631 | 1x |
grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") |
| 632 |
) |
|
| 633 | 1x |
ht_units <- c( |
| 634 | 1x |
ht_units, |
| 635 | 1x |
"null", |
| 636 | 1x |
"pt", |
| 637 | 1x |
"pt", |
| 638 | 1x |
"lines", |
| 639 | 1x |
"lines", |
| 640 | 1x |
"pt" |
| 641 |
) |
|
| 642 | ||
| 643 | 1x |
if (foot_row == 1) {
|
| 644 | 1x |
no_tbl_ind <- c(no_tbl_ind, TRUE) |
| 645 | 1x |
ht_x <- c(ht_x, 1) |
| 646 | 1x |
ht_units <- c(ht_units, "lines") |
| 647 |
} |
|
| 648 | 1x |
if (annot_at_risk) {
|
| 649 | 1x |
no_at_risk_tbl <- rep(TRUE, 6 + ttl_row + foot_row) |
| 650 | 1x |
if (!annot_at_risk_title) {
|
| 651 | ! |
no_at_risk_tbl[length(no_at_risk_tbl) - 2 - foot_row] <- FALSE |
| 652 |
} |
|
| 653 |
} else {
|
|
| 654 | ! |
no_at_risk_tbl <- no_tbl_ind |
| 655 |
} |
|
| 656 | ||
| 657 | 1x |
grid::grid.layout( |
| 658 | 1x |
nrow = sum(no_at_risk_tbl), ncol = 2, |
| 659 | 1x |
widths = grid::unit(c(col_annot_width, 1), c("pt", "null")),
|
| 660 | 1x |
heights = grid::unit( |
| 661 | 1x |
x = ht_x[no_at_risk_tbl], |
| 662 | 1x |
units = ht_units[no_at_risk_tbl] |
| 663 |
) |
|
| 664 |
) |
|
| 665 |
} |
|
| 666 | ||
| 667 |
#' Helper function to create patient-at-risk grobs |
|
| 668 |
#' |
|
| 669 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 670 |
#' |
|
| 671 |
#' Two graphical objects are obtained, one corresponding to row labeling and the second to the table of |
|
| 672 |
#' numbers of patients at risk. If `title = TRUE`, a third object corresponding to the table title is |
|
| 673 |
#' also obtained. |
|
| 674 |
#' |
|
| 675 |
#' @inheritParams g_km |
|
| 676 |
#' @inheritParams h_ggkm |
|
| 677 |
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which |
|
| 678 |
#' includes the number of patients at risk at given time points. |
|
| 679 |
#' @param xlim (`numeric(1)`)\cr the maximum value on the x-axis (used to ensure the at risk table aligns with the KM |
|
| 680 |
#' graph). |
|
| 681 |
#' @param title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk` |
|
| 682 |
#' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`. |
|
| 683 |
#' |
|
| 684 |
#' @return A named `list` of two `gTree` objects if `title = FALSE`: `at_risk` and `label`, or three |
|
| 685 |
#' `gTree` objects if `title = TRUE`: `at_risk`, `label`, and `title`. |
|
| 686 |
#' |
|
| 687 |
#' @examples |
|
| 688 |
#' \donttest{
|
|
| 689 |
#' library(dplyr) |
|
| 690 |
#' library(survival) |
|
| 691 |
#' library(grid) |
|
| 692 |
#' |
|
| 693 |
#' fit_km <- tern_ex_adtte %>% |
|
| 694 |
#' filter(PARAMCD == "OS") %>% |
|
| 695 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
| 696 |
#' |
|
| 697 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
| 698 |
#' |
|
| 699 |
#' xticks <- h_xticks(data = data_plot) |
|
| 700 |
#' |
|
| 701 |
#' gg <- h_ggkm( |
|
| 702 |
#' data = data_plot, |
|
| 703 |
#' censor_show = TRUE, |
|
| 704 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
| 705 |
#' title = "tt", footnotes = "ff", yval = "Survival" |
|
| 706 |
#' ) |
|
| 707 |
#' |
|
| 708 |
#' # The annotation table reports the patient at risk for a given strata and |
|
| 709 |
#' # times (`xticks`). |
|
| 710 |
#' annot_tbl <- summary(fit_km, times = xticks) |
|
| 711 |
#' if (is.null(fit_km$strata)) {
|
|
| 712 |
#' annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All")) |
|
| 713 |
#' } else {
|
|
| 714 |
#' strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
|
|
| 715 |
#' levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] |
|
| 716 |
#' annot_tbl <- data.frame( |
|
| 717 |
#' n.risk = annot_tbl$n.risk, |
|
| 718 |
#' time = annot_tbl$time, |
|
| 719 |
#' strata = annot_tbl$strata |
|
| 720 |
#' ) |
|
| 721 |
#' } |
|
| 722 |
#' |
|
| 723 |
#' # The annotation table is transformed into a grob. |
|
| 724 |
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks)) |
|
| 725 |
#' |
|
| 726 |
#' # For the representation, the layout is estimated for which the decomposition |
|
| 727 |
#' # of the graphic element is necessary. |
|
| 728 |
#' g_el <- h_decompose_gg(gg) |
|
| 729 |
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f") |
|
| 730 |
#' |
|
| 731 |
#' grid::grid.newpage() |
|
| 732 |
#' pushViewport(viewport(layout = lyt, height = .95, width = .95)) |
|
| 733 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1)) |
|
| 734 |
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 2)) |
|
| 735 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1)) |
|
| 736 |
#' grid::grid.draw(tbl$at_risk) |
|
| 737 |
#' popViewport() |
|
| 738 |
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 1)) |
|
| 739 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1)) |
|
| 740 |
#' grid::grid.draw(tbl$label) |
|
| 741 |
#' } |
|
| 742 |
#' |
|
| 743 |
#' @export |
|
| 744 |
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim, title = TRUE) {
|
|
| 745 | 1x |
lifecycle::deprecate_warn( |
| 746 | 1x |
"0.9.4", |
| 747 | 1x |
"h_grob_tbl_at_risk()", |
| 748 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 749 |
) |
|
| 750 | 1x |
txtlines <- levels(as.factor(data$strata)) |
| 751 | 1x |
nlines <- nlevels(as.factor(data$strata)) |
| 752 | 1x |
y_int <- annot_tbl$time[2] - annot_tbl$time[1] |
| 753 | 1x |
annot_tbl <- expand.grid( |
| 754 | 1x |
time = seq(0, xlim, y_int), |
| 755 | 1x |
strata = unique(annot_tbl$strata) |
| 756 | 1x |
) %>% dplyr::left_join(annot_tbl, by = c("time", "strata"))
|
| 757 | 1x |
annot_tbl[is.na(annot_tbl)] <- 0 |
| 758 | 1x |
y_str_unit <- as.numeric(annot_tbl$strata) |
| 759 | 1x |
vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines")) |
| 760 | 1x |
if (title) {
|
| 761 | 1x |
gb_table_title <- grid::gList( |
| 762 | 1x |
grid::textGrob( |
| 763 | 1x |
label = "Patients at Risk:", |
| 764 | 1x |
x = 1, |
| 765 | 1x |
y = grid::unit(0.2, "native"), |
| 766 | 1x |
gp = grid::gpar(fontface = "bold", fontsize = 10) |
| 767 |
) |
|
| 768 |
) |
|
| 769 |
} |
|
| 770 | 1x |
gb_table_left_annot <- grid::gList( |
| 771 | 1x |
grid::rectGrob( |
| 772 | 1x |
x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), |
| 773 | 1x |
gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
|
| 774 | 1x |
height = grid::unit(1, "lines"), just = "bottom", hjust = 0 |
| 775 |
), |
|
| 776 | 1x |
grid::textGrob( |
| 777 | 1x |
label = unique(annot_tbl$strata), |
| 778 | 1x |
x = 0.5, |
| 779 | 1x |
y = grid::unit( |
| 780 | 1x |
(max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75, |
| 781 | 1x |
"native" |
| 782 |
), |
|
| 783 | 1x |
gp = grid::gpar(fontface = "italic", fontsize = 10) |
| 784 |
) |
|
| 785 |
) |
|
| 786 | 1x |
gb_patient_at_risk <- grid::gList( |
| 787 | 1x |
grid::rectGrob( |
| 788 | 1x |
x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), |
| 789 | 1x |
gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
|
| 790 | 1x |
height = grid::unit(1, "lines"), just = "bottom", hjust = 0 |
| 791 |
), |
|
| 792 | 1x |
grid::textGrob( |
| 793 | 1x |
label = annot_tbl$n.risk, |
| 794 | 1x |
x = grid::unit(annot_tbl$time, "native"), |
| 795 | 1x |
y = grid::unit( |
| 796 | 1x |
(max(y_str_unit) - y_str_unit) + .5, |
| 797 | 1x |
"line" |
| 798 | 1x |
) # maybe native |
| 799 |
) |
|
| 800 |
) |
|
| 801 | ||
| 802 | 1x |
ret <- list( |
| 803 | 1x |
at_risk = grid::gList( |
| 804 | 1x |
grid::gTree( |
| 805 | 1x |
vp = vp_table, |
| 806 | 1x |
children = grid::gList( |
| 807 | 1x |
grid::gTree( |
| 808 | 1x |
vp = grid::dataViewport( |
| 809 | 1x |
xscale = c(0, xlim) + c(-0.05, 0.05) * xlim, |
| 810 | 1x |
yscale = c(0, nlines + 1), |
| 811 | 1x |
extension = c(0.05, 0) |
| 812 |
), |
|
| 813 | 1x |
children = grid::gList(gb_patient_at_risk) |
| 814 |
) |
|
| 815 |
) |
|
| 816 |
) |
|
| 817 |
), |
|
| 818 | 1x |
label = grid::gList( |
| 819 | 1x |
grid::gTree( |
| 820 | 1x |
vp = grid::viewport(width = max(grid::stringWidth(txtlines))), |
| 821 | 1x |
children = grid::gList( |
| 822 | 1x |
grid::gTree( |
| 823 | 1x |
vp = grid::dataViewport( |
| 824 | 1x |
xscale = 0:1, |
| 825 | 1x |
yscale = c(0, nlines + 1), |
| 826 | 1x |
extension = c(0.0, 0) |
| 827 |
), |
|
| 828 | 1x |
children = grid::gList(gb_table_left_annot) |
| 829 |
) |
|
| 830 |
) |
|
| 831 |
) |
|
| 832 |
) |
|
| 833 |
) |
|
| 834 | ||
| 835 | 1x |
if (title) {
|
| 836 | 1x |
ret[["title"]] <- grid::gList( |
| 837 | 1x |
grid::gTree( |
| 838 | 1x |
vp = grid::viewport(width = max(grid::stringWidth(txtlines))), |
| 839 | 1x |
children = grid::gList( |
| 840 | 1x |
grid::gTree( |
| 841 | 1x |
vp = grid::dataViewport( |
| 842 | 1x |
xscale = 0:1, |
| 843 | 1x |
yscale = c(0, 1), |
| 844 | 1x |
extension = c(0, 0) |
| 845 |
), |
|
| 846 | 1x |
children = grid::gList(gb_table_title) |
| 847 |
) |
|
| 848 |
) |
|
| 849 |
) |
|
| 850 |
) |
|
| 851 |
} |
|
| 852 | ||
| 853 | 1x |
ret |
| 854 |
} |
|
| 855 | ||
| 856 |
#' Helper function to create survival estimation grobs |
|
| 857 |
#' |
|
| 858 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 859 |
#' |
|
| 860 |
#' The survival fit is transformed in a grob containing a table with groups in |
|
| 861 |
#' rows characterized by N, median and 95% confidence interval. |
|
| 862 |
#' |
|
| 863 |
#' @inheritParams g_km |
|
| 864 |
#' @inheritParams h_data_plot |
|
| 865 |
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()]. |
|
| 866 |
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location. |
|
| 867 |
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location. |
|
| 868 |
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob. |
|
| 869 |
#' |
|
| 870 |
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`). |
|
| 871 |
#' |
|
| 872 |
#' @examples |
|
| 873 |
#' \donttest{
|
|
| 874 |
#' library(dplyr) |
|
| 875 |
#' library(survival) |
|
| 876 |
#' library(grid) |
|
| 877 |
#' |
|
| 878 |
#' grid::grid.newpage() |
|
| 879 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1)) |
|
| 880 |
#' tern_ex_adtte %>% |
|
| 881 |
#' filter(PARAMCD == "OS") %>% |
|
| 882 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
| 883 |
#' h_grob_median_surv() %>% |
|
| 884 |
#' grid::grid.draw() |
|
| 885 |
#' } |
|
| 886 |
#' |
|
| 887 |
#' @export |
|
| 888 |
h_grob_median_surv <- function(fit_km, |
|
| 889 |
armval = "All", |
|
| 890 |
x = 0.9, |
|
| 891 |
y = 0.9, |
|
| 892 |
width = grid::unit(0.3, "npc"), |
|
| 893 |
ttheme = gridExtra::ttheme_default()) {
|
|
| 894 | 1x |
lifecycle::deprecate_warn( |
| 895 | 1x |
"0.9.4", |
| 896 | 1x |
"h_grob_median_surv()", |
| 897 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 898 |
) |
|
| 899 | 1x |
data <- h_tbl_median_surv(fit_km, armval = armval) |
| 900 | ||
| 901 | 1x |
width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") |
| 902 | 1x |
height <- width * (nrow(data) + 1) / 12 |
| 903 | ||
| 904 | 1x |
w <- paste(" ", c(
|
| 905 | 1x |
rownames(data)[which.max(nchar(rownames(data)))], |
| 906 | 1x |
sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) |
| 907 |
)) |
|
| 908 | 1x |
w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) |
| 909 | ||
| 910 | 1x |
w_txt <- sapply(1:64, function(x) {
|
| 911 | 64x |
graphics::par(ps = x) |
| 912 | 64x |
graphics::strwidth(w[4], units = "in") |
| 913 |
}) |
|
| 914 | 1x |
f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) |
| 915 | ||
| 916 | 1x |
h_txt <- sapply(1:64, function(x) {
|
| 917 | 64x |
graphics::par(ps = x) |
| 918 | 64x |
graphics::strheight(grid::stringHeight("X"), units = "in")
|
| 919 |
}) |
|
| 920 | 1x |
f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) |
| 921 | ||
| 922 | 1x |
if (ttheme$core$fg_params$fontsize == 12) {
|
| 923 | 1x |
ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) |
| 924 | 1x |
ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
| 925 | 1x |
ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
| 926 |
} |
|
| 927 | ||
| 928 | 1x |
gt <- gridExtra::tableGrob( |
| 929 | 1x |
d = data, |
| 930 | 1x |
theme = ttheme |
| 931 |
) |
|
| 932 | 1x |
gt$widths <- ((w_unit / sum(w_unit)) * width) |
| 933 | 1x |
gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) |
| 934 | ||
| 935 | 1x |
vp <- grid::viewport( |
| 936 | 1x |
x = grid::unit(x, "npc") + grid::unit(1, "lines"), |
| 937 | 1x |
y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), |
| 938 | 1x |
height = height, |
| 939 | 1x |
width = width, |
| 940 | 1x |
just = c("right", "top")
|
| 941 |
) |
|
| 942 | ||
| 943 | 1x |
grid::gList( |
| 944 | 1x |
grid::gTree( |
| 945 | 1x |
vp = vp, |
| 946 | 1x |
children = grid::gList(gt) |
| 947 |
) |
|
| 948 |
) |
|
| 949 |
} |
|
| 950 | ||
| 951 |
#' Helper function to create grid object with y-axis annotation |
|
| 952 |
#' |
|
| 953 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 954 |
#' |
|
| 955 |
#' Build the y-axis annotation from a decomposed `ggplot`. |
|
| 956 |
#' |
|
| 957 |
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`. |
|
| 958 |
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`. |
|
| 959 |
#' |
|
| 960 |
#' @return A `gTree` object containing the y-axis annotation from a `ggplot`. |
|
| 961 |
#' |
|
| 962 |
#' @examples |
|
| 963 |
#' \donttest{
|
|
| 964 |
#' library(dplyr) |
|
| 965 |
#' library(survival) |
|
| 966 |
#' library(grid) |
|
| 967 |
#' |
|
| 968 |
#' fit_km <- tern_ex_adtte %>% |
|
| 969 |
#' filter(PARAMCD == "OS") %>% |
|
| 970 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
| 971 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
| 972 |
#' xticks <- h_xticks(data = data_plot) |
|
| 973 |
#' gg <- h_ggkm( |
|
| 974 |
#' data = data_plot, |
|
| 975 |
#' censor_show = TRUE, |
|
| 976 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
| 977 |
#' title = "title", footnotes = "footnotes", yval = "Survival" |
|
| 978 |
#' ) |
|
| 979 |
#' |
|
| 980 |
#' g_el <- h_decompose_gg(gg) |
|
| 981 |
#' |
|
| 982 |
#' grid::grid.newpage() |
|
| 983 |
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20)) |
|
| 984 |
#' pushViewport(pvp) |
|
| 985 |
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)) |
|
| 986 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA)) |
|
| 987 |
#' } |
|
| 988 |
#' |
|
| 989 |
#' @export |
|
| 990 |
h_grob_y_annot <- function(ylab, yaxis) {
|
|
| 991 | 1x |
lifecycle::deprecate_warn( |
| 992 | 1x |
"0.9.4", |
| 993 | 1x |
"h_grob_y_annot()", |
| 994 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 995 |
) |
|
| 996 | 1x |
grid::gList( |
| 997 | 1x |
grid::gTree( |
| 998 | 1x |
vp = grid::viewport( |
| 999 | 1x |
width = grid::convertX(yaxis$widths + ylab$widths, "pt"), |
| 1000 | 1x |
x = grid::unit(1, "npc"), |
| 1001 | 1x |
just = "right" |
| 1002 |
), |
|
| 1003 | 1x |
children = grid::gList(cbind(ylab, yaxis)) |
| 1004 |
) |
|
| 1005 |
) |
|
| 1006 |
} |
|
| 1007 | ||
| 1008 |
#' Helper function to create Cox-PH grobs |
|
| 1009 |
#' |
|
| 1010 |
#' @description `r lifecycle::badge("deprecated")`
|
|
| 1011 |
#' |
|
| 1012 |
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()] |
|
| 1013 |
#' |
|
| 1014 |
#' @inheritParams h_grob_median_surv |
|
| 1015 |
#' @param ... arguments to pass to [h_tbl_coxph_pairwise()]. |
|
| 1016 |
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location. |
|
| 1017 |
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location. |
|
| 1018 |
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob. |
|
| 1019 |
#' |
|
| 1020 |
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`), |
|
| 1021 |
#' and `p-value (log-rank)`. |
|
| 1022 |
#' |
|
| 1023 |
#' @examples |
|
| 1024 |
#' \donttest{
|
|
| 1025 |
#' library(dplyr) |
|
| 1026 |
#' library(survival) |
|
| 1027 |
#' library(grid) |
|
| 1028 |
#' |
|
| 1029 |
#' grid::grid.newpage() |
|
| 1030 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1)) |
|
| 1031 |
#' data <- tern_ex_adtte %>% |
|
| 1032 |
#' filter(PARAMCD == "OS") %>% |
|
| 1033 |
#' mutate(is_event = CNSR == 0) |
|
| 1034 |
#' tbl_grob <- h_grob_coxph( |
|
| 1035 |
#' df = data, |
|
| 1036 |
#' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"), |
|
| 1037 |
#' control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5 |
|
| 1038 |
#' ) |
|
| 1039 |
#' grid::grid.draw(tbl_grob) |
|
| 1040 |
#' } |
|
| 1041 |
#' |
|
| 1042 |
#' @export |
|
| 1043 |
h_grob_coxph <- function(..., |
|
| 1044 |
x = 0, |
|
| 1045 |
y = 0, |
|
| 1046 |
width = grid::unit(0.4, "npc"), |
|
| 1047 |
ttheme = gridExtra::ttheme_default( |
|
| 1048 |
padding = grid::unit(c(1, .5), "lines"), |
|
| 1049 |
core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
|
|
| 1050 |
)) {
|
|
| 1051 | 1x |
lifecycle::deprecate_warn( |
| 1052 | 1x |
"0.9.4", |
| 1053 | 1x |
"h_grob_coxph()", |
| 1054 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
| 1055 |
) |
|
| 1056 | 1x |
data <- h_tbl_coxph_pairwise(...) |
| 1057 | ||
| 1058 | 1x |
width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") |
| 1059 | 1x |
height <- width * (nrow(data) + 1) / 12 |
| 1060 | ||
| 1061 | 1x |
w <- paste(" ", c(
|
| 1062 | 1x |
rownames(data)[which.max(nchar(rownames(data)))], |
| 1063 | 1x |
sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) |
| 1064 |
)) |
|
| 1065 | 1x |
w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) |
| 1066 | ||
| 1067 | 1x |
w_txt <- sapply(1:64, function(x) {
|
| 1068 | 64x |
graphics::par(ps = x) |
| 1069 | 64x |
graphics::strwidth(w[4], units = "in") |
| 1070 |
}) |
|
| 1071 | 1x |
f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) |
| 1072 | ||
| 1073 | 1x |
h_txt <- sapply(1:64, function(x) {
|
| 1074 | 64x |
graphics::par(ps = x) |
| 1075 | 64x |
graphics::strheight(grid::stringHeight("X"), units = "in")
|
| 1076 |
}) |
|
| 1077 | 1x |
f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) |
| 1078 | ||
| 1079 | 1x |
if (ttheme$core$fg_params$fontsize == 12) {
|
| 1080 | 1x |
ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) |
| 1081 | 1x |
ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
| 1082 | 1x |
ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
| 1083 |
} |
|
| 1084 | ||
| 1085 | 1x |
tryCatch( |
| 1086 | 1x |
expr = {
|
| 1087 | 1x |
gt <- gridExtra::tableGrob( |
| 1088 | 1x |
d = data, |
| 1089 | 1x |
theme = ttheme |
| 1090 | 1x |
) # ERROR 'data' must be of a vector type, was 'NULL' |
| 1091 | 1x |
gt$widths <- ((w_unit / sum(w_unit)) * width) |
| 1092 | 1x |
gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) |
| 1093 | 1x |
vp <- grid::viewport( |
| 1094 | 1x |
x = grid::unit(x, "npc") + grid::unit(1, "lines"), |
| 1095 | 1x |
y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), |
| 1096 | 1x |
height = height, |
| 1097 | 1x |
width = width, |
| 1098 | 1x |
just = c("left", "bottom")
|
| 1099 |
) |
|
| 1100 | 1x |
grid::gList( |
| 1101 | 1x |
grid::gTree( |
| 1102 | 1x |
vp = vp, |
| 1103 | 1x |
children = grid::gList(gt) |
| 1104 |
) |
|
| 1105 |
) |
|
| 1106 |
}, |
|
| 1107 | 1x |
error = function(w) {
|
| 1108 | ! |
message(paste( |
| 1109 | ! |
"Warning: Cox table will not be displayed as there is", |
| 1110 | ! |
"not any level to be compared in the arm variable." |
| 1111 |
)) |
|
| 1112 | ! |
return( |
| 1113 | ! |
grid::gList( |
| 1114 | ! |
grid::gTree( |
| 1115 | ! |
vp = NULL, |
| 1116 | ! |
children = NULL |
| 1117 |
) |
|
| 1118 |
) |
|
| 1119 |
) |
|
| 1120 |
} |
|
| 1121 |
) |
|
| 1122 |
} |
| 1 |
#' Confidence interval for mean |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the |
|
| 6 |
#' geometric mean. It can be used as a `ggplot` helper function for plotting. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams argument_convention |
|
| 9 |
#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean. |
|
| 10 |
#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s. |
|
| 11 |
#' @param geom_mean (`flag`)\cr whether the geometric mean should be calculated. |
|
| 12 |
#' |
|
| 13 |
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`. |
|
| 14 |
#' |
|
| 15 |
#' @examples |
|
| 16 |
#' stat_mean_ci(sample(10), gg_helper = FALSE) |
|
| 17 |
#' |
|
| 18 |
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) + |
|
| 19 |
#' ggplot2::geom_point() |
|
| 20 |
#' |
|
| 21 |
#' p + ggplot2::stat_summary( |
|
| 22 |
#' fun.data = stat_mean_ci, |
|
| 23 |
#' geom = "errorbar" |
|
| 24 |
#' ) |
|
| 25 |
#' |
|
| 26 |
#' p + ggplot2::stat_summary( |
|
| 27 |
#' fun.data = stat_mean_ci, |
|
| 28 |
#' fun.args = list(conf_level = 0.5), |
|
| 29 |
#' geom = "errorbar" |
|
| 30 |
#' ) |
|
| 31 |
#' |
|
| 32 |
#' p + ggplot2::stat_summary( |
|
| 33 |
#' fun.data = stat_mean_ci, |
|
| 34 |
#' fun.args = list(conf_level = 0.5, geom_mean = TRUE), |
|
| 35 |
#' geom = "errorbar" |
|
| 36 |
#' ) |
|
| 37 |
#' |
|
| 38 |
#' @export |
|
| 39 |
stat_mean_ci <- function(x, |
|
| 40 |
conf_level = 0.95, |
|
| 41 |
na.rm = TRUE, # nolint |
|
| 42 |
n_min = 2, |
|
| 43 |
gg_helper = TRUE, |
|
| 44 |
geom_mean = FALSE) {
|
|
| 45 | 2409x |
if (na.rm) {
|
| 46 | 10x |
x <- stats::na.omit(x) |
| 47 |
} |
|
| 48 | 2409x |
n <- length(x) |
| 49 | ||
| 50 | 2409x |
if (!geom_mean) {
|
| 51 | 1212x |
m <- mean(x) |
| 52 |
} else {
|
|
| 53 | 1197x |
negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0) |
| 54 | 1197x |
if (negative_values_exist) {
|
| 55 | 26x |
m <- NA_real_ |
| 56 |
} else {
|
|
| 57 | 1171x |
x <- log(x) |
| 58 | 1171x |
m <- mean(x) |
| 59 |
} |
|
| 60 |
} |
|
| 61 | ||
| 62 | 2409x |
if (n < n_min || is.na(m)) {
|
| 63 | 330x |
ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_) |
| 64 |
} else {
|
|
| 65 | 2079x |
hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n) |
| 66 | 2079x |
ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci) |
| 67 | 2079x |
if (geom_mean) {
|
| 68 | 1028x |
ci <- exp(ci) |
| 69 |
} |
|
| 70 |
} |
|
| 71 | ||
| 72 | 2409x |
if (gg_helper) {
|
| 73 | 4x |
m <- ifelse(is.na(m), NA_real_, m) |
| 74 | 4x |
ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]]) |
| 75 |
} |
|
| 76 | ||
| 77 | 2409x |
return(ci) |
| 78 |
} |
|
| 79 | ||
| 80 |
#' Confidence interval for median |
|
| 81 |
#' |
|
| 82 |
#' @description `r lifecycle::badge("stable")`
|
|
| 83 |
#' |
|
| 84 |
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper |
|
| 85 |
#' function for plotting. |
|
| 86 |
#' |
|
| 87 |
#' @inheritParams argument_convention |
|
| 88 |
#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s. |
|
| 89 |
#' |
|
| 90 |
#' @details This function was adapted from `DescTools/versions/0.99.35/source` |
|
| 91 |
#' |
|
| 92 |
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`. |
|
| 93 |
#' |
|
| 94 |
#' @examples |
|
| 95 |
#' stat_median_ci(sample(10), gg_helper = FALSE) |
|
| 96 |
#' |
|
| 97 |
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) + |
|
| 98 |
#' ggplot2::geom_point() |
|
| 99 |
#' p + ggplot2::stat_summary( |
|
| 100 |
#' fun.data = stat_median_ci, |
|
| 101 |
#' geom = "errorbar" |
|
| 102 |
#' ) |
|
| 103 |
#' |
|
| 104 |
#' @export |
|
| 105 |
stat_median_ci <- function(x, |
|
| 106 |
conf_level = 0.95, |
|
| 107 |
na.rm = TRUE, # nolint |
|
| 108 |
gg_helper = TRUE) {
|
|
| 109 | 1210x |
x <- unname(x) |
| 110 | 1210x |
if (na.rm) {
|
| 111 | 9x |
x <- x[!is.na(x)] |
| 112 |
} |
|
| 113 | 1210x |
n <- length(x) |
| 114 | 1210x |
med <- stats::median(x) |
| 115 | ||
| 116 | 1210x |
k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE) |
| 117 | ||
| 118 |
# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range |
|
| 119 | 1210x |
if (k == 0 || is.na(med)) {
|
| 120 | 266x |
ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_) |
| 121 | 266x |
empir_conf_level <- NA_real_ |
| 122 |
} else {
|
|
| 123 | 944x |
x_sort <- sort(x) |
| 124 | 944x |
ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1]) |
| 125 | 944x |
empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5) |
| 126 |
} |
|
| 127 | ||
| 128 | 1210x |
if (gg_helper) {
|
| 129 | 4x |
ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]]) |
| 130 |
} |
|
| 131 | ||
| 132 | 1210x |
attr(ci, "conf_level") <- empir_conf_level |
| 133 | ||
| 134 | 1210x |
return(ci) |
| 135 |
} |
|
| 136 | ||
| 137 |
#' p-Value of the mean |
|
| 138 |
#' |
|
| 139 |
#' @description `r lifecycle::badge("stable")`
|
|
| 140 |
#' |
|
| 141 |
#' Convenient function for calculating the two-sided p-value of the mean. |
|
| 142 |
#' |
|
| 143 |
#' @inheritParams argument_convention |
|
| 144 |
#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean. |
|
| 145 |
#' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis. |
|
| 146 |
#' |
|
| 147 |
#' @return A p-value. |
|
| 148 |
#' |
|
| 149 |
#' @examples |
|
| 150 |
#' stat_mean_pval(sample(10)) |
|
| 151 |
#' |
|
| 152 |
#' stat_mean_pval(rnorm(10), test_mean = 0.5) |
|
| 153 |
#' |
|
| 154 |
#' @export |
|
| 155 |
stat_mean_pval <- function(x, |
|
| 156 |
na.rm = TRUE, # nolint |
|
| 157 |
n_min = 2, |
|
| 158 |
test_mean = 0) {
|
|
| 159 | 1210x |
if (na.rm) {
|
| 160 | 9x |
x <- stats::na.omit(x) |
| 161 |
} |
|
| 162 | 1210x |
n <- length(x) |
| 163 | ||
| 164 | 1210x |
x_mean <- mean(x) |
| 165 | 1210x |
x_sd <- stats::sd(x) |
| 166 | ||
| 167 | 1210x |
if (n < n_min) {
|
| 168 | 152x |
pv <- c(p_value = NA_real_) |
| 169 |
} else {
|
|
| 170 | 1058x |
x_se <- stats::sd(x) / sqrt(n) |
| 171 | 1058x |
ttest <- (x_mean - test_mean) / x_se |
| 172 | 1058x |
pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1)) |
| 173 |
} |
|
| 174 | ||
| 175 | 1210x |
return(pv) |
| 176 |
} |
|
| 177 | ||
| 178 |
#' Proportion difference and confidence interval |
|
| 179 |
#' |
|
| 180 |
#' @description `r lifecycle::badge("stable")`
|
|
| 181 |
#' |
|
| 182 |
#' Function for calculating the proportion (or risk) difference and confidence interval between arm |
|
| 183 |
#' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence |
|
| 184 |
#' in arm Y from cumulative incidence in arm X. |
|
| 185 |
#' |
|
| 186 |
#' @inheritParams argument_convention |
|
| 187 |
#' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group). |
|
| 188 |
#' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`. |
|
| 189 |
#' @param N_x (`numeric(1)`)\cr total number of records in arm X. |
|
| 190 |
#' @param N_y (`numeric(1)`)\cr total number of records in arm Y. |
|
| 191 |
#' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in |
|
| 192 |
#' `x` and `y`. Must be of equal length to `x` and `y`. |
|
| 193 |
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`. |
|
| 194 |
#' |
|
| 195 |
#' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and |
|
| 196 |
#' `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound. |
|
| 197 |
#' |
|
| 198 |
#' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] |
|
| 199 |
#' with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing |
|
| 200 |
#' proportion (risk) difference to an `rtables` layout. |
|
| 201 |
#' |
|
| 202 |
#' @examples |
|
| 203 |
#' stat_propdiff_ci( |
|
| 204 |
#' x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9 |
|
| 205 |
#' ) |
|
| 206 |
#' |
|
| 207 |
#' stat_propdiff_ci( |
|
| 208 |
#' x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE |
|
| 209 |
#' ) |
|
| 210 |
#' |
|
| 211 |
#' @export |
|
| 212 |
stat_propdiff_ci <- function(x, |
|
| 213 |
y, |
|
| 214 |
N_x, # nolint |
|
| 215 |
N_y, # nolint |
|
| 216 |
list_names = NULL, |
|
| 217 |
conf_level = 0.95, |
|
| 218 |
pct = TRUE) {
|
|
| 219 | 62x |
checkmate::assert_list(x, types = "numeric") |
| 220 | 62x |
checkmate::assert_list(y, types = "numeric", len = length(x)) |
| 221 | 62x |
checkmate::assert_character(list_names, len = length(x), null.ok = TRUE) |
| 222 | 62x |
rd_list <- lapply(seq_along(x), function(i) {
|
| 223 | 145x |
p_x <- x[[i]] / N_x |
| 224 | 145x |
p_y <- y[[i]] / N_y |
| 225 | 145x |
rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) * |
| 226 | 145x |
sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y) |
| 227 | 145x |
c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1) |
| 228 |
}) |
|
| 229 | 62x |
names(rd_list) <- list_names |
| 230 | 62x |
rd_list |
| 231 |
} |
| 1 |
#' Analyze numeric variables in columns |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 4 |
#' |
|
| 5 |
#' The layout-creating function [analyze_vars_in_cols()] creates a layout element to generate a column-wise |
|
| 6 |
#' analysis table. |
|
| 7 |
#' |
|
| 8 |
#' This function sets the analysis methods as column labels and is a wrapper for [rtables::analyze_colvars()]. |
|
| 9 |
#' It was designed principally for PK tables. |
|
| 10 |
#' |
|
| 11 |
#' @inheritParams argument_convention |
|
| 12 |
#' @inheritParams rtables::analyze_colvars |
|
| 13 |
#' @param imp_rule (`string` or `NULL`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can |
|
| 14 |
#' also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order |
|
| 15 |
#' to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()] |
|
| 16 |
#' for more details on imputation. |
|
| 17 |
#' @param avalcat_var (`string`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a |
|
| 18 |
#' row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of |
|
| 19 |
#' the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable |
|
| 20 |
#' used to calculate the `n_blq` statistic (if included in `.stats`). |
|
| 21 |
#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will |
|
| 22 |
#' speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is |
|
| 23 |
#' used for multiple tables with different data. Defaults to `FALSE`. |
|
| 24 |
#' @param row_labels (`character`)\cr as this function works in columns space, usually `.labels` |
|
| 25 |
#' character vector applies on the column space. You can change the row labels by defining this |
|
| 26 |
#' parameter to a named character vector with names corresponding to the split values. It defaults |
|
| 27 |
#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label. |
|
| 28 |
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current |
|
| 29 |
#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr` |
|
| 30 |
#' to define row labels. This behavior is not supported as we never need to overload row labels. |
|
| 31 |
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns. |
|
| 32 |
#' This option allows you to add multiple instances of this functions, also in a nested fashion, |
|
| 33 |
#' without adding more splits. This split must happen only one time on a single layout. |
|
| 34 |
#' |
|
| 35 |
#' @return |
|
| 36 |
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()]. |
|
| 37 |
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output |
|
| 38 |
#' in columns, and add it to the table layout. |
|
| 39 |
#' |
|
| 40 |
#' @note |
|
| 41 |
#' * This is an experimental implementation of [rtables::summarize_row_groups()] and [rtables::analyze_colvars()] |
|
| 42 |
#' that may be subjected to changes as `rtables` extends its support to more complex analysis pipelines in the |
|
| 43 |
#' column space. We encourage users to read the examples carefully and file issues for different use cases. |
|
| 44 |
#' * In this function, `labelstr` behaves atypically. If `labelstr = NULL` (the default), row labels are assigned |
|
| 45 |
#' automatically as the split values if `do_summarize_row_groups = FALSE` (the default), and as the group label |
|
| 46 |
#' if `do_summarize_row_groups = TRUE`. |
|
| 47 |
#' |
|
| 48 |
#' @seealso [analyze_vars()], [rtables::analyze_colvars()]. |
|
| 49 |
#' |
|
| 50 |
#' @examples |
|
| 51 |
#' library(dplyr) |
|
| 52 |
#' |
|
| 53 |
#' # Data preparation |
|
| 54 |
#' adpp <- tern_ex_adpp %>% h_pkparam_sort() |
|
| 55 |
#' |
|
| 56 |
#' lyt <- basic_table() %>% |
|
| 57 |
#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>% |
|
| 58 |
#' split_rows_by( |
|
| 59 |
#' var = "SEX", |
|
| 60 |
#' label_pos = "topleft", |
|
| 61 |
#' child_labels = "hidden" |
|
| 62 |
#' ) %>% # Removes duplicated labels |
|
| 63 |
#' analyze_vars_in_cols(vars = "AGE") |
|
| 64 |
#' result <- build_table(lyt = lyt, df = adpp) |
|
| 65 |
#' result |
|
| 66 |
#' |
|
| 67 |
#' # By selecting just some statistics and ad-hoc labels |
|
| 68 |
#' lyt <- basic_table() %>% |
|
| 69 |
#' split_rows_by(var = "ARM", label_pos = "topleft") %>% |
|
| 70 |
#' split_rows_by( |
|
| 71 |
#' var = "SEX", |
|
| 72 |
#' label_pos = "topleft", |
|
| 73 |
#' child_labels = "hidden", |
|
| 74 |
#' split_fun = drop_split_levels |
|
| 75 |
#' ) %>% |
|
| 76 |
#' analyze_vars_in_cols( |
|
| 77 |
#' vars = "AGE", |
|
| 78 |
#' .stats = c("n", "cv", "geom_mean"),
|
|
| 79 |
#' .labels = c( |
|
| 80 |
#' n = "aN", |
|
| 81 |
#' cv = "aCV", |
|
| 82 |
#' geom_mean = "aGeomMean" |
|
| 83 |
#' ) |
|
| 84 |
#' ) |
|
| 85 |
#' result <- build_table(lyt = lyt, df = adpp) |
|
| 86 |
#' result |
|
| 87 |
#' |
|
| 88 |
#' # Changing row labels |
|
| 89 |
#' lyt <- basic_table() %>% |
|
| 90 |
#' analyze_vars_in_cols( |
|
| 91 |
#' vars = "AGE", |
|
| 92 |
#' row_labels = "some custom label" |
|
| 93 |
#' ) |
|
| 94 |
#' result <- build_table(lyt, df = adpp) |
|
| 95 |
#' result |
|
| 96 |
#' |
|
| 97 |
#' # Pharmacokinetic parameters |
|
| 98 |
#' lyt <- basic_table() %>% |
|
| 99 |
#' split_rows_by( |
|
| 100 |
#' var = "TLG_DISPLAY", |
|
| 101 |
#' split_label = "PK Parameter", |
|
| 102 |
#' label_pos = "topleft", |
|
| 103 |
#' child_labels = "hidden" |
|
| 104 |
#' ) %>% |
|
| 105 |
#' analyze_vars_in_cols( |
|
| 106 |
#' vars = "AVAL" |
|
| 107 |
#' ) |
|
| 108 |
#' result <- build_table(lyt, df = adpp) |
|
| 109 |
#' result |
|
| 110 |
#' |
|
| 111 |
#' # Multiple calls (summarize label and analyze underneath) |
|
| 112 |
#' lyt <- basic_table() %>% |
|
| 113 |
#' split_rows_by( |
|
| 114 |
#' var = "TLG_DISPLAY", |
|
| 115 |
#' split_label = "PK Parameter", |
|
| 116 |
#' label_pos = "topleft" |
|
| 117 |
#' ) %>% |
|
| 118 |
#' analyze_vars_in_cols( |
|
| 119 |
#' vars = "AVAL", |
|
| 120 |
#' do_summarize_row_groups = TRUE # does a summarize level |
|
| 121 |
#' ) %>% |
|
| 122 |
#' split_rows_by("SEX",
|
|
| 123 |
#' child_labels = "hidden", |
|
| 124 |
#' label_pos = "topleft" |
|
| 125 |
#' ) %>% |
|
| 126 |
#' analyze_vars_in_cols( |
|
| 127 |
#' vars = "AVAL", |
|
| 128 |
#' split_col_vars = FALSE # avoids re-splitting the columns |
|
| 129 |
#' ) |
|
| 130 |
#' result <- build_table(lyt, df = adpp) |
|
| 131 |
#' result |
|
| 132 |
#' |
|
| 133 |
#' @export |
|
| 134 |
analyze_vars_in_cols <- function(lyt, |
|
| 135 |
vars, |
|
| 136 |
..., |
|
| 137 |
.stats = c( |
|
| 138 |
"n", |
|
| 139 |
"mean", |
|
| 140 |
"sd", |
|
| 141 |
"se", |
|
| 142 |
"cv", |
|
| 143 |
"geom_cv" |
|
| 144 |
), |
|
| 145 |
.labels = c( |
|
| 146 |
n = "n", |
|
| 147 |
mean = "Mean", |
|
| 148 |
sd = "SD", |
|
| 149 |
se = "SE", |
|
| 150 |
cv = "CV (%)", |
|
| 151 |
geom_cv = "CV % Geometric Mean" |
|
| 152 |
), |
|
| 153 |
row_labels = NULL, |
|
| 154 |
do_summarize_row_groups = FALSE, |
|
| 155 |
split_col_vars = TRUE, |
|
| 156 |
imp_rule = NULL, |
|
| 157 |
avalcat_var = "AVALCAT1", |
|
| 158 |
cache = FALSE, |
|
| 159 |
.indent_mods = NULL, |
|
| 160 |
na_str = default_na_str(), |
|
| 161 |
nested = TRUE, |
|
| 162 |
.formats = NULL, |
|
| 163 |
.aligns = NULL) {
|
|
| 164 | 26x |
extra_args <- list(...) |
| 165 | ||
| 166 | 26x |
checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE) |
| 167 | 26x |
checkmate::assert_character(row_labels, null.ok = TRUE) |
| 168 | 26x |
checkmate::assert_int(.indent_mods, null.ok = TRUE) |
| 169 | 26x |
checkmate::assert_flag(nested) |
| 170 | 26x |
checkmate::assert_flag(split_col_vars) |
| 171 | 26x |
checkmate::assert_flag(do_summarize_row_groups) |
| 172 | ||
| 173 |
# Filtering |
|
| 174 | 26x |
met_grps <- paste0("analyze_vars", c("_numeric", "_counts"))
|
| 175 | 26x |
.stats <- get_stats(met_grps, stats_in = .stats) |
| 176 | 26x |
formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats) |
| 177 | 26x |
labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels) %>% .unlist_keep_nulls() |
| 178 | ! |
if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels)
|
| 179 | ||
| 180 |
# Check for vars in the case that one or more are used |
|
| 181 | 26x |
if (length(vars) == 1) {
|
| 182 | 21x |
vars <- rep(vars, length(.stats)) |
| 183 | 5x |
} else if (length(vars) != length(.stats)) {
|
| 184 | 1x |
stop( |
| 185 | 1x |
"Analyzed variables (vars) does not have the same ", |
| 186 | 1x |
"number of elements of specified statistics (.stats)." |
| 187 |
) |
|
| 188 |
} |
|
| 189 | ||
| 190 | 25x |
if (split_col_vars) {
|
| 191 |
# Checking there is not a previous identical column split |
|
| 192 | 21x |
clyt <- tail(clayout(lyt), 1)[[1]] |
| 193 | ||
| 194 | 21x |
dummy_lyt <- split_cols_by_multivar( |
| 195 | 21x |
lyt = basic_table(), |
| 196 | 21x |
vars = vars, |
| 197 | 21x |
varlabels = labels_v |
| 198 |
) |
|
| 199 | ||
| 200 | 21x |
if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
|
| 201 | 2x |
stop( |
| 202 | 2x |
"Column split called again with the same values. ", |
| 203 | 2x |
"This can create many unwanted columns. Please consider adding ", |
| 204 | 2x |
"split_col_vars = FALSE to the last call of ", |
| 205 | 2x |
deparse(sys.calls()[[sys.nframe() - 1]]), "." |
| 206 |
) |
|
| 207 |
} |
|
| 208 | ||
| 209 |
# Main col split |
|
| 210 | 19x |
lyt <- split_cols_by_multivar( |
| 211 | 19x |
lyt = lyt, |
| 212 | 19x |
vars = vars, |
| 213 | 19x |
varlabels = labels_v |
| 214 |
) |
|
| 215 |
} |
|
| 216 | ||
| 217 | 23x |
env <- new.env() # create caching environment |
| 218 | ||
| 219 | 23x |
if (do_summarize_row_groups) {
|
| 220 | 8x |
if (length(unique(vars)) > 1) {
|
| 221 | ! |
stop("When using do_summarize_row_groups only one label level var should be inserted.")
|
| 222 |
} |
|
| 223 | ||
| 224 |
# Function list for do_summarize_row_groups. Slightly different handling of labels |
|
| 225 | 8x |
cfun_list <- Map( |
| 226 | 8x |
function(stat, use_cache, cache_env) {
|
| 227 | 48x |
function(u, .spl_context, labelstr, .df_row, ...) {
|
| 228 |
# Statistic |
|
| 229 | 152x |
var_row_val <- paste( |
| 230 | 152x |
gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
|
| 231 | 152x |
paste(.spl_context$value, collapse = "_"), |
| 232 | 152x |
sep = "_" |
| 233 |
) |
|
| 234 | 152x |
if (use_cache) {
|
| 235 | ! |
if (is.null(cache_env[[var_row_val]])) {
|
| 236 | ! |
cache_env[[var_row_val]] <- s_summary(u, ...) |
| 237 |
} |
|
| 238 | ! |
x_stats <- cache_env[[var_row_val]] |
| 239 |
} else {
|
|
| 240 | 152x |
x_stats <- s_summary(u, ...) |
| 241 |
} |
|
| 242 | ||
| 243 | 152x |
if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
|
| 244 | 152x |
res <- x_stats[[stat]] |
| 245 |
} else {
|
|
| 246 | ! |
timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1)))
|
| 247 | ! |
res_imp <- imputation_rule( |
| 248 | ! |
.df_row, x_stats, stat, |
| 249 | ! |
imp_rule = imp_rule, |
| 250 | ! |
post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0,
|
| 251 | ! |
avalcat_var = avalcat_var |
| 252 |
) |
|
| 253 | ! |
res <- res_imp[["val"]] |
| 254 | ! |
na_str <- res_imp[["na_str"]] |
| 255 |
} |
|
| 256 | ||
| 257 |
# Label check and replacement |
|
| 258 | 152x |
if (length(row_labels) > 1) {
|
| 259 | 32x |
if (!(labelstr %in% names(row_labels))) {
|
| 260 | 2x |
stop( |
| 261 | 2x |
"Replacing the labels in do_summarize_row_groups needs a named vector", |
| 262 | 2x |
"that contains the split values. In the current split variable ", |
| 263 | 2x |
.spl_context$split[nrow(.spl_context)], |
| 264 | 2x |
" the labelstr value (split value by default) ", labelstr, " is not in", |
| 265 | 2x |
" row_labels names: ", names(row_labels) |
| 266 |
) |
|
| 267 |
} |
|
| 268 | 30x |
lbl <- unlist(row_labels[labelstr]) |
| 269 |
} else {
|
|
| 270 | 120x |
lbl <- labelstr |
| 271 |
} |
|
| 272 | ||
| 273 |
# Cell creation |
|
| 274 | 150x |
rcell(res, |
| 275 | 150x |
label = lbl, |
| 276 | 150x |
format = formats_v[names(formats_v) == stat][[1]], |
| 277 | 150x |
format_na_str = na_str, |
| 278 | 150x |
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods), |
| 279 | 150x |
align = .aligns |
| 280 |
) |
|
| 281 |
} |
|
| 282 |
}, |
|
| 283 | 8x |
stat = .stats, |
| 284 | 8x |
use_cache = cache, |
| 285 | 8x |
cache_env = replicate(length(.stats), env) |
| 286 |
) |
|
| 287 | ||
| 288 |
# Main call to rtables |
|
| 289 | 8x |
summarize_row_groups( |
| 290 | 8x |
lyt = lyt, |
| 291 | 8x |
var = unique(vars), |
| 292 | 8x |
cfun = cfun_list, |
| 293 | 8x |
na_str = na_str, |
| 294 | 8x |
extra_args = extra_args |
| 295 |
) |
|
| 296 |
} else {
|
|
| 297 |
# Function list for analyze_colvars |
|
| 298 | 15x |
afun_list <- Map( |
| 299 | 15x |
function(stat, use_cache, cache_env) {
|
| 300 | 76x |
function(u, .spl_context, .df_row, ...) {
|
| 301 |
# Main statistics |
|
| 302 | 468x |
var_row_val <- paste( |
| 303 | 468x |
gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
|
| 304 | 468x |
paste(.spl_context$value, collapse = "_"), |
| 305 | 468x |
sep = "_" |
| 306 |
) |
|
| 307 | 468x |
if (use_cache) {
|
| 308 | 16x |
if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...) |
| 309 | 56x |
x_stats <- cache_env[[var_row_val]] |
| 310 |
} else {
|
|
| 311 | 412x |
x_stats <- s_summary(u, ...) |
| 312 |
} |
|
| 313 | ||
| 314 | 468x |
if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
|
| 315 | 348x |
res <- x_stats[[stat]] |
| 316 |
} else {
|
|
| 317 | 120x |
timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1)))
|
| 318 | 120x |
res_imp <- imputation_rule( |
| 319 | 120x |
.df_row, x_stats, stat, |
| 320 | 120x |
imp_rule = imp_rule, |
| 321 | 120x |
post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0,
|
| 322 | 120x |
avalcat_var = avalcat_var |
| 323 |
) |
|
| 324 | 120x |
res <- res_imp[["val"]] |
| 325 | 120x |
na_str <- res_imp[["na_str"]] |
| 326 |
} |
|
| 327 | ||
| 328 | 468x |
if (is.list(res)) {
|
| 329 | 52x |
if (length(res) > 1) {
|
| 330 | 1x |
stop("The analyzed column produced more than one category of results.")
|
| 331 |
} else {
|
|
| 332 | 51x |
res <- unlist(res) |
| 333 |
} |
|
| 334 |
} |
|
| 335 | ||
| 336 |
# Label from context |
|
| 337 | 467x |
label_from_context <- .spl_context$value[nrow(.spl_context)] |
| 338 | ||
| 339 |
# Label switcher |
|
| 340 | 467x |
if (is.null(row_labels)) {
|
| 341 | 387x |
lbl <- label_from_context |
| 342 |
} else {
|
|
| 343 | 80x |
if (length(row_labels) > 1) {
|
| 344 | 68x |
if (!(label_from_context %in% names(row_labels))) {
|
| 345 | 2x |
stop( |
| 346 | 2x |
"Replacing the labels in do_summarize_row_groups needs a named vector", |
| 347 | 2x |
"that contains the split values. In the current split variable ", |
| 348 | 2x |
.spl_context$split[nrow(.spl_context)], |
| 349 | 2x |
" the split value ", label_from_context, " is not in", |
| 350 | 2x |
" row_labels names: ", names(row_labels) |
| 351 |
) |
|
| 352 |
} |
|
| 353 | 66x |
lbl <- unlist(row_labels[label_from_context]) |
| 354 |
} else {
|
|
| 355 | 12x |
lbl <- row_labels |
| 356 |
} |
|
| 357 |
} |
|
| 358 | ||
| 359 |
# Cell creation |
|
| 360 | 465x |
rcell(res, |
| 361 | 465x |
label = lbl, |
| 362 | 465x |
format = formats_v[names(formats_v) == stat][[1]], |
| 363 | 465x |
format_na_str = na_str, |
| 364 | 465x |
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods), |
| 365 | 465x |
align = .aligns |
| 366 |
) |
|
| 367 |
} |
|
| 368 |
}, |
|
| 369 | 15x |
stat = .stats, |
| 370 | 15x |
use_cache = cache, |
| 371 | 15x |
cache_env = replicate(length(.stats), env) |
| 372 |
) |
|
| 373 | ||
| 374 |
# Main call to rtables |
|
| 375 | 15x |
analyze_colvars(lyt, |
| 376 | 15x |
afun = afun_list, |
| 377 | 15x |
na_str = na_str, |
| 378 | 15x |
nested = nested, |
| 379 | 15x |
extra_args = extra_args |
| 380 |
) |
|
| 381 |
} |
|
| 382 |
} |
|
| 383 | ||
| 384 |
# Helper function |
|
| 385 |
get_last_col_split <- function(lyt) {
|
|
| 386 | 3x |
tail(tail(clayout(lyt), 1)[[1]], 1)[[1]] |
| 387 |
} |
| 1 |
#' Kaplan-Meier plot |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' From a survival model, a graphic is rendered along with tabulated annotation |
|
| 6 |
#' including the number of patient at risk at given time and the median survival |
|
| 7 |
#' per group. |
|
| 8 |
#' |
|
| 9 |
#' @inheritParams argument_convention |
|
| 10 |
#' @param variables (named `list`)\cr variable names. Details are: |
|
| 11 |
#' * `tte` (`numeric`)\cr variable indicating time-to-event duration values. |
|
| 12 |
#' * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored. |
|
| 13 |
#' * `arm` (`factor`)\cr the treatment group variable. |
|
| 14 |
#' * `strata` (`character` or `NULL`)\cr variable names indicating stratification factors. |
|
| 15 |
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using |
|
| 16 |
#' the helper function [control_surv_timepoint()]. Some possible parameter options are: |
|
| 17 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate. |
|
| 18 |
#' * `conf_type` (`string`)\cr `"plain"` (default), `"log"`, `"log-log"` for confidence interval type, |
|
| 19 |
#' see more in [survival::survfit()]. Note that the option "none" is no longer supported. |
|
| 20 |
#' @param col (`character`)\cr lines colors. Length of a vector should be equal |
|
| 21 |
#' to number of strata from [survival::survfit()]. |
|
| 22 |
#' @param lty (`numeric`)\cr line type. If a vector is given, its length should be equal to the number of strata from |
|
| 23 |
#' [survival::survfit()]. |
|
| 24 |
#' @param lwd (`numeric`)\cr line width. If a vector is given, its length should be equal to the number of strata from |
|
| 25 |
#' [survival::survfit()]. |
|
| 26 |
#' @param censor_show (`flag`)\cr whether to show censored observations. |
|
| 27 |
#' @param pch (`string`)\cr name of symbol or character to use as point symbol to indicate censored cases. |
|
| 28 |
#' @param size (`numeric(1)`)\cr size of censored point symbols. |
|
| 29 |
#' @param max_time (`numeric(1)`)\cr maximum value to show on x-axis. Only data values less than or up to |
|
| 30 |
#' this threshold value will be plotted (defaults to `NULL`). |
|
| 31 |
#' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing |
|
| 32 |
#' between ticks on the x-axis. If `NULL` (default), [labeling::extended()] is used to determine |
|
| 33 |
#' optimal tick positions on the x-axis. |
|
| 34 |
#' @param xlab (`string`)\cr x-axis label. |
|
| 35 |
#' @param yval (`string`)\cr type of plot, to be plotted on the y-axis. Options are `Survival` (default) and `Failure` |
|
| 36 |
#' probability. |
|
| 37 |
#' @param ylab (`string`)\cr y-axis label. |
|
| 38 |
#' @param title (`string`)\cr plot title. |
|
| 39 |
#' @param footnotes (`string`)\cr plot footnotes. |
|
| 40 |
#' @param font_size (`numeric(1)`)\cr font size to use for all text. |
|
| 41 |
#' @param ci_ribbon (`flag`)\cr whether the confidence interval should be drawn around the Kaplan-Meier curve. |
|
| 42 |
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk |
|
| 43 |
#' matching the main grid of the Kaplan-Meier curve. |
|
| 44 |
#' @param annot_at_risk_title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk` |
|
| 45 |
#' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`. |
|
| 46 |
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the |
|
| 47 |
#' median survival time per group. |
|
| 48 |
#' @param annot_coxph (`flag`)\cr whether to add the annotation table from a [survival::coxph()] model. |
|
| 49 |
#' @param annot_stats (`string` or `NULL`)\cr statistics annotations to add to the plot. Options are |
|
| 50 |
#' `median` (median survival follow-up time) and `min` (minimum survival follow-up time). |
|
| 51 |
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics |
|
| 52 |
#' specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added. |
|
| 53 |
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified using the helper function |
|
| 54 |
#' [control_coxph()]. Some possible parameter options are: |
|
| 55 |
#' * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1. |
|
| 56 |
#' Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`. |
|
| 57 |
#' * `ties` (`string`)\cr method for tie handling. Default is `"efron"`, |
|
| 58 |
#' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()] |
|
| 59 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for HR. |
|
| 60 |
#' @param ref_group_coxph (`string` or `NULL`)\cr level of arm variable to use as reference group in calculations for |
|
| 61 |
#' `annot_coxph` table. If `NULL` (default), uses the first level of the arm variable. |
|
| 62 |
#' @param control_annot_surv_med (`list`)\cr parameters to control the position and size of the annotation table added |
|
| 63 |
#' to the plot when `annot_surv_med = TRUE`, specified using the [control_surv_med_annot()] function. Parameter |
|
| 64 |
#' options are: `x`, `y`, `w`, `h`, and `fill`. See [control_surv_med_annot()] for details. |
|
| 65 |
#' @param control_annot_coxph (`list`)\cr parameters to control the position and size of the annotation table added |
|
| 66 |
#' to the plot when `annot_coxph = TRUE`, specified using the [control_coxph_annot()] function. Parameter |
|
| 67 |
#' options are: `x`, `y`, `w`, `h`, `fill`, and `ref_lbls`. See [control_coxph_annot()] for details. |
|
| 68 |
#' @param legend_pos (`numeric(2)` or `NULL`)\cr vector containing x- and y-coordinates, respectively, for the legend |
|
| 69 |
#' position relative to the KM plot area. If `NULL` (default), the legend is positioned in the bottom right corner of |
|
| 70 |
#' the plot, or the middle right of the plot if needed to prevent overlapping. |
|
| 71 |
#' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the Kaplan-Meier plot. |
|
| 72 |
#' Relative height of patients at risk table is then `1 - rel_height_plot`. If `annot_at_risk = FALSE` or |
|
| 73 |
#' `as_list = TRUE`, this parameter is ignored. |
|
| 74 |
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to format the Kaplan-Meier plot. |
|
| 75 |
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `annot_at_risk = TRUE`. |
|
| 76 |
#' If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the patients |
|
| 77 |
#' at risk table is printed below the plot via [cowplot::plot_grid()]. |
|
| 78 |
#' @param draw `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
| 79 |
#' @param newpage `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
| 80 |
#' @param gp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
| 81 |
#' @param vp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
| 82 |
#' @param name `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
| 83 |
#' @param annot_coxph_ref_lbls `r lifecycle::badge("deprecated")` Please use the `ref_lbls` element of
|
|
| 84 |
#' `control_annot_coxph` instead. |
|
| 85 |
#' @param position_coxph `r lifecycle::badge("deprecated")` Please use the `x` and `y` elements of
|
|
| 86 |
#' `control_annot_coxph` instead. |
|
| 87 |
#' @param position_surv_med `r lifecycle::badge("deprecated")` Please use the `x` and `y` elements of
|
|
| 88 |
#' `control_annot_surv_med` instead. |
|
| 89 |
#' @param width_annots `r lifecycle::badge("deprecated")` Please use the `w` element of `control_annot_surv_med`
|
|
| 90 |
#' (for `surv_med`) and `control_annot_coxph` (for `coxph`)." |
|
| 91 |
#' |
|
| 92 |
#' @return A `ggplot` Kaplan-Meier plot and (optionally) summary table. |
|
| 93 |
#' |
|
| 94 |
#' @examples |
|
| 95 |
#' library(dplyr) |
|
| 96 |
#' |
|
| 97 |
#' df <- tern_ex_adtte %>% |
|
| 98 |
#' filter(PARAMCD == "OS") %>% |
|
| 99 |
#' mutate(is_event = CNSR == 0) |
|
| 100 |
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD") |
|
| 101 |
#' |
|
| 102 |
#' # Basic examples |
|
| 103 |
#' g_km(df = df, variables = variables) |
|
| 104 |
#' g_km(df = df, variables = variables, yval = "Failure") |
|
| 105 |
#' |
|
| 106 |
#' # Examples with customization parameters applied |
|
| 107 |
#' g_km( |
|
| 108 |
#' df = df, |
|
| 109 |
#' variables = variables, |
|
| 110 |
#' control_surv = control_surv_timepoint(conf_level = 0.9), |
|
| 111 |
#' col = c("grey25", "grey50", "grey75"),
|
|
| 112 |
#' annot_at_risk_title = FALSE, |
|
| 113 |
#' lty = 1:3, |
|
| 114 |
#' font_size = 8 |
|
| 115 |
#' ) |
|
| 116 |
#' g_km( |
|
| 117 |
#' df = df, |
|
| 118 |
#' variables = variables, |
|
| 119 |
#' annot_stats = c("min", "median"),
|
|
| 120 |
#' annot_stats_vlines = TRUE, |
|
| 121 |
#' max_time = 3000, |
|
| 122 |
#' ggtheme = ggplot2::theme_minimal() |
|
| 123 |
#' ) |
|
| 124 |
#' |
|
| 125 |
#' # Example with pairwise Cox-PH analysis annotation table, adjusted annotation tables |
|
| 126 |
#' g_km( |
|
| 127 |
#' df = df, variables = variables, |
|
| 128 |
#' annot_coxph = TRUE, |
|
| 129 |
#' control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99), |
|
| 130 |
#' control_annot_coxph = control_coxph_annot(x = 0.26, w = 0.35), |
|
| 131 |
#' control_annot_surv_med = control_surv_med_annot(x = 0.8, y = 0.9, w = 0.35) |
|
| 132 |
#' ) |
|
| 133 |
#' |
|
| 134 |
#' @aliases kaplan_meier |
|
| 135 |
#' @export |
|
| 136 |
g_km <- function(df, |
|
| 137 |
variables, |
|
| 138 |
control_surv = control_surv_timepoint(), |
|
| 139 |
col = NULL, |
|
| 140 |
lty = NULL, |
|
| 141 |
lwd = 0.5, |
|
| 142 |
censor_show = TRUE, |
|
| 143 |
pch = 3, |
|
| 144 |
size = 2, |
|
| 145 |
max_time = NULL, |
|
| 146 |
xticks = NULL, |
|
| 147 |
xlab = "Days", |
|
| 148 |
yval = c("Survival", "Failure"),
|
|
| 149 |
ylab = paste(yval, "Probability"), |
|
| 150 |
ylim = NULL, |
|
| 151 |
title = NULL, |
|
| 152 |
footnotes = NULL, |
|
| 153 |
font_size = 10, |
|
| 154 |
ci_ribbon = FALSE, |
|
| 155 |
annot_at_risk = TRUE, |
|
| 156 |
annot_at_risk_title = TRUE, |
|
| 157 |
annot_surv_med = TRUE, |
|
| 158 |
annot_coxph = FALSE, |
|
| 159 |
annot_stats = NULL, |
|
| 160 |
annot_stats_vlines = FALSE, |
|
| 161 |
control_coxph_pw = control_coxph(), |
|
| 162 |
ref_group_coxph = NULL, |
|
| 163 |
control_annot_surv_med = control_surv_med_annot(), |
|
| 164 |
control_annot_coxph = control_coxph_annot(), |
|
| 165 |
legend_pos = NULL, |
|
| 166 |
rel_height_plot = 0.75, |
|
| 167 |
ggtheme = NULL, |
|
| 168 |
as_list = FALSE, |
|
| 169 |
draw = lifecycle::deprecated(), |
|
| 170 |
newpage = lifecycle::deprecated(), |
|
| 171 |
gp = lifecycle::deprecated(), |
|
| 172 |
vp = lifecycle::deprecated(), |
|
| 173 |
name = lifecycle::deprecated(), |
|
| 174 |
annot_coxph_ref_lbls = lifecycle::deprecated(), |
|
| 175 |
position_coxph = lifecycle::deprecated(), |
|
| 176 |
position_surv_med = lifecycle::deprecated(), |
|
| 177 |
width_annots = lifecycle::deprecated()) {
|
|
| 178 |
# Deprecated argument warnings |
|
| 179 | 10x |
if (lifecycle::is_present(draw)) {
|
| 180 | 1x |
lifecycle::deprecate_warn( |
| 181 | 1x |
"0.9.4", "g_km(draw)", |
| 182 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
| 183 |
) |
|
| 184 |
} |
|
| 185 | 10x |
if (lifecycle::is_present(newpage)) {
|
| 186 | 1x |
lifecycle::deprecate_warn( |
| 187 | 1x |
"0.9.4", "g_km(newpage)", |
| 188 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
| 189 |
) |
|
| 190 |
} |
|
| 191 | 10x |
if (lifecycle::is_present(gp)) {
|
| 192 | 1x |
lifecycle::deprecate_warn( |
| 193 | 1x |
"0.9.4", "g_km(gp)", |
| 194 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
| 195 |
) |
|
| 196 |
} |
|
| 197 | 10x |
if (lifecycle::is_present(vp)) {
|
| 198 | 1x |
lifecycle::deprecate_warn( |
| 199 | 1x |
"0.9.4", "g_km(vp)", |
| 200 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
| 201 |
) |
|
| 202 |
} |
|
| 203 | 10x |
if (lifecycle::is_present(name)) {
|
| 204 | 1x |
lifecycle::deprecate_warn( |
| 205 | 1x |
"0.9.4", "g_km(name)", |
| 206 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
| 207 |
) |
|
| 208 |
} |
|
| 209 | 10x |
if (lifecycle::is_present(annot_coxph_ref_lbls)) {
|
| 210 | 1x |
lifecycle::deprecate_warn( |
| 211 | 1x |
"0.9.4", "g_km(annot_coxph_ref_lbls)", |
| 212 | 1x |
details = "Please specify this setting using the 'ref_lbls' element of control_annot_coxph." |
| 213 |
) |
|
| 214 | 1x |
control_annot_coxph[["ref_lbls"]] <- annot_coxph_ref_lbls |
| 215 |
} |
|
| 216 | 10x |
if (lifecycle::is_present(position_coxph)) {
|
| 217 | 1x |
lifecycle::deprecate_warn( |
| 218 | 1x |
"0.9.4", "g_km(position_coxph)", |
| 219 | 1x |
details = "Please specify this setting using the 'x' and 'y' elements of control_annot_coxph." |
| 220 |
) |
|
| 221 | 1x |
control_annot_coxph[["x"]] <- position_coxph[1] |
| 222 | 1x |
control_annot_coxph[["y"]] <- position_coxph[2] |
| 223 |
} |
|
| 224 | 10x |
if (lifecycle::is_present(position_surv_med)) {
|
| 225 | 1x |
lifecycle::deprecate_warn( |
| 226 | 1x |
"0.9.4", "g_km(position_surv_med)", |
| 227 | 1x |
details = "Please specify this setting using the 'x' and 'y' elements of control_annot_surv_med." |
| 228 |
) |
|
| 229 | 1x |
control_annot_surv_med[["x"]] <- position_surv_med[1] |
| 230 | 1x |
control_annot_surv_med[["y"]] <- position_surv_med[2] |
| 231 |
} |
|
| 232 | 10x |
if (lifecycle::is_present(width_annots)) {
|
| 233 | 1x |
lifecycle::deprecate_warn( |
| 234 | 1x |
"0.9.4", "g_km(width_annots)", |
| 235 | 1x |
details = paste( |
| 236 | 1x |
"Please specify widths of annotation tables relative to the plot area using the 'w' element of", |
| 237 | 1x |
"control_annot_surv_med (for surv_med) and control_annot_coxph (for coxph)." |
| 238 |
) |
|
| 239 |
) |
|
| 240 | 1x |
control_annot_surv_med[["w"]] <- as.numeric(width_annots[["surv_med"]]) |
| 241 | 1x |
control_annot_coxph[["w"]] <- as.numeric(width_annots[["coxph"]]) |
| 242 |
} |
|
| 243 | ||
| 244 | 10x |
checkmate::assert_list(variables) |
| 245 | 10x |
checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables))
|
| 246 | 10x |
checkmate::assert_logical(censor_show, len = 1) |
| 247 | 10x |
checkmate::assert_numeric(size, len = 1) |
| 248 | 10x |
checkmate::assert_numeric(max_time, len = 1, null.ok = TRUE) |
| 249 | 10x |
checkmate::assert_numeric(xticks, null.ok = TRUE) |
| 250 | 10x |
checkmate::assert_character(xlab, len = 1, null.ok = TRUE) |
| 251 | 10x |
checkmate::assert_character(yval) |
| 252 | 10x |
checkmate::assert_character(ylab, null.ok = TRUE) |
| 253 | 10x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) |
| 254 | 10x |
checkmate::assert_character(title, len = 1, null.ok = TRUE) |
| 255 | 10x |
checkmate::assert_character(footnotes, len = 1, null.ok = TRUE) |
| 256 | 10x |
checkmate::assert_numeric(font_size, len = 1) |
| 257 | 10x |
checkmate::assert_logical(ci_ribbon, len = 1) |
| 258 | 10x |
checkmate::assert_logical(annot_at_risk, len = 1) |
| 259 | 10x |
checkmate::assert_logical(annot_at_risk_title, len = 1) |
| 260 | 10x |
checkmate::assert_logical(annot_surv_med, len = 1) |
| 261 | 10x |
checkmate::assert_logical(annot_coxph, len = 1) |
| 262 | 10x |
checkmate::assert_subset(annot_stats, c("median", "min"))
|
| 263 | 10x |
checkmate::assert_logical(annot_stats_vlines) |
| 264 | 10x |
checkmate::assert_list(control_coxph_pw) |
| 265 | 10x |
checkmate::assert_character(ref_group_coxph, len = 1, null.ok = TRUE) |
| 266 | 10x |
checkmate::assert_list(control_annot_surv_med) |
| 267 | 10x |
checkmate::assert_list(control_annot_coxph) |
| 268 | 10x |
checkmate::assert_numeric(legend_pos, finite = TRUE, any.missing = FALSE, len = 2, null.ok = TRUE) |
| 269 | 10x |
assert_proportion_value(rel_height_plot) |
| 270 | 10x |
checkmate::assert_logical(as_list) |
| 271 | ||
| 272 | 10x |
tte <- variables$tte |
| 273 | 10x |
is_event <- variables$is_event |
| 274 | 10x |
arm <- variables$arm |
| 275 | 10x |
assert_valid_factor(df[[arm]]) |
| 276 | 10x |
armval <- as.character(unique(df[[arm]])) |
| 277 | 10x |
assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) |
| 278 | 10x |
checkmate::assert_logical(df[[is_event]], min.len = 1) |
| 279 | 10x |
checkmate::assert_numeric(df[[tte]], min.len = 1) |
| 280 | 10x |
checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) |
| 281 | 10x |
checkmate::assert_vector(lty, null.ok = TRUE) |
| 282 | 10x |
checkmate::assert_numeric(lwd, len = 1, null.ok = TRUE) |
| 283 | ||
| 284 | 10x |
if (annot_coxph && length(armval) < 2) {
|
| 285 | ! |
stop(paste( |
| 286 | ! |
"When `annot_coxph` = TRUE, `df` must contain at least 2 levels of `variables$arm`", |
| 287 | ! |
"in order to calculate the hazard ratio." |
| 288 |
)) |
|
| 289 |
} |
|
| 290 | ||
| 291 |
# process model |
|
| 292 | 10x |
yval <- match.arg(yval) |
| 293 | 10x |
formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm))
|
| 294 | 10x |
fit_km <- survival::survfit( |
| 295 | 10x |
formula = formula, |
| 296 | 10x |
data = df, |
| 297 | 10x |
conf.int = control_surv$conf_level, |
| 298 | 10x |
conf.type = control_surv$conf_type |
| 299 |
) |
|
| 300 | 10x |
data <- h_data_plot(fit_km, armval = armval, max_time = max_time) |
| 301 | ||
| 302 |
# calculate x-ticks |
|
| 303 | 10x |
xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) |
| 304 | ||
| 305 |
# change estimates of survival to estimates of failure (1 - survival) |
|
| 306 | 10x |
if (yval == "Failure") {
|
| 307 | ! |
data[c("estimate", "conf.low", "conf.high", "censor")] <- list(
|
| 308 | ! |
1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor |
| 309 |
) |
|
| 310 |
} |
|
| 311 | ||
| 312 |
# derive y-axis limits |
|
| 313 | 10x |
if (is.null(ylim)) {
|
| 314 | 10x |
if (!is.null(max_time)) {
|
| 315 | 1x |
y_lwr <- min(data[data$time < max_time, ][["estimate"]]) |
| 316 | 1x |
y_upr <- max(data[data$time < max_time, ][["estimate"]]) |
| 317 |
} else {
|
|
| 318 | 9x |
y_lwr <- min(data[["estimate"]]) |
| 319 | 9x |
y_upr <- max(data[["estimate"]]) |
| 320 |
} |
|
| 321 | 10x |
ylim <- c(y_lwr, y_upr) |
| 322 |
} |
|
| 323 | ||
| 324 |
# initialize ggplot |
|
| 325 | 10x |
gg_plt <- ggplot( |
| 326 | 10x |
data = data, |
| 327 | 10x |
mapping = aes( |
| 328 | 10x |
x = .data[["time"]], |
| 329 | 10x |
y = .data[["estimate"]], |
| 330 | 10x |
ymin = .data[["conf.low"]], |
| 331 | 10x |
ymax = .data[["conf.high"]], |
| 332 | 10x |
color = .data[["strata"]], |
| 333 | 10x |
fill = .data[["strata"]] |
| 334 |
) |
|
| 335 |
) + |
|
| 336 | 10x |
theme_bw(base_size = font_size) + |
| 337 | 10x |
scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + |
| 338 | 10x |
labs(title = title, x = xlab, y = ylab, caption = footnotes) + |
| 339 | 10x |
theme( |
| 340 | 10x |
axis.text = element_text(size = font_size), |
| 341 | 10x |
axis.title = element_text(size = font_size), |
| 342 | 10x |
legend.title = element_blank(), |
| 343 | 10x |
legend.text = element_text(size = font_size), |
| 344 | 10x |
legend.box.background = element_rect(fill = "white", linewidth = 0.5), |
| 345 | 10x |
legend.background = element_blank(), |
| 346 | 10x |
legend.position = "inside", |
| 347 | 10x |
legend.spacing.y = unit(-0.02, "npc"), |
| 348 | 10x |
panel.grid.major = element_blank(), |
| 349 | 10x |
panel.grid.minor = element_blank() |
| 350 |
) |
|
| 351 | ||
| 352 |
# derive x-axis limits |
|
| 353 | 10x |
if (!is.null(max_time) && !is.null(xticks)) {
|
| 354 | 1x |
gg_plt <- gg_plt + scale_x_continuous( |
| 355 | 1x |
breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) |
| 356 |
) |
|
| 357 | 9x |
} else if (!is.null(xticks)) {
|
| 358 | 9x |
if (max(data$time) <= max(xticks)) {
|
| 359 | 9x |
gg_plt <- gg_plt + scale_x_continuous( |
| 360 | 9x |
breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) |
| 361 |
) |
|
| 362 |
} else {
|
|
| 363 | ! |
gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) |
| 364 |
} |
|
| 365 | ! |
} else if (!is.null(max_time)) {
|
| 366 | ! |
gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) |
| 367 |
} |
|
| 368 | ||
| 369 |
# set legend position |
|
| 370 | 10x |
if (!is.null(legend_pos)) {
|
| 371 | 2x |
gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) |
| 372 |
} else {
|
|
| 373 | 8x |
max_time2 <- sort( |
| 374 | 8x |
data$time, |
| 375 | 8x |
partial = nrow(data) - length(armval) - 1 |
| 376 | 8x |
)[nrow(data) - length(armval) - 1] |
| 377 | ||
| 378 | 8x |
y_rng <- ylim[2] - ylim[1] |
| 379 | ||
| 380 | 8x |
if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && |
| 381 | 8x |
all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint
|
| 382 | 1x |
gg_plt <- gg_plt + |
| 383 | 1x |
theme( |
| 384 | 1x |
legend.position.inside = c(1, 0.5), |
| 385 | 1x |
legend.justification = c(1.1, 0.6) |
| 386 |
) |
|
| 387 |
} else {
|
|
| 388 | 7x |
gg_plt <- gg_plt + |
| 389 | 7x |
theme( |
| 390 | 7x |
legend.position.inside = c(1, 0), |
| 391 | 7x |
legend.justification = c(1.1, -0.4) |
| 392 |
) |
|
| 393 |
} |
|
| 394 |
} |
|
| 395 | ||
| 396 |
# add lines |
|
| 397 | 10x |
gg_plt <- if (is.null(lty)) {
|
| 398 | 9x |
gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) |
| 399 | 10x |
} else if (length(lty) == 1) {
|
| 400 | ! |
gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) |
| 401 |
} else {
|
|
| 402 | 1x |
gg_plt + |
| 403 | 1x |
geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + |
| 404 | 1x |
scale_linetype_manual(values = lty) |
| 405 |
} |
|
| 406 | ||
| 407 |
# add censor marks |
|
| 408 | 10x |
if (censor_show) {
|
| 409 | 10x |
gg_plt <- gg_plt + geom_point( |
| 410 | 10x |
data = data[data$n.censor != 0, ], |
| 411 | 10x |
aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), |
| 412 | 10x |
size = size, |
| 413 | 10x |
na.rm = TRUE |
| 414 |
) + |
|
| 415 | 10x |
scale_shape_manual(name = NULL, values = pch) + |
| 416 | 10x |
guides(fill = guide_legend(override.aes = list(shape = NA))) |
| 417 |
} |
|
| 418 | ||
| 419 |
# add ci ribbon |
|
| 420 | 1x |
if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) |
| 421 | ||
| 422 |
# control aesthetics |
|
| 423 | 10x |
if (!is.null(col)) {
|
| 424 | 1x |
gg_plt <- gg_plt + |
| 425 | 1x |
scale_color_manual(values = col) + |
| 426 | 1x |
scale_fill_manual(values = col) |
| 427 |
} |
|
| 428 | ! |
if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme |
| 429 | ||
| 430 |
# annotate with stats (text/vlines) |
|
| 431 | 10x |
if (!is.null(annot_stats)) {
|
| 432 | ! |
if ("median" %in% annot_stats) {
|
| 433 | ! |
fit_km_all <- survival::survfit( |
| 434 | ! |
formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)),
|
| 435 | ! |
data = df, |
| 436 | ! |
conf.int = control_surv$conf_level, |
| 437 | ! |
conf.type = control_surv$conf_type |
| 438 |
) |
|
| 439 | ! |
gg_plt <- gg_plt + |
| 440 | ! |
annotate( |
| 441 | ! |
"text", |
| 442 | ! |
size = font_size / .pt, col = 1, lineheight = 0.95, |
| 443 | ! |
x = stats::median(fit_km_all) + 0.07 * max(data$time), |
| 444 | ! |
y = ifelse(yval == "Survival", 0.65, 0.35), |
| 445 | ! |
label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1]))
|
| 446 |
) |
|
| 447 | ! |
if (annot_stats_vlines) {
|
| 448 | ! |
gg_plt <- gg_plt + |
| 449 | ! |
annotate( |
| 450 | ! |
"segment", |
| 451 | ! |
x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf, |
| 452 | ! |
linetype = 2, col = "darkgray" |
| 453 |
) |
|
| 454 |
} |
|
| 455 |
} |
|
| 456 | ! |
if ("min" %in% annot_stats) {
|
| 457 | ! |
min_fu <- min(df[[tte]]) |
| 458 | ! |
gg_plt <- gg_plt + |
| 459 | ! |
annotate( |
| 460 | ! |
"text", |
| 461 | ! |
size = font_size / .pt, col = 1, lineheight = 0.95, |
| 462 | ! |
x = min_fu + max(data$time) * 0.07, |
| 463 | ! |
y = ifelse(yval == "Survival", 0.96, 0.05), |
| 464 | ! |
label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1]))
|
| 465 |
) |
|
| 466 | ! |
if (annot_stats_vlines) {
|
| 467 | ! |
gg_plt <- gg_plt + |
| 468 | ! |
annotate( |
| 469 | ! |
"segment", |
| 470 | ! |
linetype = 2, col = "darkgray", |
| 471 | ! |
x = min_fu, xend = min_fu, y = Inf, yend = -Inf |
| 472 |
) |
|
| 473 |
} |
|
| 474 |
} |
|
| 475 | ! |
gg_plt <- gg_plt + guides(fill = guide_legend(override.aes = list(shape = NA, label = ""))) |
| 476 |
} |
|
| 477 | ||
| 478 |
# add at risk annotation table |
|
| 479 | 10x |
if (annot_at_risk) {
|
| 480 | 9x |
annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) |
| 481 | 9x |
annot_tbl <- if (is.null(fit_km$strata)) {
|
| 482 | ! |
data.frame( |
| 483 | ! |
n.risk = annot_tbl$n.risk, |
| 484 | ! |
time = annot_tbl$time, |
| 485 | ! |
strata = armval |
| 486 |
) |
|
| 487 |
} else {
|
|
| 488 | 9x |
strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
|
| 489 | 9x |
levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] |
| 490 | 9x |
data.frame( |
| 491 | 9x |
n.risk = annot_tbl$n.risk, |
| 492 | 9x |
time = annot_tbl$time, |
| 493 | 9x |
strata = annot_tbl$strata |
| 494 |
) |
|
| 495 |
} |
|
| 496 | ||
| 497 | 9x |
at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) |
| 498 | 9x |
at_risk_tbl[is.na(at_risk_tbl)] <- 0 |
| 499 | 9x |
rownames(at_risk_tbl) <- levels(annot_tbl$strata) |
| 500 | ||
| 501 | 9x |
gg_at_risk <- df2gg( |
| 502 | 9x |
at_risk_tbl, |
| 503 | 9x |
font_size = font_size, col_labels = FALSE, hline = FALSE, |
| 504 | 9x |
colwidths = rep(1, ncol(at_risk_tbl)) |
| 505 |
) + |
|
| 506 | 9x |
labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + |
| 507 | 9x |
theme_bw(base_size = font_size) + |
| 508 | 9x |
theme( |
| 509 | 9x |
plot.title = element_text(size = font_size, vjust = 3, face = "bold"), |
| 510 | 9x |
panel.border = element_blank(), |
| 511 | 9x |
panel.grid = element_blank(), |
| 512 | 9x |
axis.title.y = element_blank(), |
| 513 | 9x |
axis.ticks.y = element_blank(), |
| 514 | 9x |
axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), |
| 515 | 9x |
axis.text.x = element_text(size = font_size), |
| 516 | 9x |
axis.line.x = element_line() |
| 517 |
) + |
|
| 518 | 9x |
coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) |
| 519 | 9x |
gg_at_risk <- suppressMessages( |
| 520 | 9x |
gg_at_risk + |
| 521 | 9x |
scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + |
| 522 | 9x |
scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) |
| 523 |
) |
|
| 524 | ||
| 525 | 9x |
if (!as_list) {
|
| 526 | 8x |
gg_plt <- cowplot::plot_grid( |
| 527 | 8x |
gg_plt, |
| 528 | 8x |
gg_at_risk, |
| 529 | 8x |
align = "v", |
| 530 | 8x |
axis = "tblr", |
| 531 | 8x |
ncol = 1, |
| 532 | 8x |
rel_heights = c(rel_height_plot, 1 - rel_height_plot) |
| 533 |
) |
|
| 534 |
} |
|
| 535 |
} |
|
| 536 | ||
| 537 |
# add median survival time annotation table |
|
| 538 | 10x |
if (annot_surv_med) {
|
| 539 | 8x |
surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) |
| 540 | 8x |
bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] |
| 541 | ||
| 542 | 8x |
gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + |
| 543 | 8x |
theme( |
| 544 | 8x |
axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), |
| 545 | 8x |
plot.margin = margin(0, 2, 0, 5) |
| 546 |
) + |
|
| 547 | 8x |
coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) |
| 548 | 8x |
gg_surv_med <- suppressMessages( |
| 549 | 8x |
gg_surv_med + |
| 550 | 8x |
scale_x_continuous(expand = c(0.025, 0)) + |
| 551 | 8x |
scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) |
| 552 |
) |
|
| 553 | ||
| 554 | 8x |
gg_plt <- cowplot::ggdraw(gg_plt) + |
| 555 | 8x |
cowplot::draw_plot( |
| 556 | 8x |
gg_surv_med, |
| 557 | 8x |
control_annot_surv_med[["x"]], |
| 558 | 8x |
control_annot_surv_med[["y"]], |
| 559 | 8x |
width = control_annot_surv_med[["w"]], |
| 560 | 8x |
height = control_annot_surv_med[["h"]], |
| 561 | 8x |
vjust = 0.5, |
| 562 | 8x |
hjust = 0.5 |
| 563 |
) |
|
| 564 |
} |
|
| 565 | ||
| 566 |
# add coxph annotation table |
|
| 567 | 10x |
if (annot_coxph) {
|
| 568 | 1x |
coxph_tbl <- h_tbl_coxph_pairwise( |
| 569 | 1x |
df = df, |
| 570 | 1x |
variables = variables, |
| 571 | 1x |
ref_group_coxph = ref_group_coxph, |
| 572 | 1x |
control_coxph_pw = control_coxph_pw, |
| 573 | 1x |
annot_coxph_ref_lbls = control_annot_coxph[["ref_lbls"]] |
| 574 |
) |
|
| 575 | 1x |
bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] |
| 576 | ||
| 577 | 1x |
gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + |
| 578 | 1x |
theme( |
| 579 | 1x |
axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), |
| 580 | 1x |
plot.margin = margin(0, 2, 0, 5) |
| 581 |
) + |
|
| 582 | 1x |
coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) |
| 583 | 1x |
gg_coxph <- suppressMessages( |
| 584 | 1x |
gg_coxph + |
| 585 | 1x |
scale_x_continuous(expand = c(0.025, 0)) + |
| 586 | 1x |
scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) |
| 587 |
) |
|
| 588 | ||
| 589 | 1x |
gg_plt <- cowplot::ggdraw(gg_plt) + |
| 590 | 1x |
cowplot::draw_plot( |
| 591 | 1x |
gg_coxph, |
| 592 | 1x |
control_annot_coxph[["x"]], |
| 593 | 1x |
control_annot_coxph[["y"]], |
| 594 | 1x |
width = control_annot_coxph[["w"]], |
| 595 | 1x |
height = control_annot_coxph[["h"]], |
| 596 | 1x |
vjust = 0.5, |
| 597 | 1x |
hjust = 0.5 |
| 598 |
) |
|
| 599 |
} |
|
| 600 | ||
| 601 | 10x |
if (as_list) {
|
| 602 | 1x |
list(plot = gg_plt, table = gg_at_risk) |
| 603 |
} else {
|
|
| 604 | 9x |
gg_plt |
| 605 |
} |
|
| 606 |
} |
| 1 |
#' Odds ratio estimation |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [estimate_odds_ratio()] creates a layout element to compare bivariate responses between |
|
| 6 |
#' two groups by estimating an odds ratio and its confidence interval. |
|
| 7 |
#' |
|
| 8 |
#' The primary analysis variable specified by `vars` is the group variable. Additional variables can be included in the |
|
| 9 |
#' analysis via the `variables` argument, which accepts `arm`, an arm variable, and `strata`, a stratification variable. |
|
| 10 |
#' If more than two arm levels are present, they can be combined into two groups using the `groups_list` argument. |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams split_cols_by_groups |
|
| 13 |
#' @inheritParams argument_convention |
|
| 14 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 15 |
#' |
|
| 16 |
#' Options are: ``r shQuote(get_stats("estimate_odds_ratio"), type = "sh")``
|
|
| 17 |
#' @param method (`string`)\cr whether to use the correct (`"exact"`) calculation in the conditional likelihood or one |
|
| 18 |
#' of the approximations. See [survival::clogit()] for details. |
|
| 19 |
#' |
|
| 20 |
#' @note |
|
| 21 |
#' * This function uses logistic regression for unstratified analyses, and conditional logistic regression for |
|
| 22 |
#' stratified analyses. The Wald confidence interval is calculated with the specified confidence level. |
|
| 23 |
#' * For stratified analyses, there is currently no implementation for conditional likelihood confidence intervals, |
|
| 24 |
#' therefore the likelihood confidence interval is not available as an option. |
|
| 25 |
#' * When `vars` contains only responders or non-responders no odds ratio estimation is possible so the returned |
|
| 26 |
#' values will be `NA`. |
|
| 27 |
#' |
|
| 28 |
#' @seealso Relevant helper function [h_odds_ratio()]. |
|
| 29 |
#' |
|
| 30 |
#' @name odds_ratio |
|
| 31 |
#' @order 1 |
|
| 32 |
NULL |
|
| 33 | ||
| 34 |
#' @describeIn odds_ratio Statistics function which estimates the odds ratio |
|
| 35 |
#' between a treatment and a control. A `variables` list with `arm` and `strata` |
|
| 36 |
#' variable names must be passed if a stratified analysis is required. |
|
| 37 |
#' |
|
| 38 |
#' @return |
|
| 39 |
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci` |
|
| 40 |
#' (containing `est`, `lcl`, and `ucl`) and `n_tot`. |
|
| 41 |
#' |
|
| 42 |
#' @examples |
|
| 43 |
#' # Unstratified analysis. |
|
| 44 |
#' s_odds_ratio( |
|
| 45 |
#' df = subset(dta, grp == "A"), |
|
| 46 |
#' .var = "rsp", |
|
| 47 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 48 |
#' .in_ref_col = FALSE, |
|
| 49 |
#' .df_row = dta |
|
| 50 |
#' ) |
|
| 51 |
#' |
|
| 52 |
#' # Stratified analysis. |
|
| 53 |
#' s_odds_ratio( |
|
| 54 |
#' df = subset(dta, grp == "A"), |
|
| 55 |
#' .var = "rsp", |
|
| 56 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 57 |
#' .in_ref_col = FALSE, |
|
| 58 |
#' .df_row = dta, |
|
| 59 |
#' variables = list(arm = "grp", strata = "strata") |
|
| 60 |
#' ) |
|
| 61 |
#' |
|
| 62 |
#' @export |
|
| 63 |
s_odds_ratio <- function(df, |
|
| 64 |
.var, |
|
| 65 |
.ref_group, |
|
| 66 |
.in_ref_col, |
|
| 67 |
.df_row, |
|
| 68 |
variables = list(arm = NULL, strata = NULL), |
|
| 69 |
conf_level = 0.95, |
|
| 70 |
groups_list = NULL, |
|
| 71 |
method = "exact", |
|
| 72 |
...) {
|
|
| 73 | 99x |
y <- list(or_ci = numeric(), n_tot = numeric()) |
| 74 | ||
| 75 | 99x |
if (!.in_ref_col) {
|
| 76 | 94x |
assert_proportion_value(conf_level) |
| 77 | 94x |
assert_df_with_variables(df, list(rsp = .var)) |
| 78 | 94x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
| 79 | ||
| 80 | 94x |
if (is.null(variables$strata)) {
|
| 81 | 76x |
data <- data.frame( |
| 82 | 76x |
rsp = c(.ref_group[[.var]], df[[.var]]), |
| 83 | 76x |
grp = factor( |
| 84 | 76x |
rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
|
| 85 | 76x |
levels = c("ref", "Not-ref")
|
| 86 |
) |
|
| 87 |
) |
|
| 88 | 76x |
y <- or_glm(data, conf_level = conf_level) |
| 89 |
} else {
|
|
| 90 | 18x |
assert_df_with_variables(.df_row, c(list(rsp = .var), variables)) |
| 91 | 18x |
checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)
|
| 92 | ||
| 93 |
# The group variable prepared for clogit must be synchronised with combination groups definition. |
|
| 94 | 18x |
if (is.null(groups_list)) {
|
| 95 | 16x |
ref_grp <- as.character(unique(.ref_group[[variables$arm]])) |
| 96 | 16x |
trt_grp <- as.character(unique(df[[variables$arm]])) |
| 97 | 16x |
grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp) |
| 98 |
} else {
|
|
| 99 |
# If more than one level in reference col. |
|
| 100 | 2x |
reference <- as.character(unique(.ref_group[[variables$arm]])) |
| 101 | 2x |
grp_ref_flag <- vapply( |
| 102 | 2x |
X = groups_list, |
| 103 | 2x |
FUN.VALUE = TRUE, |
| 104 | 2x |
FUN = function(x) all(reference %in% x) |
| 105 |
) |
|
| 106 | 2x |
ref_grp <- names(groups_list)[grp_ref_flag] |
| 107 | ||
| 108 |
# If more than one level in treatment col. |
|
| 109 | 2x |
treatment <- as.character(unique(df[[variables$arm]])) |
| 110 | 2x |
grp_trt_flag <- vapply( |
| 111 | 2x |
X = groups_list, |
| 112 | 2x |
FUN.VALUE = TRUE, |
| 113 | 2x |
FUN = function(x) all(treatment %in% x) |
| 114 |
) |
|
| 115 | 2x |
trt_grp <- names(groups_list)[grp_trt_flag] |
| 116 | ||
| 117 | 2x |
grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp) |
| 118 | 2x |
grp <- combine_levels(grp, levels = treatment, new_level = trt_grp) |
| 119 |
} |
|
| 120 | ||
| 121 |
# The reference level in `grp` must be the same as in the `rtables` column split. |
|
| 122 | 18x |
data <- data.frame( |
| 123 | 18x |
rsp = .df_row[[.var]], |
| 124 | 18x |
grp = grp, |
| 125 | 18x |
strata = interaction(.df_row[variables$strata]) |
| 126 |
) |
|
| 127 | 18x |
y_all <- or_clogit(data, conf_level = conf_level, method = method) |
| 128 | 18x |
checkmate::assert_string(trt_grp) |
| 129 | 18x |
checkmate::assert_subset(trt_grp, names(y_all$or_ci)) |
| 130 | 17x |
y$or_ci <- y_all$or_ci[[trt_grp]] |
| 131 | 17x |
y$n_tot <- y_all$n_tot |
| 132 |
} |
|
| 133 |
} |
|
| 134 | ||
| 135 | 98x |
if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && method != "approximate") {
|
| 136 | 1x |
warning( |
| 137 | 1x |
"Unable to compute the odds ratio estimate. Please try re-running the function with ", |
| 138 | 1x |
'parameter `method` set to "approximate".' |
| 139 |
) |
|
| 140 |
} |
|
| 141 | ||
| 142 | 98x |
y$or_ci <- formatters::with_label( |
| 143 | 98x |
x = y$or_ci, |
| 144 | 98x |
label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
|
| 145 |
) |
|
| 146 | ||
| 147 | 98x |
y$n_tot <- formatters::with_label( |
| 148 | 98x |
x = y$n_tot, |
| 149 | 98x |
label = "Total n" |
| 150 |
) |
|
| 151 | ||
| 152 | 98x |
y |
| 153 |
} |
|
| 154 | ||
| 155 |
#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`. |
|
| 156 |
#' |
|
| 157 |
#' @return |
|
| 158 |
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 159 |
#' |
|
| 160 |
#' @examples |
|
| 161 |
#' a_odds_ratio( |
|
| 162 |
#' df = subset(dta, grp == "A"), |
|
| 163 |
#' .var = "rsp", |
|
| 164 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 165 |
#' .in_ref_col = FALSE, |
|
| 166 |
#' .df_row = dta |
|
| 167 |
#' ) |
|
| 168 |
#' |
|
| 169 |
#' @export |
|
| 170 |
a_odds_ratio <- function(df, |
|
| 171 |
..., |
|
| 172 |
.stats = NULL, |
|
| 173 |
.stat_names = NULL, |
|
| 174 |
.formats = NULL, |
|
| 175 |
.labels = NULL, |
|
| 176 |
.indent_mods = NULL) {
|
|
| 177 |
# Check for additional parameters to the statistics function |
|
| 178 | 12x |
dots_extra_args <- list(...) |
| 179 | 12x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 180 | 12x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 181 | ||
| 182 |
# Check for user-defined functions |
|
| 183 | 12x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 184 | 12x |
.stats <- default_and_custom_stats_list$all_stats |
| 185 | 12x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 186 | ||
| 187 |
# Apply statistics function |
|
| 188 | 12x |
x_stats <- .apply_stat_functions( |
| 189 | 12x |
default_stat_fnc = s_odds_ratio, |
| 190 | 12x |
custom_stat_fnc_list = custom_stat_functions, |
| 191 | 12x |
args_list = c( |
| 192 | 12x |
df = list(df), |
| 193 | 12x |
extra_afun_params, |
| 194 | 12x |
dots_extra_args |
| 195 |
) |
|
| 196 |
) |
|
| 197 | ||
| 198 |
# Fill in formatting defaults |
|
| 199 | 12x |
.stats <- get_stats("estimate_odds_ratio",
|
| 200 | 12x |
stats_in = .stats, |
| 201 | 12x |
custom_stats_in = names(custom_stat_functions) |
| 202 |
) |
|
| 203 | 12x |
x_stats <- x_stats[.stats] |
| 204 | 12x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 205 | 12x |
.labels <- get_labels_from_stats( |
| 206 | 12x |
.stats, .labels, |
| 207 | 12x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
| 208 |
) |
|
| 209 | 12x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 210 | ||
| 211 |
# Auto format handling |
|
| 212 | 12x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 213 | ||
| 214 |
# Get and check statistical names |
|
| 215 | 12x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 216 | ||
| 217 | 12x |
in_rows( |
| 218 | 12x |
.list = x_stats, |
| 219 | 12x |
.formats = .formats, |
| 220 | 12x |
.names = .labels %>% .unlist_keep_nulls(), |
| 221 | 12x |
.stat_names = .stat_names, |
| 222 | 12x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 223 | 12x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 224 |
) |
|
| 225 |
} |
|
| 226 | ||
| 227 |
#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments |
|
| 228 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 229 |
#' |
|
| 230 |
#' @return |
|
| 231 |
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions, |
|
| 232 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 233 |
#' the statistics from `s_odds_ratio()` to the table layout. |
|
| 234 |
#' |
|
| 235 |
#' @examples |
|
| 236 |
#' set.seed(12) |
|
| 237 |
#' dta <- data.frame( |
|
| 238 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE), |
|
| 239 |
#' grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
|
|
| 240 |
#' strata = factor(sample(c("C", "D"), 100, TRUE))
|
|
| 241 |
#' ) |
|
| 242 |
#' |
|
| 243 |
#' l <- basic_table() %>% |
|
| 244 |
#' split_cols_by(var = "grp", ref_group = "B") %>% |
|
| 245 |
#' estimate_odds_ratio(vars = "rsp") |
|
| 246 |
#' |
|
| 247 |
#' build_table(l, df = dta) |
|
| 248 |
#' |
|
| 249 |
#' @export |
|
| 250 |
#' @order 2 |
|
| 251 |
estimate_odds_ratio <- function(lyt, |
|
| 252 |
vars, |
|
| 253 |
variables = list(arm = NULL, strata = NULL), |
|
| 254 |
conf_level = 0.95, |
|
| 255 |
groups_list = NULL, |
|
| 256 |
method = "exact", |
|
| 257 |
na_str = default_na_str(), |
|
| 258 |
nested = TRUE, |
|
| 259 |
..., |
|
| 260 |
table_names = vars, |
|
| 261 |
show_labels = "hidden", |
|
| 262 |
var_labels = vars, |
|
| 263 |
.stats = "or_ci", |
|
| 264 |
.stat_names = NULL, |
|
| 265 |
.formats = NULL, |
|
| 266 |
.labels = NULL, |
|
| 267 |
.indent_mods = NULL) {
|
|
| 268 |
# Process standard extra arguments |
|
| 269 | 5x |
extra_args <- list(".stats" = .stats)
|
| 270 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 271 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 272 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 273 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 274 | ||
| 275 |
# Process additional arguments to the statistic function |
|
| 276 | 5x |
extra_args <- c( |
| 277 | 5x |
extra_args, |
| 278 | 5x |
variables = list(variables), conf_level = list(conf_level), groups_list = list(groups_list), method = list(method), |
| 279 |
... |
|
| 280 |
) |
|
| 281 | ||
| 282 |
# Append additional info from layout to the analysis function |
|
| 283 | 5x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 284 | 5x |
formals(a_odds_ratio) <- c(formals(a_odds_ratio), extra_args[[".additional_fun_parameters"]]) |
| 285 | ||
| 286 | 5x |
analyze( |
| 287 | 5x |
lyt = lyt, |
| 288 | 5x |
vars = vars, |
| 289 | 5x |
afun = a_odds_ratio, |
| 290 | 5x |
na_str = na_str, |
| 291 | 5x |
nested = nested, |
| 292 | 5x |
extra_args = extra_args, |
| 293 | 5x |
var_labels = var_labels, |
| 294 | 5x |
show_labels = show_labels, |
| 295 | 5x |
table_names = table_names |
| 296 |
) |
|
| 297 |
} |
|
| 298 | ||
| 299 |
#' Helper functions for odds ratio estimation |
|
| 300 |
#' |
|
| 301 |
#' @description `r lifecycle::badge("stable")`
|
|
| 302 |
#' |
|
| 303 |
#' Functions to calculate odds ratios in [estimate_odds_ratio()]. |
|
| 304 |
#' |
|
| 305 |
#' @inheritParams odds_ratio |
|
| 306 |
#' @inheritParams argument_convention |
|
| 307 |
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally |
|
| 308 |
#' `strata` for [or_clogit()]. |
|
| 309 |
#' |
|
| 310 |
#' @return A named `list` of elements `or_ci` and `n_tot`. |
|
| 311 |
#' |
|
| 312 |
#' @seealso [odds_ratio] |
|
| 313 |
#' |
|
| 314 |
#' @name h_odds_ratio |
|
| 315 |
NULL |
|
| 316 | ||
| 317 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be |
|
| 318 |
#' exactly 2 groups in `data` as specified by the `grp` variable. |
|
| 319 |
#' |
|
| 320 |
#' @examples |
|
| 321 |
#' # Data with 2 groups. |
|
| 322 |
#' data <- data.frame( |
|
| 323 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)), |
|
| 324 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)], |
|
| 325 |
#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)], |
|
| 326 |
#' stringsAsFactors = TRUE |
|
| 327 |
#' ) |
|
| 328 |
#' |
|
| 329 |
#' # Odds ratio based on glm. |
|
| 330 |
#' or_glm(data, conf_level = 0.95) |
|
| 331 |
#' |
|
| 332 |
#' @export |
|
| 333 |
or_glm <- function(data, conf_level) {
|
|
| 334 | 77x |
checkmate::assert_logical(data$rsp) |
| 335 | 77x |
assert_proportion_value(conf_level) |
| 336 | 77x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp")) |
| 337 | 77x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
|
| 338 | ||
| 339 | 77x |
data$grp <- as_factor_keep_attributes(data$grp) |
| 340 | 77x |
assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2) |
| 341 | 77x |
formula <- stats::as.formula("rsp ~ grp")
|
| 342 | 77x |
model_fit <- stats::glm( |
| 343 | 77x |
formula = formula, data = data, |
| 344 | 77x |
family = stats::binomial(link = "logit") |
| 345 |
) |
|
| 346 | ||
| 347 |
# Note that here we need to discard the intercept. |
|
| 348 | 77x |
or <- exp(stats::coef(model_fit)[-1]) |
| 349 | 77x |
or_ci <- exp( |
| 350 | 77x |
stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE] |
| 351 |
) |
|
| 352 | ||
| 353 | 77x |
values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
|
| 354 | 77x |
n_tot <- stats::setNames(nrow(model_fit$model), "n_tot") |
| 355 | ||
| 356 | 77x |
list(or_ci = values, n_tot = n_tot) |
| 357 |
} |
|
| 358 | ||
| 359 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for |
|
| 360 |
#' the whole data set including all groups, since the results are not the same as when doing |
|
| 361 |
#' pairwise comparisons between the groups. |
|
| 362 |
#' |
|
| 363 |
#' @examples |
|
| 364 |
#' # Data with 3 groups. |
|
| 365 |
#' data <- data.frame( |
|
| 366 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)), |
|
| 367 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)], |
|
| 368 |
#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)], |
|
| 369 |
#' stringsAsFactors = TRUE |
|
| 370 |
#' ) |
|
| 371 |
#' |
|
| 372 |
#' # Odds ratio based on stratified estimation by conditional logistic regression. |
|
| 373 |
#' or_clogit(data, conf_level = 0.95) |
|
| 374 |
#' |
|
| 375 |
#' @export |
|
| 376 |
or_clogit <- function(data, conf_level, method = "exact") {
|
|
| 377 | 19x |
checkmate::assert_logical(data$rsp) |
| 378 | 19x |
assert_proportion_value(conf_level) |
| 379 | 19x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata")) |
| 380 | 19x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
|
| 381 | 19x |
checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))
|
| 382 | 19x |
checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)
|
| 383 | ||
| 384 | 19x |
data$grp <- as_factor_keep_attributes(data$grp) |
| 385 | 19x |
data$strata <- as_factor_keep_attributes(data$strata) |
| 386 | ||
| 387 |
# Deviation from convention: `survival::strata` must be simply `strata`. |
|
| 388 | 19x |
formula <- stats::as.formula("rsp ~ grp + strata(strata)")
|
| 389 | 19x |
model_fit <- clogit_with_tryCatch(formula = formula, data = data, method = method) |
| 390 | ||
| 391 |
# Create a list with one set of OR estimates and CI per coefficient, i.e. |
|
| 392 |
# comparison of one group vs. the reference group. |
|
| 393 | 19x |
coef_est <- stats::coef(model_fit) |
| 394 | 19x |
ci_est <- stats::confint(model_fit, level = conf_level) |
| 395 | 19x |
or_ci <- list() |
| 396 | 19x |
for (coef_name in names(coef_est)) {
|
| 397 | 21x |
grp_name <- gsub("^grp", "", x = coef_name)
|
| 398 | 21x |
or_ci[[grp_name]] <- stats::setNames( |
| 399 | 21x |
object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), |
| 400 | 21x |
nm = c("est", "lcl", "ucl")
|
| 401 |
) |
|
| 402 |
} |
|
| 403 | 19x |
list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n)) |
| 404 |
} |
| 1 |
#' Summarize change from baseline values or absolute baseline values |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [summarize_change()] creates a layout element to summarize the change from baseline or absolute |
|
| 6 |
#' baseline values. The primary analysis variable `vars` indicates the numerical change from baseline results. |
|
| 7 |
#' |
|
| 8 |
#' Required secondary analysis variables `value` and `baseline_flag` can be supplied to the function via |
|
| 9 |
#' the `variables` argument. The `value` element should be the name of the analysis value variable, and the |
|
| 10 |
#' `baseline_flag` element should be the name of the flag variable that indicates whether or not records contain |
|
| 11 |
#' baseline values. Depending on the baseline flag given, either the absolute baseline values (at baseline) |
|
| 12 |
#' or the change from baseline values (post-baseline) are then summarized. |
|
| 13 |
#' |
|
| 14 |
#' @inheritParams argument_convention |
|
| 15 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 16 |
#' |
|
| 17 |
#' Options are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
|
|
| 18 |
#' |
|
| 19 |
#' @name summarize_change |
|
| 20 |
#' @order 1 |
|
| 21 |
NULL |
|
| 22 | ||
| 23 |
#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits. |
|
| 24 |
#' |
|
| 25 |
#' @return |
|
| 26 |
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()]. |
|
| 27 |
#' |
|
| 28 |
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise |
|
| 29 |
#' an error will be thrown. |
|
| 30 |
#' |
|
| 31 |
#' @keywords internal |
|
| 32 |
s_change_from_baseline <- function(df, ...) {
|
|
| 33 | 10x |
args_list <- list(...) |
| 34 | 10x |
.var <- args_list[[".var"]] |
| 35 | 10x |
variables <- args_list[["variables"]] |
| 36 | ||
| 37 | 10x |
checkmate::assert_numeric(df[[variables$value]]) |
| 38 | 10x |
checkmate::assert_numeric(df[[.var]]) |
| 39 | 10x |
checkmate::assert_logical(df[[variables$baseline_flag]]) |
| 40 | 10x |
checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1) |
| 41 | 10x |
assert_df_with_variables(df, c(variables, list(chg = .var))) |
| 42 | ||
| 43 | 10x |
combined <- ifelse( |
| 44 | 10x |
df[[variables$baseline_flag]], |
| 45 | 10x |
df[[variables$value]], |
| 46 | 10x |
df[[.var]] |
| 47 |
) |
|
| 48 | 10x |
if (is.logical(combined) && identical(length(combined), 0L)) {
|
| 49 | 1x |
combined <- numeric(0) |
| 50 |
} |
|
| 51 | 10x |
s_summary(combined, ...) |
| 52 |
} |
|
| 53 | ||
| 54 |
#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`. |
|
| 55 |
#' |
|
| 56 |
#' @return |
|
| 57 |
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 58 |
#' |
|
| 59 |
#' @keywords internal |
|
| 60 |
a_change_from_baseline <- function(df, |
|
| 61 |
..., |
|
| 62 |
.stats = NULL, |
|
| 63 |
.stat_names = NULL, |
|
| 64 |
.formats = NULL, |
|
| 65 |
.labels = NULL, |
|
| 66 |
.indent_mods = NULL) {
|
|
| 67 |
# Check for additional parameters to the statistics function |
|
| 68 | 8x |
dots_extra_args <- list(...) |
| 69 | 8x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 70 | 8x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 71 | ||
| 72 |
# Check for user-defined functions |
|
| 73 | 8x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 74 | 8x |
.stats <- default_and_custom_stats_list$all_stats |
| 75 | 8x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 76 | ||
| 77 |
# Apply statistics function |
|
| 78 | 8x |
x_stats <- .apply_stat_functions( |
| 79 | 8x |
default_stat_fnc = s_change_from_baseline, |
| 80 | 8x |
custom_stat_fnc_list = custom_stat_functions, |
| 81 | 8x |
args_list = c( |
| 82 | 8x |
df = list(df), |
| 83 | 8x |
extra_afun_params, |
| 84 | 8x |
dots_extra_args |
| 85 |
) |
|
| 86 |
) |
|
| 87 | ||
| 88 |
# Fill in with formatting defaults |
|
| 89 | 6x |
.stats <- get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
|
| 90 | 6x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 91 | 6x |
.labels <- get_labels_from_stats(.stats, .labels) |
| 92 | 6x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 93 | ||
| 94 | 6x |
x_stats <- x_stats[.stats] |
| 95 | ||
| 96 |
# Auto format handling |
|
| 97 | 6x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 98 | ||
| 99 |
# Get and check statistical names |
|
| 100 | 6x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 101 | ||
| 102 | 6x |
in_rows( |
| 103 | 6x |
.list = x_stats, |
| 104 | 6x |
.formats = .formats, |
| 105 | 6x |
.names = names(.labels), |
| 106 | 6x |
.stat_names = .stat_names, |
| 107 | 6x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 108 | 6x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 109 |
) |
|
| 110 |
} |
|
| 111 | ||
| 112 |
#' @describeIn summarize_change Layout-creating function which can take statistics function arguments |
|
| 113 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 114 |
#' |
|
| 115 |
#' @return |
|
| 116 |
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions, |
|
| 117 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 118 |
#' the statistics from `s_change_from_baseline()` to the table layout. |
|
| 119 |
#' |
|
| 120 |
#' @note To be used after a split on visits in the layout, such that each data subset only contains |
|
| 121 |
#' either baseline or post-baseline data. |
|
| 122 |
#' |
|
| 123 |
#' @examples |
|
| 124 |
#' library(dplyr) |
|
| 125 |
#' |
|
| 126 |
#' # Fabricate dataset |
|
| 127 |
#' dta_test <- data.frame( |
|
| 128 |
#' USUBJID = rep(1:6, each = 3), |
|
| 129 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
| 130 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)), |
|
| 131 |
#' AVAL = c(9:1, rep(NA, 9)) |
|
| 132 |
#' ) %>% |
|
| 133 |
#' mutate(ABLFLL = AVISIT == "V1") %>% |
|
| 134 |
#' group_by(USUBJID) %>% |
|
| 135 |
#' mutate( |
|
| 136 |
#' BLVAL = AVAL[ABLFLL], |
|
| 137 |
#' CHG = AVAL - BLVAL |
|
| 138 |
#' ) %>% |
|
| 139 |
#' ungroup() |
|
| 140 |
#' |
|
| 141 |
#' results <- basic_table() %>% |
|
| 142 |
#' split_cols_by("ARM") %>%
|
|
| 143 |
#' split_rows_by("AVISIT") %>%
|
|
| 144 |
#' summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
|
|
| 145 |
#' build_table(dta_test) |
|
| 146 |
#' |
|
| 147 |
#' results |
|
| 148 |
#' |
|
| 149 |
#' @export |
|
| 150 |
#' @order 2 |
|
| 151 |
summarize_change <- function(lyt, |
|
| 152 |
vars, |
|
| 153 |
variables, |
|
| 154 |
var_labels = vars, |
|
| 155 |
na_str = default_na_str(), |
|
| 156 |
na_rm = TRUE, |
|
| 157 |
nested = TRUE, |
|
| 158 |
show_labels = "default", |
|
| 159 |
table_names = vars, |
|
| 160 |
section_div = NA_character_, |
|
| 161 |
..., |
|
| 162 |
.stats = c("n", "mean_sd", "median", "range"),
|
|
| 163 |
.stat_names = NULL, |
|
| 164 |
.formats = c( |
|
| 165 |
mean_sd = "xx.xx (xx.xx)", |
|
| 166 |
mean_se = "xx.xx (xx.xx)", |
|
| 167 |
median = "xx.xx", |
|
| 168 |
range = "xx.xx - xx.xx", |
|
| 169 |
mean_pval = "xx.xx" |
|
| 170 |
), |
|
| 171 |
.labels = NULL, |
|
| 172 |
.indent_mods = NULL) {
|
|
| 173 |
# Process standard extra arguments |
|
| 174 | 4x |
extra_args <- list(".stats" = .stats)
|
| 175 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 176 | 4x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 177 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 178 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 179 | ||
| 180 |
# Process additional arguments to the statistic function |
|
| 181 | 4x |
extra_args <- c( |
| 182 | 4x |
extra_args, |
| 183 | 4x |
variables = list(variables), |
| 184 | 4x |
na_rm = na_rm, |
| 185 |
... |
|
| 186 |
) |
|
| 187 | ||
| 188 |
# Append additional info from layout to the analysis function |
|
| 189 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 190 | 4x |
formals(a_change_from_baseline) <- c(formals(a_change_from_baseline), extra_args[[".additional_fun_parameters"]]) |
| 191 | ||
| 192 | 4x |
analyze( |
| 193 | 4x |
lyt = lyt, |
| 194 | 4x |
vars = vars, |
| 195 | 4x |
afun = a_change_from_baseline, |
| 196 | 4x |
na_str = na_str, |
| 197 | 4x |
nested = nested, |
| 198 | 4x |
extra_args = extra_args, |
| 199 | 4x |
var_labels = var_labels, |
| 200 | 4x |
show_labels = show_labels, |
| 201 | 4x |
table_names = table_names, |
| 202 | 4x |
inclNAs = !na_rm, |
| 203 | 4x |
section_div = section_div |
| 204 |
) |
|
| 205 |
} |
| 1 |
#' Proportion difference estimation |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analysis function [estimate_proportion_diff()] creates a layout element to estimate the difference in proportion |
|
| 6 |
#' of responders within a studied population. The primary analysis variable, `vars`, is a logical variable indicating |
|
| 7 |
#' whether a response has occurred for each record. See the `method` parameter for options of methods to use when |
|
| 8 |
#' constructing the confidence interval of the proportion difference. A stratification variable can be supplied via the |
|
| 9 |
#' `strata` element of the `variables` argument. |
|
| 10 |
#' |
|
| 11 |
#' @details The possible methods are: |
|
| 12 |
#' |
|
| 13 |
#' - `"waldcc"`: Wald confidence interval with continuity correction \insertCite{Agresti1998}{tern}.
|
|
| 14 |
#' - `"wald"`: Wald confidence interval without continuity correction \insertCite{Agresti1998}{tern}.
|
|
| 15 |
#' - `"cmh"`: Cochran-Mantel-Haenszel (CMH) confidence interval \insertCite{MantelHaenszel1959}{tern}.
|
|
| 16 |
#' - `"cmh_sato"`: CMH confidence interval with Sato variance estimator \insertCite{Sato1989}{tern}.
|
|
| 17 |
#' - `"cmh_mn"`: CMH confidence interval with Miettinen and Nurminen confidence interval |
|
| 18 |
#' \insertCite{MiettinenNurminen1985}{tern}.
|
|
| 19 |
#' - `"ha"`: Anderson-Hauck confidence interval \insertCite{HauckAnderson1986}{tern}.
|
|
| 20 |
#' - `"newcombe"`: Newcombe confidence interval without continuity correction \insertCite{Newcombe1998}{tern}.
|
|
| 21 |
#' - `"newcombecc"`: Newcombe confidence interval with continuity correction \insertCite{Newcombe1998}{tern}.
|
|
| 22 |
#' - `"strat_newcombe"`: Stratified Newcombe confidence interval without continuity |
|
| 23 |
#' correction \insertCite{Yan2010-jt}{tern}.
|
|
| 24 |
#' - `"strat_newcombecc"`: Stratified Newcombe confidence interval with continuity |
|
| 25 |
#' correction \insertCite{Yan2010-jt}{tern}.
|
|
| 26 |
#' |
|
| 27 |
#' @inheritParams prop_diff_strat_nc |
|
| 28 |
#' @inheritParams argument_convention |
|
| 29 |
#' @param method (`string`)\cr the method used for the confidence interval estimation. |
|
| 30 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 31 |
#' |
|
| 32 |
#' Options are: ``r shQuote(get_stats("estimate_proportion_diff"), type = "sh")``
|
|
| 33 |
#' |
|
| 34 |
#' @seealso [d_proportion_diff()] |
|
| 35 |
#' |
|
| 36 |
#' @references |
|
| 37 |
#' \insertAllCited{}
|
|
| 38 |
#' |
|
| 39 |
#' @name prop_diff |
|
| 40 |
#' @order 1 |
|
| 41 |
NULL |
|
| 42 | ||
| 43 |
#' @describeIn prop_diff Statistics function estimating the difference |
|
| 44 |
#' in terms of responder proportion. |
|
| 45 |
#' |
|
| 46 |
#' @return |
|
| 47 |
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`. |
|
| 48 |
#' |
|
| 49 |
#' @note When performing an unstratified analysis, methods `"cmh"`, `"cmh_sato"`, `"strat_newcombe"`, |
|
| 50 |
#' and `"strat_newcombecc"` are not permitted. |
|
| 51 |
#' |
|
| 52 |
#' @examples |
|
| 53 |
#' s_proportion_diff( |
|
| 54 |
#' df = subset(dta, grp == "A"), |
|
| 55 |
#' .var = "rsp", |
|
| 56 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 57 |
#' .in_ref_col = FALSE, |
|
| 58 |
#' conf_level = 0.90, |
|
| 59 |
#' method = "ha" |
|
| 60 |
#' ) |
|
| 61 |
#' |
|
| 62 |
#' # CMH example with strata |
|
| 63 |
#' s_proportion_diff( |
|
| 64 |
#' df = subset(dta, grp == "A"), |
|
| 65 |
#' .var = "rsp", |
|
| 66 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 67 |
#' .in_ref_col = FALSE, |
|
| 68 |
#' variables = list(strata = c("f1", "f2")),
|
|
| 69 |
#' conf_level = 0.90, |
|
| 70 |
#' method = "cmh" |
|
| 71 |
#' ) |
|
| 72 |
#' |
|
| 73 |
#' @export |
|
| 74 |
s_proportion_diff <- function(df, |
|
| 75 |
.var, |
|
| 76 |
.ref_group, |
|
| 77 |
.in_ref_col, |
|
| 78 |
variables = list(strata = NULL), |
|
| 79 |
conf_level = 0.95, |
|
| 80 |
method = c( |
|
| 81 |
"waldcc", "wald", "cmh", "cmh_sato", "cmh_mn", |
|
| 82 |
"ha", "newcombe", "newcombecc", |
|
| 83 |
"strat_newcombe", "strat_newcombecc" |
|
| 84 |
), |
|
| 85 |
weights_method = "cmh", |
|
| 86 |
...) {
|
|
| 87 | 13x |
method <- match.arg(method) |
| 88 |
if ( |
|
| 89 | 13x |
is.null(variables$strata) && |
| 90 | 13x |
checkmate::test_subset(method, c("cmh", "cmh_sato", "cmh_mn", "strat_newcombe", "strat_newcombecc"))
|
| 91 |
) {
|
|
| 92 | ! |
stop(paste( |
| 93 | ! |
"When performing an unstratified analysis, methods", |
| 94 | ! |
"'cmh', 'cmh_sato', 'cmh_mn', 'strat_newcombe', and 'strat_newcombecc' are not", |
| 95 | ! |
"permitted. Please choose a different method." |
| 96 |
)) |
|
| 97 |
} |
|
| 98 | 13x |
y <- list(diff = numeric(), diff_ci = numeric()) |
| 99 | ||
| 100 | 13x |
if (!.in_ref_col) {
|
| 101 | 9x |
rsp <- c(.ref_group[[.var]], df[[.var]]) |
| 102 | 9x |
grp <- factor( |
| 103 | 9x |
rep( |
| 104 | 9x |
c("ref", "Not-ref"),
|
| 105 | 9x |
c(nrow(.ref_group), nrow(df)) |
| 106 |
), |
|
| 107 | 9x |
levels = c("ref", "Not-ref")
|
| 108 |
) |
|
| 109 | ||
| 110 | 9x |
if (!is.null(variables$strata)) {
|
| 111 | 5x |
strata_colnames <- variables$strata |
| 112 | 5x |
checkmate::assert_character(strata_colnames, null.ok = FALSE) |
| 113 | 5x |
strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) |
| 114 | ||
| 115 | 5x |
assert_df_with_variables(df, strata_vars) |
| 116 | 5x |
assert_df_with_variables(.ref_group, strata_vars) |
| 117 | ||
| 118 |
# Merging interaction strata for reference group rows data and remaining |
|
| 119 | 5x |
strata <- c( |
| 120 | 5x |
interaction(.ref_group[strata_colnames]), |
| 121 | 5x |
interaction(df[strata_colnames]) |
| 122 |
) |
|
| 123 | 5x |
strata <- as.factor(strata) |
| 124 |
} |
|
| 125 | ||
| 126 |
# Defining the std way to calculate weights for strat_newcombe |
|
| 127 | 9x |
if (!is.null(variables$weights_method)) {
|
| 128 | ! |
weights_method <- variables$weights_method |
| 129 |
} else {
|
|
| 130 | 9x |
weights_method <- "cmh" |
| 131 |
} |
|
| 132 | ||
| 133 | 9x |
y <- switch(method, |
| 134 | 9x |
"wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE), |
| 135 | 9x |
"waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE), |
| 136 | 9x |
"ha" = prop_diff_ha(rsp, grp, conf_level), |
| 137 | 9x |
"newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE), |
| 138 | 9x |
"newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE), |
| 139 | 9x |
"strat_newcombe" = prop_diff_strat_nc(rsp, |
| 140 | 9x |
grp, |
| 141 | 9x |
strata, |
| 142 | 9x |
weights_method, |
| 143 | 9x |
conf_level, |
| 144 | 9x |
correct = FALSE |
| 145 |
), |
|
| 146 | 9x |
"strat_newcombecc" = prop_diff_strat_nc(rsp, |
| 147 | 9x |
grp, |
| 148 | 9x |
strata, |
| 149 | 9x |
weights_method, |
| 150 | 9x |
conf_level, |
| 151 | 9x |
correct = TRUE |
| 152 |
), |
|
| 153 | 9x |
"cmh" = prop_diff_cmh(rsp, grp, strata, conf_level, diff_se = "standard")[c("diff", "diff_ci")],
|
| 154 | 9x |
"cmh_sato" = prop_diff_cmh(rsp, grp, strata, conf_level, diff_se = "sato")[c("diff", "diff_ci")],
|
| 155 | 9x |
"cmh_mn" = prop_diff_cmh(rsp, grp, strata, conf_level, diff_se = "miettinen_nurminen")[c("diff", "diff_ci")]
|
| 156 |
) |
|
| 157 | ||
| 158 | 9x |
y$diff <- setNames(y$diff * 100, paste0("diff_", method))
|
| 159 | 9x |
y$diff_ci <- setNames(y$diff_ci * 100, paste0("diff_ci_", method, c("_l", "_u")))
|
| 160 |
} |
|
| 161 | ||
| 162 | 13x |
attr(y$diff, "label") <- "Difference in Response rate (%)" |
| 163 | 13x |
attr(y$diff_ci, "label") <- d_proportion_diff( |
| 164 | 13x |
conf_level, method, |
| 165 | 13x |
long = FALSE |
| 166 |
) |
|
| 167 | ||
| 168 | 13x |
y |
| 169 |
} |
|
| 170 | ||
| 171 |
#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`. |
|
| 172 |
#' |
|
| 173 |
#' @return |
|
| 174 |
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 175 |
#' |
|
| 176 |
#' @examples |
|
| 177 |
#' a_proportion_diff( |
|
| 178 |
#' df = subset(dta, grp == "A"), |
|
| 179 |
#' .stats = c("diff"),
|
|
| 180 |
#' .var = "rsp", |
|
| 181 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 182 |
#' .in_ref_col = FALSE, |
|
| 183 |
#' conf_level = 0.90, |
|
| 184 |
#' method = "ha" |
|
| 185 |
#' ) |
|
| 186 |
#' |
|
| 187 |
#' @export |
|
| 188 |
a_proportion_diff <- function(df, |
|
| 189 |
..., |
|
| 190 |
.stats = NULL, |
|
| 191 |
.stat_names = NULL, |
|
| 192 |
.formats = NULL, |
|
| 193 |
.labels = NULL, |
|
| 194 |
.indent_mods = NULL) {
|
|
| 195 | 9x |
dots_extra_args <- list(...) |
| 196 | ||
| 197 |
# Check if there are user-defined functions |
|
| 198 | 9x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 199 | 9x |
.stats <- default_and_custom_stats_list$all_stats |
| 200 | 9x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 201 | ||
| 202 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) |
|
| 203 | 9x |
extra_afun_params <- retrieve_extra_afun_params( |
| 204 | 9x |
names(dots_extra_args$.additional_fun_parameters) |
| 205 |
) |
|
| 206 | 9x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
| 207 | ||
| 208 |
# Main statistical functions application |
|
| 209 | 9x |
x_stats <- .apply_stat_functions( |
| 210 | 9x |
default_stat_fnc = s_proportion_diff, |
| 211 | 9x |
custom_stat_fnc_list = custom_stat_functions, |
| 212 | 9x |
args_list = c( |
| 213 | 9x |
df = list(df), |
| 214 | 9x |
extra_afun_params, |
| 215 | 9x |
dots_extra_args |
| 216 |
) |
|
| 217 |
) |
|
| 218 | ||
| 219 |
# Fill in with stats defaults if needed |
|
| 220 | 9x |
.stats <- get_stats("estimate_proportion_diff",
|
| 221 | 9x |
stats_in = .stats, |
| 222 | 9x |
custom_stats_in = names(custom_stat_functions) |
| 223 |
) |
|
| 224 | ||
| 225 | 9x |
x_stats <- x_stats[.stats] |
| 226 | ||
| 227 |
# Fill in formats/indents/labels with custom input and defaults |
|
| 228 | 9x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 229 | 9x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 230 | 9x |
if (is.null(.labels)) {
|
| 231 | 9x |
.labels <- sapply(x_stats, attr, "label") |
| 232 | 9x |
.labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)] |
| 233 |
} |
|
| 234 | 9x |
.labels <- get_labels_from_stats(.stats, .labels) |
| 235 | ||
| 236 |
# Auto format handling |
|
| 237 | 9x |
.formats <- apply_auto_formatting( |
| 238 | 9x |
.formats, |
| 239 | 9x |
x_stats, |
| 240 | 9x |
extra_afun_params$.df_row, |
| 241 | 9x |
extra_afun_params$.var |
| 242 |
) |
|
| 243 | ||
| 244 |
# Get and check statistical names from defaults |
|
| 245 | 9x |
.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats |
| 246 | ||
| 247 | 9x |
in_rows( |
| 248 | 9x |
.list = x_stats, |
| 249 | 9x |
.formats = .formats, |
| 250 | 9x |
.names = names(.labels), |
| 251 | 9x |
.stat_names = .stat_names, |
| 252 | 9x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 253 | 9x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 254 |
) |
|
| 255 |
} |
|
| 256 | ||
| 257 |
#' @describeIn prop_diff Layout-creating function which can take statistics function arguments |
|
| 258 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 259 |
#' |
|
| 260 |
#' @return |
|
| 261 |
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions, |
|
| 262 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 263 |
#' the statistics from `s_proportion_diff()` to the table layout. |
|
| 264 |
#' |
|
| 265 |
#' @examples |
|
| 266 |
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B. |
|
| 267 |
#' nex <- 100 # Number of example rows |
|
| 268 |
#' dta <- data.frame( |
|
| 269 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
| 270 |
#' "grp" = sample(c("A", "B"), nex, TRUE),
|
|
| 271 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE),
|
|
| 272 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
|
|
| 273 |
#' stringsAsFactors = TRUE |
|
| 274 |
#' ) |
|
| 275 |
#' |
|
| 276 |
#' l <- basic_table() %>% |
|
| 277 |
#' split_cols_by(var = "grp", ref_group = "B") %>% |
|
| 278 |
#' estimate_proportion_diff( |
|
| 279 |
#' vars = "rsp", |
|
| 280 |
#' conf_level = 0.90, |
|
| 281 |
#' method = "ha" |
|
| 282 |
#' ) |
|
| 283 |
#' |
|
| 284 |
#' build_table(l, df = dta) |
|
| 285 |
#' |
|
| 286 |
#' @export |
|
| 287 |
#' @order 2 |
|
| 288 |
estimate_proportion_diff <- function(lyt, |
|
| 289 |
vars, |
|
| 290 |
variables = list(strata = NULL), |
|
| 291 |
conf_level = 0.95, |
|
| 292 |
method = c( |
|
| 293 |
"waldcc", "wald", "cmh", "cmh_sato", "cmh_mn", |
|
| 294 |
"ha", "newcombe", "newcombecc", |
|
| 295 |
"strat_newcombe", "strat_newcombecc" |
|
| 296 |
), |
|
| 297 |
weights_method = "cmh", |
|
| 298 |
var_labels = vars, |
|
| 299 |
na_str = default_na_str(), |
|
| 300 |
nested = TRUE, |
|
| 301 |
show_labels = "hidden", |
|
| 302 |
table_names = vars, |
|
| 303 |
section_div = NA_character_, |
|
| 304 |
..., |
|
| 305 |
na_rm = TRUE, |
|
| 306 |
.stats = c("diff", "diff_ci"),
|
|
| 307 |
.stat_names = NULL, |
|
| 308 |
.formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"), |
|
| 309 |
.labels = NULL, |
|
| 310 |
.indent_mods = c(diff = 0L, diff_ci = 1L)) {
|
|
| 311 |
# Depending on main functions |
|
| 312 | 4x |
extra_args <- list( |
| 313 | 4x |
"na_rm" = na_rm, |
| 314 | 4x |
"variables" = variables, |
| 315 | 4x |
"conf_level" = conf_level, |
| 316 | 4x |
"method" = method, |
| 317 | 4x |
"weights_method" = weights_method, |
| 318 |
... |
|
| 319 |
) |
|
| 320 | ||
| 321 |
# Needed defaults |
|
| 322 | 4x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
| 323 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 324 | 4x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 325 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 326 | 4x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 327 | ||
| 328 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) |
|
| 329 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 330 | 4x |
formals(a_proportion_diff) <- c( |
| 331 | 4x |
formals(a_proportion_diff), |
| 332 | 4x |
extra_args[[".additional_fun_parameters"]] |
| 333 |
) |
|
| 334 | ||
| 335 |
# Main {rtables} structural call
|
|
| 336 | 4x |
analyze( |
| 337 | 4x |
lyt = lyt, |
| 338 | 4x |
vars = vars, |
| 339 | 4x |
var_labels = var_labels, |
| 340 | 4x |
afun = a_proportion_diff, |
| 341 | 4x |
na_str = na_str, |
| 342 | 4x |
inclNAs = !na_rm, |
| 343 | 4x |
nested = nested, |
| 344 | 4x |
extra_args = extra_args, |
| 345 | 4x |
show_labels = show_labels, |
| 346 | 4x |
table_names = table_names, |
| 347 | 4x |
section_div = section_div |
| 348 |
) |
|
| 349 |
} |
|
| 350 | ||
| 351 |
#' Check proportion difference arguments |
|
| 352 |
#' |
|
| 353 |
#' @description `r lifecycle::badge("stable")`
|
|
| 354 |
#' |
|
| 355 |
#' Verifies that and/or convert arguments into valid values to be used in the |
|
| 356 |
#' estimation of difference in responder proportions. |
|
| 357 |
#' |
|
| 358 |
#' @inheritParams prop_diff |
|
| 359 |
#' @inheritParams prop_diff_wald |
|
| 360 |
#' |
|
| 361 |
#' @examples |
|
| 362 |
#' # example code |
|
| 363 |
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B. |
|
| 364 |
#' nex <- 100 # Number of example rows |
|
| 365 |
#' dta <- data.frame( |
|
| 366 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
| 367 |
#' "grp" = sample(c("A", "B"), nex, TRUE),
|
|
| 368 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE),
|
|
| 369 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
|
|
| 370 |
#' stringsAsFactors = TRUE |
|
| 371 |
#' ) |
|
| 372 |
#' check_diff_prop_ci(rsp = dta[["rsp"]], grp = dta[["grp"]], conf_level = 0.95) |
|
| 373 |
#' @export |
|
| 374 |
check_diff_prop_ci <- function(rsp, |
|
| 375 |
grp, |
|
| 376 |
strata = NULL, |
|
| 377 |
conf_level, |
|
| 378 |
correct = NULL) {
|
|
| 379 | 32x |
checkmate::assert_logical(rsp, any.missing = FALSE) |
| 380 | 32x |
checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2) |
| 381 | 31x |
checkmate::assert_number(conf_level, lower = 0, upper = 1) |
| 382 | 31x |
checkmate::assert_flag(correct, null.ok = TRUE) |
| 383 | ||
| 384 | 31x |
if (!is.null(strata)) {
|
| 385 | 16x |
checkmate::assert_factor(strata, len = length(rsp)) |
| 386 |
} |
|
| 387 | ||
| 388 | 31x |
invisible() |
| 389 |
} |
|
| 390 | ||
| 391 |
#' Description of method used for proportion comparison |
|
| 392 |
#' |
|
| 393 |
#' @description `r lifecycle::badge("stable")`
|
|
| 394 |
#' |
|
| 395 |
#' This is an auxiliary function that describes the analysis in |
|
| 396 |
#' [s_proportion_diff()]. |
|
| 397 |
#' |
|
| 398 |
#' @inheritParams s_proportion_diff |
|
| 399 |
#' @param long (`flag`)\cr whether a long (`TRUE`) or a short (`FALSE`, default) description is required. |
|
| 400 |
#' |
|
| 401 |
#' @return A `string` describing the analysis. |
|
| 402 |
#' |
|
| 403 |
#' @seealso [prop_diff] |
|
| 404 |
#' |
|
| 405 |
#' @export |
|
| 406 |
d_proportion_diff <- function(conf_level, |
|
| 407 |
method, |
|
| 408 |
long = FALSE) {
|
|
| 409 | 13x |
label <- paste0(conf_level * 100, "% CI") |
| 410 | 13x |
if (long) {
|
| 411 | ! |
label <- paste( |
| 412 | ! |
label, |
| 413 | ! |
ifelse( |
| 414 | ! |
method %in% c("cmh", "cmh_sato", "cmh_mn"),
|
| 415 | ! |
"for adjusted difference", |
| 416 | ! |
"for difference" |
| 417 |
) |
|
| 418 |
) |
|
| 419 |
} |
|
| 420 | ||
| 421 | 13x |
method_part <- switch(method, |
| 422 | 13x |
"cmh" = "CMH, without correction", |
| 423 | 13x |
"cmh_sato" = "CMH, Sato variance estimator", |
| 424 | 13x |
"cmh_mn" = "CMH, Miettinen and Nurminen", |
| 425 | 13x |
"waldcc" = "Wald, with correction", |
| 426 | 13x |
"wald" = "Wald, without correction", |
| 427 | 13x |
"ha" = "Anderson-Hauck", |
| 428 | 13x |
"newcombe" = "Newcombe, without correction", |
| 429 | 13x |
"newcombecc" = "Newcombe, with correction", |
| 430 | 13x |
"strat_newcombe" = "Stratified Newcombe, without correction", |
| 431 | 13x |
"strat_newcombecc" = "Stratified Newcombe, with correction", |
| 432 | 13x |
stop(paste(method, "does not have a description")) |
| 433 |
) |
|
| 434 | 13x |
paste0(label, " (", method_part, ")")
|
| 435 |
} |
|
| 436 | ||
| 437 |
#' Helper functions to calculate proportion difference |
|
| 438 |
#' |
|
| 439 |
#' @description `r lifecycle::badge("stable")`
|
|
| 440 |
#' |
|
| 441 |
#' @inheritParams argument_convention |
|
| 442 |
#' @inheritParams prop_diff |
|
| 443 |
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups |
|
| 444 |
#' (e.g. reference and treatment group). |
|
| 445 |
#' |
|
| 446 |
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci` |
|
| 447 |
#' (proportion difference confidence interval). |
|
| 448 |
#' |
|
| 449 |
#' @seealso [prop_diff()] for implementation of these helper functions. |
|
| 450 |
#' |
|
| 451 |
#' @references |
|
| 452 |
#' \insertAllCited{}
|
|
| 453 |
#' |
|
| 454 |
#' @name h_prop_diff |
|
| 455 |
NULL |
|
| 456 | ||
| 457 |
#' @describeIn h_prop_diff The Wald interval follows the usual textbook |
|
| 458 |
#' definition for a single proportion confidence interval using the normal |
|
| 459 |
#' approximation. It is possible to include a continuity correction for Wald's |
|
| 460 |
#' interval. |
|
| 461 |
#' |
|
| 462 |
#' @param correct (`flag`)\cr whether to include the continuity correction. For further |
|
| 463 |
#' information, see [stats::prop.test()]. |
|
| 464 |
#' |
|
| 465 |
#' @examples |
|
| 466 |
#' # Wald confidence interval |
|
| 467 |
#' set.seed(2) |
|
| 468 |
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20) |
|
| 469 |
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
|
|
| 470 |
#' |
|
| 471 |
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE) |
|
| 472 |
#' |
|
| 473 |
#' @export |
|
| 474 |
prop_diff_wald <- function(rsp, |
|
| 475 |
grp, |
|
| 476 |
conf_level = 0.95, |
|
| 477 |
correct = FALSE) {
|
|
| 478 | 8x |
if (isTRUE(correct)) {
|
| 479 | 5x |
mthd <- "waldcc" |
| 480 |
} else {
|
|
| 481 | 3x |
mthd <- "wald" |
| 482 |
} |
|
| 483 | 8x |
grp <- as_factor_keep_attributes(grp) |
| 484 | 8x |
check_diff_prop_ci( |
| 485 | 8x |
rsp = rsp, grp = grp, conf_level = conf_level, correct = correct |
| 486 |
) |
|
| 487 | ||
| 488 |
# check if binary response is coded as logical |
|
| 489 | 8x |
checkmate::assert_logical(rsp, any.missing = FALSE) |
| 490 | 8x |
checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2) |
| 491 | ||
| 492 | 8x |
tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
| 493 |
# x1 and n1 are non-reference groups. |
|
| 494 | 8x |
diff_ci <- desctools_binom( |
| 495 | 8x |
x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), |
| 496 | 8x |
x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), |
| 497 | 8x |
conf.level = conf_level, |
| 498 | 8x |
method = mthd |
| 499 |
) |
|
| 500 | ||
| 501 | 8x |
list( |
| 502 | 8x |
"diff" = unname(diff_ci[, "est"]), |
| 503 | 8x |
"diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
|
| 504 |
) |
|
| 505 |
} |
|
| 506 | ||
| 507 |
#' @describeIn h_prop_diff Anderson-Hauck confidence interval \insertCite{HauckAnderson1986}{tern}.
|
|
| 508 |
#' |
|
| 509 |
#' @examples |
|
| 510 |
#' # Anderson-Hauck confidence interval |
|
| 511 |
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B. |
|
| 512 |
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE) |
|
| 513 |
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
|
|
| 514 |
#' |
|
| 515 |
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90) |
|
| 516 |
#' |
|
| 517 |
#' ## Edge case: Same proportion of response in A and B. |
|
| 518 |
#' rsp <- c(TRUE, FALSE, TRUE, FALSE) |
|
| 519 |
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
|
|
| 520 |
#' |
|
| 521 |
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6) |
|
| 522 |
#' |
|
| 523 |
#' @export |
|
| 524 |
prop_diff_ha <- function(rsp, |
|
| 525 |
grp, |
|
| 526 |
conf_level) {
|
|
| 527 | 4x |
grp <- as_factor_keep_attributes(grp) |
| 528 | 4x |
check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level) |
| 529 | ||
| 530 | 4x |
tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
| 531 |
# x1 and n1 are non-reference groups. |
|
| 532 | 4x |
ci <- desctools_binom( |
| 533 | 4x |
x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), |
| 534 | 4x |
x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), |
| 535 | 4x |
conf.level = conf_level, |
| 536 | 4x |
method = "ha" |
| 537 |
) |
|
| 538 | 4x |
list( |
| 539 | 4x |
"diff" = unname(ci[, "est"]), |
| 540 | 4x |
"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
|
| 541 |
) |
|
| 542 |
} |
|
| 543 | ||
| 544 |
#' @describeIn h_prop_diff Newcombe confidence interval. It is based on |
|
| 545 |
#' the Wilson score confidence interval for a single binomial proportion \insertCite{Newcombe1998}{tern}.
|
|
| 546 |
#' |
|
| 547 |
#' @examples |
|
| 548 |
#' # Newcombe confidence interval |
|
| 549 |
#' |
|
| 550 |
#' set.seed(1) |
|
| 551 |
#' rsp <- c( |
|
| 552 |
#' sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE), |
|
| 553 |
#' sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE) |
|
| 554 |
#' ) |
|
| 555 |
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
|
|
| 556 |
#' table(rsp, grp) |
|
| 557 |
#' |
|
| 558 |
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9) |
|
| 559 |
#' |
|
| 560 |
#' @export |
|
| 561 |
prop_diff_nc <- function(rsp, |
|
| 562 |
grp, |
|
| 563 |
conf_level, |
|
| 564 |
correct = FALSE) {
|
|
| 565 | 2x |
if (isTRUE(correct)) {
|
| 566 | ! |
mthd <- "scorecc" |
| 567 |
} else {
|
|
| 568 | 2x |
mthd <- "score" |
| 569 |
} |
|
| 570 | 2x |
grp <- as_factor_keep_attributes(grp) |
| 571 | 2x |
check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level) |
| 572 | ||
| 573 | 2x |
p_grp <- tapply(rsp, grp, mean) |
| 574 | 2x |
diff_p <- unname(diff(p_grp)) |
| 575 | 2x |
tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
| 576 | 2x |
ci <- desctools_binom( |
| 577 |
# x1 and n1 are non-reference groups. |
|
| 578 | 2x |
x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), |
| 579 | 2x |
x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), |
| 580 | 2x |
conf.level = conf_level, |
| 581 | 2x |
method = mthd |
| 582 |
) |
|
| 583 | 2x |
list( |
| 584 | 2x |
"diff" = unname(ci[, "est"]), |
| 585 | 2x |
"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
|
| 586 |
) |
|
| 587 |
} |
|
| 588 | ||
| 589 |
#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in |
|
| 590 |
#' response rates between the experimental treatment group and the control treatment group, adjusted |
|
| 591 |
#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared |
|
| 592 |
#' test, use [stats::mantelhaen.test()]. |
|
| 593 |
#' |
|
| 594 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. |
|
| 595 |
#' @param diff_se (`string`)\cr method to estimate the standard error for the difference, either |
|
| 596 |
#' `standard`, `sato` \insertCite{Sato1989}{tern} or
|
|
| 597 |
#' `miettinen_nurminen` \insertCite{MiettinenNurminen1985}{tern}.
|
|
| 598 |
#' |
|
| 599 |
#' @examples |
|
| 600 |
#' # Cochran-Mantel-Haenszel confidence interval |
|
| 601 |
#' |
|
| 602 |
#' set.seed(2) |
|
| 603 |
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE) |
|
| 604 |
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
|
|
| 605 |
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
|
|
| 606 |
#' strata_data <- data.frame( |
|
| 607 |
#' "f1" = sample(c("a", "b"), 100, TRUE),
|
|
| 608 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
| 609 |
#' stringsAsFactors = TRUE |
|
| 610 |
#' ) |
|
| 611 |
#' |
|
| 612 |
#' prop_diff_cmh( |
|
| 613 |
#' rsp = rsp, grp = grp, strata = interaction(strata_data), |
|
| 614 |
#' conf_level = 0.90 |
|
| 615 |
#' ) |
|
| 616 |
#' prop_diff_cmh( |
|
| 617 |
#' rsp = rsp, grp = grp, strata = interaction(strata_data), |
|
| 618 |
#' conf_level = 0.90, diff_se = "sato" |
|
| 619 |
#' ) |
|
| 620 |
#' |
|
| 621 |
#' @export |
|
| 622 |
prop_diff_cmh <- function(rsp, |
|
| 623 |
grp, |
|
| 624 |
strata, |
|
| 625 |
conf_level = 0.95, |
|
| 626 |
diff_se = c("standard", "sato", "miettinen_nurminen")) {
|
|
| 627 | 12x |
grp <- as_factor_keep_attributes(grp) |
| 628 | 12x |
strata <- as_factor_keep_attributes(strata) |
| 629 | 12x |
diff_se <- match.arg(diff_se) |
| 630 | 12x |
check_diff_prop_ci( |
| 631 | 12x |
rsp = rsp, grp = grp, conf_level = conf_level, strata = strata |
| 632 |
) |
|
| 633 | ||
| 634 | 12x |
if (any(tapply(rsp, strata, length) < 5)) {
|
| 635 | 1x |
warning("Less than 5 observations in some strata.")
|
| 636 |
} |
|
| 637 | ||
| 638 |
# first dimension: FALSE, TRUE |
|
| 639 |
# 2nd dimension: CONTROL, TX |
|
| 640 |
# 3rd dimension: levels of strata |
|
| 641 |
# rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records |
|
| 642 | 12x |
t_tbl <- table( |
| 643 | 12x |
factor(rsp, levels = c("FALSE", "TRUE")),
|
| 644 | 12x |
grp, |
| 645 | 12x |
strata |
| 646 |
) |
|
| 647 | 12x |
n1 <- colSums(t_tbl[1:2, 1, ]) |
| 648 | 12x |
n2 <- colSums(t_tbl[1:2, 2, ]) |
| 649 | 12x |
x1 <- t_tbl[2, 1, ] |
| 650 | 12x |
p1 <- x1 / n1 |
| 651 | 12x |
x2 <- t_tbl[2, 2, ] |
| 652 | 12x |
p2 <- x2 / n2 |
| 653 |
# CMH weights |
|
| 654 | 12x |
use_stratum <- (n1 > 0) & (n2 > 0) |
| 655 | 12x |
n1 <- n1[use_stratum] |
| 656 | 12x |
n2 <- n2[use_stratum] |
| 657 | 12x |
p1 <- p1[use_stratum] |
| 658 | 12x |
p2 <- p2[use_stratum] |
| 659 | 12x |
wt <- (n1 * n2 / (n1 + n2)) |
| 660 | 12x |
wt_normalized <- wt / sum(wt) |
| 661 | 12x |
est1 <- sum(wt_normalized * p1) |
| 662 | 12x |
est2 <- sum(wt_normalized * p2) |
| 663 | 12x |
estimate <- c(est1, est2) |
| 664 | 12x |
names(estimate) <- levels(grp) |
| 665 | 12x |
se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1)) |
| 666 | 12x |
se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2)) |
| 667 | 12x |
z <- stats::qnorm((1 + conf_level) / 2) |
| 668 | 12x |
err1 <- z * se1 |
| 669 | 12x |
err2 <- z * se2 |
| 670 | 12x |
ci1 <- c((est1 - err1), (est1 + err1)) |
| 671 | 12x |
ci2 <- c((est2 - err2), (est2 + err2)) |
| 672 | 12x |
estimate_ci <- list(ci1, ci2) |
| 673 | 12x |
names(estimate_ci) <- levels(grp) |
| 674 | 12x |
diff_est <- est2 - est1 |
| 675 | ||
| 676 | 12x |
if (diff_se %in% c("standard", "sato")) {
|
| 677 | 10x |
se_diff <- if (diff_se == "standard") {
|
| 678 | 8x |
sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2)) |
| 679 |
} else {
|
|
| 680 |
# Sato variance estimator. |
|
| 681 | 2x |
p_terms <- (n2^2 * x1 - n1^2 * x2 + n1 * n2 * (n1 - n2) / 2) / (n1 + n2)^2 |
| 682 | 2x |
q_terms <- (x1 * (n2 - x2) + x2 * (n1 - x1)) / (2 * (n1 + n2)) |
| 683 | 2x |
num <- diff_est * sum(p_terms) + sum(q_terms) |
| 684 | 2x |
denom <- sum(wt)^2 |
| 685 | 2x |
sqrt(num / denom) |
| 686 |
} |
|
| 687 | 10x |
diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff) |
| 688 |
} else {
|
|
| 689 |
# Miettinen and Nurminen method is used. |
|
| 690 | 2x |
z_stat_fun <- function(delta) {
|
| 691 | 36x |
var_est <- h_miettinen_nurminen_var_est( |
| 692 | 36x |
n1 = n1, n2 = n2, |
| 693 | 36x |
x1 = x1, x2 = x2, |
| 694 | 36x |
diff_par = delta |
| 695 | 36x |
)$var_est |
| 696 | 36x |
num <- sum(wt * (p2 - p1 - delta)) |
| 697 | 36x |
denom <- sqrt(sum(wt^2 * var_est)) |
| 698 | 36x |
num / denom |
| 699 |
} |
|
| 700 |
# Find upper and lower confidence limits by root finding such that |
|
| 701 |
# z_stat_fun(limit) = +/- z quantile: |
|
| 702 | 2x |
root_lower <- function(delta) z_stat_fun(delta) - z |
| 703 | 2x |
root_upper <- function(delta) z_stat_fun(delta) + z |
| 704 | 2x |
diff_ci <- c( |
| 705 | 2x |
stats::uniroot(root_lower, interval = c(-0.99, diff_est))$root, |
| 706 | 2x |
stats::uniroot(root_upper, interval = c(diff_est, 0.99))$root |
| 707 |
) |
|
| 708 |
# Calculate the standard error separately. |
|
| 709 | 2x |
var_est <- h_miettinen_nurminen_var_est( |
| 710 | 2x |
n1 = n1, n2 = n2, |
| 711 | 2x |
x1 = x1, x2 = x2, |
| 712 | 2x |
diff_par = diff_est |
| 713 | 2x |
)$var_est |
| 714 | 2x |
se_diff <- sqrt(sum(wt_normalized^2 * var_est)) |
| 715 |
} |
|
| 716 | ||
| 717 | 12x |
list( |
| 718 | 12x |
prop = estimate, |
| 719 | 12x |
prop_ci = estimate_ci, |
| 720 | 12x |
diff = diff_est, |
| 721 | 12x |
diff_ci = diff_ci, |
| 722 | 12x |
se_diff = se_diff, |
| 723 | 12x |
weights = wt_normalized, |
| 724 | 12x |
n1 = n1, |
| 725 | 12x |
n2 = n2 |
| 726 |
) |
|
| 727 |
} |
|
| 728 | ||
| 729 |
#' Variance Estimates in Strata following Miettinen and Nurminen |
|
| 730 |
#' |
|
| 731 |
#' The variable names in this function follow the notation in the original |
|
| 732 |
#' paper by \insertCite{MiettinenNurminen1985;textual}{tern}, cf. Appendix 1.
|
|
| 733 |
#' |
|
| 734 |
#' @param n1 (`numeric`)\cr sample sizes in group 1. |
|
| 735 |
#' @param n2 (`numeric`)\cr sample sizes in group 2. |
|
| 736 |
#' @param x1 (`numeric`)\cr number of responders in group 1. |
|
| 737 |
#' @param x2 (`numeric`)\cr number of responders in group 2. |
|
| 738 |
#' @param diff_par (`numeric`)\cr assumed difference in true proportions |
|
| 739 |
#' (group 2 minus group 1). |
|
| 740 |
#' @return A named `list` with elements: |
|
| 741 |
#' |
|
| 742 |
#' - `p1_hat`: estimated proportion in group 1 |
|
| 743 |
#' - `p2_hat`: estimated proportion in group 2 |
|
| 744 |
#' - `var_est`: variance estimate of the difference in proportions |
|
| 745 |
#' |
|
| 746 |
#' @keywords internal |
|
| 747 |
#' @references |
|
| 748 |
#' \insertAllCited{}
|
|
| 749 |
h_miettinen_nurminen_var_est <- function(n1, n2, x1, x2, diff_par) {
|
|
| 750 |
# nolint start |
|
| 751 |
# Translate to the notation in the paper. |
|
| 752 | 40x |
S0 <- n1 |
| 753 | 40x |
S1 <- n2 |
| 754 | 40x |
c0 <- x1 |
| 755 | 40x |
c1 <- x2 |
| 756 | 40x |
RD <- diff_par |
| 757 | ||
| 758 |
# Further definitions. |
|
| 759 | 40x |
S <- S0 + S1 |
| 760 | 40x |
c <- c0 + c1 |
| 761 | ||
| 762 |
# Coefficients of the third-degree polynomial. |
|
| 763 | 40x |
L3 <- S |
| 764 | 40x |
L2 <- (S1 + 2 * S0) * RD - S - c |
| 765 | 40x |
L1 <- (S0 * RD - S - 2 * c0) * RD + c |
| 766 | 40x |
L0 <- c0 * RD * (1 - RD) |
| 767 |
# nolint end |
|
| 768 | ||
| 769 |
# Solution for group 1 proportion. |
|
| 770 | 40x |
q <- L2^3 / (3 * L3)^3 - L1 * L2 / (6 * L3^2) + L0 / (2 * L3) |
| 771 | 40x |
p <- sign(q) * sqrt(L2^2 / (3 * L3)^2 - L1 / (3 * L3)) |
| 772 | 40x |
a <- (1 / 3) * (base::pi + acos(q / p^3)) |
| 773 | 40x |
p1_hat <- 2 * p * cos(a) - L2 / (3 * L3) |
| 774 | ||
| 775 |
# Estimated group 2 proportion. |
|
| 776 | 40x |
p2_hat <- p1_hat + RD |
| 777 | ||
| 778 |
# Variance estimate. |
|
| 779 | 40x |
var_est <- (p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2) * |
| 780 | 40x |
S / (S - 1) |
| 781 | ||
| 782 | 40x |
list( |
| 783 | 40x |
p1_hat = p1_hat, |
| 784 | 40x |
p2_hat = p2_hat, |
| 785 | 40x |
var_est = var_est |
| 786 |
) |
|
| 787 |
} |
|
| 788 | ||
| 789 |
#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response |
|
| 790 |
#' rates between the experimental treatment group and the control treatment group, adjusted for stratification |
|
| 791 |
#' factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
|
|
| 792 |
#' Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights |
|
| 793 |
#' (see [prop_diff_cmh()]). |
|
| 794 |
#' |
|
| 795 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. |
|
| 796 |
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"` |
|
| 797 |
#' and directs the way weights are estimated. |
|
| 798 |
#' |
|
| 799 |
#' @examples |
|
| 800 |
#' # Stratified Newcombe confidence interval |
|
| 801 |
#' |
|
| 802 |
#' set.seed(2) |
|
| 803 |
#' data_set <- data.frame( |
|
| 804 |
#' "rsp" = sample(c(TRUE, FALSE), 100, TRUE), |
|
| 805 |
#' "f1" = sample(c("a", "b"), 100, TRUE),
|
|
| 806 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
| 807 |
#' "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
|
|
| 808 |
#' stringsAsFactors = TRUE |
|
| 809 |
#' ) |
|
| 810 |
#' |
|
| 811 |
#' prop_diff_strat_nc( |
|
| 812 |
#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]), |
|
| 813 |
#' weights_method = "cmh", |
|
| 814 |
#' conf_level = 0.90 |
|
| 815 |
#' ) |
|
| 816 |
#' |
|
| 817 |
#' prop_diff_strat_nc( |
|
| 818 |
#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]), |
|
| 819 |
#' weights_method = "wilson_h", |
|
| 820 |
#' conf_level = 0.90 |
|
| 821 |
#' ) |
|
| 822 |
#' |
|
| 823 |
#' @export |
|
| 824 |
prop_diff_strat_nc <- function(rsp, |
|
| 825 |
grp, |
|
| 826 |
strata, |
|
| 827 |
weights_method = c("cmh", "wilson_h"),
|
|
| 828 |
conf_level = 0.95, |
|
| 829 |
correct = FALSE) {
|
|
| 830 | 4x |
weights_method <- match.arg(weights_method) |
| 831 | 4x |
grp <- as_factor_keep_attributes(grp) |
| 832 | 4x |
strata <- as_factor_keep_attributes(strata) |
| 833 | 4x |
check_diff_prop_ci( |
| 834 | 4x |
rsp = rsp, grp = grp, conf_level = conf_level, strata = strata |
| 835 |
) |
|
| 836 | 4x |
checkmate::assert_number(conf_level, lower = 0, upper = 1) |
| 837 | 4x |
checkmate::assert_flag(correct) |
| 838 | 4x |
if (any(tapply(rsp, strata, length) < 5)) {
|
| 839 | ! |
warning("Less than 5 observations in some strata.")
|
| 840 |
} |
|
| 841 | ||
| 842 | 4x |
rsp_by_grp <- split(rsp, f = grp) |
| 843 | 4x |
strata_by_grp <- split(strata, f = grp) |
| 844 | ||
| 845 |
# Finding the weights |
|
| 846 | 4x |
weights <- if (identical(weights_method, "cmh")) {
|
| 847 | 3x |
prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights |
| 848 | 4x |
} else if (identical(weights_method, "wilson_h")) {
|
| 849 | 1x |
prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights |
| 850 |
} |
|
| 851 | 4x |
weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0 |
| 852 | ||
| 853 |
# Calculating lower (`l`) and upper (`u`) confidence bounds per group. |
|
| 854 | 4x |
strat_wilson_by_grp <- Map( |
| 855 | 4x |
prop_strat_wilson, |
| 856 | 4x |
rsp = rsp_by_grp, |
| 857 | 4x |
strata = strata_by_grp, |
| 858 | 4x |
weights = list(weights, weights), |
| 859 | 4x |
conf_level = conf_level, |
| 860 | 4x |
correct = correct |
| 861 |
) |
|
| 862 | ||
| 863 | 4x |
ci_ref <- strat_wilson_by_grp[[1]] |
| 864 | 4x |
ci_trt <- strat_wilson_by_grp[[2]] |
| 865 | 4x |
l_ref <- as.numeric(ci_ref$conf_int[1]) |
| 866 | 4x |
u_ref <- as.numeric(ci_ref$conf_int[2]) |
| 867 | 4x |
l_trt <- as.numeric(ci_trt$conf_int[1]) |
| 868 | 4x |
u_trt <- as.numeric(ci_trt$conf_int[2]) |
| 869 | ||
| 870 |
# Estimating the diff and n_ref, n_trt (it allows different weights to be used) |
|
| 871 | 4x |
t_tbl <- table( |
| 872 | 4x |
factor(rsp, levels = c("FALSE", "TRUE")),
|
| 873 | 4x |
grp, |
| 874 | 4x |
strata |
| 875 |
) |
|
| 876 | 4x |
n_ref <- colSums(t_tbl[1:2, 1, ]) |
| 877 | 4x |
n_trt <- colSums(t_tbl[1:2, 2, ]) |
| 878 | 4x |
use_stratum <- (n_ref > 0) & (n_trt > 0) |
| 879 | 4x |
n_ref <- n_ref[use_stratum] |
| 880 | 4x |
n_trt <- n_trt[use_stratum] |
| 881 | 4x |
p_ref <- t_tbl[2, 1, use_stratum] / n_ref |
| 882 | 4x |
p_trt <- t_tbl[2, 2, use_stratum] / n_trt |
| 883 | 4x |
est1 <- sum(weights * p_ref) |
| 884 | 4x |
est2 <- sum(weights * p_trt) |
| 885 | 4x |
diff_est <- est2 - est1 |
| 886 | ||
| 887 | 4x |
lambda1 <- sum(weights^2 / n_ref) |
| 888 | 4x |
lambda2 <- sum(weights^2 / n_trt) |
| 889 | 4x |
z <- stats::qnorm((1 + conf_level) / 2) |
| 890 | ||
| 891 | 4x |
lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref)) |
| 892 | 4x |
upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt)) |
| 893 | ||
| 894 | 4x |
list( |
| 895 | 4x |
"diff" = diff_est, |
| 896 | 4x |
"diff_ci" = c("lower" = lower, "upper" = upper)
|
| 897 |
) |
|
| 898 |
} |
| 1 |
#' Count patients by most extreme post-baseline toxicity grade per direction of abnormality |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [count_abnormal_by_worst_grade()] creates a layout element to count patients by highest (worst) |
|
| 6 |
#' analysis toxicity grade post-baseline for each direction, categorized by parameter value. |
|
| 7 |
#' |
|
| 8 |
#' This function analyzes primary analysis variable `var` which indicates toxicity grades. Additional |
|
| 9 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to |
|
| 10 |
#' `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a variable |
|
| 11 |
#' to indicate parameter values, and `grade_dir` (defaults to `GRADE_DIR`), a variable to indicate directions |
|
| 12 |
#' (e.g. High or Low) for each toxicity grade supplied in `var`. |
|
| 13 |
#' |
|
| 14 |
#' For each combination of `param` and `grade_dir` levels, patient counts by worst |
|
| 15 |
#' grade are calculated as follows: |
|
| 16 |
#' * `1` to `4`: The number of patients with worst grades 1-4, respectively. |
|
| 17 |
#' * `Any`: The number of patients with at least one abnormality (i.e. grade is not 0). |
|
| 18 |
#' |
|
| 19 |
#' Fractions are calculated by dividing the above counts by the number of patients with at least one |
|
| 20 |
#' valid measurement recorded during treatment. |
|
| 21 |
#' |
|
| 22 |
#' Pre-processing is crucial when using this function and can be done automatically using the |
|
| 23 |
#' [h_adlb_abnormal_by_worst_grade()] helper function. See the description of this function for details on the |
|
| 24 |
#' necessary pre-processing steps. |
|
| 25 |
#' |
|
| 26 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two row |
|
| 27 |
#' splits, one on variable `param` and one on variable `grade_dir`. |
|
| 28 |
#' |
|
| 29 |
#' @inheritParams argument_convention |
|
| 30 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 31 |
#' |
|
| 32 |
#' Options are: ``r shQuote(get_stats("abnormal_by_worst_grade"), type = "sh")``
|
|
| 33 |
#' |
|
| 34 |
#' @seealso [h_adlb_abnormal_by_worst_grade()] which pre-processes ADLB data frames to be used in |
|
| 35 |
#' [count_abnormal_by_worst_grade()]. |
|
| 36 |
#' |
|
| 37 |
#' @name abnormal_by_worst_grade |
|
| 38 |
#' @order 1 |
|
| 39 |
NULL |
|
| 40 | ||
| 41 |
#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade. |
|
| 42 |
#' |
|
| 43 |
#' @return |
|
| 44 |
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and |
|
| 45 |
#' "Any" results. |
|
| 46 |
#' |
|
| 47 |
#' @keywords internal |
|
| 48 |
s_count_abnormal_by_worst_grade <- function(df, |
|
| 49 |
.var = "GRADE_ANL", |
|
| 50 |
.spl_context, |
|
| 51 |
variables = list( |
|
| 52 |
id = "USUBJID", |
|
| 53 |
param = "PARAM", |
|
| 54 |
grade_dir = "GRADE_DIR" |
|
| 55 |
), |
|
| 56 |
...) {
|
|
| 57 | 5x |
checkmate::assert_string(.var) |
| 58 | 5x |
assert_valid_factor(df[[.var]]) |
| 59 | 5x |
assert_valid_factor(df[[variables$param]]) |
| 60 | 4x |
assert_valid_factor(df[[variables$grade_dir]]) |
| 61 | 4x |
assert_df_with_variables(df, c(a = .var, variables)) |
| 62 | 4x |
checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
|
| 63 | ||
| 64 |
# To verify that the `split_rows_by` are performed with correct variables. |
|
| 65 | 4x |
checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split) |
| 66 | 4x |
first_row <- .spl_context[.spl_context$split == variables[["param"]], ] |
| 67 | 4x |
x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any") |
| 68 | 4x |
result <- split(numeric(0), factor(x_lvls)) |
| 69 | ||
| 70 | 4x |
subj <- first_row$full_parent_df[[1]][[variables[["id"]]]] |
| 71 | 4x |
subj_cur_col <- subj[first_row$cur_col_subset[[1]]] |
| 72 |
# Some subjects may have a record for high and low directions but |
|
| 73 |
# should be counted only once. |
|
| 74 | 4x |
denom <- length(unique(subj_cur_col)) |
| 75 | ||
| 76 | 4x |
for (lvl in x_lvls) {
|
| 77 | 20x |
if (lvl != "Any") {
|
| 78 | 16x |
df_lvl <- df[df[[.var]] == lvl, ] |
| 79 |
} else {
|
|
| 80 | 4x |
df_lvl <- df[df[[.var]] != 0, ] |
| 81 |
} |
|
| 82 | 20x |
num <- length(unique(df_lvl[[variables[["id"]]]])) |
| 83 | 20x |
fraction <- ifelse(denom == 0, 0, num / denom) |
| 84 | 20x |
result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl) |
| 85 |
} |
|
| 86 | ||
| 87 | 4x |
result <- list(count_fraction = result) |
| 88 | 4x |
result |
| 89 |
} |
|
| 90 | ||
| 91 |
#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun` |
|
| 92 |
#' in `count_abnormal_by_worst_grade()`. |
|
| 93 |
#' |
|
| 94 |
#' @return |
|
| 95 |
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 96 |
#' |
|
| 97 |
#' @keywords internal |
|
| 98 |
a_count_abnormal_by_worst_grade <- function(df, |
|
| 99 |
..., |
|
| 100 |
.stats = NULL, |
|
| 101 |
.stat_names = NULL, |
|
| 102 |
.formats = NULL, |
|
| 103 |
.labels = NULL, |
|
| 104 |
.indent_mods = NULL) {
|
|
| 105 |
# Check for additional parameters to the statistics function |
|
| 106 | 4x |
dots_extra_args <- list(...) |
| 107 | 4x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 108 | 4x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 109 | ||
| 110 |
# Check for user-defined functions |
|
| 111 | 4x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 112 | 4x |
.stats <- default_and_custom_stats_list$all_stats |
| 113 | 4x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 114 | ||
| 115 |
# Apply statistics function |
|
| 116 | 4x |
x_stats <- .apply_stat_functions( |
| 117 | 4x |
default_stat_fnc = s_count_abnormal_by_worst_grade, |
| 118 | 4x |
custom_stat_fnc_list = custom_stat_functions, |
| 119 | 4x |
args_list = c( |
| 120 | 4x |
df = list(df), |
| 121 | 4x |
extra_afun_params, |
| 122 | 4x |
dots_extra_args |
| 123 |
) |
|
| 124 |
) |
|
| 125 | ||
| 126 |
# Fill in formatting defaults |
|
| 127 | 3x |
.stats <- get_stats("abnormal_by_worst_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
|
| 128 | 3x |
levels_per_stats <- lapply(x_stats, names) |
| 129 | 3x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
| 130 | 3x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
| 131 | 3x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
| 132 | ||
| 133 | 3x |
x_stats <- x_stats[.stats] %>% |
| 134 | 3x |
.unlist_keep_nulls() %>% |
| 135 | 3x |
setNames(names(.formats)) |
| 136 | ||
| 137 |
# Auto format handling |
|
| 138 | 3x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 139 | ||
| 140 |
# Get and check statistical names |
|
| 141 | 3x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 142 | ||
| 143 | 3x |
in_rows( |
| 144 | 3x |
.list = x_stats, |
| 145 | 3x |
.formats = .formats, |
| 146 | 3x |
.names = .labels %>% .unlist_keep_nulls(), |
| 147 | 3x |
.stat_names = .stat_names, |
| 148 | 3x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 149 | 3x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 150 |
) |
|
| 151 |
} |
|
| 152 | ||
| 153 |
#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments |
|
| 154 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 155 |
#' |
|
| 156 |
#' @return |
|
| 157 |
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions, |
|
| 158 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 159 |
#' the statistics from `s_count_abnormal_by_worst_grade()` to the table layout. |
|
| 160 |
#' |
|
| 161 |
#' @examples |
|
| 162 |
#' library(dplyr) |
|
| 163 |
#' library(forcats) |
|
| 164 |
#' adlb <- tern_ex_adlb |
|
| 165 |
#' |
|
| 166 |
#' # Data is modified in order to have some parameters with grades only in one direction |
|
| 167 |
#' # and simulate the real data. |
|
| 168 |
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
|
|
| 169 |
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW" |
|
| 170 |
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- "" |
|
| 171 |
#' |
|
| 172 |
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
|
|
| 173 |
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH" |
|
| 174 |
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- "" |
|
| 175 |
#' |
|
| 176 |
#' # Pre-processing |
|
| 177 |
#' adlb_f <- adlb %>% h_adlb_abnormal_by_worst_grade() |
|
| 178 |
#' |
|
| 179 |
#' # Map excludes records without abnormal grade since they should not be displayed |
|
| 180 |
#' # in the table. |
|
| 181 |
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
|
|
| 182 |
#' lapply(as.character) %>% |
|
| 183 |
#' as.data.frame() %>% |
|
| 184 |
#' arrange(PARAM, desc(GRADE_DIR), GRADE_ANL) |
|
| 185 |
#' |
|
| 186 |
#' basic_table() %>% |
|
| 187 |
#' split_cols_by("ARMCD") %>%
|
|
| 188 |
#' split_rows_by("PARAM") %>%
|
|
| 189 |
#' split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
|
|
| 190 |
#' count_abnormal_by_worst_grade( |
|
| 191 |
#' var = "GRADE_ANL", |
|
| 192 |
#' variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR") |
|
| 193 |
#' ) %>% |
|
| 194 |
#' build_table(df = adlb_f) |
|
| 195 |
#' |
|
| 196 |
#' @export |
|
| 197 |
#' @order 2 |
|
| 198 |
count_abnormal_by_worst_grade <- function(lyt, |
|
| 199 |
var, |
|
| 200 |
variables = list( |
|
| 201 |
id = "USUBJID", |
|
| 202 |
param = "PARAM", |
|
| 203 |
grade_dir = "GRADE_DIR" |
|
| 204 |
), |
|
| 205 |
na_str = default_na_str(), |
|
| 206 |
nested = TRUE, |
|
| 207 |
..., |
|
| 208 |
.stats = "count_fraction", |
|
| 209 |
.stat_names = NULL, |
|
| 210 |
.formats = list(count_fraction = format_count_fraction), |
|
| 211 |
.labels = NULL, |
|
| 212 |
.indent_mods = NULL) {
|
|
| 213 |
# Process standard extra arguments |
|
| 214 | 2x |
extra_args <- list(".stats" = .stats)
|
| 215 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 216 | 2x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 217 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 218 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 219 | ||
| 220 |
# Process additional arguments to the statistic function |
|
| 221 | 2x |
extra_args <- c(extra_args, "variables" = list(variables), ...) |
| 222 | ||
| 223 |
# Append additional info from layout to the analysis function |
|
| 224 | 2x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 225 | 2x |
formals(a_count_abnormal_by_worst_grade) <- c( |
| 226 | 2x |
formals(a_count_abnormal_by_worst_grade), extra_args[[".additional_fun_parameters"]] |
| 227 |
) |
|
| 228 | ||
| 229 | 2x |
analyze( |
| 230 | 2x |
lyt = lyt, |
| 231 | 2x |
vars = var, |
| 232 | 2x |
afun = a_count_abnormal_by_worst_grade, |
| 233 | 2x |
na_str = na_str, |
| 234 | 2x |
nested = nested, |
| 235 | 2x |
extra_args = extra_args, |
| 236 | 2x |
show_labels = "hidden" |
| 237 |
) |
|
| 238 |
} |
|
| 239 | ||
| 240 |
#' Helper function to prepare ADLB for `count_abnormal_by_worst_grade()` |
|
| 241 |
#' |
|
| 242 |
#' @description `r lifecycle::badge("stable")`
|
|
| 243 |
#' |
|
| 244 |
#' Helper function to prepare an ADLB data frame to be used as input in |
|
| 245 |
#' [count_abnormal_by_worst_grade()]. The following pre-processing steps are applied: |
|
| 246 |
#' |
|
| 247 |
#' 1. `adlb` is filtered on variable `avisit` to only include post-baseline visits. |
|
| 248 |
#' 2. `adlb` is filtered on variables `worst_flag_low` and `worst_flag_high` so that only |
|
| 249 |
#' worst grades (in either direction) are included. |
|
| 250 |
#' 3. From the standard lab grade variable `atoxgr`, the following two variables are derived |
|
| 251 |
#' and added to `adlb`: |
|
| 252 |
#' * A grade direction variable (e.g. `GRADE_DIR`). The variable takes value `"HIGH"` when |
|
| 253 |
#' `atoxgr > 0`, `"LOW"` when `atoxgr < 0`, and `"ZERO"` otherwise. |
|
| 254 |
#' * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from `atoxgr` are |
|
| 255 |
#' replaced by their absolute values. |
|
| 256 |
#' 4. Unused factor levels are dropped from `adlb` via [droplevels()]. |
|
| 257 |
#' |
|
| 258 |
#' @param adlb (`data.frame`)\cr ADLB data frame. |
|
| 259 |
#' @param atoxgr (`string`)\cr name of the analysis toxicity grade variable. This must be a `factor` |
|
| 260 |
#' variable. |
|
| 261 |
#' @param avisit (`string`)\cr name of the analysis visit variable. |
|
| 262 |
#' @param worst_flag_low (`string`)\cr name of the worst low lab grade flag variable. This variable is |
|
| 263 |
#' set to `"Y"` when indicating records of worst low lab grades. |
|
| 264 |
#' @param worst_flag_high (`string`)\cr name of the worst high lab grade flag variable. This variable is |
|
| 265 |
#' set to `"Y"` when indicating records of worst high lab grades. |
|
| 266 |
#' |
|
| 267 |
#' @return `h_adlb_abnormal_by_worst_grade()` returns the `adlb` data frame with two new |
|
| 268 |
#' variables: `GRADE_DIR` and `GRADE_ANL`. |
|
| 269 |
#' |
|
| 270 |
#' @seealso [abnormal_by_worst_grade] |
|
| 271 |
#' |
|
| 272 |
#' @examples |
|
| 273 |
#' h_adlb_abnormal_by_worst_grade(tern_ex_adlb) %>% |
|
| 274 |
#' dplyr::select(ATOXGR, GRADE_DIR, GRADE_ANL) %>% |
|
| 275 |
#' head(10) |
|
| 276 |
#' |
|
| 277 |
#' @export |
|
| 278 |
h_adlb_abnormal_by_worst_grade <- function(adlb, |
|
| 279 |
atoxgr = "ATOXGR", |
|
| 280 |
avisit = "AVISIT", |
|
| 281 |
worst_flag_low = "WGRLOFL", |
|
| 282 |
worst_flag_high = "WGRHIFL") {
|
|
| 283 | 1x |
adlb %>% |
| 284 | 1x |
dplyr::filter( |
| 285 | 1x |
!.data[[avisit]] %in% c("SCREENING", "BASELINE"),
|
| 286 | 1x |
.data[[worst_flag_low]] == "Y" | .data[[worst_flag_high]] == "Y" |
| 287 |
) %>% |
|
| 288 | 1x |
dplyr::mutate( |
| 289 | 1x |
GRADE_DIR = factor( |
| 290 | 1x |
dplyr::case_when( |
| 291 | 1x |
.data[[atoxgr]] %in% c("-1", "-2", "-3", "-4") ~ "LOW",
|
| 292 | 1x |
.data[[atoxgr]] == "0" ~ "ZERO", |
| 293 | 1x |
.data[[atoxgr]] %in% c("1", "2", "3", "4") ~ "HIGH"
|
| 294 |
), |
|
| 295 | 1x |
levels = c("LOW", "ZERO", "HIGH")
|
| 296 |
), |
|
| 297 | 1x |
GRADE_ANL = forcats::fct_relevel( |
| 298 | 1x |
forcats::fct_recode(.data[[atoxgr]], `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"), |
| 299 | 1x |
c("0", "1", "2", "3", "4")
|
| 300 |
) |
|
| 301 |
) %>% |
|
| 302 | 1x |
droplevels() |
| 303 |
} |
| 1 |
#' Incidence rate estimation |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [estimate_incidence_rate()] creates a layout element to estimate an event rate adjusted for |
|
| 6 |
#' person-years at risk, otherwise known as incidence rate. The primary analysis variable specified via `vars` is |
|
| 7 |
#' the person-years at risk. In addition to this variable, the `n_events` variable for number of events observed (where |
|
| 8 |
#' a value of 1 means an event was observed and 0 means that no event was observed) must also be specified. |
|
| 9 |
#' |
|
| 10 |
#' @inheritParams argument_convention |
|
| 11 |
#' @param control (`list`)\cr parameters for estimation details, specified by using |
|
| 12 |
#' the helper function [control_incidence_rate()]. Possible parameter options are: |
|
| 13 |
#' * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate. |
|
| 14 |
#' * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar` |
|
| 15 |
#' for confidence interval type. |
|
| 16 |
#' * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default) |
|
| 17 |
#' indicating time unit for data input. |
|
| 18 |
#' * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years). |
|
| 19 |
#' @param n_events (`string`)\cr name of integer variable indicating whether an event has been observed (1) or not (0). |
|
| 20 |
#' @param id_var (`string`)\cr name of variable used as patient identifier if `"n_unique"` is included in `.stats`. |
|
| 21 |
#' Defaults to `"USUBJID"`. |
|
| 22 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 23 |
#' |
|
| 24 |
#' Options are: ``r shQuote(get_stats("estimate_incidence_rate"), type = "sh")``
|
|
| 25 |
#' @param summarize (`flag`)\cr whether the function should act as an analyze function (`summarize = FALSE`), or a |
|
| 26 |
#' summarize function (`summarize = TRUE`). Defaults to `FALSE`. |
|
| 27 |
#' @param label_fmt (`string`)\cr how labels should be formatted after a row split occurs if `summarize = TRUE`. The |
|
| 28 |
#' string should use `"%s"` to represent row split levels, and `"%.labels"` to represent labels supplied to the |
|
| 29 |
#' `.labels` argument. Defaults to `"%s - %.labels"`. |
|
| 30 |
#' |
|
| 31 |
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate]. |
|
| 32 |
#' |
|
| 33 |
#' @examples |
|
| 34 |
#' df <- data.frame( |
|
| 35 |
#' USUBJID = as.character(seq(6)), |
|
| 36 |
#' CNSR = c(0, 1, 1, 0, 0, 0), |
|
| 37 |
#' AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4), |
|
| 38 |
#' ARM = factor(c("A", "A", "A", "B", "B", "B")),
|
|
| 39 |
#' STRATA1 = factor(c("X", "Y", "Y", "X", "X", "Y"))
|
|
| 40 |
#' ) |
|
| 41 |
#' df$n_events <- 1 - df$CNSR |
|
| 42 |
#' |
|
| 43 |
#' @name incidence_rate |
|
| 44 |
#' @order 1 |
|
| 45 |
NULL |
|
| 46 | ||
| 47 |
#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the |
|
| 48 |
#' associated confidence interval. |
|
| 49 |
#' |
|
| 50 |
#' @return |
|
| 51 |
#' * `s_incidence_rate()` returns the following statistics: |
|
| 52 |
#' - `person_years`: Total person-years at risk. |
|
| 53 |
#' - `n_events`: Total number of events observed. |
|
| 54 |
#' - `rate`: Estimated incidence rate. |
|
| 55 |
#' - `rate_ci`: Confidence interval for the incidence rate. |
|
| 56 |
#' - `n_unique`: Total number of patients with at least one event observed. |
|
| 57 |
#' - `n_rate`: Total number of events observed & estimated incidence rate. |
|
| 58 |
#' |
|
| 59 |
#' @keywords internal |
|
| 60 |
s_incidence_rate <- function(df, |
|
| 61 |
.var, |
|
| 62 |
..., |
|
| 63 |
n_events, |
|
| 64 |
is_event = lifecycle::deprecated(), |
|
| 65 |
id_var = "USUBJID", |
|
| 66 |
control = control_incidence_rate()) {
|
|
| 67 | 17x |
if (lifecycle::is_present(is_event)) {
|
| 68 | ! |
checkmate::assert_string(is_event) |
| 69 | ! |
lifecycle::deprecate_warn( |
| 70 | ! |
"0.9.6", "s_incidence_rate(is_event)", "s_incidence_rate(n_events)" |
| 71 |
) |
|
| 72 | ! |
n_events <- is_event |
| 73 | ! |
df[[n_events]] <- as.numeric(df[[is_event]]) |
| 74 |
} |
|
| 75 | ||
| 76 | 17x |
assert_df_with_variables(df, list(tte = .var, n_events = n_events)) |
| 77 | 17x |
checkmate::assert_string(.var) |
| 78 | 17x |
checkmate::assert_string(n_events) |
| 79 | 17x |
checkmate::assert_string(id_var) |
| 80 | 17x |
checkmate::assert_numeric(df[[.var]], any.missing = FALSE) |
| 81 | 17x |
checkmate::assert_integerish(df[[n_events]], any.missing = FALSE) |
| 82 | ||
| 83 | 17x |
n_unique <- n_available(unique(df[[id_var]][df[[n_events]] == 1])) |
| 84 | 17x |
input_time_unit <- control$input_time_unit |
| 85 | 17x |
num_pt_year <- control$num_pt_year |
| 86 | 17x |
conf_level <- control$conf_level |
| 87 | 17x |
person_years <- sum(df[[.var]], na.rm = TRUE) * ( |
| 88 | 17x |
1 * (input_time_unit == "year") + |
| 89 | 17x |
1 / 12 * (input_time_unit == "month") + |
| 90 | 17x |
1 / 52.14 * (input_time_unit == "week") + |
| 91 | 17x |
1 / 365.24 * (input_time_unit == "day") |
| 92 |
) |
|
| 93 | 17x |
n_events <- sum(df[[n_events]], na.rm = TRUE) |
| 94 | ||
| 95 | 17x |
result <- h_incidence_rate( |
| 96 | 17x |
person_years, |
| 97 | 17x |
n_events, |
| 98 | 17x |
control |
| 99 |
) |
|
| 100 | 17x |
list( |
| 101 | 17x |
person_years = formatters::with_label(person_years, "Total patient-years at risk"), |
| 102 | 17x |
n_events = formatters::with_label(n_events, "Number of adverse events observed"), |
| 103 | 17x |
rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")),
|
| 104 | 17x |
rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level)), |
| 105 | 17x |
n_unique = formatters::with_label(n_unique, "Total number of patients with at least one adverse event"), |
| 106 | 17x |
n_rate = formatters::with_label( |
| 107 | 17x |
c(n_events, result$rate), |
| 108 | 17x |
paste("Number of adverse events observed (AE rate per", num_pt_year, "patient-years)")
|
| 109 |
) |
|
| 110 |
) |
|
| 111 |
} |
|
| 112 | ||
| 113 |
#' @describeIn incidence_rate Formatted analysis function which is used as `afun` in `estimate_incidence_rate()`. |
|
| 114 |
#' |
|
| 115 |
#' @return |
|
| 116 |
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 117 |
#' |
|
| 118 |
#' @examples |
|
| 119 |
#' a_incidence_rate( |
|
| 120 |
#' df, |
|
| 121 |
#' .var = "AVAL", |
|
| 122 |
#' .df_row = df, |
|
| 123 |
#' n_events = "n_events" |
|
| 124 |
#' ) |
|
| 125 |
#' |
|
| 126 |
#' @export |
|
| 127 |
a_incidence_rate <- function(df, |
|
| 128 |
labelstr = "", |
|
| 129 |
label_fmt = "%s - %.labels", |
|
| 130 |
..., |
|
| 131 |
.stats = NULL, |
|
| 132 |
.stat_names = NULL, |
|
| 133 |
.formats = NULL, |
|
| 134 |
.labels = NULL, |
|
| 135 |
.indent_mods = NULL) {
|
|
| 136 | 16x |
checkmate::assert_string(label_fmt) |
| 137 | ||
| 138 |
# Check for additional parameters to the statistics function |
|
| 139 | 16x |
dots_extra_args <- list(...) |
| 140 | 16x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 141 | 16x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 142 | ||
| 143 |
# Check for user-defined functions |
|
| 144 | 16x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 145 | 16x |
.stats <- default_and_custom_stats_list$all_stats |
| 146 | 16x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 147 | ||
| 148 |
# Main statistic calculations |
|
| 149 | 16x |
x_stats <- .apply_stat_functions( |
| 150 | 16x |
default_stat_fnc = s_incidence_rate, |
| 151 | 16x |
custom_stat_fnc_list = custom_stat_functions, |
| 152 | 16x |
args_list = c( |
| 153 | 16x |
df = list(df), |
| 154 | 16x |
extra_afun_params, |
| 155 | 16x |
dots_extra_args |
| 156 |
) |
|
| 157 |
) |
|
| 158 | ||
| 159 |
# Fill in formatting defaults |
|
| 160 | 16x |
.stats <- get_stats("estimate_incidence_rate", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
|
| 161 | 16x |
x_stats <- x_stats[.stats] |
| 162 | 16x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 163 | 16x |
.labels <- get_labels_from_stats(.stats, .labels, tern_defaults = lapply(x_stats, attr, "label")) |
| 164 | 16x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 165 | ||
| 166 |
# Apply label format |
|
| 167 | 16x |
if (nzchar(labelstr) > 0) {
|
| 168 | 8x |
.labels <- sapply(.labels, function(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt)))
|
| 169 |
} |
|
| 170 | ||
| 171 |
# Auto format handling |
|
| 172 | 16x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 173 | ||
| 174 |
# Get and check statistical names |
|
| 175 | 16x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 176 | ||
| 177 | 16x |
in_rows( |
| 178 | 16x |
.list = x_stats, |
| 179 | 16x |
.formats = .formats, |
| 180 | 16x |
.names = names(.labels), |
| 181 | 16x |
.stat_names = .stat_names, |
| 182 | 16x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 183 | 16x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 184 |
) |
|
| 185 |
} |
|
| 186 | ||
| 187 |
#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments |
|
| 188 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 189 |
#' |
|
| 190 |
#' @return |
|
| 191 |
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions, |
|
| 192 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 193 |
#' the statistics from `s_incidence_rate()` to the table layout. |
|
| 194 |
#' |
|
| 195 |
#' @examples |
|
| 196 |
#' basic_table(show_colcounts = TRUE) %>% |
|
| 197 |
#' split_cols_by("ARM") %>%
|
|
| 198 |
#' estimate_incidence_rate( |
|
| 199 |
#' vars = "AVAL", |
|
| 200 |
#' n_events = "n_events", |
|
| 201 |
#' control = control_incidence_rate( |
|
| 202 |
#' input_time_unit = "month", |
|
| 203 |
#' num_pt_year = 100 |
|
| 204 |
#' ) |
|
| 205 |
#' ) %>% |
|
| 206 |
#' build_table(df) |
|
| 207 |
#' |
|
| 208 |
#' # summarize = TRUE |
|
| 209 |
#' basic_table(show_colcounts = TRUE) %>% |
|
| 210 |
#' split_cols_by("ARM") %>%
|
|
| 211 |
#' split_rows_by("STRATA1", child_labels = "visible") %>%
|
|
| 212 |
#' estimate_incidence_rate( |
|
| 213 |
#' vars = "AVAL", |
|
| 214 |
#' n_events = "n_events", |
|
| 215 |
#' .stats = c("n_unique", "n_rate"),
|
|
| 216 |
#' summarize = TRUE, |
|
| 217 |
#' label_fmt = "%.labels" |
|
| 218 |
#' ) %>% |
|
| 219 |
#' build_table(df) |
|
| 220 |
#' |
|
| 221 |
#' @export |
|
| 222 |
#' @order 2 |
|
| 223 |
estimate_incidence_rate <- function(lyt, |
|
| 224 |
vars, |
|
| 225 |
n_events, |
|
| 226 |
id_var = "USUBJID", |
|
| 227 |
control = control_incidence_rate(), |
|
| 228 |
na_str = default_na_str(), |
|
| 229 |
nested = TRUE, |
|
| 230 |
summarize = FALSE, |
|
| 231 |
label_fmt = "%s - %.labels", |
|
| 232 |
..., |
|
| 233 |
show_labels = "hidden", |
|
| 234 |
table_names = vars, |
|
| 235 |
.stats = c("person_years", "n_events", "rate", "rate_ci"),
|
|
| 236 |
.stat_names = NULL, |
|
| 237 |
.formats = list(rate = "xx.xx", rate_ci = "(xx.xx, xx.xx)"), |
|
| 238 |
.labels = NULL, |
|
| 239 |
.indent_mods = NULL) {
|
|
| 240 |
# Process standard extra arguments |
|
| 241 | 5x |
extra_args <- list(".stats" = .stats)
|
| 242 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 243 | 5x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 244 | 1x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 245 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 246 | ||
| 247 |
# Process additional arguments to the statistic function |
|
| 248 | 5x |
extra_args <- c( |
| 249 | 5x |
extra_args, |
| 250 | 5x |
n_events = n_events, id_var = id_var, control = list(control), label_fmt = label_fmt, |
| 251 |
... |
|
| 252 |
) |
|
| 253 | ||
| 254 |
# Adding additional info from layout to analysis function |
|
| 255 | 5x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 256 | 5x |
formals(a_incidence_rate) <- c(formals(a_incidence_rate), extra_args[[".additional_fun_parameters"]]) |
| 257 | ||
| 258 | 5x |
if (!summarize) {
|
| 259 | 3x |
analyze( |
| 260 | 3x |
lyt = lyt, |
| 261 | 3x |
vars = vars, |
| 262 | 3x |
afun = a_incidence_rate, |
| 263 | 3x |
na_str = na_str, |
| 264 | 3x |
nested = nested, |
| 265 | 3x |
extra_args = extra_args, |
| 266 | 3x |
show_labels = show_labels, |
| 267 | 3x |
table_names = table_names |
| 268 |
) |
|
| 269 |
} else {
|
|
| 270 | 2x |
summarize_row_groups( |
| 271 | 2x |
lyt = lyt, |
| 272 | 2x |
var = vars, |
| 273 | 2x |
cfun = a_incidence_rate, |
| 274 | 2x |
na_str = na_str, |
| 275 | 2x |
extra_args = extra_args |
| 276 |
) |
|
| 277 |
} |
|
| 278 |
} |
| 1 |
#' Line plot with optional table |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Line plot with optional table. |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams argument_convention |
|
| 8 |
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) |
|
| 9 |
#' to counts objects in groups for stratification. |
|
| 10 |
#' @param variables (named `character`) vector of variable names in `df` which should include: |
|
| 11 |
#' * `x` (`string`)\cr name of x-axis variable. |
|
| 12 |
#' * `y` (`string`)\cr name of y-axis variable. |
|
| 13 |
#' * `group_var` (`string` or `NULL`)\cr name of grouping variable (or strata), i.e. treatment arm. |
|
| 14 |
#' Can be `NA` to indicate lack of groups. |
|
| 15 |
#' * `subject_var` (`string` or `NULL`)\cr name of subject variable. Only applies if `group_var` is |
|
| 16 |
#' not NULL. |
|
| 17 |
#' * `paramcd` (`string` or `NA`)\cr name of the variable for parameter's code. Used for y-axis label and plot's |
|
| 18 |
#' subtitle. Can be `NA` if `paramcd` is not to be added to the y-axis label or subtitle. |
|
| 19 |
#' * `y_unit` (`string` or `NA`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle. |
|
| 20 |
#' Can be `NA` if y unit is not to be added to the y-axis label or subtitle. |
|
| 21 |
#' * `facet_var` (`string` or `NA`)\cr name of the secondary grouping variable used for plot faceting, i.e. treatment |
|
| 22 |
#' arm. Can be `NA` to indicate lack of groups. |
|
| 23 |
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints. |
|
| 24 |
#' All the statistics indicated in `mid` variable must be present in the object returned by `sfun`, |
|
| 25 |
#' and be of a `double` or `numeric` type vector of length one. |
|
| 26 |
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals. |
|
| 27 |
#' All the statistics indicated in `interval` variable must be present in the object returned by `sfun`, |
|
| 28 |
#' and be of a `double` or `numeric` type vector of length two. Set `interval = NULL` if intervals should not be |
|
| 29 |
#' added to the plot. |
|
| 30 |
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Names must match names |
|
| 31 |
#' of the list element `interval` that will be returned by `sfun` (e.g. `mean_ci_lwr` element of |
|
| 32 |
#' `sfun(x)[["mean_ci"]]`). It is possible to specify one whisker only, or to suppress all whiskers by setting |
|
| 33 |
#' `interval = NULL`. |
|
| 34 |
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot. |
|
| 35 |
#' All the statistics indicated in `table` variable must be present in the object returned by `sfun`. |
|
| 36 |
#' @param sfun (`function`)\cr the function to compute the values of required statistics. It must return a named `list` |
|
| 37 |
#' with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`, |
|
| 38 |
#' `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed. |
|
| 39 |
#' @param ... optional arguments to `sfun`. |
|
| 40 |
#' @param mid_type (`string`)\cr controls the type of the `mid` plot, it can be point (`"p"`), line (`"l"`), |
|
| 41 |
#' or point and line (`"pl"`). |
|
| 42 |
#' @param mid_point_size (`numeric(1)`)\cr font size of the `mid` plot points. |
|
| 43 |
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of |
|
| 44 |
#' a call to a position adjustment function. |
|
| 45 |
#' @param legend_title (`string`)\cr legend title. |
|
| 46 |
#' @param legend_position (`string`)\cr the position of the plot legend (`"none"`, `"left"`, `"right"`, `"bottom"`, |
|
| 47 |
#' `"top"`, or a two-element numeric vector). |
|
| 48 |
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot. |
|
| 49 |
#' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing |
|
| 50 |
#' between ticks on the x-axis, for use when `variables$x` is numeric. If `NULL` (default), [labeling::extended()] is |
|
| 51 |
#' used to determine optimal tick positions on the x-axis. If `variables$x` is not numeric, this argument is ignored. |
|
| 52 |
#' @param x_lab (`string` or `NULL`)\cr x-axis label. If `NULL` then no label will be added. |
|
| 53 |
#' @param y_lab (`string` or `NULL`)\cr y-axis label. If `NULL` then no label will be added. |
|
| 54 |
#' @param y_lab_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be added |
|
| 55 |
#' to the y-axis label (`y_lab`). |
|
| 56 |
#' @param y_lab_add_unit (`flag`)\cr whether y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be added |
|
| 57 |
#' to the y-axis label (`y_lab`). |
|
| 58 |
#' @param title (`string`)\cr plot title. |
|
| 59 |
#' @param subtitle (`string`)\cr plot subtitle. |
|
| 60 |
#' @param subtitle_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be |
|
| 61 |
#' added to the plot's subtitle (`subtitle`). |
|
| 62 |
#' @param subtitle_add_unit (`flag`)\cr whether the y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be |
|
| 63 |
#' added to the plot's subtitle (`subtitle`). |
|
| 64 |
#' @param caption (`string`)\cr optional caption below the plot. |
|
| 65 |
#' @param table_format (named `vector` or `NULL`)\cr custom formats for descriptive statistics used instead of defaults |
|
| 66 |
#' in the (optional) table appended to the plot. It is passed directly to the `h_format_row` function through |
|
| 67 |
#' the `format` parameter. Names of `table_format` must match the names of statistics returned by `sfun` function. |
|
| 68 |
#' Can be a character vector with values from [formatters::list_valid_format_labels()] or custom format functions. |
|
| 69 |
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table |
|
| 70 |
#' appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function. |
|
| 71 |
#' @param table_font_size (`numeric(1)`)\cr font size of the text in the table. |
|
| 72 |
#' @param newpage `r lifecycle::badge("deprecated")` not used.
|
|
| 73 |
#' @param col (`character`)\cr color(s). See `?ggplot2::aes_colour_fill_alpha` for example values. |
|
| 74 |
#' @param linetype (`character`)\cr line type(s). See `?ggplot2::aes_linetype_size_shape` for example values. |
|
| 75 |
#' @param errorbar_width (`numeric(1)`)\cr width of the error bars. |
|
| 76 |
#' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the line plot. |
|
| 77 |
#' Relative height of annotation table is then `1 - rel_height_plot`. If `table = NULL`, this parameter is ignored. |
|
| 78 |
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `table` is not `NULL`. |
|
| 79 |
#' If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the |
|
| 80 |
#' annotation table is printed below the plot via [cowplot::plot_grid()]. |
|
| 81 |
#' |
|
| 82 |
#' @return A `ggplot` line plot (and statistics table if applicable). |
|
| 83 |
#' |
|
| 84 |
#' @examples |
|
| 85 |
#' |
|
| 86 |
#' adsl <- tern_ex_adsl |
|
| 87 |
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING") |
|
| 88 |
#' adlb$AVISIT <- droplevels(adlb$AVISIT) |
|
| 89 |
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min)) |
|
| 90 |
#' |
|
| 91 |
#' # Mean with CI |
|
| 92 |
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:") |
|
| 93 |
#' |
|
| 94 |
#' # Mean with CI, no stratification with group_var |
|
| 95 |
#' g_lineplot(adlb, variables = control_lineplot_vars(group_var = NA)) |
|
| 96 |
#' |
|
| 97 |
#' # Mean, upper whisker of CI, no group_var(strata) counts N |
|
| 98 |
#' g_lineplot( |
|
| 99 |
#' adlb, |
|
| 100 |
#' whiskers = "mean_ci_upr", |
|
| 101 |
#' title = "Plot of Mean and Upper 95% Confidence Limit by Visit" |
|
| 102 |
#' ) |
|
| 103 |
#' |
|
| 104 |
#' # Median with CI |
|
| 105 |
#' g_lineplot( |
|
| 106 |
#' adlb, |
|
| 107 |
#' adsl, |
|
| 108 |
#' mid = "median", |
|
| 109 |
#' interval = "median_ci", |
|
| 110 |
#' whiskers = c("median_ci_lwr", "median_ci_upr"),
|
|
| 111 |
#' title = "Plot of Median and 95% Confidence Limits by Visit" |
|
| 112 |
#' ) |
|
| 113 |
#' |
|
| 114 |
#' # Mean, +/- SD |
|
| 115 |
#' g_lineplot(adlb, adsl, |
|
| 116 |
#' interval = "mean_sdi", |
|
| 117 |
#' whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
|
|
| 118 |
#' title = "Plot of Median +/- SD by Visit" |
|
| 119 |
#' ) |
|
| 120 |
#' |
|
| 121 |
#' # Mean with CI plot with stats table |
|
| 122 |
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
|
|
| 123 |
#' |
|
| 124 |
#' # Mean with CI, table and customized confidence level |
|
| 125 |
#' g_lineplot( |
|
| 126 |
#' adlb, |
|
| 127 |
#' adsl, |
|
| 128 |
#' table = c("n", "mean", "mean_ci"),
|
|
| 129 |
#' control = control_analyze_vars(conf_level = 0.80), |
|
| 130 |
#' title = "Plot of Mean and 80% Confidence Limits by Visit" |
|
| 131 |
#' ) |
|
| 132 |
#' |
|
| 133 |
#' # Mean with CI, table with customized formats/labels |
|
| 134 |
#' g_lineplot( |
|
| 135 |
#' adlb, |
|
| 136 |
#' adsl, |
|
| 137 |
#' table = c("n", "mean", "mean_ci"),
|
|
| 138 |
#' table_format = list( |
|
| 139 |
#' mean = function(x, ...) {
|
|
| 140 |
#' ifelse(x < 20, round_fmt(x, digits = 3), round_fmt(x, digits = 2)) |
|
| 141 |
#' }, |
|
| 142 |
#' mean_ci = "(xx.xxx, xx.xxx)" |
|
| 143 |
#' ), |
|
| 144 |
#' table_labels = list( |
|
| 145 |
#' mean = "mean", |
|
| 146 |
#' mean_ci = "95% CI" |
|
| 147 |
#' ) |
|
| 148 |
#' ) |
|
| 149 |
#' |
|
| 150 |
#' # Mean with CI, table, filtered data |
|
| 151 |
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE") |
|
| 152 |
#' g_lineplot(adlb_f, table = c("n", "mean"))
|
|
| 153 |
#' |
|
| 154 |
#' @export |
|
| 155 |
g_lineplot <- function(df, |
|
| 156 |
alt_counts_df = NULL, |
|
| 157 |
variables = control_lineplot_vars(), |
|
| 158 |
mid = "mean", |
|
| 159 |
interval = "mean_ci", |
|
| 160 |
whiskers = c("mean_ci_lwr", "mean_ci_upr"),
|
|
| 161 |
table = NULL, |
|
| 162 |
sfun = s_summary, |
|
| 163 |
..., |
|
| 164 |
mid_type = "pl", |
|
| 165 |
mid_point_size = 2, |
|
| 166 |
position = ggplot2::position_dodge(width = 0.4), |
|
| 167 |
legend_title = NULL, |
|
| 168 |
legend_position = "bottom", |
|
| 169 |
ggtheme = nestcolor::theme_nest(), |
|
| 170 |
xticks = NULL, |
|
| 171 |
xlim = NULL, |
|
| 172 |
ylim = NULL, |
|
| 173 |
x_lab = obj_label(df[[variables[["x"]]]]), |
|
| 174 |
y_lab = NULL, |
|
| 175 |
y_lab_add_paramcd = TRUE, |
|
| 176 |
y_lab_add_unit = TRUE, |
|
| 177 |
title = "Plot of Mean and 95% Confidence Limits by Visit", |
|
| 178 |
subtitle = "", |
|
| 179 |
subtitle_add_paramcd = TRUE, |
|
| 180 |
subtitle_add_unit = TRUE, |
|
| 181 |
caption = NULL, |
|
| 182 |
table_format = NULL, |
|
| 183 |
table_labels = NULL, |
|
| 184 |
table_font_size = 3, |
|
| 185 |
errorbar_width = 0.45, |
|
| 186 |
newpage = lifecycle::deprecated(), |
|
| 187 |
col = NULL, |
|
| 188 |
linetype = NULL, |
|
| 189 |
rel_height_plot = 0.5, |
|
| 190 |
as_list = FALSE) {
|
|
| 191 | 14x |
checkmate::assert_character(variables, any.missing = TRUE) |
| 192 | 14x |
checkmate::assert_character(mid, null.ok = TRUE) |
| 193 | 14x |
checkmate::assert_character(interval, null.ok = TRUE) |
| 194 | 14x |
checkmate::assert_character(col, null.ok = TRUE) |
| 195 | 14x |
checkmate::assert_character(linetype, null.ok = TRUE) |
| 196 | 14x |
checkmate::assert_numeric(xticks, null.ok = TRUE) |
| 197 | 14x |
checkmate::assert_numeric(xlim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) |
| 198 | 14x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) |
| 199 | 14x |
checkmate::assert_number(errorbar_width, lower = 0) |
| 200 | 14x |
checkmate::assert_string(title, null.ok = TRUE) |
| 201 | 14x |
checkmate::assert_string(subtitle, null.ok = TRUE) |
| 202 | 14x |
assert_proportion_value(rel_height_plot) |
| 203 | 14x |
checkmate::assert_logical(as_list) |
| 204 | ||
| 205 | 14x |
if (!is.null(table)) {
|
| 206 | 6x |
table_format <- get_formats_from_stats(table, formats_in = table_format) |
| 207 | 6x |
table_labels <- get_labels_from_stats(table, labels_in = table_labels) %>% .unlist_keep_nulls() |
| 208 |
} |
|
| 209 | ||
| 210 | 14x |
extra_args <- list(...) |
| 211 | 14x |
if ("control" %in% names(extra_args)) {
|
| 212 | 4x |
if (!is.null(table) && all(table_labels == .unlist_keep_nulls(get_labels_from_stats(table)))) {
|
| 213 | 3x |
table_labels <- table_labels %>% labels_use_control(extra_args[["control"]]) |
| 214 |
} |
|
| 215 |
} |
|
| 216 | ||
| 217 | 14x |
if (is.character(interval)) {
|
| 218 | 14x |
checkmate::assert_vector(whiskers, min.len = 0, max.len = 2) |
| 219 |
} |
|
| 220 | ||
| 221 | 14x |
if (length(whiskers) == 1) {
|
| 222 | ! |
checkmate::assert_character(mid) |
| 223 |
} |
|
| 224 | ||
| 225 | 14x |
if (is.character(mid)) {
|
| 226 | 14x |
checkmate::assert_scalar(mid_type) |
| 227 | 14x |
checkmate::assert_subset(mid_type, c("pl", "p", "l"))
|
| 228 |
} |
|
| 229 | ||
| 230 | 14x |
x <- variables[["x"]] |
| 231 | 14x |
y <- variables[["y"]] |
| 232 | 14x |
paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables |
| 233 | 14x |
y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables |
| 234 | 14x |
if (is.na(variables["group_var"])) {
|
| 235 | 1x |
group_var <- NULL # NULL if group_var == NA or it is not in variables |
| 236 |
} else {
|
|
| 237 | 13x |
group_var <- variables[["group_var"]] |
| 238 | 13x |
subject_var <- variables[["subject_var"]] |
| 239 |
} |
|
| 240 | 14x |
if (is.na(variables["facet_var"])) {
|
| 241 | 13x |
facet_var <- NULL # NULL if facet_var == NA or it is not in variables |
| 242 |
} else {
|
|
| 243 | 1x |
facet_var <- variables[["facet_var"]] |
| 244 |
} |
|
| 245 | 14x |
checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE) |
| 246 | 14x |
checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE) |
| 247 | 14x |
if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) {
|
| 248 | 14x |
checkmate::assert_false(is.na(paramcd)) |
| 249 | 14x |
checkmate::assert_scalar(unique(df[[paramcd]])) |
| 250 |
} |
|
| 251 | ||
| 252 | 14x |
checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE) |
| 253 | 14x |
checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE) |
| 254 | 14x |
if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) {
|
| 255 | 14x |
checkmate::assert_false(is.na(y_unit)) |
| 256 | 14x |
checkmate::assert_scalar(unique(df[[y_unit]])) |
| 257 |
} |
|
| 258 | ||
| 259 | 14x |
if (!is.null(group_var) && !is.null(alt_counts_df)) {
|
| 260 | 9x |
checkmate::assert_set_equal(unique(alt_counts_df[[group_var]]), unique(df[[group_var]])) |
| 261 |
} |
|
| 262 | ||
| 263 |
####################################### | |
|
| 264 |
# ---- Compute required statistics ---- |
|
| 265 |
####################################### | |
|
| 266 |
# Remove unused levels for x-axis |
|
| 267 | 14x |
if (is.factor(df[[x]])) {
|
| 268 | 13x |
df[[x]] <- droplevels(df[[x]]) |
| 269 |
} |
|
| 270 | ||
| 271 | 14x |
if (!is.null(facet_var) && !is.null(group_var)) {
|
| 272 | 1x |
df_grp <- tidyr::expand(df, .data[[facet_var]], .data[[group_var]], .data[[x]]) # expand based on levels of factors |
| 273 | 13x |
} else if (!is.null(group_var)) {
|
| 274 | 12x |
df_grp <- tidyr::expand(df, .data[[group_var]], .data[[x]]) # expand based on levels of factors |
| 275 |
} else {
|
|
| 276 | 1x |
df_grp <- tidyr::expand(df, NULL, .data[[x]]) |
| 277 |
} |
|
| 278 | ||
| 279 | 14x |
df_grp <- df_grp %>% |
| 280 | 14x |
dplyr::full_join(y = df[, c(facet_var, group_var, x, y)], by = c(facet_var, group_var, x), multiple = "all") %>% |
| 281 | 14x |
dplyr::group_by_at(c(facet_var, group_var, x)) |
| 282 | ||
| 283 | 14x |
df_stats <- df_grp %>% |
| 284 | 14x |
dplyr::summarise( |
| 285 | 14x |
data.frame(t(do.call(c, unname(sfun(.data[[y]])[c(mid, interval)])))), |
| 286 | 14x |
.groups = "drop" |
| 287 |
) |
|
| 288 | ||
| 289 | 14x |
df_stats <- df_stats[!is.na(df_stats[[mid]]), ] |
| 290 | ||
| 291 |
# add number of objects N in group_var (strata) |
|
| 292 | 14x |
if (!is.null(group_var) && !is.null(alt_counts_df)) {
|
| 293 | 9x |
strata_N <- paste0(group_var, "_N") # nolint |
| 294 | ||
| 295 | 9x |
df_N <- stats::aggregate(eval(parse(text = subject_var)) ~ eval(parse(text = group_var)), data = alt_counts_df, FUN = function(x) length(unique(x))) # nolint |
| 296 | 9x |
colnames(df_N) <- c(group_var, "N") # nolint |
| 297 | 9x |
df_N[[strata_N]] <- paste0(df_N[[group_var]], " (N = ", df_N$N, ")") # nolint |
| 298 | ||
| 299 |
# retain strata factor levels |
|
| 300 | 9x |
search_strings <- unique(df_N[[strata_N]]) |
| 301 | 9x |
matches <- sapply(unique(df_N[[group_var]]), function(x) {
|
| 302 | 25x |
regex_pattern <- gsub("([][(){}^$.|*+?\\\\])", "\\\\\\1", x)
|
| 303 | 25x |
search_strings[grepl( |
| 304 | 25x |
paste0("^", regex_pattern, "\\b"),
|
| 305 | 25x |
search_strings |
| 306 |
)] |
|
| 307 |
}) |
|
| 308 | 9x |
df_N[[paste0(group_var, "_N")]] <- factor(df_N[[group_var]]) # nolint |
| 309 | 9x |
levels(df_N[[paste0(group_var, "_N")]]) <- unlist(matches) # nolint |
| 310 | ||
| 311 |
# strata_N should not be in colnames(df_stats) |
|
| 312 | 9x |
checkmate::assert_disjunct(strata_N, colnames(df_stats)) |
| 313 | ||
| 314 | 9x |
df_stats <- merge(x = df_stats, y = df_N[, c(group_var, strata_N)], by = group_var) |
| 315 | 5x |
} else if (!is.null(group_var)) {
|
| 316 | 4x |
strata_N <- group_var # nolint |
| 317 |
} else {
|
|
| 318 | 1x |
strata_N <- NULL # nolint |
| 319 |
} |
|
| 320 | ||
| 321 |
############################################### | |
|
| 322 |
# ---- Prepare certain plot's properties. ---- |
|
| 323 |
############################################### | |
|
| 324 |
# legend title |
|
| 325 | 14x |
if (is.null(legend_title) && !is.null(group_var) && legend_position != "none") {
|
| 326 | 13x |
legend_title <- attr(df[[group_var]], "label") |
| 327 |
} |
|
| 328 | ||
| 329 |
# y label |
|
| 330 | 14x |
if (!is.null(y_lab)) {
|
| 331 | 4x |
if (y_lab_add_paramcd) {
|
| 332 | 4x |
y_lab <- paste(y_lab, unique(df[[paramcd]])) |
| 333 |
} |
|
| 334 | ||
| 335 | 4x |
if (y_lab_add_unit) {
|
| 336 | 4x |
y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
|
| 337 |
} |
|
| 338 | ||
| 339 | 4x |
y_lab <- trimws(y_lab) |
| 340 |
} |
|
| 341 | ||
| 342 |
# subtitle |
|
| 343 | 14x |
if (!is.null(subtitle)) {
|
| 344 | 14x |
if (subtitle_add_paramcd) {
|
| 345 | 14x |
subtitle <- paste(subtitle, unique(df[[paramcd]])) |
| 346 |
} |
|
| 347 | ||
| 348 | 14x |
if (subtitle_add_unit) {
|
| 349 | 14x |
subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
|
| 350 |
} |
|
| 351 | ||
| 352 | 14x |
subtitle <- trimws(subtitle) |
| 353 |
} |
|
| 354 | ||
| 355 |
############################### | |
|
| 356 |
# ---- Build plot object. ---- |
|
| 357 |
############################### | |
|
| 358 | 14x |
p <- ggplot2::ggplot( |
| 359 | 14x |
data = df_stats, |
| 360 | 14x |
mapping = ggplot2::aes( |
| 361 | 14x |
x = .data[[x]], y = .data[[mid]], |
| 362 | 14x |
color = if (is.null(strata_N)) NULL else .data[[strata_N]], |
| 363 | 14x |
shape = if (is.null(strata_N)) NULL else .data[[strata_N]], |
| 364 | 14x |
lty = if (is.null(strata_N)) NULL else .data[[strata_N]], |
| 365 | 14x |
group = if (is.null(strata_N)) NULL else .data[[strata_N]] |
| 366 |
) |
|
| 367 |
) |
|
| 368 | ||
| 369 | 14x |
if (!is.null(group_var) && nlevels(df_stats[[strata_N]]) > 6) {
|
| 370 | 1x |
p <- p + |
| 371 | 1x |
scale_shape_manual(values = seq(15, 15 + nlevels(df_stats[[strata_N]]))) |
| 372 |
} |
|
| 373 | ||
| 374 | 14x |
if (!is.null(mid)) {
|
| 375 |
# points |
|
| 376 | 14x |
if (grepl("p", mid_type, fixed = TRUE)) {
|
| 377 | 14x |
p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE) |
| 378 |
} |
|
| 379 | ||
| 380 |
# lines - plotted only if there is a strata grouping (group_var) |
|
| 381 | 14x |
if (grepl("l", mid_type, fixed = TRUE) && !is.null(strata_N)) {
|
| 382 | 13x |
p <- p + ggplot2::geom_line(position = position, na.rm = TRUE) |
| 383 |
} |
|
| 384 |
} |
|
| 385 | ||
| 386 |
# interval |
|
| 387 | 14x |
if (!is.null(interval)) {
|
| 388 | 14x |
p <- p + |
| 389 | 14x |
ggplot2::geom_errorbar( |
| 390 | 14x |
ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]), |
| 391 | 14x |
width = errorbar_width, |
| 392 | 14x |
position = position |
| 393 |
) |
|
| 394 | ||
| 395 | 14x |
if (length(whiskers) == 1) { # lwr or upr only; mid is then required
|
| 396 |
# workaround as geom_errorbar does not provide single-direction whiskers |
|
| 397 | ! |
p <- p + |
| 398 | ! |
ggplot2::geom_linerange( |
| 399 | ! |
data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings |
| 400 | ! |
ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]), |
| 401 | ! |
position = position, |
| 402 | ! |
na.rm = TRUE, |
| 403 | ! |
show.legend = FALSE |
| 404 |
) |
|
| 405 |
} |
|
| 406 |
} |
|
| 407 | ||
| 408 | 14x |
if (is.numeric(df_stats[[x]])) {
|
| 409 | 1x |
if (length(xticks) == 1) xticks <- seq(from = min(df_stats[[x]]), to = max(df_stats[[x]]), by = xticks) |
| 410 | 1x |
p <- p + ggplot2::scale_x_continuous(breaks = if (!is.null(xticks)) xticks else waiver(), limits = xlim) |
| 411 |
} |
|
| 412 | ||
| 413 | 14x |
p <- p + |
| 414 | 14x |
ggplot2::scale_y_continuous(labels = scales::comma, limits = ylim) + |
| 415 | 14x |
ggplot2::labs( |
| 416 | 14x |
title = title, |
| 417 | 14x |
subtitle = subtitle, |
| 418 | 14x |
caption = caption, |
| 419 | 14x |
color = legend_title, |
| 420 | 14x |
lty = legend_title, |
| 421 | 14x |
shape = legend_title, |
| 422 | 14x |
x = x_lab, |
| 423 | 14x |
y = y_lab |
| 424 |
) |
|
| 425 | ||
| 426 | 14x |
if (!is.null(col)) {
|
| 427 | 1x |
p <- p + |
| 428 | 1x |
ggplot2::scale_color_manual(values = col) |
| 429 |
} |
|
| 430 | 14x |
if (!is.null(linetype)) {
|
| 431 | 1x |
p <- p + |
| 432 | 1x |
ggplot2::scale_linetype_manual(values = linetype) |
| 433 |
} |
|
| 434 | ||
| 435 | 14x |
if (!is.null(facet_var)) {
|
| 436 | 1x |
p <- p + |
| 437 | 1x |
facet_grid(cols = vars(df_stats[[facet_var]])) |
| 438 |
} |
|
| 439 | ||
| 440 | 14x |
if (!is.null(ggtheme)) {
|
| 441 | 14x |
p <- p + ggtheme |
| 442 |
} else {
|
|
| 443 | ! |
p <- p + |
| 444 | ! |
ggplot2::theme_bw() + |
| 445 | ! |
ggplot2::theme( |
| 446 | ! |
legend.key.width = grid::unit(1, "cm"), |
| 447 | ! |
legend.position = legend_position, |
| 448 | ! |
legend.direction = ifelse( |
| 449 | ! |
legend_position %in% c("top", "bottom"),
|
| 450 | ! |
"horizontal", |
| 451 | ! |
"vertical" |
| 452 |
) |
|
| 453 |
) |
|
| 454 |
} |
|
| 455 | ||
| 456 |
############################################################# | |
|
| 457 |
# ---- Optionally, add table to the bottom of the plot. ---- |
|
| 458 |
############################################################# | |
|
| 459 | 14x |
if (!is.null(table)) {
|
| 460 | 6x |
df_stats_table <- df_grp %>% |
| 461 | 6x |
dplyr::summarise( |
| 462 | 6x |
h_format_row( |
| 463 | 6x |
x = sfun(.data[[y]], ...)[table], |
| 464 | 6x |
format = table_format, |
| 465 | 6x |
labels = table_labels |
| 466 |
), |
|
| 467 | 6x |
.groups = "drop" |
| 468 |
) |
|
| 469 | ||
| 470 | 6x |
stats_lev <- rev(setdiff(colnames(df_stats_table), c(group_var, x))) |
| 471 | ||
| 472 | 6x |
df_stats_table <- df_stats_table %>% |
| 473 | 6x |
tidyr::pivot_longer( |
| 474 | 6x |
cols = -dplyr::all_of(c(group_var, x)), |
| 475 | 6x |
names_to = "stat", |
| 476 | 6x |
values_to = "value", |
| 477 | 6x |
names_ptypes = list(stat = factor(levels = stats_lev)) |
| 478 |
) |
|
| 479 | ||
| 480 | 6x |
tbl <- ggplot2::ggplot( |
| 481 | 6x |
df_stats_table, |
| 482 | 6x |
ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]]) |
| 483 |
) + |
|
| 484 | 6x |
ggplot2::geom_text(size = table_font_size) + |
| 485 | 6x |
ggplot2::theme_bw() + |
| 486 | 6x |
ggplot2::theme( |
| 487 | 6x |
panel.border = ggplot2::element_blank(), |
| 488 | 6x |
panel.grid.major = ggplot2::element_blank(), |
| 489 | 6x |
panel.grid.minor = ggplot2::element_blank(), |
| 490 | 6x |
axis.ticks = ggplot2::element_blank(), |
| 491 | 6x |
axis.title = ggplot2::element_blank(), |
| 492 | 6x |
axis.text.x = ggplot2::element_blank(), |
| 493 | 6x |
axis.text.y = ggplot2::element_text( |
| 494 | 6x |
size = table_font_size * ggplot2::.pt, |
| 495 | 6x |
margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5) |
| 496 |
), |
|
| 497 | 6x |
strip.text = ggplot2::element_text(hjust = 0), |
| 498 | 6x |
strip.text.x = ggplot2::element_text( |
| 499 | 6x |
size = table_font_size * ggplot2::.pt, |
| 500 | 6x |
margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt") |
| 501 |
), |
|
| 502 | 6x |
strip.background = ggplot2::element_rect(fill = "grey95", color = NA), |
| 503 | 6x |
legend.position = "none" |
| 504 |
) |
|
| 505 | ||
| 506 | 6x |
if (!is.null(group_var)) {
|
| 507 | 6x |
tbl <- tbl + ggplot2::facet_wrap(facets = group_var, ncol = 1) |
| 508 |
} |
|
| 509 | ||
| 510 | 6x |
if (!as_list) {
|
| 511 |
# align plot and table |
|
| 512 | 5x |
cowplot::plot_grid( |
| 513 | 5x |
p, |
| 514 | 5x |
tbl, |
| 515 | 5x |
ncol = 1, |
| 516 | 5x |
align = "v", |
| 517 | 5x |
axis = "tblr", |
| 518 | 5x |
rel_heights = c(rel_height_plot, 1 - rel_height_plot) |
| 519 |
) |
|
| 520 |
} else {
|
|
| 521 | 1x |
list(plot = p, table = tbl) |
| 522 |
} |
|
| 523 |
} else {
|
|
| 524 | 8x |
p |
| 525 |
} |
|
| 526 |
} |
|
| 527 | ||
| 528 |
#' Helper function to format the optional `g_lineplot` table |
|
| 529 |
#' |
|
| 530 |
#' @description `r lifecycle::badge("stable")`
|
|
| 531 |
#' |
|
| 532 |
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled. |
|
| 533 |
#' Elements of `x` must be `numeric` vectors. |
|
| 534 |
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must |
|
| 535 |
#' match the names of `x`. This parameter is passed directly to the `rtables::format_rcell` |
|
| 536 |
#' function through the `format` parameter. |
|
| 537 |
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must |
|
| 538 |
#' match the names of `x`. When a label is not specified for an element of `x`, |
|
| 539 |
#' then this function tries to use `label` or `names` (in this order) attribute of that element |
|
| 540 |
#' (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes |
|
| 541 |
#' are attached to a given element of `x`, then the label is automatically generated. |
|
| 542 |
#' |
|
| 543 |
#' @return A single row `data.frame` object. |
|
| 544 |
#' |
|
| 545 |
#' @examples |
|
| 546 |
#' mean_ci <- c(48, 51) |
|
| 547 |
#' x <- list(mean = 50, mean_ci = mean_ci) |
|
| 548 |
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)") |
|
| 549 |
#' labels <- c(mean = "My Mean") |
|
| 550 |
#' h_format_row(x, format, labels) |
|
| 551 |
#' |
|
| 552 |
#' attr(mean_ci, "label") <- "Mean 95% CI" |
|
| 553 |
#' x <- list(mean = 50, mean_ci = mean_ci) |
|
| 554 |
#' h_format_row(x, format, labels) |
|
| 555 |
#' |
|
| 556 |
#' @export |
|
| 557 |
h_format_row <- function(x, format, labels = NULL) {
|
|
| 558 |
# cell: one row, one column data.frame |
|
| 559 | 110x |
format_cell <- function(x, format, label = NULL) {
|
| 560 | 292x |
fc <- format_rcell(x = x, format = format) |
| 561 | 292x |
if (is.na(fc)) {
|
| 562 | ! |
fc <- "NA" |
| 563 |
} |
|
| 564 | 292x |
x_label <- attr(x, "label") |
| 565 | 292x |
if (!is.null(label) && !is.na(label)) {
|
| 566 | 290x |
names(fc) <- label |
| 567 | 2x |
} else if (!is.null(x_label) && !is.na(x_label)) {
|
| 568 | 1x |
names(fc) <- x_label |
| 569 | 1x |
} else if (length(x) == length(fc)) {
|
| 570 | ! |
names(fc) <- names(x) |
| 571 |
} |
|
| 572 | 292x |
as.data.frame(t(fc)) |
| 573 |
} |
|
| 574 | ||
| 575 | 110x |
row <- do.call( |
| 576 | 110x |
cbind, |
| 577 | 110x |
lapply( |
| 578 | 110x |
names(x), function(xn) format_cell(x[[xn]], format = format[[xn]], label = labels[xn]) |
| 579 |
) |
|
| 580 |
) |
|
| 581 | ||
| 582 | 110x |
row |
| 583 |
} |
|
| 584 | ||
| 585 |
#' Control function for `g_lineplot()` |
|
| 586 |
#' |
|
| 587 |
#' @description `r lifecycle::badge("stable")`
|
|
| 588 |
#' |
|
| 589 |
#' Default values for `variables` parameter in `g_lineplot` function. |
|
| 590 |
#' A variable's default value can be overwritten for any variable. |
|
| 591 |
#' |
|
| 592 |
#' @param x (`string`)\cr x-variable name. |
|
| 593 |
#' @param y (`string`)\cr y-variable name. |
|
| 594 |
#' @param group_var (`string` or `NA`)\cr group variable name. |
|
| 595 |
#' @param subject_var (`string` or `NA`)\cr subject variable name. |
|
| 596 |
#' @param facet_var (`string` or `NA`)\cr faceting variable name. |
|
| 597 |
#' @param paramcd (`string` or `NA`)\cr parameter code variable name. |
|
| 598 |
#' @param y_unit (`string` or `NA`)\cr y-axis unit variable name. |
|
| 599 |
#' |
|
| 600 |
#' @return A named character vector of variable names. |
|
| 601 |
#' |
|
| 602 |
#' @examples |
|
| 603 |
#' control_lineplot_vars() |
|
| 604 |
#' control_lineplot_vars(group_var = NA) |
|
| 605 |
#' |
|
| 606 |
#' @export |
|
| 607 |
control_lineplot_vars <- function(x = "AVISIT", |
|
| 608 |
y = "AVAL", |
|
| 609 |
group_var = "ARM", |
|
| 610 |
facet_var = NA, |
|
| 611 |
paramcd = "PARAMCD", |
|
| 612 |
y_unit = "AVALU", |
|
| 613 |
subject_var = "USUBJID") {
|
|
| 614 | 17x |
checkmate::assert_string(x) |
| 615 | 17x |
checkmate::assert_string(y) |
| 616 | 17x |
checkmate::assert_string(group_var, na.ok = TRUE, null.ok = TRUE) |
| 617 | 17x |
checkmate::assert_string(facet_var, na.ok = TRUE, null.ok = TRUE) |
| 618 | 17x |
checkmate::assert_string(subject_var, na.ok = TRUE, null.ok = TRUE) |
| 619 | 17x |
checkmate::assert_string(paramcd, na.ok = TRUE, null.ok = TRUE) |
| 620 | 17x |
checkmate::assert_string(y_unit, na.ok = TRUE, null.ok = TRUE) |
| 621 | ||
| 622 | 17x |
variables <- c( |
| 623 | 17x |
x = x, y = y, group_var = group_var, paramcd = paramcd, |
| 624 | 17x |
y_unit = y_unit, subject_var = subject_var, facet_var = facet_var |
| 625 |
) |
|
| 626 | 17x |
return(variables) |
| 627 |
} |
| 1 |
#' Convert `rtable` objects to `ggplot` objects |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 4 |
#' |
|
| 5 |
#' Given a [rtables::rtable()] object, performs basic conversion to a [ggplot2::ggplot()] object built using |
|
| 6 |
#' functions from the `ggplot2` package. Any table titles and/or footnotes are ignored. |
|
| 7 |
#' |
|
| 8 |
#' @param tbl (`VTableTree`)\cr `rtables` table object. |
|
| 9 |
#' @param fontsize (`numeric(1)`)\cr font size. |
|
| 10 |
#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in |
|
| 11 |
#' `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths |
|
| 12 |
#' are calculated according to maximum number of characters per column. |
|
| 13 |
#' @param lbl_col_padding (`numeric`)\cr additional padding to use when calculating spacing between |
|
| 14 |
#' the first (label) column and the second column of `tbl`. If `colwidths` is specified, |
|
| 15 |
#' the width of the first column becomes `colwidths[1] + lbl_col_padding`. Defaults to 0. |
|
| 16 |
#' |
|
| 17 |
#' @return A `ggplot` object. |
|
| 18 |
#' |
|
| 19 |
#' @examples |
|
| 20 |
#' dta <- data.frame( |
|
| 21 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)), |
|
| 22 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
| 23 |
#' AVAL = c(9:1, rep(NA, 9)) |
|
| 24 |
#' ) |
|
| 25 |
#' |
|
| 26 |
#' lyt <- basic_table() %>% |
|
| 27 |
#' split_cols_by(var = "ARM") %>% |
|
| 28 |
#' split_rows_by(var = "AVISIT") %>% |
|
| 29 |
#' analyze_vars(vars = "AVAL") |
|
| 30 |
#' |
|
| 31 |
#' tbl <- build_table(lyt, df = dta) |
|
| 32 |
#' |
|
| 33 |
#' rtable2gg(tbl) |
|
| 34 |
#' |
|
| 35 |
#' rtable2gg(tbl, fontsize = 15, colwidths = c(2, 1, 1, 1)) |
|
| 36 |
#' |
|
| 37 |
#' @export |
|
| 38 |
rtable2gg <- function(tbl, fontsize = 12, colwidths = NULL, lbl_col_padding = 0) {
|
|
| 39 | 6x |
mat <- rtables::matrix_form(tbl, indent_rownames = TRUE) |
| 40 | 6x |
mat_strings <- formatters::mf_strings(mat) |
| 41 | 6x |
mat_aligns <- formatters::mf_aligns(mat) |
| 42 | 6x |
mat_indent <- formatters::mf_rinfo(mat)$indent |
| 43 | 6x |
mat_display <- formatters::mf_display(mat) |
| 44 | 6x |
nlines_hdr <- formatters::mf_nlheader(mat) |
| 45 | 6x |
shared_hdr_rows <- which(apply(mat_display, 1, function(x) (any(!x)))) |
| 46 | ||
| 47 | 6x |
tbl_df <- data.frame(mat_strings) |
| 48 | 6x |
body_rows <- seq(nlines_hdr + 1, nrow(tbl_df)) |
| 49 | 6x |
mat_aligns <- apply(mat_aligns, 1:2, function(x) if (x == "left") 0 else if (x == "right") 1 else 0.5) |
| 50 | ||
| 51 |
# Apply indentation in first column |
|
| 52 | 6x |
tbl_df[body_rows, 1] <- sapply(body_rows, function(i) {
|
| 53 | 42x |
ind_i <- mat_indent[i - nlines_hdr] * 4 |
| 54 | 18x |
if (ind_i > 0) paste0(paste(rep(" ", ind_i), collapse = ""), tbl_df[i, 1]) else tbl_df[i, 1]
|
| 55 |
}) |
|
| 56 | ||
| 57 |
# Get column widths |
|
| 58 | 6x |
if (is.null(colwidths)) {
|
| 59 | 6x |
colwidths <- apply(tbl_df, 2, function(x) max(nchar(x))) + 1 |
| 60 |
} |
|
| 61 | 6x |
tot_width <- sum(colwidths) + lbl_col_padding |
| 62 | ||
| 63 | 6x |
if (length(shared_hdr_rows) > 0) {
|
| 64 | 5x |
tbl_df <- tbl_df[-shared_hdr_rows, ] |
| 65 | 5x |
mat_aligns <- mat_aligns[-shared_hdr_rows, ] |
| 66 |
} |
|
| 67 | ||
| 68 | 6x |
res <- ggplot(data = tbl_df) + |
| 69 | 6x |
theme_void() + |
| 70 | 6x |
scale_x_continuous(limits = c(0, tot_width)) + |
| 71 | 6x |
scale_y_continuous(limits = c(0, nrow(mat_strings))) + |
| 72 | 6x |
annotate( |
| 73 | 6x |
"segment", |
| 74 | 6x |
x = 0, xend = tot_width, |
| 75 | 6x |
y = nrow(mat_strings) - nlines_hdr + 0.5, yend = nrow(mat_strings) - nlines_hdr + 0.5 |
| 76 |
) |
|
| 77 | ||
| 78 |
# If header content spans multiple columns, center over these columns |
|
| 79 | 6x |
if (length(shared_hdr_rows) > 0) {
|
| 80 | 5x |
mat_strings[shared_hdr_rows, ] <- trimws(mat_strings[shared_hdr_rows, ]) |
| 81 | 5x |
for (hr in shared_hdr_rows) {
|
| 82 | 6x |
hdr_lbls <- mat_strings[1:hr, mat_display[hr, -1]] |
| 83 | 6x |
hdr_lbls <- matrix(hdr_lbls[nzchar(hdr_lbls)], nrow = hr) |
| 84 | 6x |
for (idx_hl in seq_len(ncol(hdr_lbls))) {
|
| 85 | 13x |
cur_lbl <- tail(hdr_lbls[, idx_hl], 1) |
| 86 | 13x |
which_cols <- if (hr == 1) {
|
| 87 | 9x |
which(mat_strings[hr, ] == hdr_lbls[idx_hl]) |
| 88 | 13x |
} else { # for >2 col splits, only print labels for each unique combo of nested columns
|
| 89 | 4x |
which( |
| 90 | 4x |
apply(mat_strings[1:hr, ], 2, function(x) all(x == hdr_lbls[1:hr, idx_hl])) |
| 91 |
) |
|
| 92 |
} |
|
| 93 | 13x |
line_pos <- c( |
| 94 | 13x |
sum(colwidths[1:(which_cols[1] - 1)]) + 1 + lbl_col_padding, |
| 95 | 13x |
sum(colwidths[1:max(which_cols)]) - 1 + lbl_col_padding |
| 96 |
) |
|
| 97 | ||
| 98 | 13x |
res <- res + |
| 99 | 13x |
annotate( |
| 100 | 13x |
"text", |
| 101 | 13x |
x = mean(line_pos), |
| 102 | 13x |
y = nrow(mat_strings) + 1 - hr, |
| 103 | 13x |
label = cur_lbl, |
| 104 | 13x |
size = fontsize / .pt |
| 105 |
) + |
|
| 106 | 13x |
annotate( |
| 107 | 13x |
"segment", |
| 108 | 13x |
x = line_pos[1], |
| 109 | 13x |
xend = line_pos[2], |
| 110 | 13x |
y = nrow(mat_strings) - hr + 0.5, |
| 111 | 13x |
yend = nrow(mat_strings) - hr + 0.5 |
| 112 |
) |
|
| 113 |
} |
|
| 114 |
} |
|
| 115 |
} |
|
| 116 | ||
| 117 |
# Add table columns |
|
| 118 | 6x |
for (i in seq_len(ncol(tbl_df))) {
|
| 119 | 40x |
res <- res + annotate( |
| 120 | 40x |
"text", |
| 121 | 40x |
x = if (i == 1) 0 else sum(colwidths[1:i]) - 0.5 * colwidths[i] + lbl_col_padding, |
| 122 | 40x |
y = rev(seq_len(nrow(tbl_df))), |
| 123 | 40x |
label = tbl_df[, i], |
| 124 | 40x |
hjust = mat_aligns[, i], |
| 125 | 40x |
size = fontsize / .pt |
| 126 |
) |
|
| 127 |
} |
|
| 128 | ||
| 129 | 6x |
res |
| 130 |
} |
|
| 131 | ||
| 132 |
#' Convert `data.frame` object to `ggplot` object |
|
| 133 |
#' |
|
| 134 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 135 |
#' |
|
| 136 |
#' Given a `data.frame` object, performs basic conversion to a [ggplot2::ggplot()] object built using |
|
| 137 |
#' functions from the `ggplot2` package. |
|
| 138 |
#' |
|
| 139 |
#' @param df (`data.frame`)\cr a data frame. |
|
| 140 |
#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in |
|
| 141 |
#' `colwidths` corresponds to the column of `df` in the same position. If `NULL`, column widths |
|
| 142 |
#' are calculated according to maximum number of characters per column. |
|
| 143 |
#' @param font_size (`numeric(1)`)\cr font size. |
|
| 144 |
#' @param col_labels (`flag`)\cr whether the column names (labels) of `df` should be used as the first row |
|
| 145 |
#' of the output table. |
|
| 146 |
#' @param col_lab_fontface (`string`)\cr font face to apply to the first row (of column labels |
|
| 147 |
#' if `col_labels = TRUE`). Defaults to `"bold"`. |
|
| 148 |
#' @param hline (`flag`)\cr whether a horizontal line should be printed below the first row of the table. |
|
| 149 |
#' @param bg_fill (`string`)\cr table background fill color. |
|
| 150 |
#' |
|
| 151 |
#' @return A `ggplot` object. |
|
| 152 |
#' |
|
| 153 |
#' @examples |
|
| 154 |
#' \dontrun{
|
|
| 155 |
#' df2gg(head(iris, 5)) |
|
| 156 |
#' |
|
| 157 |
#' df2gg(head(iris, 5), font_size = 15, colwidths = c(1, 1, 1, 1, 1)) |
|
| 158 |
#' } |
|
| 159 |
#' @keywords internal |
|
| 160 |
df2gg <- function(df, |
|
| 161 |
colwidths = NULL, |
|
| 162 |
font_size = 10, |
|
| 163 |
col_labels = TRUE, |
|
| 164 |
col_lab_fontface = "bold", |
|
| 165 |
hline = TRUE, |
|
| 166 |
bg_fill = NULL) {
|
|
| 167 |
# convert to text |
|
| 168 | 19x |
df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) "NA" else as.character(x))) |
| 169 | ||
| 170 | 19x |
if (col_labels) {
|
| 171 | 10x |
df <- as.matrix(df) |
| 172 | 10x |
df <- rbind(colnames(df), df) |
| 173 |
} |
|
| 174 | ||
| 175 |
# Get column widths |
|
| 176 | 19x |
if (is.null(colwidths)) {
|
| 177 | 1x |
colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) |
| 178 |
} |
|
| 179 | 19x |
tot_width <- sum(colwidths) |
| 180 | ||
| 181 | 19x |
res <- ggplot(data = df) + |
| 182 | 19x |
theme_void() + |
| 183 | 19x |
scale_x_continuous(limits = c(0, tot_width)) + |
| 184 | 19x |
scale_y_continuous(limits = c(1, nrow(df))) |
| 185 | ||
| 186 | 9x |
if (!is.null(bg_fill)) res <- res + theme(plot.background = element_rect(fill = bg_fill)) |
| 187 | ||
| 188 | 19x |
if (hline) {
|
| 189 | 10x |
res <- res + |
| 190 | 10x |
annotate( |
| 191 | 10x |
"segment", |
| 192 | 10x |
x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), |
| 193 | 10x |
y = nrow(df) - 0.5, yend = nrow(df) - 0.5 |
| 194 |
) |
|
| 195 |
} |
|
| 196 | ||
| 197 | 19x |
for (i in seq_len(ncol(df))) {
|
| 198 | 86x |
line_pos <- c( |
| 199 | 86x |
if (i == 1) 0 else sum(colwidths[1:(i - 1)]), |
| 200 | 86x |
sum(colwidths[1:i]) |
| 201 |
) |
|
| 202 | 86x |
res <- res + |
| 203 | 86x |
annotate( |
| 204 | 86x |
"text", |
| 205 | 86x |
x = mean(line_pos), |
| 206 | 86x |
y = rev(seq_len(nrow(df))), |
| 207 | 86x |
label = df[, i], |
| 208 | 86x |
size = font_size / .pt, |
| 209 | 86x |
fontface = if (col_labels) {
|
| 210 | 32x |
c(col_lab_fontface, rep("plain", nrow(df) - 1))
|
| 211 |
} else {
|
|
| 212 | 54x |
rep("plain", nrow(df))
|
| 213 |
} |
|
| 214 |
) |
|
| 215 |
} |
|
| 216 | ||
| 217 | 19x |
res |
| 218 |
} |
| 1 |
#' Subgroup treatment effect pattern (STEP) fit for binary (response) outcome |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary |
|
| 6 |
#' (response) outcome. The treatment arm variable must have exactly 2 levels, |
|
| 7 |
#' where the first one is taken as reference and the estimated odds ratios are |
|
| 8 |
#' for the comparison of the second level vs. the first one. |
|
| 9 |
#' |
|
| 10 |
#' The (conditional) logistic regression model which is fit is: |
|
| 11 |
#' |
|
| 12 |
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)` |
|
| 13 |
#' |
|
| 14 |
#' where `degree` is specified by `control_step()`. |
|
| 15 |
#' |
|
| 16 |
#' @inheritParams argument_convention |
|
| 17 |
#' @param variables (named `list` of `character`)\cr list of analysis variables: |
|
| 18 |
#' needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`. |
|
| 19 |
#' @param control (named `list`)\cr combined control list from [control_step()] |
|
| 20 |
#' and [control_logistic()]. |
|
| 21 |
#' |
|
| 22 |
#' @return A matrix of class `step`. The first part of the columns describe the |
|
| 23 |
#' subgroup intervals used for the biomarker variable, including where the |
|
| 24 |
#' center of the intervals are and their bounds. The second part of the |
|
| 25 |
#' columns contain the estimates for the treatment arm comparison. |
|
| 26 |
#' |
|
| 27 |
#' @note For the default degree 0 the `biomarker` variable is not included in the model. |
|
| 28 |
#' |
|
| 29 |
#' @seealso [control_step()] and [control_logistic()] for the available |
|
| 30 |
#' customization options. |
|
| 31 |
#' |
|
| 32 |
#' @examples |
|
| 33 |
#' # Testing dataset with just two treatment arms. |
|
| 34 |
#' library(survival) |
|
| 35 |
#' library(dplyr) |
|
| 36 |
#' |
|
| 37 |
#' adrs_f <- tern_ex_adrs %>% |
|
| 38 |
#' filter( |
|
| 39 |
#' PARAMCD == "BESRSPI", |
|
| 40 |
#' ARM %in% c("B: Placebo", "A: Drug X")
|
|
| 41 |
#' ) %>% |
|
| 42 |
#' mutate( |
|
| 43 |
#' # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations. |
|
| 44 |
#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")), |
|
| 45 |
#' RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
| 46 |
#' SEX = factor(SEX) |
|
| 47 |
#' ) |
|
| 48 |
#' |
|
| 49 |
#' variables <- list( |
|
| 50 |
#' arm = "ARM", |
|
| 51 |
#' biomarker = "BMRKR1", |
|
| 52 |
#' covariates = "AGE", |
|
| 53 |
#' response = "RSP" |
|
| 54 |
#' ) |
|
| 55 |
#' |
|
| 56 |
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup. |
|
| 57 |
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those. |
|
| 58 |
#' step_matrix <- fit_rsp_step( |
|
| 59 |
#' variables = variables, |
|
| 60 |
#' data = adrs_f, |
|
| 61 |
#' control = c(control_logistic(), control_step(bandwidth = 0.9)) |
|
| 62 |
#' ) |
|
| 63 |
#' dim(step_matrix) |
|
| 64 |
#' head(step_matrix) |
|
| 65 |
#' |
|
| 66 |
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local |
|
| 67 |
#' # models. Or specify different logistic regression options, including confidence level. |
|
| 68 |
#' step_matrix2 <- fit_rsp_step( |
|
| 69 |
#' variables = variables, |
|
| 70 |
#' data = adrs_f, |
|
| 71 |
#' control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = NULL, degree = 1)) |
|
| 72 |
#' ) |
|
| 73 |
#' |
|
| 74 |
#' # Use a global constant model. This is helpful as a reference for the subgroup models. |
|
| 75 |
#' step_matrix3 <- fit_rsp_step( |
|
| 76 |
#' variables = variables, |
|
| 77 |
#' data = adrs_f, |
|
| 78 |
#' control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L)) |
|
| 79 |
#' ) |
|
| 80 |
#' |
|
| 81 |
#' # It is also possible to use strata, i.e. use conditional logistic regression models. |
|
| 82 |
#' variables2 <- list( |
|
| 83 |
#' arm = "ARM", |
|
| 84 |
#' biomarker = "BMRKR1", |
|
| 85 |
#' covariates = "AGE", |
|
| 86 |
#' response = "RSP", |
|
| 87 |
#' strata = c("STRATA1", "STRATA2")
|
|
| 88 |
#' ) |
|
| 89 |
#' |
|
| 90 |
#' step_matrix4 <- fit_rsp_step( |
|
| 91 |
#' variables = variables2, |
|
| 92 |
#' data = adrs_f, |
|
| 93 |
#' control = c(control_logistic(), control_step(bandwidth = NULL)) |
|
| 94 |
#' ) |
|
| 95 |
#' |
|
| 96 |
#' @export |
|
| 97 |
fit_rsp_step <- function(variables, |
|
| 98 |
data, |
|
| 99 |
control = c(control_step(), control_logistic())) {
|
|
| 100 | 5x |
assert_df_with_variables(data, variables) |
| 101 | 5x |
checkmate::assert_list(control, names = "named") |
| 102 | 5x |
data <- data[!is.na(data[[variables$biomarker]]), ] |
| 103 | 5x |
window_sel <- h_step_window(x = data[[variables$biomarker]], control = control) |
| 104 | 5x |
interval_center <- window_sel$interval[, "Interval Center"] |
| 105 | 5x |
form <- h_step_rsp_formula(variables = variables, control = control) |
| 106 | 5x |
estimates <- if (is.null(control$bandwidth)) {
|
| 107 | 1x |
h_step_rsp_est( |
| 108 | 1x |
formula = form, |
| 109 | 1x |
data = data, |
| 110 | 1x |
variables = variables, |
| 111 | 1x |
x = interval_center, |
| 112 | 1x |
control = control |
| 113 |
) |
|
| 114 |
} else {
|
|
| 115 | 4x |
tmp <- mapply( |
| 116 | 4x |
FUN = h_step_rsp_est, |
| 117 | 4x |
x = interval_center, |
| 118 | 4x |
subset = as.list(as.data.frame(window_sel$sel)), |
| 119 | 4x |
MoreArgs = list( |
| 120 | 4x |
formula = form, |
| 121 | 4x |
data = data, |
| 122 | 4x |
variables = variables, |
| 123 | 4x |
control = control |
| 124 |
) |
|
| 125 |
) |
|
| 126 |
# Maybe we find a more elegant solution than this. |
|
| 127 | 4x |
rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
|
| 128 | 4x |
t(tmp) |
| 129 |
} |
|
| 130 | 5x |
result <- cbind(window_sel$interval, estimates) |
| 131 | 5x |
structure( |
| 132 | 5x |
result, |
| 133 | 5x |
class = c("step", "matrix"),
|
| 134 | 5x |
variables = variables, |
| 135 | 5x |
control = control |
| 136 |
) |
|
| 137 |
} |
| 1 |
#' Estimate proportions of each level of a variable |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze & summarize function [estimate_multinomial_response()] creates a layout element to estimate the |
|
| 6 |
#' proportion and proportion confidence interval for each level of a factor variable. The primary analysis variable, |
|
| 7 |
#' `var`, should be a factor variable, the values of which will be used as labels within the output table. |
|
| 8 |
#' |
|
| 9 |
#' @inheritParams argument_convention |
|
| 10 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 11 |
#' |
|
| 12 |
#' Options are: ``r shQuote(get_stats("estimate_multinomial_response"), type = "sh")``
|
|
| 13 |
#' |
|
| 14 |
#' @seealso Relevant description function [d_onco_rsp_label()]. |
|
| 15 |
#' |
|
| 16 |
#' @name estimate_multinomial_rsp |
|
| 17 |
#' @order 1 |
|
| 18 |
NULL |
|
| 19 | ||
| 20 |
#' Description of standard oncology response |
|
| 21 |
#' |
|
| 22 |
#' @description `r lifecycle::badge("stable")`
|
|
| 23 |
#' |
|
| 24 |
#' Describe the oncology response in a standard way. |
|
| 25 |
#' |
|
| 26 |
#' @param x (`character`)\cr the standard oncology codes to be described. |
|
| 27 |
#' |
|
| 28 |
#' @return Response labels. |
|
| 29 |
#' |
|
| 30 |
#' @seealso [estimate_multinomial_rsp()] |
|
| 31 |
#' |
|
| 32 |
#' @examples |
|
| 33 |
#' d_onco_rsp_label( |
|
| 34 |
#' c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
|
|
| 35 |
#' ) |
|
| 36 |
#' |
|
| 37 |
#' # Adding some values not considered in d_onco_rsp_label |
|
| 38 |
#' |
|
| 39 |
#' d_onco_rsp_label( |
|
| 40 |
#' c("CR", "PR", "hello", "hi")
|
|
| 41 |
#' ) |
|
| 42 |
#' |
|
| 43 |
#' @export |
|
| 44 |
d_onco_rsp_label <- function(x) {
|
|
| 45 | 2x |
x <- as.character(x) |
| 46 | 2x |
desc <- c( |
| 47 | 2x |
CR = "Complete Response (CR)", |
| 48 | 2x |
PR = "Partial Response (PR)", |
| 49 | 2x |
MR = "Minimal/Minor Response (MR)", |
| 50 | 2x |
MRD = "Minimal Residual Disease (MRD)", |
| 51 | 2x |
SD = "Stable Disease (SD)", |
| 52 | 2x |
PD = "Progressive Disease (PD)", |
| 53 | 2x |
`NON CR/PD` = "Non-CR or Non-PD (NON CR/PD)", |
| 54 | 2x |
NE = "Not Evaluable (NE)", |
| 55 | 2x |
`NE/Missing` = "Missing or unevaluable", |
| 56 | 2x |
Missing = "Missing", |
| 57 | 2x |
`NA` = "Not Applicable (NA)", |
| 58 | 2x |
ND = "Not Done (ND)" |
| 59 |
) |
|
| 60 | ||
| 61 | 2x |
values_label <- vapply( |
| 62 | 2x |
X = x, |
| 63 | 2x |
FUN.VALUE = character(1), |
| 64 | 2x |
function(val) {
|
| 65 | ! |
if (val %in% names(desc)) desc[val] else val |
| 66 |
} |
|
| 67 |
) |
|
| 68 | ||
| 69 | 2x |
factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))) |
| 70 |
} |
|
| 71 | ||
| 72 |
#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number |
|
| 73 |
#' of successes, and `.N_col` as total number of successes and failures into [s_proportion()]. |
|
| 74 |
#' |
|
| 75 |
#' @return |
|
| 76 |
#' * `s_length_proportion()` returns statistics from [s_proportion()]. |
|
| 77 |
#' |
|
| 78 |
#' @examples |
|
| 79 |
#' s_length_proportion(rep("CR", 10), .N_col = 100)
|
|
| 80 |
#' s_length_proportion(factor(character(0)), .N_col = 100) |
|
| 81 |
#' |
|
| 82 |
#' @export |
|
| 83 |
s_length_proportion <- function(x, |
|
| 84 |
..., |
|
| 85 |
.N_col) { # nolint
|
|
| 86 | 10x |
checkmate::assert_multi_class(x, classes = c("factor", "character"))
|
| 87 | 9x |
checkmate::assert_vector(x, min.len = 0, max.len = .N_col) |
| 88 | 7x |
checkmate::assert_vector(unique(x), min.len = 0, max.len = 1) |
| 89 | ||
| 90 | 7x |
n_true <- length(x) |
| 91 | 7x |
n_false <- .N_col - n_true |
| 92 | 7x |
x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false)) |
| 93 | 7x |
s_proportion(df = x_logical, ...) |
| 94 |
} |
|
| 95 | ||
| 96 |
#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun` |
|
| 97 |
#' in `estimate_multinomial_response()`. |
|
| 98 |
#' |
|
| 99 |
#' @return |
|
| 100 |
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 101 |
#' |
|
| 102 |
#' @examples |
|
| 103 |
#' a_length_proportion(rep("CR", 10), .N_col = 100)
|
|
| 104 |
#' a_length_proportion(factor(character(0)), .N_col = 100) |
|
| 105 |
#' |
|
| 106 |
#' @export |
|
| 107 |
a_length_proportion <- function(x, |
|
| 108 |
..., |
|
| 109 |
.stats = NULL, |
|
| 110 |
.stat_names = NULL, |
|
| 111 |
.formats = NULL, |
|
| 112 |
.labels = NULL, |
|
| 113 |
.indent_mods = NULL) {
|
|
| 114 |
# Check for additional parameters to the statistics function |
|
| 115 | 6x |
dots_extra_args <- list(...) |
| 116 | 6x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 117 | 6x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 118 | ||
| 119 |
# Check for user-defined functions |
|
| 120 | 6x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 121 | 6x |
.stats <- default_and_custom_stats_list$all_stats |
| 122 | 6x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 123 | ||
| 124 |
# Apply statistics function |
|
| 125 | 6x |
x_stats <- .apply_stat_functions( |
| 126 | 6x |
default_stat_fnc = s_length_proportion, |
| 127 | 6x |
custom_stat_fnc_list = custom_stat_functions, |
| 128 | 6x |
args_list = c( |
| 129 | 6x |
x = list(x), |
| 130 | 6x |
extra_afun_params, |
| 131 | 6x |
dots_extra_args |
| 132 |
) |
|
| 133 |
) |
|
| 134 | ||
| 135 |
# Fill in formatting defaults |
|
| 136 | 6x |
.stats <- get_stats("estimate_multinomial_response",
|
| 137 | 6x |
stats_in = .stats, |
| 138 | 6x |
custom_stats_in = names(custom_stat_functions) |
| 139 |
) |
|
| 140 | 6x |
x_stats <- x_stats[.stats] |
| 141 | 6x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 142 | 6x |
.labels <- get_labels_from_stats( |
| 143 | 6x |
.stats, .labels, |
| 144 | 6x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
| 145 |
) |
|
| 146 | 6x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 147 | ||
| 148 |
# Auto format handling |
|
| 149 | 6x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 150 | ||
| 151 |
# Get and check statistical names |
|
| 152 | 6x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 153 | ||
| 154 | 6x |
in_rows( |
| 155 | 6x |
.list = x_stats, |
| 156 | 6x |
.formats = .formats, |
| 157 | 6x |
.names = .labels %>% .unlist_keep_nulls(), |
| 158 | 6x |
.stat_names = .stat_names, |
| 159 | 6x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 160 | 6x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 161 |
) |
|
| 162 |
} |
|
| 163 | ||
| 164 |
#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments |
|
| 165 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()] and |
|
| 166 |
#' [rtables::summarize_row_groups()]. |
|
| 167 |
#' |
|
| 168 |
#' @return |
|
| 169 |
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions, |
|
| 170 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 171 |
#' the statistics from `s_length_proportion()` to the table layout. |
|
| 172 |
#' |
|
| 173 |
#' @examples |
|
| 174 |
#' library(dplyr) |
|
| 175 |
#' |
|
| 176 |
#' # Use of the layout creating function. |
|
| 177 |
#' dta_test <- data.frame( |
|
| 178 |
#' USUBJID = paste0("S", 1:12),
|
|
| 179 |
#' ARM = factor(rep(LETTERS[1:3], each = 4)), |
|
| 180 |
#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0)) |
|
| 181 |
#' ) %>% mutate( |
|
| 182 |
#' AVALC = factor(AVAL, |
|
| 183 |
#' levels = c(0, 1), |
|
| 184 |
#' labels = c("Complete Response (CR)", "Partial Response (PR)")
|
|
| 185 |
#' ) |
|
| 186 |
#' ) |
|
| 187 |
#' |
|
| 188 |
#' lyt <- basic_table() %>% |
|
| 189 |
#' split_cols_by("ARM") %>%
|
|
| 190 |
#' estimate_multinomial_response(var = "AVALC") |
|
| 191 |
#' |
|
| 192 |
#' tbl <- build_table(lyt, dta_test) |
|
| 193 |
#' |
|
| 194 |
#' tbl |
|
| 195 |
#' |
|
| 196 |
#' @export |
|
| 197 |
#' @order 2 |
|
| 198 |
estimate_multinomial_response <- function(lyt, |
|
| 199 |
var, |
|
| 200 |
na_str = default_na_str(), |
|
| 201 |
nested = TRUE, |
|
| 202 |
..., |
|
| 203 |
show_labels = "hidden", |
|
| 204 |
table_names = var, |
|
| 205 |
.stats = "prop_ci", |
|
| 206 |
.stat_names = NULL, |
|
| 207 |
.formats = list(prop_ci = "(xx.xx, xx.xx)"), |
|
| 208 |
.labels = NULL, |
|
| 209 |
.indent_mods = NULL) {
|
|
| 210 |
# Process standard extra arguments |
|
| 211 | 1x |
extra_args <- list(".stats" = .stats)
|
| 212 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 213 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 214 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 215 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 216 | ||
| 217 |
# Process additional arguments to the statistic function |
|
| 218 | 1x |
extra_args <- c(extra_args, ...) |
| 219 | ||
| 220 |
# Append additional info from layout to the analysis function |
|
| 221 | 1x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 222 | 1x |
formals(a_length_proportion) <- c(formals(a_length_proportion), extra_args[[".additional_fun_parameters"]]) |
| 223 | ||
| 224 | 1x |
lyt <- split_rows_by(lyt, var = var) |
| 225 | 1x |
lyt <- summarize_row_groups(lyt, na_str = na_str) |
| 226 | ||
| 227 | 1x |
analyze( |
| 228 | 1x |
lyt = lyt, |
| 229 | 1x |
vars = var, |
| 230 | 1x |
afun = a_length_proportion, |
| 231 | 1x |
na_str = na_str, |
| 232 | 1x |
nested = nested, |
| 233 | 1x |
extra_args = extra_args, |
| 234 | 1x |
show_labels = show_labels, |
| 235 | 1x |
table_names = table_names |
| 236 |
) |
|
| 237 |
} |
| 1 |
#' Difference test for two proportions |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [test_proportion_diff()] creates a layout element to test the difference between two |
|
| 6 |
#' proportions. The primary analysis variable, `vars`, indicates whether a response has occurred for each record. See |
|
| 7 |
#' the `method` parameter for options of methods to use to calculate the p-value. The argument `alternative` |
|
| 8 |
#' specifies the direction of the alternative hypothesis. Additionally, a stratification variable can be |
|
| 9 |
#' supplied via the `strata` element of the `variables` argument. |
|
| 10 |
#' |
|
| 11 |
#' @inheritParams argument_convention |
|
| 12 |
#' @param method (`string`)\cr one of `chisq`, `cmh`, `cmh_wh`, `fisher`, or `schouten`; |
|
| 13 |
#' specifies the test used to calculate the p-value. |
|
| 14 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 15 |
#' |
|
| 16 |
#' Options are: ``r shQuote(get_stats("test_proportion_diff"), type = "sh")``
|
|
| 17 |
#' |
|
| 18 |
#' @seealso [h_prop_diff_test] |
|
| 19 |
#' |
|
| 20 |
#' @name prop_diff_test |
|
| 21 |
#' @order 1 |
|
| 22 |
NULL |
|
| 23 | ||
| 24 |
#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions. |
|
| 25 |
#' |
|
| 26 |
#' @return |
|
| 27 |
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label` |
|
| 28 |
#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same. |
|
| 29 |
#' |
|
| 30 |
#' @examples |
|
| 31 |
#' |
|
| 32 |
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B. |
|
| 33 |
#' nex <- 100 # Number of example rows |
|
| 34 |
#' dta <- data.frame( |
|
| 35 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
| 36 |
#' "grp" = sample(c("A", "B"), nex, TRUE),
|
|
| 37 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE),
|
|
| 38 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
|
|
| 39 |
#' stringsAsFactors = TRUE |
|
| 40 |
#' ) |
|
| 41 |
#' s_test_proportion_diff( |
|
| 42 |
#' df = subset(dta, grp == "A"), |
|
| 43 |
#' .var = "rsp", |
|
| 44 |
#' .ref_group = subset(dta, grp == "B"), |
|
| 45 |
#' .in_ref_col = FALSE, |
|
| 46 |
#' variables = NULL, |
|
| 47 |
#' method = "chisq" |
|
| 48 |
#' ) |
|
| 49 |
#' |
|
| 50 |
#' @export |
|
| 51 |
s_test_proportion_diff <- function(df, |
|
| 52 |
.var, |
|
| 53 |
.ref_group, |
|
| 54 |
.in_ref_col, |
|
| 55 |
variables = list(strata = NULL), |
|
| 56 |
method = c("chisq", "schouten", "fisher", "cmh", "cmh_wh"),
|
|
| 57 |
alternative = c("two.sided", "less", "greater"),
|
|
| 58 |
...) {
|
|
| 59 | 64x |
method <- match.arg(method) |
| 60 | 64x |
y <- list(pval = numeric()) |
| 61 | ||
| 62 | 64x |
if (!.in_ref_col) {
|
| 63 | 56x |
assert_df_with_variables(df, list(rsp = .var)) |
| 64 | 56x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
| 65 | 56x |
rsp <- factor( |
| 66 | 56x |
c(.ref_group[[.var]], df[[.var]]), |
| 67 | 56x |
levels = c("TRUE", "FALSE")
|
| 68 |
) |
|
| 69 | 56x |
grp <- factor( |
| 70 | 56x |
rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
|
| 71 | 56x |
levels = c("ref", "Not-ref")
|
| 72 |
) |
|
| 73 | ||
| 74 | 56x |
if (!is.null(variables$strata) || method %in% c("cmh", "cmh_wh")) {
|
| 75 | 18x |
strata <- variables$strata |
| 76 | 18x |
checkmate::assert_false(is.null(strata)) |
| 77 | 18x |
strata_vars <- stats::setNames(as.list(strata), strata) |
| 78 | 18x |
assert_df_with_variables(df, strata_vars) |
| 79 | 18x |
assert_df_with_variables(.ref_group, strata_vars) |
| 80 | 18x |
strata <- c(interaction(.ref_group[strata]), interaction(df[strata])) |
| 81 |
} |
|
| 82 | ||
| 83 | 56x |
tbl <- switch(method, |
| 84 | 56x |
cmh = table(grp, rsp, strata), |
| 85 | 56x |
cmh_wh = table(grp, rsp, strata), |
| 86 | 56x |
table(grp, rsp) |
| 87 |
) |
|
| 88 | ||
| 89 | 56x |
y$pval <- switch(method, |
| 90 | 56x |
chisq = prop_chisq(tbl, alternative = alternative), |
| 91 | 56x |
cmh = prop_cmh(tbl, alternative = alternative), |
| 92 | 56x |
fisher = prop_fisher(tbl, alternative = alternative), |
| 93 | 56x |
schouten = prop_schouten(tbl, alternative = alternative), |
| 94 | 56x |
cmh_wh = prop_cmh(tbl, alternative = alternative, transform = "wilson_hilferty") |
| 95 |
) |
|
| 96 |
} |
|
| 97 | ||
| 98 | 64x |
y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method, alternative = alternative)) |
| 99 | 64x |
y |
| 100 |
} |
|
| 101 | ||
| 102 |
#' Description of the difference test between two proportions |
|
| 103 |
#' |
|
| 104 |
#' @description `r lifecycle::badge("stable")`
|
|
| 105 |
#' |
|
| 106 |
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`. |
|
| 107 |
#' |
|
| 108 |
#' @inheritParams s_test_proportion_diff |
|
| 109 |
#' |
|
| 110 |
#' @return A `string` describing the test from which the p-value is derived. |
|
| 111 |
#' |
|
| 112 |
#' @export |
|
| 113 |
d_test_proportion_diff <- function(method, alternative = c("two.sided", "less", "greater")) {
|
|
| 114 | 67x |
checkmate::assert_string(method) |
| 115 | 67x |
alternative <- match.arg(alternative) |
| 116 | ||
| 117 | 67x |
meth_part <- switch(method, |
| 118 | 67x |
"schouten" = "Chi-Squared Test with Schouten Correction", |
| 119 | 67x |
"chisq" = "Chi-Squared Test", |
| 120 | 67x |
"cmh" = "Cochran-Mantel-Haenszel Test", |
| 121 | 67x |
"cmh_wh" = "Cochran-Mantel-Haenszel Test with Wilson-Hilferty Transformation", |
| 122 | 67x |
"fisher" = "Fisher's Exact Test", |
| 123 | 67x |
stop(paste(method, "does not have a description")) |
| 124 |
) |
|
| 125 | 67x |
alt_part <- switch(alternative, |
| 126 | 67x |
two.sided = "", |
| 127 | 67x |
less = ", 1-sided, direction less", |
| 128 | 67x |
greater = ", 1-sided, direction greater" |
| 129 |
) |
|
| 130 | 67x |
paste0("p-value (", meth_part, alt_part, ")")
|
| 131 |
} |
|
| 132 | ||
| 133 |
#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`. |
|
| 134 |
#' |
|
| 135 |
#' @return |
|
| 136 |
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 137 |
#' |
|
| 138 |
#' @keywords internal |
|
| 139 |
a_test_proportion_diff <- function(df, |
|
| 140 |
..., |
|
| 141 |
.stats = NULL, |
|
| 142 |
.stat_names = NULL, |
|
| 143 |
.formats = NULL, |
|
| 144 |
.labels = NULL, |
|
| 145 |
.indent_mods = NULL) {
|
|
| 146 | 17x |
dots_extra_args <- list(...) |
| 147 | ||
| 148 |
# Check if there are user-defined functions |
|
| 149 | 17x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 150 | 17x |
.stats <- default_and_custom_stats_list$all_stats |
| 151 | 17x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 152 | ||
| 153 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) |
|
| 154 | 17x |
extra_afun_params <- retrieve_extra_afun_params( |
| 155 | 17x |
names(dots_extra_args$.additional_fun_parameters) |
| 156 |
) |
|
| 157 | 17x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
| 158 | ||
| 159 |
# Main statistical functions application |
|
| 160 | 17x |
x_stats <- .apply_stat_functions( |
| 161 | 17x |
default_stat_fnc = s_test_proportion_diff, |
| 162 | 17x |
custom_stat_fnc_list = custom_stat_functions, |
| 163 | 17x |
args_list = c( |
| 164 | 17x |
df = list(df), |
| 165 | 17x |
extra_afun_params, |
| 166 | 17x |
dots_extra_args |
| 167 |
) |
|
| 168 |
) |
|
| 169 | ||
| 170 |
# Fill in with stats defaults if needed |
|
| 171 | 17x |
.stats <- get_stats("test_proportion_diff",
|
| 172 | 17x |
stats_in = .stats, |
| 173 | 17x |
custom_stats_in = names(custom_stat_functions) |
| 174 |
) |
|
| 175 | ||
| 176 | 17x |
x_stats <- x_stats[.stats] |
| 177 | ||
| 178 |
# Fill in formats/indents/labels with custom input and defaults |
|
| 179 | 17x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 180 | 17x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 181 | 17x |
if (is.null(.labels)) {
|
| 182 | 17x |
.labels <- sapply(x_stats, attr, "label") |
| 183 | 17x |
.labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)] |
| 184 |
} |
|
| 185 | 17x |
.labels <- get_labels_from_stats(.stats, .labels) |
| 186 | ||
| 187 |
# Auto format handling |
|
| 188 | 17x |
.formats <- apply_auto_formatting( |
| 189 | 17x |
.formats, |
| 190 | 17x |
x_stats, |
| 191 | 17x |
extra_afun_params$.df_row, |
| 192 | 17x |
extra_afun_params$.var |
| 193 |
) |
|
| 194 | ||
| 195 |
# Get and check statistical names from defaults |
|
| 196 | 17x |
.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats |
| 197 | ||
| 198 | 17x |
in_rows( |
| 199 | 17x |
.list = x_stats, |
| 200 | 17x |
.formats = .formats, |
| 201 | 17x |
.names = names(.labels), |
| 202 | 17x |
.stat_names = .stat_names, |
| 203 | 17x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 204 | 17x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 205 |
) |
|
| 206 |
} |
|
| 207 | ||
| 208 |
#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments |
|
| 209 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 210 |
#' |
|
| 211 |
#' @return |
|
| 212 |
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions, |
|
| 213 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 214 |
#' the statistics from `s_test_proportion_diff()` to the table layout. |
|
| 215 |
#' |
|
| 216 |
#' @examples |
|
| 217 |
#' dta <- data.frame( |
|
| 218 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE), |
|
| 219 |
#' grp = factor(rep(c("A", "B"), each = 50)),
|
|
| 220 |
#' strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
|
|
| 221 |
#' ) |
|
| 222 |
#' |
|
| 223 |
#' # With `rtables` pipelines. |
|
| 224 |
#' l <- basic_table() %>% |
|
| 225 |
#' split_cols_by(var = "grp", ref_group = "B") %>% |
|
| 226 |
#' test_proportion_diff( |
|
| 227 |
#' vars = "rsp", |
|
| 228 |
#' method = "cmh", variables = list(strata = "strata") |
|
| 229 |
#' ) |
|
| 230 |
#' |
|
| 231 |
#' build_table(l, df = dta) |
|
| 232 |
#' |
|
| 233 |
#' @export |
|
| 234 |
#' @order 2 |
|
| 235 |
test_proportion_diff <- function(lyt, |
|
| 236 |
vars, |
|
| 237 |
variables = list(strata = NULL), |
|
| 238 |
method = c("chisq", "schouten", "fisher", "cmh", "cmh_wh"),
|
|
| 239 |
alternative = c("two.sided", "less", "greater"),
|
|
| 240 |
var_labels = vars, |
|
| 241 |
na_str = default_na_str(), |
|
| 242 |
nested = TRUE, |
|
| 243 |
show_labels = "hidden", |
|
| 244 |
table_names = vars, |
|
| 245 |
section_div = NA_character_, |
|
| 246 |
..., |
|
| 247 |
na_rm = TRUE, |
|
| 248 |
.stats = c("pval"),
|
|
| 249 |
.stat_names = NULL, |
|
| 250 |
.formats = c(pval = "x.xxxx | (<0.0001)"), |
|
| 251 |
.labels = NULL, |
|
| 252 |
.indent_mods = c(pval = 1L)) {
|
|
| 253 |
# Depending on main functions |
|
| 254 | 8x |
extra_args <- list( |
| 255 | 8x |
"na_rm" = na_rm, |
| 256 | 8x |
"variables" = variables, |
| 257 | 8x |
"method" = method, |
| 258 | 8x |
"alternative" = alternative, |
| 259 |
... |
|
| 260 |
) |
|
| 261 | ||
| 262 |
# Needed defaults |
|
| 263 | 8x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
| 264 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 265 | 8x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 266 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 267 | 8x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 268 | ||
| 269 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) |
|
| 270 | 8x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 271 | 8x |
formals(a_test_proportion_diff) <- c( |
| 272 | 8x |
formals(a_test_proportion_diff), |
| 273 | 8x |
extra_args[[".additional_fun_parameters"]] |
| 274 |
) |
|
| 275 | ||
| 276 |
# Main {rtables} structural call
|
|
| 277 | 8x |
analyze( |
| 278 | 8x |
lyt = lyt, |
| 279 | 8x |
vars = vars, |
| 280 | 8x |
var_labels = var_labels, |
| 281 | 8x |
afun = a_test_proportion_diff, |
| 282 | 8x |
na_str = na_str, |
| 283 | 8x |
inclNAs = !na_rm, |
| 284 | 8x |
nested = nested, |
| 285 | 8x |
extra_args = extra_args, |
| 286 | 8x |
show_labels = show_labels, |
| 287 | 8x |
table_names = table_names, |
| 288 | 8x |
section_div = section_div |
| 289 |
) |
|
| 290 |
} |
|
| 291 | ||
| 292 |
#' Helper functions to test proportion differences |
|
| 293 |
#' |
|
| 294 |
#' Helper functions to implement various tests on the difference between two proportions. |
|
| 295 |
#' |
|
| 296 |
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns. |
|
| 297 |
#' @inheritParams argument_convention |
|
| 298 |
#' |
|
| 299 |
#' @return A p-value. |
|
| 300 |
#' |
|
| 301 |
#' @seealso [prop_diff_test()] for implementation of these helper functions. |
|
| 302 |
#' |
|
| 303 |
#' @name h_prop_diff_test |
|
| 304 |
NULL |
|
| 305 | ||
| 306 |
#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()]. |
|
| 307 |
#' |
|
| 308 |
#' @keywords internal |
|
| 309 |
prop_chisq <- function(tbl, alternative = c("two.sided", "less", "greater")) {
|
|
| 310 | 45x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
| 311 | 45x |
tbl <- tbl[, c("TRUE", "FALSE")]
|
| 312 | 45x |
if (any(colSums(tbl) == 0)) {
|
| 313 | 2x |
return(1) |
| 314 |
} |
|
| 315 | 43x |
stats::prop.test(tbl, correct = FALSE, alternative = alternative)$p.value |
| 316 |
} |
|
| 317 | ||
| 318 |
#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test, |
|
| 319 |
#' using [stats::mantelhaen.test()] internally. |
|
| 320 |
#' Note that strata with less than two observations are automatically discarded. |
|
| 321 |
#' |
|
| 322 |
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response |
|
| 323 |
#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension. |
|
| 324 |
#' @param transform (`string`)\cr either `none` or `wilson_hilferty`; specifies whether to apply |
|
| 325 |
#' the Wilson-Hilferty transformation of the chi-squared statistic. |
|
| 326 |
#' |
|
| 327 |
#' @keywords internal |
|
| 328 |
prop_cmh <- function(ary, |
|
| 329 |
alternative = c("two.sided", "less", "greater"),
|
|
| 330 |
transform = c("none", "wilson_hilferty")) {
|
|
| 331 | 26x |
checkmate::assert_array(ary) |
| 332 | 26x |
checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2) |
| 333 | 26x |
checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3) |
| 334 | 26x |
alternative <- match.arg(alternative) |
| 335 | 26x |
transform <- match.arg(transform) |
| 336 | ||
| 337 | 26x |
strata_sizes <- apply(ary, MARGIN = 3, sum) |
| 338 | 26x |
if (any(strata_sizes < 5)) {
|
| 339 | 1x |
warning("<5 data points in some strata. CMH test may be incorrect.")
|
| 340 | 1x |
ary <- ary[, , strata_sizes > 1] |
| 341 |
} |
|
| 342 | ||
| 343 | 26x |
cmh_res <- stats::mantelhaen.test(ary, correct = FALSE, alternative = alternative) |
| 344 | ||
| 345 | 26x |
if (transform == "none") {
|
| 346 | 23x |
cmh_res$p.value |
| 347 |
} else {
|
|
| 348 | 3x |
chisq_stat <- unname(cmh_res$statistic) |
| 349 | 3x |
df <- unname(cmh_res$parameter) |
| 350 | 3x |
num <- (chisq_stat / df)^(1 / 3) - (1 - 2 / (9 * df)) |
| 351 | 3x |
denom <- sqrt(2 / (9 * df)) |
| 352 | 3x |
wh_stat <- num / denom |
| 353 | ||
| 354 | 3x |
if (alternative == "two.sided") {
|
| 355 | 1x |
2 * stats::pnorm(-abs(wh_stat)) |
| 356 |
} else {
|
|
| 357 | 2x |
stats::pnorm(wh_stat, lower.tail = (alternative == "greater")) |
| 358 |
} |
|
| 359 |
} |
|
| 360 |
} |
|
| 361 | ||
| 362 |
#' @describeIn h_prop_diff_test Performs the Chi-Squared test with Schouten correction. |
|
| 363 |
#' |
|
| 364 |
#' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}.
|
|
| 365 |
#' |
|
| 366 |
#' @keywords internal |
|
| 367 |
prop_schouten <- function(tbl, alternative = c("two.sided", "less", "greater")) {
|
|
| 368 | 102x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
| 369 | 102x |
alternative <- match.arg(alternative) |
| 370 | 102x |
tbl <- tbl[, c("TRUE", "FALSE")]
|
| 371 | 102x |
if (any(colSums(tbl) == 0)) {
|
| 372 | 1x |
return(1) |
| 373 |
} |
|
| 374 | ||
| 375 | 101x |
n <- sum(tbl) |
| 376 | 101x |
n1 <- sum(tbl[1, ]) |
| 377 | 101x |
n2 <- sum(tbl[2, ]) |
| 378 | ||
| 379 | 101x |
ad <- diag(tbl) |
| 380 | 101x |
bc <- diag(apply(tbl, 2, rev)) |
| 381 | 101x |
ac <- tbl[, 1] |
| 382 | 101x |
bd <- tbl[, 2] |
| 383 | ||
| 384 | 101x |
t_schouten <- (n - 1) * |
| 385 | 101x |
(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 / |
| 386 | 101x |
(n1 * n2 * sum(ac) * sum(bd)) |
| 387 | ||
| 388 | 101x |
if (alternative == "two.sided") {
|
| 389 | 99x |
stats::pchisq(t_schouten, df = 1, lower.tail = FALSE) |
| 390 |
} else {
|
|
| 391 |
# This follows the logic in stats::prop.test for one-sided p-values. |
|
| 392 | 2x |
x1 <- tbl[1, 1] |
| 393 | 2x |
x2 <- tbl[2, 1] |
| 394 | 2x |
delta <- (x1 / n1) - (x2 / n2) |
| 395 | 2x |
z <- sign(delta) * sqrt(t_schouten) |
| 396 | 2x |
stats::pnorm(z, lower.tail = (alternative == "less")) |
| 397 |
} |
|
| 398 |
} |
|
| 399 | ||
| 400 |
#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()]. |
|
| 401 |
#' |
|
| 402 |
#' @keywords internal |
|
| 403 |
prop_fisher <- function(tbl, alternative = c("two.sided", "less", "greater")) {
|
|
| 404 | 4x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
| 405 | 4x |
alternative <- match.arg(alternative) # Is needed here, because stats::fisher.test does not handle defaults. |
| 406 | 4x |
tbl <- tbl[, c("TRUE", "FALSE")]
|
| 407 | 4x |
stats::fisher.test(tbl, alternative = alternative)$p.value |
| 408 |
} |
| 1 |
#' Count patients with toxicity grades that have worsened from baseline by highest grade post-baseline |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [count_abnormal_lab_worsen_by_baseline()] creates a layout element to count patients with |
|
| 6 |
#' analysis toxicity grades which have worsened from baseline, categorized by highest (worst) grade post-baseline. |
|
| 7 |
#' |
|
| 8 |
#' This function analyzes primary analysis variable `var` which indicates analysis toxicity grades. Additional |
|
| 9 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to `USUBJID`), |
|
| 10 |
#' a variable to indicate unique subject identifiers, `baseline_var` (defaults to `BTOXGR`), a variable to indicate |
|
| 11 |
#' baseline toxicity grades, and `direction_var` (defaults to `GRADDIR`), a variable to indicate toxicity grade |
|
| 12 |
#' directions of interest to include (e.g. `"H"` (high), `"L"` (low), or `"B"` (both)). |
|
| 13 |
#' |
|
| 14 |
#' For the direction(s) specified in `direction_var`, patient counts by worst grade for patients who have |
|
| 15 |
#' worsened from baseline are calculated as follows: |
|
| 16 |
#' * `1` to `4`: The number of patients who have worsened from their baseline grades with worst |
|
| 17 |
#' grades 1-4, respectively. |
|
| 18 |
#' * `Any`: The total number of patients who have worsened from their baseline grades. |
|
| 19 |
#' |
|
| 20 |
#' Fractions are calculated by dividing the above counts by the number of patients who's analysis toxicity grades |
|
| 21 |
#' have worsened from baseline toxicity grades during treatment. |
|
| 22 |
#' |
|
| 23 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create a row |
|
| 24 |
#' split on variable `direction_var`. |
|
| 25 |
#' |
|
| 26 |
#' @inheritParams argument_convention |
|
| 27 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including: |
|
| 28 |
#' * `id` (`string`)\cr subject variable name. |
|
| 29 |
#' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable. |
|
| 30 |
#' * `direction_var` (`string`)\cr see `direction_var` for more details. |
|
| 31 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 32 |
#' @param table_names `r lifecycle::badge("deprecated")` this parameter has no effect.
|
|
| 33 |
#' |
|
| 34 |
#' Options are: ``r shQuote(get_stats("abnormal_lab_worsen_by_baseline"), type = "sh")``
|
|
| 35 |
#' |
|
| 36 |
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()] which are used within |
|
| 37 |
#' [s_count_abnormal_lab_worsen_by_baseline()] to process input data. |
|
| 38 |
#' |
|
| 39 |
#' @name abnormal_lab_worsen_by_baseline |
|
| 40 |
#' @order 1 |
|
| 41 |
NULL |
|
| 42 | ||
| 43 |
#' @describeIn abnormal_lab_worsen_by_baseline Statistics function for patients whose worst post-baseline |
|
| 44 |
#' lab grades are worse than their baseline grades. |
|
| 45 |
#' |
|
| 46 |
#' @return |
|
| 47 |
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst |
|
| 48 |
#' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades |
|
| 49 |
#' "1", "2", "3", "4" and "Any". |
|
| 50 |
#' |
|
| 51 |
#' @keywords internal |
|
| 52 |
s_count_abnormal_lab_worsen_by_baseline <- function(df, |
|
| 53 |
.var = "ATOXGR", |
|
| 54 |
variables = list( |
|
| 55 |
id = "USUBJID", |
|
| 56 |
baseline_var = "BTOXGR", |
|
| 57 |
direction_var = "GRADDR" |
|
| 58 |
), |
|
| 59 |
...) {
|
|
| 60 | 13x |
checkmate::assert_string(.var) |
| 61 | 13x |
checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
|
| 62 | 13x |
checkmate::assert_string(variables$id) |
| 63 | 13x |
checkmate::assert_string(variables$baseline_var) |
| 64 | 13x |
checkmate::assert_string(variables$direction_var) |
| 65 | 13x |
assert_df_with_variables(df, c(aval = .var, variables[1:3])) |
| 66 | 13x |
assert_list_of_variables(variables) |
| 67 | ||
| 68 | 13x |
h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var) |
| 69 |
} |
|
| 70 | ||
| 71 |
#' @describeIn abnormal_lab_worsen_by_baseline Formatted analysis function which is used as `afun` |
|
| 72 |
#' in `count_abnormal_lab_worsen_by_baseline()`. |
|
| 73 |
#' |
|
| 74 |
#' @return |
|
| 75 |
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with |
|
| 76 |
#' formatted [rtables::CellValue()]. |
|
| 77 |
#' |
|
| 78 |
#' @keywords internal |
|
| 79 |
a_count_abnormal_lab_worsen_by_baseline <- function(df, |
|
| 80 |
..., |
|
| 81 |
.stats = NULL, |
|
| 82 |
.stat_names = NULL, |
|
| 83 |
.formats = NULL, |
|
| 84 |
.labels = NULL, |
|
| 85 |
.indent_mods = NULL) {
|
|
| 86 |
# Check for additional parameters to the statistics function |
|
| 87 | 12x |
dots_extra_args <- list(...) |
| 88 | 12x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 89 | 12x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 90 | ||
| 91 |
# Check for user-defined functions |
|
| 92 | 12x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 93 | 12x |
.stats <- default_and_custom_stats_list$all_stats |
| 94 | 12x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 95 | ||
| 96 |
# Apply statistics function |
|
| 97 | 12x |
x_stats <- .apply_stat_functions( |
| 98 | 12x |
default_stat_fnc = s_count_abnormal_lab_worsen_by_baseline, |
| 99 | 12x |
custom_stat_fnc_list = custom_stat_functions, |
| 100 | 12x |
args_list = c( |
| 101 | 12x |
df = list(df), |
| 102 | 12x |
extra_afun_params, |
| 103 | 12x |
dots_extra_args |
| 104 |
) |
|
| 105 |
) |
|
| 106 | ||
| 107 |
# Fill in formatting defaults |
|
| 108 | 12x |
.stats <- get_stats( |
| 109 | 12x |
"abnormal_lab_worsen_by_baseline", |
| 110 | 12x |
stats_in = .stats, |
| 111 | 12x |
custom_stats_in = names(custom_stat_functions) |
| 112 |
) |
|
| 113 | 12x |
levels_per_stats <- lapply(x_stats, names) |
| 114 | 12x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
| 115 | 12x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
| 116 | 12x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
| 117 | ||
| 118 | 12x |
x_stats <- x_stats[.stats] %>% |
| 119 | 12x |
.unlist_keep_nulls() %>% |
| 120 | 12x |
setNames(names(.formats)) |
| 121 | ||
| 122 |
# Auto format handling |
|
| 123 | 12x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 124 | ||
| 125 |
# Get and check statistical names |
|
| 126 | 12x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 127 | ||
| 128 | 12x |
in_rows( |
| 129 | 12x |
.list = x_stats, |
| 130 | 12x |
.formats = .formats, |
| 131 | 12x |
.names = .labels %>% .unlist_keep_nulls(), |
| 132 | 12x |
.stat_names = .stat_names, |
| 133 | 12x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 134 | 12x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 135 |
) |
|
| 136 |
} |
|
| 137 | ||
| 138 |
#' @describeIn abnormal_lab_worsen_by_baseline Layout-creating function which can take statistics function |
|
| 139 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 140 |
#' |
|
| 141 |
#' @return |
|
| 142 |
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting |
|
| 143 |
#' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted |
|
| 144 |
#' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout. |
|
| 145 |
#' |
|
| 146 |
#' @examples |
|
| 147 |
#' library(dplyr) |
|
| 148 |
#' |
|
| 149 |
#' # The direction variable, GRADDR, is based on metadata |
|
| 150 |
#' adlb <- tern_ex_adlb %>% |
|
| 151 |
#' mutate( |
|
| 152 |
#' GRADDR = case_when( |
|
| 153 |
#' PARAMCD == "ALT" ~ "B", |
|
| 154 |
#' PARAMCD == "CRP" ~ "L", |
|
| 155 |
#' PARAMCD == "IGA" ~ "H" |
|
| 156 |
#' ) |
|
| 157 |
#' ) %>% |
|
| 158 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") |
|
| 159 |
#' |
|
| 160 |
#' df <- h_adlb_worsen( |
|
| 161 |
#' adlb, |
|
| 162 |
#' worst_flag_low = c("WGRLOFL" = "Y"),
|
|
| 163 |
#' worst_flag_high = c("WGRHIFL" = "Y"),
|
|
| 164 |
#' direction_var = "GRADDR" |
|
| 165 |
#' ) |
|
| 166 |
#' |
|
| 167 |
#' basic_table() %>% |
|
| 168 |
#' split_cols_by("ARMCD") %>%
|
|
| 169 |
#' add_colcounts() %>% |
|
| 170 |
#' split_rows_by("PARAMCD") %>%
|
|
| 171 |
#' split_rows_by("GRADDR") %>%
|
|
| 172 |
#' count_abnormal_lab_worsen_by_baseline( |
|
| 173 |
#' var = "ATOXGR", |
|
| 174 |
#' variables = list( |
|
| 175 |
#' id = "USUBJID", |
|
| 176 |
#' baseline_var = "BTOXGR", |
|
| 177 |
#' direction_var = "GRADDR" |
|
| 178 |
#' ) |
|
| 179 |
#' ) %>% |
|
| 180 |
#' append_topleft("Direction of Abnormality") %>%
|
|
| 181 |
#' build_table(df = df, alt_counts_df = tern_ex_adsl) |
|
| 182 |
#' |
|
| 183 |
#' @export |
|
| 184 |
#' @order 2 |
|
| 185 |
count_abnormal_lab_worsen_by_baseline <- function(lyt, |
|
| 186 |
var, |
|
| 187 |
variables = list( |
|
| 188 |
id = "USUBJID", |
|
| 189 |
baseline_var = "BTOXGR", |
|
| 190 |
direction_var = "GRADDR" |
|
| 191 |
), |
|
| 192 |
na_str = default_na_str(), |
|
| 193 |
nested = TRUE, |
|
| 194 |
..., |
|
| 195 |
table_names = lifecycle::deprecated(), |
|
| 196 |
.stats = "fraction", |
|
| 197 |
.stat_names = NULL, |
|
| 198 |
.formats = list(fraction = format_fraction), |
|
| 199 |
.labels = NULL, |
|
| 200 |
.indent_mods = NULL) {
|
|
| 201 | 1x |
checkmate::assert_string(var) |
| 202 | ||
| 203 |
# Deprecated argument warning |
|
| 204 | 1x |
if (lifecycle::is_present(table_names)) {
|
| 205 | ! |
lifecycle::deprecate_warn( |
| 206 | ! |
"0.9.8", "count_abnormal_lab_worsen_by_baseline(table_names)", |
| 207 | ! |
details = "The argument has no effect on the output." |
| 208 |
) |
|
| 209 |
} |
|
| 210 | ||
| 211 |
# Process standard extra arguments |
|
| 212 | 1x |
extra_args <- list(".stats" = .stats)
|
| 213 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 214 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 215 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 216 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 217 | ||
| 218 |
# Process additional arguments to the statistic function |
|
| 219 | 1x |
extra_args <- c(extra_args, "variables" = list(variables), ...) |
| 220 | ||
| 221 |
# Append additional info from layout to the analysis function |
|
| 222 | 1x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 223 | 1x |
formals(a_count_abnormal_lab_worsen_by_baseline) <- c( |
| 224 | 1x |
formals(a_count_abnormal_lab_worsen_by_baseline), extra_args[[".additional_fun_parameters"]] |
| 225 |
) |
|
| 226 | ||
| 227 | 1x |
analyze( |
| 228 | 1x |
lyt = lyt, |
| 229 | 1x |
vars = var, |
| 230 | 1x |
afun = a_count_abnormal_lab_worsen_by_baseline, |
| 231 | 1x |
na_str = na_str, |
| 232 | 1x |
nested = nested, |
| 233 | 1x |
extra_args = extra_args, |
| 234 | 1x |
show_labels = "hidden" |
| 235 |
) |
|
| 236 |
} |
|
| 237 | ||
| 238 |
#' Helper function to prepare ADLB with worst labs |
|
| 239 |
#' |
|
| 240 |
#' @description `r lifecycle::badge("stable")`
|
|
| 241 |
#' |
|
| 242 |
#' Helper function to prepare a `df` for generate the patient count shift table. |
|
| 243 |
#' |
|
| 244 |
#' @param adlb (`data.frame`)\cr ADLB data frame. |
|
| 245 |
#' @param worst_flag_low (named `vector`)\cr worst low post-baseline lab grade flag variable. See how this is |
|
| 246 |
#' implemented in the following examples. |
|
| 247 |
#' @param worst_flag_high (named `vector`)\cr worst high post-baseline lab grade flag variable. See how this is |
|
| 248 |
#' implemented in the following examples. |
|
| 249 |
#' @param direction_var (`string`)\cr name of the direction variable specifying the direction of the shift table of |
|
| 250 |
#' interest. Only lab records flagged by `L`, `H` or `B` are included in the shift table. |
|
| 251 |
#' * `L`: low direction only |
|
| 252 |
#' * `H`: high direction only |
|
| 253 |
#' * `B`: both low and high directions |
|
| 254 |
#' |
|
| 255 |
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the |
|
| 256 |
#' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the |
|
| 257 |
#' direction specified according to `direction_var`. For instance, for a lab that is |
|
| 258 |
#' needed for the low direction only, only records flagged by `worst_flag_low` are |
|
| 259 |
#' selected. For a lab that is needed for both low and high directions, the worst |
|
| 260 |
#' low records are selected for the low direction, and the worst high record are selected |
|
| 261 |
#' for the high direction. |
|
| 262 |
#' |
|
| 263 |
#' @seealso [abnormal_lab_worsen_by_baseline] |
|
| 264 |
#' |
|
| 265 |
#' @examples |
|
| 266 |
#' library(dplyr) |
|
| 267 |
#' |
|
| 268 |
#' # The direction variable, GRADDR, is based on metadata |
|
| 269 |
#' adlb <- tern_ex_adlb %>% |
|
| 270 |
#' mutate( |
|
| 271 |
#' GRADDR = case_when( |
|
| 272 |
#' PARAMCD == "ALT" ~ "B", |
|
| 273 |
#' PARAMCD == "CRP" ~ "L", |
|
| 274 |
#' PARAMCD == "IGA" ~ "H" |
|
| 275 |
#' ) |
|
| 276 |
#' ) %>% |
|
| 277 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") |
|
| 278 |
#' |
|
| 279 |
#' df <- h_adlb_worsen( |
|
| 280 |
#' adlb, |
|
| 281 |
#' worst_flag_low = c("WGRLOFL" = "Y"),
|
|
| 282 |
#' worst_flag_high = c("WGRHIFL" = "Y"),
|
|
| 283 |
#' direction_var = "GRADDR" |
|
| 284 |
#' ) |
|
| 285 |
#' |
|
| 286 |
#' @export |
|
| 287 |
h_adlb_worsen <- function(adlb, |
|
| 288 |
worst_flag_low = NULL, |
|
| 289 |
worst_flag_high = NULL, |
|
| 290 |
direction_var) {
|
|
| 291 | 5x |
checkmate::assert_string(direction_var) |
| 292 | 5x |
checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
|
| 293 | 5x |
assert_df_with_variables(adlb, list("Col" = direction_var))
|
| 294 | ||
| 295 | 5x |
if (any(unique(adlb[[direction_var]]) == "H")) {
|
| 296 | 4x |
assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
|
| 297 |
} |
|
| 298 | ||
| 299 | 5x |
if (any(unique(adlb[[direction_var]]) == "L")) {
|
| 300 | 4x |
assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
|
| 301 |
} |
|
| 302 | ||
| 303 | 5x |
if (any(unique(adlb[[direction_var]]) == "B")) {
|
| 304 | 3x |
assert_df_with_variables( |
| 305 | 3x |
adlb, |
| 306 | 3x |
list( |
| 307 | 3x |
"Low" = names(worst_flag_low), |
| 308 | 3x |
"High" = names(worst_flag_high) |
| 309 |
) |
|
| 310 |
) |
|
| 311 |
} |
|
| 312 | ||
| 313 |
# extract patients with worst post-baseline lab, either low or high or both |
|
| 314 | 5x |
worst_flag <- c(worst_flag_low, worst_flag_high) |
| 315 | 5x |
col_names <- names(worst_flag) |
| 316 | 5x |
filter_values <- worst_flag |
| 317 | 5x |
temp <- Map( |
| 318 | 5x |
function(x, y) which(adlb[[x]] == y), |
| 319 | 5x |
col_names, |
| 320 | 5x |
filter_values |
| 321 |
) |
|
| 322 | 5x |
position_satisfy_filters <- Reduce(union, temp) |
| 323 | ||
| 324 |
# select variables of interest |
|
| 325 | 5x |
adlb_f <- adlb[position_satisfy_filters, ] |
| 326 | ||
| 327 |
# generate subsets for different directionality |
|
| 328 | 5x |
adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ] |
| 329 | 5x |
adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ] |
| 330 | 5x |
adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ] |
| 331 | ||
| 332 |
# for labs requiring both high and low, data is duplicated and will be stacked on top of each other |
|
| 333 | 5x |
adlb_f_b_h <- adlb_f_b |
| 334 | 5x |
adlb_f_b_l <- adlb_f_b |
| 335 | ||
| 336 |
# extract data with worst lab |
|
| 337 | 5x |
if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
|
| 338 |
# change H to High, L to Low |
|
| 339 | 3x |
adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
|
| 340 | 3x |
adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
|
| 341 | ||
| 342 |
# change, B to High and Low |
|
| 343 | 3x |
adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
|
| 344 | 3x |
adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
|
| 345 | ||
| 346 | 3x |
adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] |
| 347 | 3x |
adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] |
| 348 | 3x |
adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] |
| 349 | 3x |
adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] |
| 350 | ||
| 351 | 3x |
out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l) |
| 352 | 2x |
} else if (!is.null(worst_flag_high)) {
|
| 353 | 1x |
adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
|
| 354 | 1x |
adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
|
| 355 | ||
| 356 | 1x |
adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] |
| 357 | 1x |
adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] |
| 358 | ||
| 359 | 1x |
out <- rbind(adlb_out_h, adlb_out_b_h) |
| 360 | 1x |
} else if (!is.null(worst_flag_low)) {
|
| 361 | 1x |
adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
|
| 362 | 1x |
adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
|
| 363 | ||
| 364 | 1x |
adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] |
| 365 | 1x |
adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] |
| 366 | ||
| 367 | 1x |
out <- rbind(adlb_out_l, adlb_out_b_l) |
| 368 |
} |
|
| 369 | ||
| 370 |
# label |
|
| 371 | 5x |
formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE) |
| 372 | ||
| 373 | 5x |
out |
| 374 |
} |
|
| 375 | ||
| 376 |
#' Helper function to analyze patients for `s_count_abnormal_lab_worsen_by_baseline()` |
|
| 377 |
#' |
|
| 378 |
#' @description `r lifecycle::badge("stable")`
|
|
| 379 |
#' |
|
| 380 |
#' Helper function to count the number of patients and the fraction of patients according to |
|
| 381 |
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`, |
|
| 382 |
#' and the direction of interest specified in `direction_var`. |
|
| 383 |
#' |
|
| 384 |
#' @inheritParams argument_convention |
|
| 385 |
#' @inheritParams h_adlb_worsen |
|
| 386 |
#' @param baseline_var (`string`)\cr name of the baseline lab grade variable. |
|
| 387 |
#' |
|
| 388 |
#' @return The counts and fraction of patients |
|
| 389 |
#' whose worst post-baseline lab grades are worse than their baseline grades, for |
|
| 390 |
#' post-baseline worst grades "1", "2", "3", "4" and "Any". |
|
| 391 |
#' |
|
| 392 |
#' @seealso [abnormal_lab_worsen_by_baseline] |
|
| 393 |
#' |
|
| 394 |
#' @examples |
|
| 395 |
#' library(dplyr) |
|
| 396 |
#' |
|
| 397 |
#' # The direction variable, GRADDR, is based on metadata |
|
| 398 |
#' adlb <- tern_ex_adlb %>% |
|
| 399 |
#' mutate( |
|
| 400 |
#' GRADDR = case_when( |
|
| 401 |
#' PARAMCD == "ALT" ~ "B", |
|
| 402 |
#' PARAMCD == "CRP" ~ "L", |
|
| 403 |
#' PARAMCD == "IGA" ~ "H" |
|
| 404 |
#' ) |
|
| 405 |
#' ) %>% |
|
| 406 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") |
|
| 407 |
#' |
|
| 408 |
#' df <- h_adlb_worsen( |
|
| 409 |
#' adlb, |
|
| 410 |
#' worst_flag_low = c("WGRLOFL" = "Y"),
|
|
| 411 |
#' worst_flag_high = c("WGRHIFL" = "Y"),
|
|
| 412 |
#' direction_var = "GRADDR" |
|
| 413 |
#' ) |
|
| 414 |
#' |
|
| 415 |
#' # `h_worsen_counter` |
|
| 416 |
#' h_worsen_counter( |
|
| 417 |
#' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"), |
|
| 418 |
#' id = "USUBJID", |
|
| 419 |
#' .var = "ATOXGR", |
|
| 420 |
#' baseline_var = "BTOXGR", |
|
| 421 |
#' direction_var = "GRADDR" |
|
| 422 |
#' ) |
|
| 423 |
#' |
|
| 424 |
#' @export |
|
| 425 |
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
|
|
| 426 | 17x |
checkmate::assert_string(id) |
| 427 | 17x |
checkmate::assert_string(.var) |
| 428 | 17x |
checkmate::assert_string(baseline_var) |
| 429 | 17x |
checkmate::assert_scalar(unique(df[[direction_var]])) |
| 430 | 17x |
checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
|
| 431 | 17x |
assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var))) |
| 432 | ||
| 433 |
# remove post-baseline missing |
|
| 434 | 17x |
df <- df[df[[.var]] != "<Missing>", ] |
| 435 | ||
| 436 |
# obtain directionality |
|
| 437 | 17x |
direction <- unique(df[[direction_var]]) |
| 438 | ||
| 439 | 17x |
if (direction == "Low") {
|
| 440 | 10x |
grade <- -1:-4 |
| 441 | 10x |
worst_grade <- -4 |
| 442 | 7x |
} else if (direction == "High") {
|
| 443 | 7x |
grade <- 1:4 |
| 444 | 7x |
worst_grade <- 4 |
| 445 |
} |
|
| 446 | ||
| 447 | 17x |
if (nrow(df) > 0) {
|
| 448 | 17x |
by_grade <- lapply(grade, function(i) {
|
| 449 |
# filter baseline values that is less than i or <Missing> |
|
| 450 | 68x |
df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ] |
| 451 |
# num: number of patients with post-baseline worst lab equal to i |
|
| 452 | 68x |
num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE])) |
| 453 |
# denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction |
|
| 454 | 68x |
denom <- length(unique(df_temp[[id]])) |
| 455 | 68x |
rm(df_temp) |
| 456 | 68x |
c(num = num, denom = denom) |
| 457 |
}) |
|
| 458 |
} else {
|
|
| 459 | ! |
by_grade <- lapply(1, function(i) {
|
| 460 | ! |
c(num = 0, denom = 0) |
| 461 |
}) |
|
| 462 |
} |
|
| 463 | ||
| 464 | 17x |
names(by_grade) <- as.character(seq_along(by_grade)) |
| 465 | ||
| 466 |
# baseline grade less 4 or missing |
|
| 467 | 17x |
df_temp <- df[!df[[baseline_var]] %in% worst_grade, ] |
| 468 | ||
| 469 |
# denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction |
|
| 470 | 17x |
denom <- length(unique(df_temp[, id, drop = TRUE])) |
| 471 | ||
| 472 |
# condition 1: missing baseline and in the direction of abnormality |
|
| 473 | 17x |
con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade) |
| 474 | 17x |
df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ] |
| 475 | ||
| 476 |
# condition 2: if post-baseline values are present then post-baseline values must be worse than baseline |
|
| 477 | 17x |
if (direction == "Low") {
|
| 478 | 10x |
con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]]))) |
| 479 |
} else {
|
|
| 480 | 7x |
con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]]))) |
| 481 |
} |
|
| 482 | ||
| 483 |
# number of patients satisfy either conditions 1 or 2 |
|
| 484 | 17x |
num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE])) |
| 485 | ||
| 486 | 17x |
list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
|
| 487 |
} |
| 1 |
#' Count the number of patients with particular flags |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [count_patients_with_flags()] creates a layout element to calculate counts of patients for |
|
| 6 |
#' which user-specified flags are present. |
|
| 7 |
#' |
|
| 8 |
#' This function analyzes primary analysis variable `var` which indicates unique subject identifiers. Flags |
|
| 9 |
#' variables to analyze are specified by the user via the `flag_variables` argument, and must either take value |
|
| 10 |
#' `TRUE` (flag present) or `FALSE` (flag absent) for each record. |
|
| 11 |
#' |
|
| 12 |
#' If there are multiple records with the same flag present for a patient, only one occurrence is counted. |
|
| 13 |
#' |
|
| 14 |
#' @inheritParams argument_convention |
|
| 15 |
#' @param flag_variables (`character`)\cr a vector specifying the names of `logical` variables from analysis dataset |
|
| 16 |
#' used for counting the number of unique identifiers. |
|
| 17 |
#' @param flag_labels (`character`)\cr vector of labels to use for flag variables. If any labels are also specified via |
|
| 18 |
#' the `.labels` parameter, the `.labels` values will take precedence and replace these labels. |
|
| 19 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 20 |
#' |
|
| 21 |
#' Options are: ``r shQuote(get_stats("count_patients_with_flags"), type = "sh")``
|
|
| 22 |
#' |
|
| 23 |
#' @seealso [count_patients_with_event] |
|
| 24 |
#' |
|
| 25 |
#' @name count_patients_with_flags |
|
| 26 |
#' @order 1 |
|
| 27 |
NULL |
|
| 28 | ||
| 29 |
#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which |
|
| 30 |
#' a particular flag variable is `TRUE`. |
|
| 31 |
#' |
|
| 32 |
#' @inheritParams analyze_variables |
|
| 33 |
#' @param .var (`string`)\cr name of the column that contains the unique identifier. |
|
| 34 |
#' |
|
| 35 |
#' @note If `flag_labels` is not specified, variables labels will be extracted from `df`. If variables are not |
|
| 36 |
#' labeled, variable names will be used instead. Alternatively, a named `vector` can be supplied to |
|
| 37 |
#' `flag_variables` such that within each name-value pair the name corresponds to the variable name and the value is |
|
| 38 |
#' the label to use for this variable. |
|
| 39 |
#' |
|
| 40 |
#' @return |
|
| 41 |
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular |
|
| 42 |
#' flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag. |
|
| 43 |
#' |
|
| 44 |
#' @examples |
|
| 45 |
#' # `s_count_patients_with_flags()` |
|
| 46 |
#' |
|
| 47 |
#' s_count_patients_with_flags( |
|
| 48 |
#' adae, |
|
| 49 |
#' "SUBJID", |
|
| 50 |
#' flag_variables = c("fl1", "fl2", "fl3", "fl4"),
|
|
| 51 |
#' denom = "N_col", |
|
| 52 |
#' .N_col = 1000 |
|
| 53 |
#' ) |
|
| 54 |
#' |
|
| 55 |
#' @export |
|
| 56 |
s_count_patients_with_flags <- function(df, |
|
| 57 |
.var, |
|
| 58 |
.N_col = ncol(df), # nolint |
|
| 59 |
.N_row = nrow(df), # nolint |
|
| 60 |
..., |
|
| 61 |
flag_variables, |
|
| 62 |
flag_labels = NULL, |
|
| 63 |
denom = c("n", "N_col", "N_row")) {
|
|
| 64 | 41x |
checkmate::assert_character(flag_variables) |
| 65 | 41x |
if (!is.null(flag_labels)) {
|
| 66 | 6x |
checkmate::assert_character(flag_labels, len = length(flag_variables), any.missing = FALSE) |
| 67 | 6x |
flag_names <- flag_labels |
| 68 |
} else {
|
|
| 69 | 35x |
if (is.null(names(flag_variables))) {
|
| 70 | 20x |
flag_names <- formatters::var_labels(df[flag_variables], fill = TRUE) |
| 71 |
} else {
|
|
| 72 | 15x |
flag_names <- unname(flag_variables) |
| 73 | 15x |
flag_variables <- names(flag_variables) |
| 74 |
} |
|
| 75 |
} |
|
| 76 | 41x |
checkmate::assert_subset(flag_variables, colnames(df)) |
| 77 | ||
| 78 | 41x |
temp <- sapply(flag_variables, function(x) {
|
| 79 | 123x |
tmp <- Map(function(y) which(df[[y]]), x) |
| 80 | 123x |
position_satisfy_flags <- Reduce(intersect, tmp) |
| 81 | 123x |
id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]])) |
| 82 | 123x |
s_count_values( |
| 83 | 123x |
x = as.character(unique(df[[.var]])), |
| 84 | 123x |
values = id_satisfy_flags, |
| 85 | 123x |
denom = denom, |
| 86 | 123x |
.N_col = .N_col, |
| 87 | 123x |
.N_row = .N_row |
| 88 |
) |
|
| 89 |
}) |
|
| 90 | 41x |
colnames(temp) <- flag_names |
| 91 | 41x |
temp <- data.frame(t(temp)) |
| 92 | 41x |
result <- as.list(temp) |
| 93 | 41x |
if (length(flag_variables) == 1) {
|
| 94 | 1x |
for (i in seq(3)) names(result[[i]]) <- flag_names[1] |
| 95 |
} |
|
| 96 | 41x |
result |
| 97 |
} |
|
| 98 | ||
| 99 |
#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun` |
|
| 100 |
#' in `count_patients_with_flags()`. |
|
| 101 |
#' |
|
| 102 |
#' @return |
|
| 103 |
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 104 |
#' |
|
| 105 |
#' @examples |
|
| 106 |
#' a_count_patients_with_flags( |
|
| 107 |
#' adae, |
|
| 108 |
#' .N_col = 10L, |
|
| 109 |
#' .N_row = 10L, |
|
| 110 |
#' .var = "USUBJID", |
|
| 111 |
#' flag_variables = c("fl1", "fl2", "fl3", "fl4")
|
|
| 112 |
#' ) |
|
| 113 |
#' |
|
| 114 |
#' @export |
|
| 115 |
a_count_patients_with_flags <- function(df, |
|
| 116 |
labelstr = "", |
|
| 117 |
..., |
|
| 118 |
.stats = NULL, |
|
| 119 |
.stat_names = NULL, |
|
| 120 |
.formats = NULL, |
|
| 121 |
.labels = NULL, |
|
| 122 |
.indent_mods = NULL) {
|
|
| 123 |
# Check for additional parameters to the statistics function |
|
| 124 | 31x |
dots_extra_args <- list(...) |
| 125 | 31x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 126 | 31x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 127 | 31x |
flag_variables <- dots_extra_args[["flag_variables"]] |
| 128 | 31x |
flag_labels <- dots_extra_args[["flag_labels"]] |
| 129 | ||
| 130 | 17x |
if (is.null(names(flag_variables))) flag_variables <- formatters::var_labels(df, fill = TRUE)[flag_variables] |
| 131 | 26x |
if (is.null(flag_labels)) flag_labels <- flag_variables |
| 132 | ||
| 133 |
# Check for user-defined functions |
|
| 134 | 31x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 135 | 31x |
.stats <- default_and_custom_stats_list$all_stats |
| 136 | 31x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 137 | ||
| 138 |
# Apply statistics function |
|
| 139 | 31x |
x_stats <- .apply_stat_functions( |
| 140 | 31x |
default_stat_fnc = s_count_patients_with_flags, |
| 141 | 31x |
custom_stat_fnc_list = custom_stat_functions, |
| 142 | 31x |
args_list = c( |
| 143 | 31x |
df = list(df), |
| 144 | 31x |
extra_afun_params, |
| 145 | 31x |
dots_extra_args |
| 146 |
) |
|
| 147 |
) |
|
| 148 | ||
| 149 |
# Fill in formatting defaults |
|
| 150 | 31x |
.stats <- get_stats("count_patients_with_flags", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
|
| 151 | 31x |
levels_per_stats <- rep(list(names(flag_variables)), length(.stats)) %>% stats::setNames(.stats) |
| 152 | 31x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
| 153 | 31x |
.labels <- get_labels_from_stats( |
| 154 | 31x |
.stats, .labels, levels_per_stats, |
| 155 | 31x |
tern_defaults = flag_labels %>% stats::setNames(names(flag_variables)) |
| 156 |
) |
|
| 157 | 31x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
| 158 | ||
| 159 | 31x |
x_stats <- x_stats[.stats] %>% |
| 160 | 31x |
.unlist_keep_nulls() %>% |
| 161 | 31x |
setNames(names(.formats)) |
| 162 | ||
| 163 |
# Auto format handling |
|
| 164 | 31x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 165 | ||
| 166 |
# Get and check statistical names |
|
| 167 | 31x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 168 | ||
| 169 | 31x |
in_rows( |
| 170 | 31x |
.list = x_stats, |
| 171 | 31x |
.formats = .formats, |
| 172 | 31x |
.names = names(.labels), |
| 173 | 31x |
.stat_names = .stat_names, |
| 174 | 31x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 175 | 31x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 176 |
) |
|
| 177 |
} |
|
| 178 | ||
| 179 |
#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function |
|
| 180 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 181 |
#' |
|
| 182 |
#' @return |
|
| 183 |
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions, |
|
| 184 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 185 |
#' the statistics from `s_count_patients_with_flags()` to the table layout. |
|
| 186 |
#' |
|
| 187 |
#' @examples |
|
| 188 |
#' # Add labelled flag variables to analysis dataset. |
|
| 189 |
#' adae <- tern_ex_adae %>% |
|
| 190 |
#' dplyr::mutate( |
|
| 191 |
#' fl1 = TRUE %>% with_label("Total AEs"),
|
|
| 192 |
#' fl2 = (TRTEMFL == "Y") %>% |
|
| 193 |
#' with_label("Total number of patients with at least one adverse event"),
|
|
| 194 |
#' fl3 = (TRTEMFL == "Y" & AEOUT == "FATAL") %>% |
|
| 195 |
#' with_label("Total number of patients with fatal AEs"),
|
|
| 196 |
#' fl4 = (TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y") %>% |
|
| 197 |
#' with_label("Total number of patients with related fatal AEs")
|
|
| 198 |
#' ) |
|
| 199 |
#' |
|
| 200 |
#' lyt <- basic_table() %>% |
|
| 201 |
#' split_cols_by("ARM") %>%
|
|
| 202 |
#' add_colcounts() %>% |
|
| 203 |
#' count_patients_with_flags( |
|
| 204 |
#' "SUBJID", |
|
| 205 |
#' flag_variables = c("fl1", "fl2", "fl3", "fl4"),
|
|
| 206 |
#' denom = "N_col" |
|
| 207 |
#' ) |
|
| 208 |
#' |
|
| 209 |
#' build_table(lyt, adae, alt_counts_df = tern_ex_adsl) |
|
| 210 |
#' |
|
| 211 |
#' @export |
|
| 212 |
#' @order 2 |
|
| 213 |
count_patients_with_flags <- function(lyt, |
|
| 214 |
var, |
|
| 215 |
flag_variables, |
|
| 216 |
flag_labels = NULL, |
|
| 217 |
var_labels = var, |
|
| 218 |
show_labels = "hidden", |
|
| 219 |
riskdiff = FALSE, |
|
| 220 |
na_str = default_na_str(), |
|
| 221 |
nested = TRUE, |
|
| 222 |
..., |
|
| 223 |
table_names = paste0("tbl_flags_", var),
|
|
| 224 |
.stats = "count_fraction", |
|
| 225 |
.stat_names = NULL, |
|
| 226 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
| 227 |
.indent_mods = NULL, |
|
| 228 |
.labels = NULL) {
|
|
| 229 | 11x |
checkmate::assert_flag(riskdiff) |
| 230 | 11x |
afun <- if (isFALSE(riskdiff)) a_count_patients_with_flags else afun_riskdiff |
| 231 | ||
| 232 |
# Process standard extra arguments |
|
| 233 | 11x |
extra_args <- list(".stats" = .stats)
|
| 234 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 235 | 11x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 236 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 237 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 238 | ||
| 239 |
# Process additional arguments to the statistic function |
|
| 240 | 11x |
extra_args <- c( |
| 241 | 11x |
extra_args, |
| 242 | 11x |
flag_variables = list(flag_variables), flag_labels = list(flag_labels), |
| 243 | 11x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_flags" = a_count_patients_with_flags)),
|
| 244 |
... |
|
| 245 |
) |
|
| 246 | ||
| 247 |
# Append additional info from layout to the analysis function |
|
| 248 | 11x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 249 | 11x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
| 250 | ||
| 251 | 11x |
analyze( |
| 252 | 11x |
lyt = lyt, |
| 253 | 11x |
vars = var, |
| 254 | 11x |
afun = afun, |
| 255 | 11x |
na_str = na_str, |
| 256 | 11x |
nested = nested, |
| 257 | 11x |
extra_args = extra_args, |
| 258 | 11x |
var_labels = var_labels, |
| 259 | 11x |
show_labels = show_labels, |
| 260 | 11x |
table_names = table_names |
| 261 |
) |
|
| 262 |
} |
| 1 |
#' Count specific values |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [count_values()] creates a layout element to calculate counts of specific values within a |
|
| 6 |
#' variable of interest. |
|
| 7 |
#' |
|
| 8 |
#' This function analyzes one or more variables of interest supplied as a vector to `vars`. Values to |
|
| 9 |
#' count for variable(s) in `vars` can be given as a vector via the `values` argument. One row of |
|
| 10 |
#' counts will be generated for each variable. |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param values (`character`)\cr specific values that should be counted. |
|
| 14 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 15 |
#' |
|
| 16 |
#' Options are: ``r shQuote(get_stats("count_values"), type = "sh")``
|
|
| 17 |
#' |
|
| 18 |
#' @note |
|
| 19 |
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x` |
|
| 20 |
#' and fails otherwise. |
|
| 21 |
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`, |
|
| 22 |
#' otherwise they are hidden. |
|
| 23 |
#' |
|
| 24 |
#' @name count_values |
|
| 25 |
#' @order 1 |
|
| 26 |
NULL |
|
| 27 | ||
| 28 |
#' @describeIn count_values S3 generic function to count values. |
|
| 29 |
#' |
|
| 30 |
#' @inheritParams s_summary.logical |
|
| 31 |
#' |
|
| 32 |
#' @return |
|
| 33 |
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable. |
|
| 34 |
#' |
|
| 35 |
#' @export |
|
| 36 |
s_count_values <- function(x, |
|
| 37 |
values, |
|
| 38 |
na.rm = TRUE, # nolint |
|
| 39 |
denom = c("n", "N_col", "N_row"),
|
|
| 40 |
...) {
|
|
| 41 | 207x |
UseMethod("s_count_values", x)
|
| 42 |
} |
|
| 43 | ||
| 44 |
#' @describeIn count_values Method for `character` class. |
|
| 45 |
#' |
|
| 46 |
#' @method s_count_values character |
|
| 47 |
#' |
|
| 48 |
#' @examples |
|
| 49 |
#' # `s_count_values.character` |
|
| 50 |
#' s_count_values(x = c("a", "b", "a"), values = "a")
|
|
| 51 |
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
|
|
| 52 |
#' |
|
| 53 |
#' @export |
|
| 54 |
s_count_values.character <- function(x, |
|
| 55 |
values = "Y", |
|
| 56 |
na.rm = TRUE, # nolint |
|
| 57 |
...) {
|
|
| 58 | 200x |
checkmate::assert_character(values) |
| 59 | ||
| 60 | 200x |
if (na.rm) {
|
| 61 | 199x |
x <- x[!is.na(x)] |
| 62 |
} |
|
| 63 | ||
| 64 | 200x |
is_in_values <- x %in% values |
| 65 | ||
| 66 | 200x |
s_summary(is_in_values, na_rm = na.rm, ...) |
| 67 |
} |
|
| 68 | ||
| 69 |
#' @describeIn count_values Method for `factor` class. This makes an automatic |
|
| 70 |
#' conversion to `character` and then forwards to the method for characters. |
|
| 71 |
#' |
|
| 72 |
#' @method s_count_values factor |
|
| 73 |
#' |
|
| 74 |
#' @examples |
|
| 75 |
#' # `s_count_values.factor` |
|
| 76 |
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
|
|
| 77 |
#' |
|
| 78 |
#' @export |
|
| 79 |
s_count_values.factor <- function(x, |
|
| 80 |
values = "Y", |
|
| 81 |
...) {
|
|
| 82 | 4x |
s_count_values(as.character(x), values = as.character(values), ...) |
| 83 |
} |
|
| 84 | ||
| 85 |
#' @describeIn count_values Method for `logical` class. |
|
| 86 |
#' |
|
| 87 |
#' @method s_count_values logical |
|
| 88 |
#' |
|
| 89 |
#' @examples |
|
| 90 |
#' # `s_count_values.logical` |
|
| 91 |
#' s_count_values(x = c(TRUE, FALSE, TRUE)) |
|
| 92 |
#' |
|
| 93 |
#' @export |
|
| 94 |
s_count_values.logical <- function(x, values = TRUE, ...) {
|
|
| 95 | 3x |
checkmate::assert_logical(values) |
| 96 | 3x |
s_count_values(as.character(x), values = as.character(values), ...) |
| 97 |
} |
|
| 98 | ||
| 99 |
#' @describeIn count_values Formatted analysis function which is used as `afun` |
|
| 100 |
#' in `count_values()`. |
|
| 101 |
#' |
|
| 102 |
#' @return |
|
| 103 |
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 104 |
#' |
|
| 105 |
#' @examples |
|
| 106 |
#' # `a_count_values` |
|
| 107 |
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
|
|
| 108 |
#' |
|
| 109 |
#' @export |
|
| 110 |
a_count_values <- function(x, |
|
| 111 |
..., |
|
| 112 |
.stats = NULL, |
|
| 113 |
.stat_names = NULL, |
|
| 114 |
.formats = NULL, |
|
| 115 |
.labels = NULL, |
|
| 116 |
.indent_mods = NULL) {
|
|
| 117 |
# Check for additional parameters to the statistics function |
|
| 118 | 17x |
dots_extra_args <- list(...) |
| 119 | 17x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 120 | 17x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 121 | ||
| 122 |
# Check for user-defined functions |
|
| 123 | 17x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 124 | 17x |
.stats <- default_and_custom_stats_list$all_stats |
| 125 | 17x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 126 | ||
| 127 |
# Main statistic calculations |
|
| 128 | 17x |
x_stats <- .apply_stat_functions( |
| 129 | 17x |
default_stat_fnc = s_count_values, |
| 130 | 17x |
custom_stat_fnc_list = custom_stat_functions, |
| 131 | 17x |
args_list = c( |
| 132 | 17x |
x = list(x), |
| 133 | 17x |
extra_afun_params, |
| 134 | 17x |
dots_extra_args |
| 135 |
) |
|
| 136 |
) |
|
| 137 | ||
| 138 |
# Fill in formatting defaults |
|
| 139 | 17x |
.stats <- get_stats("analyze_vars_counts", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
|
| 140 | 17x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 141 | 17x |
.labels <- get_labels_from_stats(.stats, .labels) |
| 142 | 17x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 143 | ||
| 144 | 17x |
x_stats <- x_stats[.stats] |
| 145 | ||
| 146 |
# Auto format handling |
|
| 147 | 17x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 148 | ||
| 149 |
# Get and check statistical names |
|
| 150 | 17x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 151 | ||
| 152 | 17x |
in_rows( |
| 153 | 17x |
.list = x_stats, |
| 154 | 17x |
.formats = .formats, |
| 155 | 17x |
.names = names(.labels), |
| 156 | 17x |
.stat_names = .stat_names, |
| 157 | 17x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 158 | 17x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 159 |
) |
|
| 160 |
} |
|
| 161 | ||
| 162 |
#' @describeIn count_values Layout-creating function which can take statistics function arguments |
|
| 163 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 164 |
#' |
|
| 165 |
#' @return |
|
| 166 |
#' * `count_values()` returns a layout object suitable for passing to further layouting functions, |
|
| 167 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 168 |
#' the statistics from `s_count_values()` to the table layout. |
|
| 169 |
#' |
|
| 170 |
#' @examples |
|
| 171 |
#' # `count_values` |
|
| 172 |
#' basic_table() %>% |
|
| 173 |
#' count_values("Species", values = "setosa") %>%
|
|
| 174 |
#' build_table(iris) |
|
| 175 |
#' |
|
| 176 |
#' @export |
|
| 177 |
#' @order 2 |
|
| 178 |
count_values <- function(lyt, |
|
| 179 |
vars, |
|
| 180 |
values, |
|
| 181 |
na_str = default_na_str(), |
|
| 182 |
na_rm = TRUE, |
|
| 183 |
nested = TRUE, |
|
| 184 |
..., |
|
| 185 |
table_names = vars, |
|
| 186 |
.stats = "count_fraction", |
|
| 187 |
.stat_names = NULL, |
|
| 188 |
.formats = c(count_fraction = "xx (xx.xx%)", count = "xx"), |
|
| 189 |
.labels = c(count_fraction = paste(values, collapse = ", ")), |
|
| 190 |
.indent_mods = NULL) {
|
|
| 191 |
# Process standard extra arguments |
|
| 192 | 8x |
extra_args <- list(".stats" = .stats)
|
| 193 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 194 | 8x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 195 | 8x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 196 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 197 | ||
| 198 |
# Process additional arguments to the statistic function |
|
| 199 | 8x |
extra_args <- c( |
| 200 | 8x |
extra_args, |
| 201 | 8x |
na_rm = na_rm, values = list(values), |
| 202 |
... |
|
| 203 |
) |
|
| 204 | ||
| 205 |
# Adding additional info from layout to analysis function |
|
| 206 | 8x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 207 | 8x |
formals(a_count_values) <- c(formals(a_count_values), extra_args[[".additional_fun_parameters"]]) |
| 208 | ||
| 209 | 8x |
analyze( |
| 210 | 8x |
lyt, |
| 211 | 8x |
vars, |
| 212 | 8x |
afun = a_count_values, |
| 213 | 8x |
na_str = na_str, |
| 214 | 8x |
nested = nested, |
| 215 | 8x |
extra_args = extra_args, |
| 216 | 8x |
show_labels = ifelse(length(vars) > 1, "visible", "hidden"), |
| 217 | 8x |
table_names = table_names |
| 218 |
) |
|
| 219 |
} |
| 1 |
#' Missing data |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Substitute missing data with a string or factor level. |
|
| 6 |
#' |
|
| 7 |
#' @param x (`factor` or `character`)\cr values for which any missing values should be substituted. |
|
| 8 |
#' @param label (`string`)\cr string that missing data should be replaced with. |
|
| 9 |
#' @param drop_na (`flag`)\cr if `TRUE` and `x` is a factor, any levels |
|
| 10 |
#' that are only `label` will be dropped. |
|
| 11 |
#' |
|
| 12 |
#' @return `x` with any `NA` values substituted by `label`. |
|
| 13 |
#' |
|
| 14 |
#' @examples |
|
| 15 |
#' explicit_na(c(NA, "a", "b")) |
|
| 16 |
#' is.na(explicit_na(c(NA, "a", "b"))) |
|
| 17 |
#' |
|
| 18 |
#' explicit_na(factor(c(NA, "a", "b"))) |
|
| 19 |
#' is.na(explicit_na(factor(c(NA, "a", "b")))) |
|
| 20 |
#' |
|
| 21 |
#' explicit_na(sas_na(c("a", "")))
|
|
| 22 |
#' |
|
| 23 |
#' explicit_na(factor(levels = c(NA, "a"))) |
|
| 24 |
#' explicit_na(factor(levels = c(NA, "a")), drop_na = TRUE) # previous default |
|
| 25 |
#' |
|
| 26 |
#' @export |
|
| 27 |
explicit_na <- function(x, label = default_na_str(), drop_na = default_drop_na()) {
|
|
| 28 | 257x |
checkmate::assert_string(label, na.ok = TRUE) |
| 29 | 257x |
checkmate::assert_flag(drop_na) |
| 30 | ||
| 31 | 257x |
if (is.factor(x)) {
|
| 32 | 151x |
x <- forcats::fct_na_value_to_level(x, label) |
| 33 | 151x |
if (drop_na) {
|
| 34 | 151x |
x <- forcats::fct_drop(x, only = label) |
| 35 |
} |
|
| 36 | 106x |
} else if (is.character(x)) {
|
| 37 | 106x |
x[is.na(x)] <- label |
| 38 |
} else {
|
|
| 39 | ! |
stop("only factors and character vectors allowed")
|
| 40 |
} |
|
| 41 | ||
| 42 | 257x |
x |
| 43 |
} |
|
| 44 |
#' @describeIn explicit_na should `NA` values without a dedicated level be dropped? |
|
| 45 |
#' |
|
| 46 |
#' @return |
|
| 47 |
#' * `tern_default_drop_na`: (`flag`)\cr default value for `drop_na` argument in `explicit_na()`. |
|
| 48 |
#' |
|
| 49 |
#' @export |
|
| 50 |
default_drop_na <- function() {
|
|
| 51 | 257x |
getOption("tern_default_drop_na", default = TRUE)
|
| 52 |
} |
|
| 53 | ||
| 54 |
#' @describeIn explicit_na Setter for default `NA` value replacement string. Sets the |
|
| 55 |
#' option `"tern_default_drop_na"` within the R environment. |
|
| 56 |
#' |
|
| 57 |
#' @return |
|
| 58 |
#' * `tern_default_drop_na` has no return value. |
|
| 59 |
#' |
|
| 60 |
#' @export |
|
| 61 |
set_default_drop_na <- function(drop_na) {
|
|
| 62 | ! |
checkmate::assert_flag(drop_na, null.ok = TRUE) |
| 63 | ! |
options("tern_default_drop_na" = drop_na)
|
| 64 |
} |
|
| 65 | ||
| 66 |
#' Convert strings to `NA` |
|
| 67 |
#' |
|
| 68 |
#' @description `r lifecycle::badge("stable")`
|
|
| 69 |
#' |
|
| 70 |
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to |
|
| 71 |
#' convert these values to `NA`s. |
|
| 72 |
#' |
|
| 73 |
#' @inheritParams explicit_na |
|
| 74 |
#' @param empty (`flag`)\cr if `TRUE`, empty strings get replaced by `NA`. |
|
| 75 |
#' @param whitespaces (`flag`)\cr if `TRUE`, strings made from only whitespaces get replaced with `NA`. |
|
| 76 |
#' |
|
| 77 |
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of |
|
| 78 |
#' `empty` and `whitespaces`. |
|
| 79 |
#' |
|
| 80 |
#' @examples |
|
| 81 |
#' sas_na(c("1", "", " ", " ", "b"))
|
|
| 82 |
#' sas_na(factor(c("", " ", "b")))
|
|
| 83 |
#' |
|
| 84 |
#' is.na(sas_na(c("1", "", " ", " ", "b")))
|
|
| 85 |
#' |
|
| 86 |
#' @export |
|
| 87 |
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
|
|
| 88 | 246x |
checkmate::assert_flag(empty) |
| 89 | 246x |
checkmate::assert_flag(whitespaces) |
| 90 | ||
| 91 | 246x |
if (is.factor(x)) {
|
| 92 | 135x |
empty_levels <- levels(x) == "" |
| 93 | 11x |
if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA |
| 94 | ||
| 95 | 135x |
ws_levels <- grepl("^\\s+$", levels(x))
|
| 96 | ! |
if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA |
| 97 | ||
| 98 | 135x |
x |
| 99 | 111x |
} else if (is.character(x)) {
|
| 100 | 111x |
if (empty) x[x == ""] <- NA_character_ |
| 101 | ||
| 102 | 111x |
if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_
|
| 103 | ||
| 104 | 111x |
x |
| 105 |
} else {
|
|
| 106 | ! |
stop("only factors and character vectors allowed")
|
| 107 |
} |
|
| 108 |
} |
| 1 |
#' Helper functions for multivariate logistic regression |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Helper functions used in calculations for logistic regression. |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams argument_convention |
|
| 8 |
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. |
|
| 9 |
#' Limited functionality is also available for conditional logistic regression models fitted by |
|
| 10 |
#' [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()]. |
|
| 11 |
#' @param x (`character`)\cr a variable or interaction term in `fit_glm` (depending on the helper function used). |
|
| 12 |
#' |
|
| 13 |
#' @examples |
|
| 14 |
#' library(dplyr) |
|
| 15 |
#' library(broom) |
|
| 16 |
#' |
|
| 17 |
#' adrs_f <- tern_ex_adrs %>% |
|
| 18 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 19 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
| 20 |
#' mutate( |
|
| 21 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
| 22 |
#' RACE = factor(RACE), |
|
| 23 |
#' SEX = factor(SEX) |
|
| 24 |
#' ) |
|
| 25 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
| 26 |
#' mod1 <- fit_logistic( |
|
| 27 |
#' data = adrs_f, |
|
| 28 |
#' variables = list( |
|
| 29 |
#' response = "Response", |
|
| 30 |
#' arm = "ARMCD", |
|
| 31 |
#' covariates = c("AGE", "RACE")
|
|
| 32 |
#' ) |
|
| 33 |
#' ) |
|
| 34 |
#' mod2 <- fit_logistic( |
|
| 35 |
#' data = adrs_f, |
|
| 36 |
#' variables = list( |
|
| 37 |
#' response = "Response", |
|
| 38 |
#' arm = "ARMCD", |
|
| 39 |
#' covariates = c("AGE", "RACE"),
|
|
| 40 |
#' interaction = "AGE" |
|
| 41 |
#' ) |
|
| 42 |
#' ) |
|
| 43 |
#' |
|
| 44 |
#' @name h_logistic_regression |
|
| 45 |
NULL |
|
| 46 | ||
| 47 |
#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted |
|
| 48 |
#' model assuming only one interaction term. |
|
| 49 |
#' |
|
| 50 |
#' @return Vector of names of interaction variables. |
|
| 51 |
#' |
|
| 52 |
#' @export |
|
| 53 |
h_get_interaction_vars <- function(fit_glm) {
|
|
| 54 | 34x |
checkmate::assert_class(fit_glm, "glm") |
| 55 | 34x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
| 56 | 34x |
terms_order <- attr(stats::terms(fit_glm), "order") |
| 57 | 34x |
interaction_term <- terms_name[terms_order == 2] |
| 58 | 34x |
checkmate::assert_string(interaction_term) |
| 59 | 34x |
strsplit(interaction_term, split = ":")[[1]] |
| 60 |
} |
|
| 61 | ||
| 62 |
#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the |
|
| 63 |
#' interaction variable names and the given levels. The main value here is that the order |
|
| 64 |
#' of first and second variable is checked in the `interaction_vars` input. |
|
| 65 |
#' |
|
| 66 |
#' @param interaction_vars (`character(2)`)\cr interaction variable names. |
|
| 67 |
#' @param first_var_with_level (`character(2)`)\cr the first variable name with the interaction level. |
|
| 68 |
#' @param second_var_with_level (`character(2)`)\cr the second variable name with the interaction level. |
|
| 69 |
#' |
|
| 70 |
#' @return Name of coefficient. |
|
| 71 |
#' |
|
| 72 |
#' @export |
|
| 73 |
h_interaction_coef_name <- function(interaction_vars, |
|
| 74 |
first_var_with_level, |
|
| 75 |
second_var_with_level) {
|
|
| 76 | 55x |
checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE) |
| 77 | 55x |
checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE) |
| 78 | 55x |
checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE) |
| 79 | 55x |
checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars) |
| 80 | ||
| 81 | 55x |
first_name <- paste(first_var_with_level, collapse = "") |
| 82 | 55x |
second_name <- paste(second_var_with_level, collapse = "") |
| 83 | 55x |
if (first_var_with_level[1] == interaction_vars[1]) {
|
| 84 | 36x |
paste(first_name, second_name, sep = ":") |
| 85 | 19x |
} else if (second_var_with_level[1] == interaction_vars[1]) {
|
| 86 | 19x |
paste(second_name, first_name, sep = ":") |
| 87 |
} |
|
| 88 |
} |
|
| 89 | ||
| 90 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
| 91 |
#' for the case when both the odds ratio and the interaction variable are categorical. |
|
| 92 |
#' |
|
| 93 |
#' @param odds_ratio_var (`string`)\cr the odds ratio variable. |
|
| 94 |
#' @param interaction_var (`string`)\cr the interaction variable. |
|
| 95 |
#' |
|
| 96 |
#' @return Odds ratio. |
|
| 97 |
#' |
|
| 98 |
#' @export |
|
| 99 |
h_or_cat_interaction <- function(odds_ratio_var, |
|
| 100 |
interaction_var, |
|
| 101 |
fit_glm, |
|
| 102 |
conf_level = 0.95) {
|
|
| 103 | 8x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
| 104 | 8x |
checkmate::assert_string(odds_ratio_var) |
| 105 | 8x |
checkmate::assert_string(interaction_var) |
| 106 | 8x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
| 107 | 8x |
checkmate::assert_vector(interaction_vars, len = 2) |
| 108 | ||
| 109 | 8x |
xs_level <- fit_glm$xlevels |
| 110 | 8x |
xs_coef <- stats::coef(fit_glm) |
| 111 | 8x |
xs_vcov <- stats::vcov(fit_glm) |
| 112 | 8x |
y <- list() |
| 113 | 8x |
for (var_level in xs_level[[odds_ratio_var]][-1]) {
|
| 114 | 14x |
x <- list() |
| 115 | 14x |
for (ref_level in xs_level[[interaction_var]]) {
|
| 116 | 38x |
coef_names <- paste0(odds_ratio_var, var_level) |
| 117 | 38x |
if (ref_level != xs_level[[interaction_var]][1]) {
|
| 118 | 24x |
interaction_coef_name <- h_interaction_coef_name( |
| 119 | 24x |
interaction_vars, |
| 120 | 24x |
c(odds_ratio_var, var_level), |
| 121 | 24x |
c(interaction_var, ref_level) |
| 122 |
) |
|
| 123 | 24x |
coef_names <- c( |
| 124 | 24x |
coef_names, |
| 125 | 24x |
interaction_coef_name |
| 126 |
) |
|
| 127 |
} |
|
| 128 | 38x |
if (length(coef_names) > 1) {
|
| 129 | 24x |
ones <- t(c(1, 1)) |
| 130 | 24x |
est <- as.numeric(ones %*% xs_coef[coef_names]) |
| 131 | 24x |
se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones))) |
| 132 |
} else {
|
|
| 133 | 14x |
est <- xs_coef[coef_names] |
| 134 | 14x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
| 135 |
} |
|
| 136 | 38x |
or <- exp(est) |
| 137 | 38x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
| 138 | 38x |
x[[ref_level]] <- list(or = or, ci = ci) |
| 139 |
} |
|
| 140 | 14x |
y[[var_level]] <- x |
| 141 |
} |
|
| 142 | 8x |
y |
| 143 |
} |
|
| 144 | ||
| 145 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
| 146 |
#' for the case when either the odds ratio or the interaction variable is continuous. |
|
| 147 |
#' |
|
| 148 |
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise |
|
| 149 |
#' the median is used. |
|
| 150 |
#' |
|
| 151 |
#' @return Odds ratio. |
|
| 152 |
#' |
|
| 153 |
#' @note We don't provide a function for the case when both variables are continuous because |
|
| 154 |
#' this does not arise in this table, as the treatment arm variable will always be involved |
|
| 155 |
#' and categorical. |
|
| 156 |
#' |
|
| 157 |
#' @export |
|
| 158 |
h_or_cont_interaction <- function(odds_ratio_var, |
|
| 159 |
interaction_var, |
|
| 160 |
fit_glm, |
|
| 161 |
at = NULL, |
|
| 162 |
conf_level = 0.95) {
|
|
| 163 | 13x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
| 164 | 13x |
checkmate::assert_string(odds_ratio_var) |
| 165 | 13x |
checkmate::assert_string(interaction_var) |
| 166 | 13x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
| 167 | 13x |
checkmate::assert_vector(interaction_vars, len = 2) |
| 168 | 13x |
checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE) |
| 169 | 13x |
xs_level <- fit_glm$xlevels |
| 170 | 13x |
xs_coef <- stats::coef(fit_glm) |
| 171 | 13x |
xs_vcov <- stats::vcov(fit_glm) |
| 172 | 13x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
| 173 | 13x |
model_data <- fit_glm$model |
| 174 | 13x |
if (!is.null(at)) {
|
| 175 | 3x |
checkmate::assert_set_equal(xs_class[interaction_var], "numeric") |
| 176 |
} |
|
| 177 | 12x |
y <- list() |
| 178 | 12x |
if (xs_class[interaction_var] == "numeric") {
|
| 179 | 7x |
if (is.null(at)) {
|
| 180 | 5x |
at <- ceiling(stats::median(model_data[[interaction_var]])) |
| 181 |
} |
|
| 182 | ||
| 183 | 7x |
for (var_level in xs_level[[odds_ratio_var]][-1]) {
|
| 184 | 14x |
x <- list() |
| 185 | 14x |
for (increment in at) {
|
| 186 | 20x |
coef_names <- paste0(odds_ratio_var, var_level) |
| 187 | 20x |
if (increment != 0) {
|
| 188 | 20x |
interaction_coef_name <- h_interaction_coef_name( |
| 189 | 20x |
interaction_vars, |
| 190 | 20x |
c(odds_ratio_var, var_level), |
| 191 | 20x |
c(interaction_var, "") |
| 192 |
) |
|
| 193 | 20x |
coef_names <- c( |
| 194 | 20x |
coef_names, |
| 195 | 20x |
interaction_coef_name |
| 196 |
) |
|
| 197 |
} |
|
| 198 | 20x |
if (length(coef_names) > 1) {
|
| 199 | 20x |
xvec <- t(c(1, increment)) |
| 200 | 20x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
| 201 | 20x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
| 202 |
} else {
|
|
| 203 | ! |
est <- xs_coef[coef_names] |
| 204 | ! |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
| 205 |
} |
|
| 206 | 20x |
or <- exp(est) |
| 207 | 20x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
| 208 | 20x |
x[[as.character(increment)]] <- list(or = or, ci = ci) |
| 209 |
} |
|
| 210 | 14x |
y[[var_level]] <- x |
| 211 |
} |
|
| 212 |
} else {
|
|
| 213 | 5x |
checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric") |
| 214 | 5x |
checkmate::assert_set_equal(xs_class[interaction_var], "factor") |
| 215 | 5x |
for (var_level in xs_level[[interaction_var]]) {
|
| 216 | 15x |
coef_names <- odds_ratio_var |
| 217 | 15x |
if (var_level != xs_level[[interaction_var]][1]) {
|
| 218 | 10x |
interaction_coef_name <- h_interaction_coef_name( |
| 219 | 10x |
interaction_vars, |
| 220 | 10x |
c(odds_ratio_var, ""), |
| 221 | 10x |
c(interaction_var, var_level) |
| 222 |
) |
|
| 223 | 10x |
coef_names <- c( |
| 224 | 10x |
coef_names, |
| 225 | 10x |
interaction_coef_name |
| 226 |
) |
|
| 227 |
} |
|
| 228 | 15x |
if (length(coef_names) > 1) {
|
| 229 | 10x |
xvec <- t(c(1, 1)) |
| 230 | 10x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
| 231 | 10x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
| 232 |
} else {
|
|
| 233 | 5x |
est <- xs_coef[coef_names] |
| 234 | 5x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
| 235 |
} |
|
| 236 | 15x |
or <- exp(est) |
| 237 | 15x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
| 238 | 15x |
y[[var_level]] <- list(or = or, ci = ci) |
| 239 |
} |
|
| 240 |
} |
|
| 241 | 12x |
y |
| 242 |
} |
|
| 243 | ||
| 244 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
| 245 |
#' in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and |
|
| 246 |
#' [h_or_cat_interaction()]. |
|
| 247 |
#' |
|
| 248 |
#' @return Odds ratio. |
|
| 249 |
#' |
|
| 250 |
#' @export |
|
| 251 |
h_or_interaction <- function(odds_ratio_var, |
|
| 252 |
interaction_var, |
|
| 253 |
fit_glm, |
|
| 254 |
at = NULL, |
|
| 255 |
conf_level = 0.95) {
|
|
| 256 | 15x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
| 257 | 15x |
if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) {
|
| 258 | 9x |
h_or_cont_interaction( |
| 259 | 9x |
odds_ratio_var, |
| 260 | 9x |
interaction_var, |
| 261 | 9x |
fit_glm, |
| 262 | 9x |
at = at, |
| 263 | 9x |
conf_level = conf_level |
| 264 |
) |
|
| 265 | 6x |
} else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) {
|
| 266 | 6x |
h_or_cat_interaction( |
| 267 | 6x |
odds_ratio_var, |
| 268 | 6x |
interaction_var, |
| 269 | 6x |
fit_glm, |
| 270 | 6x |
conf_level = conf_level |
| 271 |
) |
|
| 272 |
} else {
|
|
| 273 | ! |
stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor")
|
| 274 |
} |
|
| 275 |
} |
|
| 276 | ||
| 277 |
#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table |
|
| 278 |
#' of numbers of patients. |
|
| 279 |
#' |
|
| 280 |
#' @param terms (`character`)\cr simple terms. |
|
| 281 |
#' @param table (`table`)\cr table containing numbers for terms. |
|
| 282 |
#' |
|
| 283 |
#' @return Term labels containing numbers of patients. |
|
| 284 |
#' |
|
| 285 |
#' @export |
|
| 286 |
h_simple_term_labels <- function(terms, |
|
| 287 |
table) {
|
|
| 288 | 54x |
checkmate::assert_true(is.table(table)) |
| 289 | 54x |
checkmate::assert_multi_class(terms, classes = c("factor", "character"))
|
| 290 | 54x |
terms <- as.character(terms) |
| 291 | 54x |
term_n <- table[terms] |
| 292 | 54x |
paste0(terms, ", n = ", term_n) |
| 293 |
} |
|
| 294 | ||
| 295 |
#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table |
|
| 296 |
#' of numbers of patients. |
|
| 297 |
#' |
|
| 298 |
#' @param terms1 (`character`)\cr terms for first dimension (rows). |
|
| 299 |
#' @param terms2 (`character`)\cr terms for second dimension (rows). |
|
| 300 |
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the |
|
| 301 |
#' number of patients. In that case they can only be scalar (strings). |
|
| 302 |
#' |
|
| 303 |
#' @return Term labels containing numbers of patients. |
|
| 304 |
#' |
|
| 305 |
#' @export |
|
| 306 |
h_interaction_term_labels <- function(terms1, |
|
| 307 |
terms2, |
|
| 308 |
table, |
|
| 309 |
any = FALSE) {
|
|
| 310 | 8x |
checkmate::assert_true(is.table(table)) |
| 311 | 8x |
checkmate::assert_flag(any) |
| 312 | 8x |
checkmate::assert_multi_class(terms1, classes = c("factor", "character"))
|
| 313 | 8x |
checkmate::assert_multi_class(terms2, classes = c("factor", "character"))
|
| 314 | 8x |
terms1 <- as.character(terms1) |
| 315 | 8x |
terms2 <- as.character(terms2) |
| 316 | 8x |
if (any) {
|
| 317 | 4x |
checkmate::assert_scalar(terms1) |
| 318 | 4x |
checkmate::assert_scalar(terms2) |
| 319 | 4x |
paste0( |
| 320 | 4x |
terms1, " or ", terms2, ", n = ", |
| 321 |
# Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract. |
|
| 322 | 4x |
sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2] |
| 323 |
) |
|
| 324 |
} else {
|
|
| 325 | 4x |
term_n <- table[cbind(terms1, terms2)] |
| 326 | 4x |
paste0(terms1, " * ", terms2, ", n = ", term_n) |
| 327 |
} |
|
| 328 |
} |
|
| 329 | ||
| 330 |
#' @describeIn h_logistic_regression Helper function to tabulate the main effect |
|
| 331 |
#' results of a (conditional) logistic regression model. |
|
| 332 |
#' |
|
| 333 |
#' @return Tabulated main effect results from a logistic regression model. |
|
| 334 |
#' |
|
| 335 |
#' @examples |
|
| 336 |
#' h_glm_simple_term_extract("AGE", mod1)
|
|
| 337 |
#' h_glm_simple_term_extract("ARMCD", mod1)
|
|
| 338 |
#' |
|
| 339 |
#' @export |
|
| 340 |
h_glm_simple_term_extract <- function(x, fit_glm) {
|
|
| 341 | 78x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
|
| 342 | 78x |
checkmate::assert_string(x) |
| 343 | ||
| 344 | 78x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
| 345 | 78x |
xs_level <- fit_glm$xlevels |
| 346 | 78x |
xs_coef <- summary(fit_glm)$coefficients |
| 347 | 78x |
stats <- if (inherits(fit_glm, "glm")) {
|
| 348 | 66x |
c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
|
| 349 |
} else {
|
|
| 350 | 12x |
c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)")
|
| 351 |
} |
|
| 352 |
# Make sure x is not an interaction term. |
|
| 353 | 78x |
checkmate::assert_subset(x, names(xs_class)) |
| 354 | 78x |
x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1]) |
| 355 | 78x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
| 356 | 78x |
colnames(x_stats) <- names(stats) |
| 357 | 78x |
x_stats$estimate <- as.list(x_stats$estimate) |
| 358 | 78x |
x_stats$std_error <- as.list(x_stats$std_error) |
| 359 | 78x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
| 360 | 78x |
x_stats$df <- as.list(1) |
| 361 | 78x |
if (xs_class[x] == "numeric") {
|
| 362 | 60x |
x_stats$term <- x |
| 363 | 60x |
x_stats$term_label <- if (inherits(fit_glm, "glm")) {
|
| 364 | 48x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
| 365 |
} else {
|
|
| 366 |
# We just fill in here with the `term` itself as we don't have the data available. |
|
| 367 | 12x |
x |
| 368 |
} |
|
| 369 | 60x |
x_stats$is_variable_summary <- FALSE |
| 370 | 60x |
x_stats$is_term_summary <- TRUE |
| 371 |
} else {
|
|
| 372 | 18x |
checkmate::assert_class(fit_glm, "glm") |
| 373 |
# The reason is that we don't have the original data set in the `clogit` object |
|
| 374 |
# and therefore cannot determine the `x_numbers` here. |
|
| 375 | 18x |
x_numbers <- table(fit_glm$data[[x]]) |
| 376 | 18x |
x_stats$term <- xs_level[[x]][-1] |
| 377 | 18x |
x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers) |
| 378 | 18x |
x_stats$is_variable_summary <- FALSE |
| 379 | 18x |
x_stats$is_term_summary <- TRUE |
| 380 | 18x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
| 381 | 18x |
x_main <- data.frame( |
| 382 | 18x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
| 383 | 18x |
term = xs_level[[x]][1], |
| 384 | 18x |
term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)),
|
| 385 | 18x |
df = main_effects[x, "Df", drop = TRUE], |
| 386 | 18x |
stringsAsFactors = FALSE |
| 387 |
) |
|
| 388 | 18x |
x_main$pvalue <- as.list(x_main$pvalue) |
| 389 | 18x |
x_main$df <- as.list(x_main$df) |
| 390 | 18x |
x_main$estimate <- list(numeric(0)) |
| 391 | 18x |
x_main$std_error <- list(numeric(0)) |
| 392 | 18x |
if (length(xs_level[[x]][-1]) == 1) {
|
| 393 | 8x |
x_main$pvalue <- list(numeric(0)) |
| 394 | 8x |
x_main$df <- list(numeric(0)) |
| 395 |
} |
|
| 396 | 18x |
x_main$is_variable_summary <- TRUE |
| 397 | 18x |
x_main$is_term_summary <- FALSE |
| 398 | 18x |
x_stats <- rbind(x_main, x_stats) |
| 399 |
} |
|
| 400 | 78x |
x_stats$variable <- x |
| 401 | 78x |
x_stats$variable_label <- if (inherits(fit_glm, "glm")) {
|
| 402 | 66x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
| 403 |
} else {
|
|
| 404 | 12x |
x |
| 405 |
} |
|
| 406 | 78x |
x_stats$interaction <- "" |
| 407 | 78x |
x_stats$interaction_label <- "" |
| 408 | 78x |
x_stats$reference <- "" |
| 409 | 78x |
x_stats$reference_label <- "" |
| 410 | 78x |
rownames(x_stats) <- NULL |
| 411 | 78x |
x_stats[c( |
| 412 | 78x |
"variable", |
| 413 | 78x |
"variable_label", |
| 414 | 78x |
"term", |
| 415 | 78x |
"term_label", |
| 416 | 78x |
"interaction", |
| 417 | 78x |
"interaction_label", |
| 418 | 78x |
"reference", |
| 419 | 78x |
"reference_label", |
| 420 | 78x |
"estimate", |
| 421 | 78x |
"std_error", |
| 422 | 78x |
"df", |
| 423 | 78x |
"pvalue", |
| 424 | 78x |
"is_variable_summary", |
| 425 | 78x |
"is_term_summary" |
| 426 |
)] |
|
| 427 |
} |
|
| 428 | ||
| 429 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction term |
|
| 430 |
#' results of a logistic regression model. |
|
| 431 |
#' |
|
| 432 |
#' @return Tabulated interaction term results from a logistic regression model. |
|
| 433 |
#' |
|
| 434 |
#' @examples |
|
| 435 |
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
|
|
| 436 |
#' |
|
| 437 |
#' @export |
|
| 438 |
h_glm_interaction_extract <- function(x, fit_glm) {
|
|
| 439 | 7x |
vars <- h_get_interaction_vars(fit_glm) |
| 440 | 7x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
| 441 | ||
| 442 | 7x |
checkmate::assert_string(x) |
| 443 | ||
| 444 |
# Only take two-way interaction |
|
| 445 | 7x |
checkmate::assert_vector(vars, len = 2) |
| 446 | ||
| 447 |
# Only consider simple case: first variable in interaction is arm, a categorical variable |
|
| 448 | 7x |
checkmate::assert_disjunct(xs_class[vars[1]], "numeric") |
| 449 | ||
| 450 | 7x |
xs_level <- fit_glm$xlevels |
| 451 | 7x |
xs_coef <- summary(fit_glm)$coefficients |
| 452 | 7x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
| 453 | 7x |
stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
|
| 454 | 7x |
v1_comp <- xs_level[[vars[1]]][-1] |
| 455 | 7x |
if (xs_class[vars[2]] == "numeric") {
|
| 456 | 4x |
x_stats <- as.data.frame( |
| 457 | 4x |
xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE], |
| 458 | 4x |
stringsAsFactors = FALSE |
| 459 |
) |
|
| 460 | 4x |
colnames(x_stats) <- names(stats) |
| 461 | 4x |
x_stats$term <- v1_comp |
| 462 | 4x |
x_numbers <- table(fit_glm$data[[vars[1]]]) |
| 463 | 4x |
x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers) |
| 464 | 4x |
v1_ref <- xs_level[[vars[1]]][1] |
| 465 | 4x |
term_main <- v1_ref |
| 466 | 4x |
ref_label <- h_simple_term_labels(v1_ref, x_numbers) |
| 467 | 3x |
} else if (xs_class[vars[2]] != "numeric") {
|
| 468 | 3x |
v2_comp <- xs_level[[vars[2]]][-1] |
| 469 | 3x |
v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp) |
| 470 | 3x |
x_sel <- paste( |
| 471 | 3x |
paste0(vars[1], v1_v2_grid$v1), |
| 472 | 3x |
paste0(vars[2], v1_v2_grid$v2), |
| 473 | 3x |
sep = ":" |
| 474 |
) |
|
| 475 | 3x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
| 476 | 3x |
colnames(x_stats) <- names(stats) |
| 477 | 3x |
x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2) |
| 478 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]]) |
| 479 | 3x |
x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers) |
| 480 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
| 481 | 3x |
v2_ref <- xs_level[[vars[2]]][1] |
| 482 | 3x |
term_main <- paste(vars[1], vars[2], sep = " * ") |
| 483 | 3x |
ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE) |
| 484 |
} |
|
| 485 | 7x |
x_stats$df <- as.list(1) |
| 486 | 7x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
| 487 | 7x |
x_stats$is_variable_summary <- FALSE |
| 488 | 7x |
x_stats$is_term_summary <- TRUE |
| 489 | 7x |
x_main <- data.frame( |
| 490 | 7x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
| 491 | 7x |
term = term_main, |
| 492 | 7x |
term_label = paste("Reference", ref_label),
|
| 493 | 7x |
df = main_effects[x, "Df", drop = TRUE], |
| 494 | 7x |
stringsAsFactors = FALSE |
| 495 |
) |
|
| 496 | 7x |
x_main$pvalue <- as.list(x_main$pvalue) |
| 497 | 7x |
x_main$df <- as.list(x_main$df) |
| 498 | 7x |
x_main$estimate <- list(numeric(0)) |
| 499 | 7x |
x_main$std_error <- list(numeric(0)) |
| 500 | 7x |
x_main$is_variable_summary <- TRUE |
| 501 | 7x |
x_main$is_term_summary <- FALSE |
| 502 | ||
| 503 | 7x |
x_stats <- rbind(x_main, x_stats) |
| 504 | 7x |
x_stats$variable <- x |
| 505 | 7x |
x_stats$variable_label <- paste( |
| 506 | 7x |
"Interaction of", |
| 507 | 7x |
formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE), |
| 508 |
"*", |
|
| 509 | 7x |
formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE) |
| 510 |
) |
|
| 511 | 7x |
x_stats$interaction <- "" |
| 512 | 7x |
x_stats$interaction_label <- "" |
| 513 | 7x |
x_stats$reference <- "" |
| 514 | 7x |
x_stats$reference_label <- "" |
| 515 | 7x |
rownames(x_stats) <- NULL |
| 516 | 7x |
x_stats[c( |
| 517 | 7x |
"variable", |
| 518 | 7x |
"variable_label", |
| 519 | 7x |
"term", |
| 520 | 7x |
"term_label", |
| 521 | 7x |
"interaction", |
| 522 | 7x |
"interaction_label", |
| 523 | 7x |
"reference", |
| 524 | 7x |
"reference_label", |
| 525 | 7x |
"estimate", |
| 526 | 7x |
"std_error", |
| 527 | 7x |
"df", |
| 528 | 7x |
"pvalue", |
| 529 | 7x |
"is_variable_summary", |
| 530 | 7x |
"is_term_summary" |
| 531 |
)] |
|
| 532 |
} |
|
| 533 | ||
| 534 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction |
|
| 535 |
#' results of a logistic regression model. This basically is a wrapper for |
|
| 536 |
#' [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results |
|
| 537 |
#' in the right data frame format. |
|
| 538 |
#' |
|
| 539 |
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model. |
|
| 540 |
#' |
|
| 541 |
#' @examples |
|
| 542 |
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
|
|
| 543 |
#' |
|
| 544 |
#' @export |
|
| 545 |
h_glm_inter_term_extract <- function(odds_ratio_var, |
|
| 546 |
interaction_var, |
|
| 547 |
fit_glm, |
|
| 548 |
...) {
|
|
| 549 |
# First obtain the main effects. |
|
| 550 | 13x |
main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm) |
| 551 | 13x |
main_stats$is_reference_summary <- FALSE |
| 552 | 13x |
main_stats$odds_ratio <- NA |
| 553 | 13x |
main_stats$lcl <- NA |
| 554 | 13x |
main_stats$ucl <- NA |
| 555 | ||
| 556 |
# Then we get the odds ratio estimates and put into df form. |
|
| 557 | 13x |
or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...) |
| 558 | 13x |
is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric" |
| 559 | ||
| 560 | 13x |
if (is_num_or_var) {
|
| 561 |
# Numeric OR variable case. |
|
| 562 | 4x |
references <- names(or_numbers) |
| 563 | 4x |
n_ref <- length(references) |
| 564 | ||
| 565 | 4x |
extract_from_list <- function(l, name, pos = 1) {
|
| 566 | 12x |
unname(unlist( |
| 567 | 12x |
lapply(or_numbers, function(x) {
|
| 568 | 36x |
x[[name]][pos] |
| 569 |
}) |
|
| 570 |
)) |
|
| 571 |
} |
|
| 572 | 4x |
or_stats <- data.frame( |
| 573 | 4x |
variable = odds_ratio_var, |
| 574 | 4x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
| 575 | 4x |
term = odds_ratio_var, |
| 576 | 4x |
term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
| 577 | 4x |
interaction = interaction_var, |
| 578 | 4x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
| 579 | 4x |
reference = references, |
| 580 | 4x |
reference_label = references, |
| 581 | 4x |
estimate = NA, |
| 582 | 4x |
std_error = NA, |
| 583 | 4x |
odds_ratio = extract_from_list(or_numbers, "or"), |
| 584 | 4x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
| 585 | 4x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
| 586 | 4x |
df = NA, |
| 587 | 4x |
pvalue = NA, |
| 588 | 4x |
is_variable_summary = FALSE, |
| 589 | 4x |
is_term_summary = FALSE, |
| 590 | 4x |
is_reference_summary = TRUE |
| 591 |
) |
|
| 592 |
} else {
|
|
| 593 |
# Categorical OR variable case. |
|
| 594 | 9x |
references <- names(or_numbers[[1]]) |
| 595 | 9x |
n_ref <- length(references) |
| 596 | ||
| 597 | 9x |
extract_from_list <- function(l, name, pos = 1) {
|
| 598 | 27x |
unname(unlist( |
| 599 | 27x |
lapply(or_numbers, function(x) {
|
| 600 | 48x |
lapply(x, function(y) y[[name]][pos]) |
| 601 |
}) |
|
| 602 |
)) |
|
| 603 |
} |
|
| 604 | 9x |
or_stats <- data.frame( |
| 605 | 9x |
variable = odds_ratio_var, |
| 606 | 9x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
| 607 | 9x |
term = rep(names(or_numbers), each = n_ref), |
| 608 | 9x |
term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])), |
| 609 | 9x |
interaction = interaction_var, |
| 610 | 9x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
| 611 | 9x |
reference = unlist(lapply(or_numbers, names)), |
| 612 | 9x |
reference_label = unlist(lapply(or_numbers, names)), |
| 613 | 9x |
estimate = NA, |
| 614 | 9x |
std_error = NA, |
| 615 | 9x |
odds_ratio = extract_from_list(or_numbers, "or"), |
| 616 | 9x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
| 617 | 9x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
| 618 | 9x |
df = NA, |
| 619 | 9x |
pvalue = NA, |
| 620 | 9x |
is_variable_summary = FALSE, |
| 621 | 9x |
is_term_summary = FALSE, |
| 622 | 9x |
is_reference_summary = TRUE |
| 623 |
) |
|
| 624 |
} |
|
| 625 | ||
| 626 | 13x |
df <- rbind( |
| 627 | 13x |
main_stats[, names(or_stats)], |
| 628 | 13x |
or_stats |
| 629 |
) |
|
| 630 | 13x |
df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ] |
| 631 |
} |
|
| 632 | ||
| 633 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including |
|
| 634 |
#' odds ratios and confidence intervals of simple terms. |
|
| 635 |
#' |
|
| 636 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model. |
|
| 637 |
#' |
|
| 638 |
#' @examples |
|
| 639 |
#' h_logistic_simple_terms("AGE", mod1)
|
|
| 640 |
#' |
|
| 641 |
#' @export |
|
| 642 |
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) {
|
|
| 643 | 53x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
|
| 644 | 53x |
if (inherits(fit_glm, "glm")) {
|
| 645 | 42x |
checkmate::assert_set_equal(fit_glm$family$family, "binomial") |
| 646 |
} |
|
| 647 | 53x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
| 648 | 53x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
| 649 | 53x |
interaction <- terms_name[which(!terms_name %in% names(xs_class))] |
| 650 | 53x |
checkmate::assert_subset(x, terms_name) |
| 651 | 53x |
if (length(interaction) != 0) {
|
| 652 |
# Make sure any item in x is not part of interaction term |
|
| 653 | 2x |
checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":"))) |
| 654 |
} |
|
| 655 | 53x |
x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm) |
| 656 | 53x |
x_stats <- do.call(rbind, x_stats) |
| 657 | 53x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
| 658 | 53x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
| 659 | 53x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
| 660 | 53x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
| 661 | 53x |
x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl) |
| 662 | 53x |
x_stats |
| 663 |
} |
|
| 664 | ||
| 665 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including |
|
| 666 |
#' odds ratios and confidence intervals of interaction terms. |
|
| 667 |
#' |
|
| 668 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model. |
|
| 669 |
#' |
|
| 670 |
#' @examples |
|
| 671 |
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
|
|
| 672 |
#' |
|
| 673 |
#' @export |
|
| 674 |
h_logistic_inter_terms <- function(x, |
|
| 675 |
fit_glm, |
|
| 676 |
conf_level = 0.95, |
|
| 677 |
at = NULL) {
|
|
| 678 |
# Find out the interaction variables and interaction term. |
|
| 679 | 5x |
inter_vars <- h_get_interaction_vars(fit_glm) |
| 680 | 5x |
checkmate::assert_vector(inter_vars, len = 2) |
| 681 | ||
| 682 | ||
| 683 | 5x |
inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x)) |
| 684 | 5x |
inter_term <- x[inter_term_index] |
| 685 | ||
| 686 |
# For the non-interaction vars we need the standard stuff. |
|
| 687 | 5x |
normal_terms <- setdiff(x, union(inter_vars, inter_term)) |
| 688 | ||
| 689 | 5x |
x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm) |
| 690 | 5x |
x_stats <- do.call(rbind, x_stats) |
| 691 | 5x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
| 692 | 5x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
| 693 | 5x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
| 694 | 5x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
| 695 | 5x |
normal_stats <- x_stats |
| 696 | 5x |
normal_stats$is_reference_summary <- FALSE |
| 697 | ||
| 698 |
# Now the interaction term itself. |
|
| 699 | 5x |
inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm) |
| 700 | 5x |
inter_term_stats$odds_ratio <- NA |
| 701 | 5x |
inter_term_stats$lcl <- NA |
| 702 | 5x |
inter_term_stats$ucl <- NA |
| 703 | 5x |
inter_term_stats$is_reference_summary <- FALSE |
| 704 | ||
| 705 | 5x |
is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric" |
| 706 | ||
| 707 |
# Interaction stuff. |
|
| 708 | 5x |
inter_stats_one <- h_glm_inter_term_extract( |
| 709 | 5x |
inter_vars[1], |
| 710 | 5x |
inter_vars[2], |
| 711 | 5x |
fit_glm, |
| 712 | 5x |
conf_level = conf_level, |
| 713 | 5x |
at = `if`(is_intervar1_numeric, NULL, at) |
| 714 |
) |
|
| 715 | 5x |
inter_stats_two <- h_glm_inter_term_extract( |
| 716 | 5x |
inter_vars[2], |
| 717 | 5x |
inter_vars[1], |
| 718 | 5x |
fit_glm, |
| 719 | 5x |
conf_level = conf_level, |
| 720 | 5x |
at = `if`(is_intervar1_numeric, at, NULL) |
| 721 |
) |
|
| 722 | ||
| 723 |
# Now just combine everything in one data frame. |
|
| 724 | 5x |
col_names <- c( |
| 725 | 5x |
"variable", |
| 726 | 5x |
"variable_label", |
| 727 | 5x |
"term", |
| 728 | 5x |
"term_label", |
| 729 | 5x |
"interaction", |
| 730 | 5x |
"interaction_label", |
| 731 | 5x |
"reference", |
| 732 | 5x |
"reference_label", |
| 733 | 5x |
"estimate", |
| 734 | 5x |
"std_error", |
| 735 | 5x |
"df", |
| 736 | 5x |
"pvalue", |
| 737 | 5x |
"odds_ratio", |
| 738 | 5x |
"lcl", |
| 739 | 5x |
"ucl", |
| 740 | 5x |
"is_variable_summary", |
| 741 | 5x |
"is_term_summary", |
| 742 | 5x |
"is_reference_summary" |
| 743 |
) |
|
| 744 | 5x |
df <- rbind( |
| 745 | 5x |
inter_stats_one[, col_names], |
| 746 | 5x |
inter_stats_two[, col_names], |
| 747 | 5x |
inter_term_stats[, col_names] |
| 748 |
) |
|
| 749 | 5x |
if (length(normal_terms) > 0) {
|
| 750 | 5x |
df <- rbind( |
| 751 | 5x |
normal_stats[, col_names], |
| 752 | 5x |
df |
| 753 |
) |
|
| 754 |
} |
|
| 755 | 5x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
| 756 | 5x |
df |
| 757 |
} |
| 1 |
#' Confidence intervals for a difference of binomials |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 4 |
#' |
|
| 5 |
#' Several confidence intervals for the difference between proportions. |
|
| 6 |
#' |
|
| 7 |
#' @name desctools_binom |
|
| 8 |
NULL |
|
| 9 | ||
| 10 |
#' Recycle list of parameters |
|
| 11 |
#' |
|
| 12 |
#' This function recycles all supplied elements to the maximal dimension. |
|
| 13 |
#' |
|
| 14 |
#' @param ... (`any`)\cr elements to recycle. |
|
| 15 |
#' |
|
| 16 |
#' @return A `list`. |
|
| 17 |
#' |
|
| 18 |
#' @keywords internal |
|
| 19 |
#' @noRd |
|
| 20 |
h_recycle <- function(...) {
|
|
| 21 | 78x |
lst <- list(...) |
| 22 | 78x |
maxdim <- max(lengths(lst)) |
| 23 | 78x |
res <- lapply(lst, rep, length.out = maxdim) |
| 24 | 78x |
attr(res, "maxdim") <- maxdim |
| 25 | 78x |
return(res) |
| 26 |
} |
|
| 27 | ||
| 28 |
#' @describeIn desctools_binom Several confidence intervals for the difference between proportions. |
|
| 29 |
#' |
|
| 30 |
#' @return A `matrix` of 3 values: |
|
| 31 |
#' * `est`: estimate of proportion difference. |
|
| 32 |
#' * `lwr.ci`: estimate of lower end of the confidence interval. |
|
| 33 |
#' * `upr.ci`: estimate of upper end of the confidence interval. |
|
| 34 |
#' |
|
| 35 |
#' @keywords internal |
|
| 36 |
desctools_binom <- function(x1, |
|
| 37 |
n1, |
|
| 38 |
x2, |
|
| 39 |
n2, |
|
| 40 |
conf.level = 0.95, # nolint |
|
| 41 |
sides = c("two.sided", "left", "right"),
|
|
| 42 |
method = c( |
|
| 43 |
"ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp" |
|
| 44 |
)) {
|
|
| 45 | 26x |
if (missing(sides)) {
|
| 46 | 26x |
sides <- match.arg(sides) |
| 47 |
} |
|
| 48 | 26x |
if (missing(method)) {
|
| 49 | 1x |
method <- match.arg(method) |
| 50 |
} |
|
| 51 | 26x |
iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint
|
| 52 | 26x |
if (sides != "two.sided") {
|
| 53 | ! |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
| 54 |
} |
|
| 55 | 26x |
alpha <- 1 - conf.level |
| 56 | 26x |
kappa <- stats::qnorm(1 - alpha / 2) |
| 57 | 26x |
p1_hat <- x1 / n1 |
| 58 | 26x |
p2_hat <- x2 / n2 |
| 59 | 26x |
est <- p1_hat - p2_hat |
| 60 | 26x |
switch(method, |
| 61 | 26x |
wald = {
|
| 62 | 4x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
| 63 | 4x |
term2 <- kappa * sqrt(vd) |
| 64 | 4x |
ci_lwr <- max(-1, est - term2) |
| 65 | 4x |
ci_upr <- min(1, est + term2) |
| 66 |
}, |
|
| 67 | 26x |
waldcc = {
|
| 68 | 6x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
| 69 | 6x |
term2 <- kappa * sqrt(vd) |
| 70 | 6x |
term2 <- term2 + 0.5 * (1 / n1 + 1 / n2) |
| 71 | 6x |
ci_lwr <- max(-1, est - term2) |
| 72 | 6x |
ci_upr <- min(1, est + term2) |
| 73 |
}, |
|
| 74 | 26x |
ac = {
|
| 75 | 2x |
n1 <- n1 + 2 |
| 76 | 2x |
n2 <- n2 + 2 |
| 77 | 2x |
x1 <- x1 + 1 |
| 78 | 2x |
x2 <- x2 + 1 |
| 79 | 2x |
p1_hat <- x1 / n1 |
| 80 | 2x |
p2_hat <- x2 / n2 |
| 81 | 2x |
est1 <- p1_hat - p2_hat |
| 82 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
| 83 | 2x |
term2 <- kappa * sqrt(vd) |
| 84 | 2x |
ci_lwr <- max(-1, est1 - term2) |
| 85 | 2x |
ci_upr <- min(1, est1 + term2) |
| 86 |
}, |
|
| 87 | 26x |
exact = {
|
| 88 | ! |
ci_lwr <- NA |
| 89 | ! |
ci_upr <- NA |
| 90 |
}, |
|
| 91 | 26x |
score = {
|
| 92 | 3x |
w1 <- desctools_binomci( |
| 93 | 3x |
x = x1, n = n1, conf.level = conf.level, |
| 94 | 3x |
method = "wilson" |
| 95 |
) |
|
| 96 | 3x |
w2 <- desctools_binomci( |
| 97 | 3x |
x = x2, n = n2, conf.level = conf.level, |
| 98 | 3x |
method = "wilson" |
| 99 |
) |
|
| 100 | 3x |
l1 <- w1[2] |
| 101 | 3x |
u1 <- w1[3] |
| 102 | 3x |
l2 <- w2[2] |
| 103 | 3x |
u2 <- w2[3] |
| 104 | 3x |
ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2) |
| 105 | 3x |
ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2) |
| 106 |
}, |
|
| 107 | 26x |
scorecc = {
|
| 108 | 1x |
w1 <- desctools_binomci( |
| 109 | 1x |
x = x1, n = n1, conf.level = conf.level, |
| 110 | 1x |
method = "wilsoncc" |
| 111 |
) |
|
| 112 | 1x |
w2 <- desctools_binomci( |
| 113 | 1x |
x = x2, n = n2, conf.level = conf.level, |
| 114 | 1x |
method = "wilsoncc" |
| 115 |
) |
|
| 116 | 1x |
l1 <- w1[2] |
| 117 | 1x |
u1 <- w1[3] |
| 118 | 1x |
l2 <- w2[2] |
| 119 | 1x |
u2 <- w2[3] |
| 120 | 1x |
ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2)) |
| 121 | 1x |
ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2)) |
| 122 |
}, |
|
| 123 | 26x |
mee = {
|
| 124 | 1x |
.score <- function(p1, n1, p2, n2, dif) {
|
| 125 | ! |
if (dif > 1) dif <- 1 |
| 126 | ! |
if (dif < -1) dif <- -1 |
| 127 | 24x |
diff <- p1 - p2 - dif |
| 128 | 24x |
if (abs(diff) == 0) {
|
| 129 | ! |
res <- 0 |
| 130 |
} else {
|
|
| 131 | 24x |
t <- n2 / n1 |
| 132 | 24x |
a <- 1 + t |
| 133 | 24x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
| 134 | 24x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
| 135 | 24x |
d <- -p1 * dif * (1 + dif) |
| 136 | 24x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
| 137 | 24x |
if (abs(v) < .Machine$double.eps) v <- 0 |
| 138 | 24x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
| 139 | 24x |
u <- ifelse(v > 0, 1, -1) * s |
| 140 | 24x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
| 141 | 24x |
p1d <- 2 * u * cos(w) - b / a / 3 |
| 142 | 24x |
p2d <- p1d - dif |
| 143 | 24x |
n <- n1 + n2 |
| 144 | 24x |
res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) |
| 145 |
} |
|
| 146 | 24x |
return(sqrt(res)) |
| 147 |
} |
|
| 148 | 1x |
pval <- function(delta) {
|
| 149 | 24x |
z <- (est - delta) / .score(p1_hat, n1, p2_hat, n2, delta) |
| 150 | 24x |
2 * min(stats::pnorm(z), 1 - stats::pnorm(z)) |
| 151 |
} |
|
| 152 | 1x |
ci_lwr <- max(-1, stats::uniroot(function(delta) {
|
| 153 | 12x |
pval(delta) - alpha |
| 154 | 1x |
}, interval = c(-1 + 1e-06, est - 1e-06))$root) |
| 155 | 1x |
ci_upr <- min(1, stats::uniroot(function(delta) {
|
| 156 | 12x |
pval(delta) - alpha |
| 157 | 1x |
}, interval = c(est + 1e-06, 1 - 1e-06))$root) |
| 158 |
}, |
|
| 159 | 26x |
blj = {
|
| 160 | 1x |
p1_dash <- (x1 + 0.5) / (n1 + 1) |
| 161 | 1x |
p2_dash <- (x2 + 0.5) / (n2 + 1) |
| 162 | 1x |
vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - p2_dash) / n2 |
| 163 | 1x |
term2 <- kappa * sqrt(vd) |
| 164 | 1x |
est_dash <- p1_dash - p2_dash |
| 165 | 1x |
ci_lwr <- max(-1, est_dash - term2) |
| 166 | 1x |
ci_upr <- min(1, est_dash + term2) |
| 167 |
}, |
|
| 168 | 26x |
ha = {
|
| 169 | 5x |
term2 <- 1 / |
| 170 | 5x |
(2 * min(n1, n2)) + kappa * sqrt(p1_hat * (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - 1)) |
| 171 | 5x |
ci_lwr <- max(-1, est - term2) |
| 172 | 5x |
ci_upr <- min(1, est + term2) |
| 173 |
}, |
|
| 174 | 26x |
mn = {
|
| 175 | 1x |
.conf <- function(x1, n1, x2, n2, z, lower = FALSE) {
|
| 176 | 2x |
p1 <- x1 / n1 |
| 177 | 2x |
p2 <- x2 / n2 |
| 178 | 2x |
p_hat <- p1 - p2 |
| 179 | 2x |
dp <- 1 + ifelse(lower, 1, -1) * p_hat |
| 180 | 2x |
i <- 1 |
| 181 | 2x |
while (i <= 50) {
|
| 182 | 46x |
dp <- 0.5 * dp |
| 183 | 46x |
y <- p_hat + ifelse(lower, -1, 1) * dp |
| 184 | 46x |
score <- .score(p1, n1, p2, n2, y) |
| 185 | 46x |
if (score < z) {
|
| 186 | 20x |
p_hat <- y |
| 187 |
} |
|
| 188 | 46x |
if ((dp < 1e-07) || (abs(z - score) < 1e-06)) {
|
| 189 | 2x |
(break)() |
| 190 |
} else {
|
|
| 191 | 44x |
i <- i + 1 |
| 192 |
} |
|
| 193 |
} |
|
| 194 | 2x |
return(y) |
| 195 |
} |
|
| 196 | 1x |
.score <- function(p1, n1, p2, n2, dif) {
|
| 197 | 46x |
diff <- p1 - p2 - dif |
| 198 | 46x |
if (abs(diff) == 0) {
|
| 199 | ! |
res <- 0 |
| 200 |
} else {
|
|
| 201 | 46x |
t <- n2 / n1 |
| 202 | 46x |
a <- 1 + t |
| 203 | 46x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
| 204 | 46x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
| 205 | 46x |
d <- -p1 * dif * (1 + dif) |
| 206 | 46x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
| 207 | 46x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
| 208 | 46x |
u <- ifelse(v > 0, 1, -1) * s |
| 209 | 46x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
| 210 | 46x |
p1d <- 2 * u * cos(w) - b / a / 3 |
| 211 | 46x |
p2d <- p1d - dif |
| 212 | 46x |
n <- n1 + n2 |
| 213 | 46x |
var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * n / (n - 1) |
| 214 | 46x |
res <- diff^2 / var |
| 215 |
} |
|
| 216 | 46x |
return(res) |
| 217 |
} |
|
| 218 | 1x |
z <- stats::qchisq(conf.level, 1) |
| 219 | 1x |
ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE)) |
| 220 | 1x |
ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE)) |
| 221 |
}, |
|
| 222 | 26x |
beal = {
|
| 223 | ! |
a <- p1_hat + p2_hat |
| 224 | ! |
b <- p1_hat - p2_hat |
| 225 | ! |
u <- ((1 / n1) + (1 / n2)) / 4 |
| 226 | ! |
v <- ((1 / n1) - (1 / n2)) / 4 |
| 227 | ! |
V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint |
| 228 | ! |
z <- stats::qchisq(p = 1 - alpha / 2, df = 1) |
| 229 | ! |
A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint |
| 230 | ! |
B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint |
| 231 | ! |
ci_lwr <- max(-1, B - A / (1 + z * u)) |
| 232 | ! |
ci_upr <- min(1, B + A / (1 + z * u)) |
| 233 |
}, |
|
| 234 | 26x |
hal = {
|
| 235 | 1x |
psi <- (p1_hat + p2_hat) / 2 |
| 236 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
| 237 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
| 238 | 1x |
z <- kappa |
| 239 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) |
| 240 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
| 241 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint |
| 242 | 1x |
c(theta + w, theta - w) |
| 243 | 1x |
ci_lwr <- max(-1, theta - w) |
| 244 | 1x |
ci_upr <- min(1, theta + w) |
| 245 |
}, |
|
| 246 | 26x |
jp = {
|
| 247 | 1x |
psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + 1)) |
| 248 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
| 249 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
| 250 | 1x |
z <- kappa |
| 251 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) |
| 252 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
| 253 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint |
| 254 | 1x |
c(theta + w, theta - w) |
| 255 | 1x |
ci_lwr <- max(-1, theta - w) |
| 256 | 1x |
ci_upr <- min(1, theta + w) |
| 257 |
}, |
|
| 258 |
) |
|
| 259 | 26x |
ci <- c( |
| 260 | 26x |
est = est, lwr.ci = min(ci_lwr, ci_upr), |
| 261 | 26x |
upr.ci = max(ci_lwr, ci_upr) |
| 262 |
) |
|
| 263 | 26x |
if (sides == "left") {
|
| 264 | ! |
ci[3] <- 1 |
| 265 | 26x |
} else if (sides == "right") {
|
| 266 | ! |
ci[2] <- -1 |
| 267 |
} |
|
| 268 | 26x |
return(ci) |
| 269 |
} |
|
| 270 | 26x |
method <- match.arg(arg = method, several.ok = TRUE) |
| 271 | 26x |
sides <- match.arg(arg = sides, several.ok = TRUE) |
| 272 | 26x |
lst <- h_recycle( |
| 273 | 26x |
x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level, |
| 274 | 26x |
sides = sides, method = method |
| 275 |
) |
|
| 276 | 26x |
res <- t(sapply(1:attr(lst, "maxdim"), function(i) {
|
| 277 | 26x |
iBinomDiffCI( |
| 278 | 26x |
x1 = lst$x1[i], |
| 279 | 26x |
n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i], |
| 280 | 26x |
sides = lst$sides[i], method = lst$method[i] |
| 281 |
) |
|
| 282 |
})) |
|
| 283 | 26x |
lgn <- h_recycle(x1 = if (is.null(names(x1))) {
|
| 284 | 26x |
paste("x1", seq_along(x1), sep = ".")
|
| 285 |
} else {
|
|
| 286 | ! |
names(x1) |
| 287 | 26x |
}, n1 = if (is.null(names(n1))) {
|
| 288 | 26x |
paste("n1", seq_along(n1), sep = ".")
|
| 289 |
} else {
|
|
| 290 | ! |
names(n1) |
| 291 | 26x |
}, x2 = if (is.null(names(x2))) {
|
| 292 | 26x |
paste("x2", seq_along(x2), sep = ".")
|
| 293 |
} else {
|
|
| 294 | ! |
names(x2) |
| 295 | 26x |
}, n2 = if (is.null(names(n2))) {
|
| 296 | 26x |
paste("n2", seq_along(n2), sep = ".")
|
| 297 |
} else {
|
|
| 298 | ! |
names(n2) |
| 299 | 26x |
}, conf.level = conf.level, sides = sides, method = method) |
| 300 | 26x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
|
| 301 | 182x |
length(unique(x)) != |
| 302 | 182x |
1 |
| 303 | 26x |
})]), 1, paste, collapse = ":") |
| 304 | 26x |
rownames(res) <- xn |
| 305 | 26x |
return(res) |
| 306 |
} |
|
| 307 | ||
| 308 |
#' @describeIn desctools_binom Compute confidence intervals for binomial proportions. |
|
| 309 |
#' |
|
| 310 |
#' @param x (`integer(1)`)\cr number of successes. |
|
| 311 |
#' @param n (`integer(1)`)\cr number of trials. |
|
| 312 |
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95. |
|
| 313 |
#' @param sides (`string`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default), |
|
| 314 |
#' `"left"`, or `"right"`. |
|
| 315 |
#' @param method (`string`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`, |
|
| 316 |
#' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`, |
|
| 317 |
#' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`. |
|
| 318 |
#' |
|
| 319 |
#' @return A `matrix` with 3 columns containing: |
|
| 320 |
#' * `est`: estimate of proportion difference. |
|
| 321 |
#' * `lwr.ci`: lower end of the confidence interval. |
|
| 322 |
#' * `upr.ci`: upper end of the confidence interval. |
|
| 323 |
#' |
|
| 324 |
#' @keywords internal |
|
| 325 |
desctools_binomci <- function(x, |
|
| 326 |
n, |
|
| 327 |
conf.level = 0.95, # nolint |
|
| 328 |
sides = c("two.sided", "left", "right"),
|
|
| 329 |
method = c( |
|
| 330 |
"wilson", "wald", "waldcc", "agresti-coull", |
|
| 331 |
"jeffreys", "modified wilson", "wilsoncc", "modified jeffreys", |
|
| 332 |
"clopper-pearson", "arcsine", "logit", "witting", "pratt", |
|
| 333 |
"midp", "lik", "blaker" |
|
| 334 |
), |
|
| 335 |
rand = 123, |
|
| 336 |
tol = 1e-05) {
|
|
| 337 | 26x |
if (missing(method)) {
|
| 338 | 1x |
method <- "wilson" |
| 339 |
} |
|
| 340 | 26x |
if (missing(sides)) {
|
| 341 | 25x |
sides <- "two.sided" |
| 342 |
} |
|
| 343 | 26x |
iBinomCI <- function(x, n, conf.level = 0.95, sides = c("two.sided", "left", "right"), # nolint
|
| 344 | 26x |
method = c( |
| 345 | 26x |
"wilson", "wilsoncc", "wald", |
| 346 | 26x |
"waldcc", "agresti-coull", "jeffreys", "modified wilson", |
| 347 | 26x |
"modified jeffreys", "clopper-pearson", "arcsine", "logit", |
| 348 | 26x |
"witting", "pratt", "midp", "lik", "blaker" |
| 349 |
), |
|
| 350 | 26x |
rand = 123, |
| 351 | 26x |
tol = 1e-05) {
|
| 352 | 26x |
if (length(x) != 1) {
|
| 353 | ! |
stop("'x' has to be of length 1 (number of successes)")
|
| 354 |
} |
|
| 355 | 26x |
if (length(n) != 1) {
|
| 356 | ! |
stop("'n' has to be of length 1 (number of trials)")
|
| 357 |
} |
|
| 358 | 26x |
if (length(conf.level) != 1) {
|
| 359 | ! |
stop("'conf.level' has to be of length 1 (confidence level)")
|
| 360 |
} |
|
| 361 | 26x |
if (conf.level < 0.5 || conf.level > 1) {
|
| 362 | ! |
stop("'conf.level' has to be in [0.5, 1]")
|
| 363 |
} |
|
| 364 | 26x |
sides <- match.arg(sides, choices = c( |
| 365 | 26x |
"two.sided", "left", |
| 366 | 26x |
"right" |
| 367 | 26x |
), several.ok = FALSE) |
| 368 | 26x |
if (sides != "two.sided") {
|
| 369 | 1x |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
| 370 |
} |
|
| 371 | 26x |
alpha <- 1 - conf.level |
| 372 | 26x |
kappa <- stats::qnorm(1 - alpha / 2) |
| 373 | 26x |
p_hat <- x / n |
| 374 | 26x |
q_hat <- 1 - p_hat |
| 375 | 26x |
est <- p_hat |
| 376 | 26x |
switch(match.arg(arg = method, choices = c( |
| 377 | 26x |
"wilson", |
| 378 | 26x |
"wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys", |
| 379 | 26x |
"modified wilson", "modified jeffreys", "clopper-pearson", |
| 380 | 26x |
"arcsine", "logit", "witting", "pratt", "midp", "lik", |
| 381 | 26x |
"blaker" |
| 382 |
)), |
|
| 383 | 26x |
wald = {
|
| 384 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
| 385 | 1x |
ci_lwr <- max(0, p_hat - term2) |
| 386 | 1x |
ci_upr <- min(1, p_hat + term2) |
| 387 |
}, |
|
| 388 | 26x |
waldcc = {
|
| 389 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
| 390 | 1x |
term2 <- term2 + 1 / (2 * n) |
| 391 | 1x |
ci_lwr <- max(0, p_hat - term2) |
| 392 | 1x |
ci_upr <- min(1, p_hat + term2) |
| 393 |
}, |
|
| 394 | 26x |
wilson = {
|
| 395 | 8x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
| 396 | 8x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) |
| 397 | 8x |
ci_lwr <- max(0, term1 - term2) |
| 398 | 8x |
ci_upr <- min(1, term1 + term2) |
| 399 |
}, |
|
| 400 | 26x |
wilsoncc = {
|
| 401 | 3x |
lci <- ( |
| 402 | 3x |
2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - 2 - 1 / n + 4 * p_hat * (n * q_hat + 1)) |
| 403 | 3x |
) / (2 * (n + kappa^2)) |
| 404 | 3x |
uci <- ( |
| 405 | 3x |
2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + 2 - 1 / n + 4 * p_hat * (n * q_hat - 1)) |
| 406 | 3x |
) / (2 * (n + kappa^2)) |
| 407 | 3x |
ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci)) |
| 408 | 3x |
ci_upr <- min(1, ifelse(p_hat == 1, 1, uci)) |
| 409 |
}, |
|
| 410 | 26x |
`agresti-coull` = {
|
| 411 | 1x |
x_tilde <- x + kappa^2 / 2 |
| 412 | 1x |
n_tilde <- n + kappa^2 |
| 413 | 1x |
p_tilde <- x_tilde / n_tilde |
| 414 | 1x |
q_tilde <- 1 - p_tilde |
| 415 | 1x |
est <- p_tilde |
| 416 | 1x |
term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
| 417 | 1x |
ci_lwr <- max(0, p_tilde - term2) |
| 418 | 1x |
ci_upr <- min(1, p_tilde + term2) |
| 419 |
}, |
|
| 420 | 26x |
jeffreys = {
|
| 421 | 1x |
if (x == 0) {
|
| 422 | ! |
ci_lwr <- 0 |
| 423 |
} else {
|
|
| 424 | 1x |
ci_lwr <- stats::qbeta( |
| 425 | 1x |
alpha / 2, |
| 426 | 1x |
x + 0.5, n - x + 0.5 |
| 427 |
) |
|
| 428 |
} |
|
| 429 | 1x |
if (x == n) {
|
| 430 | ! |
ci_upr <- 1 |
| 431 |
} else {
|
|
| 432 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) |
| 433 |
} |
|
| 434 |
}, |
|
| 435 | 26x |
`modified wilson` = {
|
| 436 | 1x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
| 437 | 1x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) |
| 438 | 1x |
if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% c(1:3))) {
|
| 439 | ! |
ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * x) / n |
| 440 |
} else {
|
|
| 441 | 1x |
ci_lwr <- max(0, term1 - term2) |
| 442 |
} |
|
| 443 | 1x |
if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & x %in% c(n - (1:3)))) {
|
| 444 | ! |
ci_upr <- 1 - 0.5 * stats::qchisq( |
| 445 | ! |
alpha, |
| 446 | ! |
2 * (n - x) |
| 447 | ! |
) / n |
| 448 |
} else {
|
|
| 449 | 1x |
ci_upr <- min(1, term1 + term2) |
| 450 |
} |
|
| 451 |
}, |
|
| 452 | 26x |
`modified jeffreys` = {
|
| 453 | 1x |
if (x == n) {
|
| 454 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
| 455 |
} else {
|
|
| 456 | 1x |
if (x <= 1) {
|
| 457 | ! |
ci_lwr <- 0 |
| 458 |
} else {
|
|
| 459 | 1x |
ci_lwr <- stats::qbeta( |
| 460 | 1x |
alpha / 2, |
| 461 | 1x |
x + 0.5, n - x + 0.5 |
| 462 |
) |
|
| 463 |
} |
|
| 464 |
} |
|
| 465 | 1x |
if (x == 0) {
|
| 466 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
| 467 |
} else {
|
|
| 468 | 1x |
if (x >= n - 1) {
|
| 469 | ! |
ci_upr <- 1 |
| 470 |
} else {
|
|
| 471 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) |
| 472 |
} |
|
| 473 |
} |
|
| 474 |
}, |
|
| 475 | 26x |
`clopper-pearson` = {
|
| 476 | 1x |
ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1) |
| 477 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x) |
| 478 |
}, |
|
| 479 | 26x |
arcsine = {
|
| 480 | 1x |
p_tilde <- (x + 0.375) / (n + 0.75) |
| 481 | 1x |
est <- p_tilde |
| 482 | 1x |
ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2 |
| 483 | 1x |
ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2 |
| 484 |
}, |
|
| 485 | 26x |
logit = {
|
| 486 | 1x |
lambda_hat <- log(x / (n - x)) |
| 487 | 1x |
V_hat <- n / (x * (n - x)) # nolint |
| 488 | 1x |
lambda_lower <- lambda_hat - kappa * sqrt(V_hat) |
| 489 | 1x |
lambda_upper <- lambda_hat + kappa * sqrt(V_hat) |
| 490 | 1x |
ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower)) |
| 491 | 1x |
ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper)) |
| 492 |
}, |
|
| 493 | 26x |
witting = {
|
| 494 | 1x |
set.seed(rand) |
| 495 | 1x |
x_tilde <- x + stats::runif(1, min = 0, max = 1) |
| 496 | 1x |
pbinom_abscont <- function(q, size, prob) {
|
| 497 | 22x |
v <- trunc(q) |
| 498 | 22x |
term1 <- stats::pbinom(v - 1, size = size, prob = prob) |
| 499 | 22x |
term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob) |
| 500 | 22x |
return(term1 + term2) |
| 501 |
} |
|
| 502 | 1x |
qbinom_abscont <- function(p, size, x) {
|
| 503 | 2x |
fun <- function(prob, size, x, p) {
|
| 504 | 22x |
pbinom_abscont(x, size, prob) - p |
| 505 |
} |
|
| 506 | 2x |
stats::uniroot(fun, |
| 507 | 2x |
interval = c(0, 1), size = size, |
| 508 | 2x |
x = x, p = p |
| 509 | 2x |
)$root |
| 510 |
} |
|
| 511 | 1x |
ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde) |
| 512 | 1x |
ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde) |
| 513 |
}, |
|
| 514 | 26x |
pratt = {
|
| 515 | 1x |
if (x == 0) {
|
| 516 | ! |
ci_lwr <- 0 |
| 517 | ! |
ci_upr <- 1 - alpha^(1 / n) |
| 518 | 1x |
} else if (x == 1) {
|
| 519 | ! |
ci_lwr <- 1 - (1 - alpha / 2)^(1 / n) |
| 520 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
| 521 | 1x |
} else if (x == (n - 1)) {
|
| 522 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
| 523 | ! |
ci_upr <- (1 - alpha / 2)^(1 / n) |
| 524 | 1x |
} else if (x == n) {
|
| 525 | ! |
ci_lwr <- alpha^(1 / n) |
| 526 | ! |
ci_upr <- 1 |
| 527 |
} else {
|
|
| 528 | 1x |
z <- stats::qnorm(1 - alpha / 2) |
| 529 | 1x |
A <- ((x + 1) / (n - x))^2 # nolint |
| 530 | 1x |
B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint |
| 531 | 1x |
C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint |
| 532 | 1x |
D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint |
| 533 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
| 534 | 1x |
ci_upr <- 1 / E |
| 535 | 1x |
A <- (x / (n - x - 1))^2 # nolint |
| 536 | 1x |
B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint |
| 537 | 1x |
C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint |
| 538 | 1x |
D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint |
| 539 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
| 540 | 1x |
ci_lwr <- 1 / E |
| 541 |
} |
|
| 542 |
}, |
|
| 543 | 26x |
midp = {
|
| 544 | 1x |
f_low <- function(pi, x, n) {
|
| 545 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x, |
| 546 | 12x |
size = n, prob = pi, lower.tail = FALSE |
| 547 |
) - |
|
| 548 | 12x |
(1 - conf.level) / 2 |
| 549 |
} |
|
| 550 | 1x |
f_up <- function(pi, x, n) {
|
| 551 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - 1, size = n, prob = pi) - (1 - conf.level) / 2 |
| 552 |
} |
|
| 553 | 1x |
ci_lwr <- 0 |
| 554 | 1x |
ci_upr <- 1 |
| 555 | 1x |
if (x != 0) {
|
| 556 | 1x |
ci_lwr <- stats::uniroot(f_low, |
| 557 | 1x |
interval = c(0, p_hat), |
| 558 | 1x |
x = x, n = n |
| 559 | 1x |
)$root |
| 560 |
} |
|
| 561 | 1x |
if (x != n) {
|
| 562 | 1x |
ci_upr <- stats::uniroot(f_up, interval = c( |
| 563 | 1x |
p_hat, |
| 564 | 1x |
1 |
| 565 | 1x |
), x = x, n = n)$root |
| 566 |
} |
|
| 567 |
}, |
|
| 568 | 26x |
lik = {
|
| 569 | 2x |
ci_lwr <- 0 |
| 570 | 2x |
ci_upr <- 1 |
| 571 | 2x |
z <- stats::qnorm(1 - alpha * 0.5) |
| 572 | 2x |
tol <- .Machine$double.eps^0.5 |
| 573 | 2x |
BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint |
| 574 |
...) {
|
|
| 575 | 40x |
ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt, |
| 576 | 40x |
y, |
| 577 | 40x |
log = TRUE |
| 578 |
)) |
|
| 579 | 40x |
ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x, |
| 580 | 40x |
wt, mu, |
| 581 | 40x |
log = TRUE |
| 582 |
)) |
|
| 583 | 40x |
res <- ifelse(abs(y - mu) < tol, 0, sign(y - mu) * sqrt(-2 * (ll_y - ll_mu))) |
| 584 | 40x |
return(res - bound) |
| 585 |
} |
|
| 586 | 2x |
if (x != 0 && tol < p_hat) {
|
| 587 | 2x |
ci_lwr <- if (BinDev( |
| 588 | 2x |
tol, x, p_hat, n, -z, |
| 589 | 2x |
tol |
| 590 | 2x |
) <= 0) {
|
| 591 | 2x |
stats::uniroot( |
| 592 | 2x |
f = BinDev, interval = c(tol, if (p_hat < tol || p_hat == 1) {
|
| 593 | ! |
1 - tol |
| 594 |
} else {
|
|
| 595 | 2x |
p_hat |
| 596 | 2x |
}), bound = -z, |
| 597 | 2x |
x = x, mu = p_hat, wt = n |
| 598 | 2x |
)$root |
| 599 |
} |
|
| 600 |
} |
|
| 601 | 2x |
if (x != n && p_hat < (1 - tol)) {
|
| 602 | 2x |
ci_upr <- if ( |
| 603 | 2x |
BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > 1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < 0) { # nolint
|
| 604 | ! |
ci_lwr <- if (BinDev( |
| 605 | ! |
tol, x, if (p_hat < tol || p_hat == 1) {
|
| 606 | ! |
1 - tol |
| 607 |
} else {
|
|
| 608 | ! |
p_hat |
| 609 | ! |
}, n, |
| 610 | ! |
-z, tol |
| 611 | ! |
) <= 0) {
|
| 612 | ! |
stats::uniroot( |
| 613 | ! |
f = BinDev, interval = c(tol, p_hat), |
| 614 | ! |
bound = -z, x = x, mu = p_hat, wt = n |
| 615 | ! |
)$root |
| 616 |
} |
|
| 617 |
} else {
|
|
| 618 | 2x |
stats::uniroot( |
| 619 | 2x |
f = BinDev, interval = c(if (p_hat > 1 - tol) {
|
| 620 | ! |
tol |
| 621 |
} else {
|
|
| 622 | 2x |
p_hat |
| 623 | 2x |
}, 1 - tol), bound = z, |
| 624 | 2x |
x = x, mu = p_hat, wt = n |
| 625 | 2x |
)$root |
| 626 |
} |
|
| 627 |
} |
|
| 628 |
}, |
|
| 629 | 26x |
blaker = {
|
| 630 | 1x |
acceptbin <- function(x, n, p) {
|
| 631 | 3954x |
p1 <- 1 - stats::pbinom(x - 1, n, p) |
| 632 | 3954x |
p2 <- stats::pbinom(x, n, p) |
| 633 | 3954x |
a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p) |
| 634 | 3954x |
a2 <- p2 + 1 - stats::pbinom( |
| 635 | 3954x |
stats::qbinom(1 - p2, n, p), n, |
| 636 | 3954x |
p |
| 637 |
) |
|
| 638 | 3954x |
return(min(a1, a2)) |
| 639 |
} |
|
| 640 | 1x |
ci_lwr <- 0 |
| 641 | 1x |
ci_upr <- 1 |
| 642 | 1x |
if (x != 0) {
|
| 643 | 1x |
ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - x + 1) |
| 644 | 1x |
while (acceptbin(x, n, ci_lwr + tol) < (1 - conf.level)) {
|
| 645 | 1976x |
ci_lwr <- ci_lwr + tol |
| 646 |
} |
|
| 647 |
} |
|
| 648 | 1x |
if (x != n) {
|
| 649 | 1x |
ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + 1, n - x) |
| 650 | 1x |
while (acceptbin(x, n, ci_upr - tol) < (1 - conf.level)) {
|
| 651 | 1976x |
ci_upr <- ci_upr - tol |
| 652 |
} |
|
| 653 |
} |
|
| 654 |
} |
|
| 655 |
) |
|
| 656 | 26x |
ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min( |
| 657 | 26x |
1, |
| 658 | 26x |
ci_upr |
| 659 |
)) |
|
| 660 | 26x |
if (sides == "left") {
|
| 661 | 1x |
ci[3] <- 1 |
| 662 | 25x |
} else if (sides == "right") {
|
| 663 | ! |
ci[2] <- 0 |
| 664 |
} |
|
| 665 | 26x |
return(ci) |
| 666 |
} |
|
| 667 | 26x |
lst <- list( |
| 668 | 26x |
x = x, n = n, conf.level = conf.level, sides = sides, |
| 669 | 26x |
method = method, rand = rand |
| 670 |
) |
|
| 671 | 26x |
maxdim <- max(unlist(lapply(lst, length))) |
| 672 | 26x |
lgp <- lapply(lst, rep, length.out = maxdim) |
| 673 | 26x |
lgn <- h_recycle(x = if (is.null(names(x))) {
|
| 674 | 26x |
paste("x", seq_along(x), sep = ".")
|
| 675 |
} else {
|
|
| 676 | ! |
names(x) |
| 677 | 26x |
}, n = if (is.null(names(n))) {
|
| 678 | 26x |
paste("n", seq_along(n), sep = ".")
|
| 679 |
} else {
|
|
| 680 | ! |
names(n) |
| 681 | 26x |
}, conf.level = conf.level, sides = sides, method = method) |
| 682 | 26x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
|
| 683 | 130x |
length(unique(x)) != |
| 684 | 130x |
1 |
| 685 | 26x |
})]), 1, paste, collapse = ":") |
| 686 | 26x |
res <- t(sapply(1:maxdim, function(i) {
|
| 687 | 26x |
iBinomCI( |
| 688 | 26x |
x = lgp$x[i], |
| 689 | 26x |
n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i], |
| 690 | 26x |
method = lgp$method[i], rand = lgp$rand[i] |
| 691 |
) |
|
| 692 |
})) |
|
| 693 | 26x |
colnames(res)[1] <- c("est")
|
| 694 | 26x |
rownames(res) <- xn |
| 695 | 26x |
return(res) |
| 696 |
} |
| 1 |
#' Additional assertions to use with `checkmate` |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Additional assertion functions which can be used together with the `checkmate` package. |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams checkmate::assert_factor |
|
| 8 |
#' @param x (`any`)\cr object to test. |
|
| 9 |
#' @param df (`data.frame`)\cr data set to test. |
|
| 10 |
#' @param variables (named `list` of `character`)\cr list of variables to test. |
|
| 11 |
#' @param include_boundaries (`flag`)\cr whether to include boundaries when testing |
|
| 12 |
#' for proportions. |
|
| 13 |
#' @param na_level (`string`)\cr the string you have been using to represent NA or |
|
| 14 |
#' missing data. For `NA` values please consider using directly [is.na()] or |
|
| 15 |
#' similar approaches. |
|
| 16 |
#' |
|
| 17 |
#' @return Nothing if assertion passes, otherwise prints the error message. |
|
| 18 |
#' |
|
| 19 |
#' @name assertions |
|
| 20 |
NULL |
|
| 21 | ||
| 22 |
check_list_of_variables <- function(x) {
|
|
| 23 |
# drop NULL elements in list |
|
| 24 | 3019x |
x <- Filter(Negate(is.null), x) |
| 25 | ||
| 26 | 3019x |
res <- checkmate::check_list(x, |
| 27 | 3019x |
names = "named", |
| 28 | 3019x |
min.len = 1, |
| 29 | 3019x |
any.missing = FALSE, |
| 30 | 3019x |
types = "character" |
| 31 |
) |
|
| 32 |
# no empty strings allowed |
|
| 33 | 3019x |
if (isTRUE(res)) {
|
| 34 | 3014x |
res <- checkmate::check_character(unlist(x), min.chars = 1) |
| 35 |
} |
|
| 36 | 3019x |
res |
| 37 |
} |
|
| 38 |
#' @describeIn assertions Checks whether `x` is a valid list of variable names. |
|
| 39 |
#' `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`. |
|
| 40 |
#' |
|
| 41 |
#' @keywords internal |
|
| 42 |
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables) |
|
| 43 | ||
| 44 |
check_df_with_variables <- function(df, variables, na_level = NULL) {
|
|
| 45 | 2702x |
checkmate::assert_data_frame(df) |
| 46 | 2700x |
assert_list_of_variables(variables) |
| 47 | ||
| 48 |
# flag for equal variables and column names |
|
| 49 | 2698x |
err_flag <- all(unlist(variables) %in% colnames(df)) |
| 50 | 2698x |
checkmate::assert_flag(err_flag) |
| 51 | ||
| 52 | 2698x |
if (isFALSE(err_flag)) {
|
| 53 | 5x |
vars <- setdiff(unlist(variables), colnames(df)) |
| 54 | 5x |
return(paste( |
| 55 | 5x |
deparse(substitute(df)), |
| 56 | 5x |
"does not contain all specified variables as column names. Missing from data frame:", |
| 57 | 5x |
paste(vars, collapse = ", ") |
| 58 |
)) |
|
| 59 |
} |
|
| 60 |
# checking if na_level is present and in which column |
|
| 61 | 2693x |
if (!is.null(na_level)) {
|
| 62 | 9x |
checkmate::assert_string(na_level) |
| 63 | 9x |
res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level))) |
| 64 | 9x |
if (any(res)) {
|
| 65 | 1x |
return(paste0( |
| 66 | 1x |
deparse(substitute(df)), " contains explicit na_level (", na_level,
|
| 67 | 1x |
") in the following columns: ", paste0(unlist(variables)[res], |
| 68 | 1x |
collapse = ", " |
| 69 |
) |
|
| 70 |
)) |
|
| 71 |
} |
|
| 72 |
} |
|
| 73 | 2692x |
return(TRUE) |
| 74 |
} |
|
| 75 |
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`. |
|
| 76 |
#' Please notice how this produces an error when not all variables are present in the |
|
| 77 |
#' data.frame while the opposite is not required. |
|
| 78 |
#' |
|
| 79 |
#' @examples |
|
| 80 |
#' x <- data.frame( |
|
| 81 |
#' a = 1:10, |
|
| 82 |
#' b = rnorm(10) |
|
| 83 |
#' ) |
|
| 84 |
#' assert_df_with_variables(x, variables = list(a = "a", b = "b")) |
|
| 85 |
#' |
|
| 86 |
#' x <- ex_adsl |
|
| 87 |
#' assert_df_with_variables(x, list(a = "ARM", b = "USUBJID")) |
|
| 88 |
#' |
|
| 89 |
#' @export |
|
| 90 |
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables) |
|
| 91 | ||
| 92 |
check_valid_factor <- function(x, |
|
| 93 |
min.levels = 1, # nolint |
|
| 94 |
max.levels = NULL, # nolint |
|
| 95 |
null.ok = TRUE, # nolint |
|
| 96 |
any.missing = TRUE, # nolint |
|
| 97 |
n.levels = NULL, # nolint |
|
| 98 |
len = NULL) {
|
|
| 99 |
# checks on levels insertion |
|
| 100 | 1115x |
checkmate::assert_int(min.levels, lower = 1) |
| 101 | ||
| 102 |
# main factor check |
|
| 103 | 1115x |
res <- checkmate::check_factor(x, |
| 104 | 1115x |
min.levels = min.levels, |
| 105 | 1115x |
null.ok = null.ok, |
| 106 | 1115x |
max.levels = max.levels, |
| 107 | 1115x |
any.missing = any.missing, |
| 108 | 1115x |
n.levels = n.levels |
| 109 |
) |
|
| 110 | ||
| 111 |
# no empty strings allowed |
|
| 112 | 1115x |
if (isTRUE(res)) {
|
| 113 | 1101x |
res <- checkmate::check_character(levels(x), min.chars = 1) |
| 114 |
} |
|
| 115 | ||
| 116 | 1115x |
return(res) |
| 117 |
} |
|
| 118 |
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty |
|
| 119 |
#' string levels). Note that `NULL` and `NA` elements are allowed. |
|
| 120 |
#' |
|
| 121 |
#' @keywords internal |
|
| 122 |
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor) |
|
| 123 | ||
| 124 |
check_df_with_factors <- function(df, |
|
| 125 |
variables, |
|
| 126 |
min.levels = 1, # nolint |
|
| 127 |
max.levels = NULL, # nolint |
|
| 128 |
any.missing = TRUE, # nolint |
|
| 129 |
na_level = NULL) {
|
|
| 130 | 254x |
res <- check_df_with_variables(df, variables, na_level) |
| 131 |
# checking if all the columns specified by variables are valid factors |
|
| 132 | 253x |
if (isTRUE(res)) {
|
| 133 |
# searching the data.frame with selected columns (variables) as a list |
|
| 134 | 251x |
res <- lapply( |
| 135 | 251x |
X = as.list(df)[unlist(variables)], |
| 136 | 251x |
FUN = check_valid_factor, |
| 137 | 251x |
min.levels = min.levels, |
| 138 | 251x |
max.levels = max.levels, |
| 139 | 251x |
any.missing = any.missing |
| 140 |
) |
|
| 141 | 251x |
res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1))) |
| 142 | 251x |
if (any(res_lo)) {
|
| 143 | 6x |
return(paste0( |
| 144 | 6x |
deparse(substitute(df)), " does not contain only factor variables among:", |
| 145 | 6x |
"\n* Column `", paste0(unlist(variables)[res_lo], |
| 146 | 6x |
"` of the data.frame -> ", res[res_lo], |
| 147 | 6x |
collapse = "\n* " |
| 148 |
) |
|
| 149 |
)) |
|
| 150 |
} else {
|
|
| 151 | 245x |
res <- TRUE |
| 152 |
} |
|
| 153 |
} |
|
| 154 | 247x |
return(res) |
| 155 |
} |
|
| 156 | ||
| 157 |
#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables` |
|
| 158 |
#' are all factors. Note that the creation of `NA` by direct call of `factor()` will |
|
| 159 |
#' trim `NA` levels out of the vector list itself. |
|
| 160 |
#' |
|
| 161 |
#' @examples |
|
| 162 |
#' x <- ex_adsl |
|
| 163 |
#' assert_df_with_factors(x, list(a = "ARM")) |
|
| 164 |
#' |
|
| 165 |
#' @export |
|
| 166 |
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors) |
|
| 167 | ||
| 168 |
#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1. |
|
| 169 |
#' |
|
| 170 |
#' @examples |
|
| 171 |
#' assert_proportion_value(0.95) |
|
| 172 |
#' assert_proportion_value(1.0, include_boundaries = TRUE) |
|
| 173 |
#' |
|
| 174 |
#' @export |
|
| 175 |
assert_proportion_value <- function(x, include_boundaries = FALSE) {
|
|
| 176 | 19396x |
checkmate::assert_number(x, lower = 0, upper = 1) |
| 177 | 19384x |
checkmate::assert_flag(include_boundaries) |
| 178 | 19384x |
if (isFALSE(include_boundaries)) {
|
| 179 | 13456x |
checkmate::assert_true(x > 0) |
| 180 | 13454x |
checkmate::assert_true(x < 1) |
| 181 |
} |
|
| 182 |
} |
| 1 |
#' Count occurrences by grade |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [count_occurrences_by_grade()] creates a layout element to calculate occurrence counts by grade. |
|
| 6 |
#' |
|
| 7 |
#' This function analyzes primary analysis variable `var` which indicates toxicity grades. The `id` variable |
|
| 8 |
#' is used to indicate unique subject identifiers (defaults to `USUBJID`). The user can also supply a list of |
|
| 9 |
#' custom groups of grades to analyze via the `grade_groups` parameter. The `remove_single` argument will |
|
| 10 |
#' remove single grades from the analysis so that *only* grade groups are analyzed. |
|
| 11 |
#' |
|
| 12 |
#' If there are multiple grades recorded for one patient only the highest grade level is counted. |
|
| 13 |
#' |
|
| 14 |
#' The summarize function [summarize_occurrences_by_grade()] performs the same function as |
|
| 15 |
#' [count_occurrences_by_grade()] except it creates content rows, not data rows, to summarize the current table |
|
| 16 |
#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have |
|
| 17 |
#' occurred. |
|
| 18 |
#' |
|
| 19 |
#' @inheritParams count_occurrences |
|
| 20 |
#' @inheritParams argument_convention |
|
| 21 |
#' @param grade_groups (named `list` of `character`)\cr list containing groupings of grades. |
|
| 22 |
#' @param remove_single (`flag`)\cr `TRUE` to not include the elements of one-element grade groups |
|
| 23 |
#' in the the output list; in this case only the grade groups names will be included in the output. If |
|
| 24 |
#' `only_grade_groups` is set to `TRUE` this argument is ignored. |
|
| 25 |
#' @param only_grade_groups (`flag`)\cr whether only the specified grade groups should be |
|
| 26 |
#' included, with individual grade rows removed (`TRUE`), or all grades and grade groups |
|
| 27 |
#' should be displayed (`FALSE`). |
|
| 28 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 29 |
#' |
|
| 30 |
#' Options are: ``r shQuote(get_stats("count_occurrences_by_grade"), type = "sh")``
|
|
| 31 |
#' |
|
| 32 |
#' @seealso Relevant helper function [h_append_grade_groups()]. |
|
| 33 |
#' |
|
| 34 |
#' @name count_occurrences_by_grade |
|
| 35 |
#' @order 1 |
|
| 36 |
NULL |
|
| 37 | ||
| 38 |
#' Helper function for `s_count_occurrences_by_grade()` |
|
| 39 |
#' |
|
| 40 |
#' @description `r lifecycle::badge("stable")`
|
|
| 41 |
#' |
|
| 42 |
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with |
|
| 43 |
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`. |
|
| 44 |
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to |
|
| 45 |
#' the end. Grade groups names must be unique. |
|
| 46 |
#' |
|
| 47 |
#' @inheritParams count_occurrences_by_grade |
|
| 48 |
#' @param refs (named `list` of `numeric`)\cr named list where each name corresponds to a reference grade level |
|
| 49 |
#' and each entry represents a count. |
|
| 50 |
#' |
|
| 51 |
#' @return Formatted list of grade groupings. |
|
| 52 |
#' |
|
| 53 |
#' @examples |
|
| 54 |
#' h_append_grade_groups( |
|
| 55 |
#' list( |
|
| 56 |
#' "Any Grade" = as.character(1:5), |
|
| 57 |
#' "Grade 1-2" = c("1", "2"),
|
|
| 58 |
#' "Grade 3-4" = c("3", "4")
|
|
| 59 |
#' ), |
|
| 60 |
#' list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
|
|
| 61 |
#' ) |
|
| 62 |
#' |
|
| 63 |
#' h_append_grade_groups( |
|
| 64 |
#' list( |
|
| 65 |
#' "Any Grade" = as.character(5:1), |
|
| 66 |
#' "Grade A" = "5", |
|
| 67 |
#' "Grade B" = c("4", "3")
|
|
| 68 |
#' ), |
|
| 69 |
#' list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
|
|
| 70 |
#' ) |
|
| 71 |
#' |
|
| 72 |
#' h_append_grade_groups( |
|
| 73 |
#' list( |
|
| 74 |
#' "Any Grade" = as.character(1:5), |
|
| 75 |
#' "Grade 1-2" = c("1", "2"),
|
|
| 76 |
#' "Grade 3-4" = c("3", "4")
|
|
| 77 |
#' ), |
|
| 78 |
#' list("1" = 10, "2" = 5, "3" = 0)
|
|
| 79 |
#' ) |
|
| 80 |
#' |
|
| 81 |
#' @export |
|
| 82 |
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE, only_grade_groups = FALSE) {
|
|
| 83 | 32x |
checkmate::assert_list(grade_groups) |
| 84 | 32x |
checkmate::assert_list(refs) |
| 85 | 32x |
refs_orig <- refs |
| 86 | 32x |
elements <- unique(unlist(grade_groups)) |
| 87 | ||
| 88 |
### compute sums in groups |
|
| 89 | 32x |
grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i])) |
| 90 | 32x |
if (!checkmate::test_subset(elements, names(refs))) {
|
| 91 | 2x |
padding_el <- setdiff(elements, names(refs)) |
| 92 | 2x |
refs[padding_el] <- 0 |
| 93 |
} |
|
| 94 | 32x |
result <- c(grp_sum, refs) |
| 95 | ||
| 96 |
### order result while keeping grade_groups's ordering |
|
| 97 | 32x |
ordr <- grade_groups |
| 98 | ||
| 99 |
# elements of any-grade group (if any) will be moved to the end |
|
| 100 | 32x |
is_any <- sapply(grade_groups, setequal, y = names(refs)) |
| 101 | 32x |
ordr[is_any] <- list(character(0)) # hide elements under any-grade group |
| 102 | ||
| 103 |
# groups-elements combined sequence |
|
| 104 | 32x |
ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE) |
| 105 | 32x |
ordr <- ordr[!duplicated(ordr)] |
| 106 | ||
| 107 |
# append remaining elements (if any) |
|
| 108 | 32x |
ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group |
| 109 | 32x |
ordr <- union(ordr, names(refs)) # from refs |
| 110 | ||
| 111 |
# remove elements of single-element groups, if any |
|
| 112 | 32x |
if (only_grade_groups) {
|
| 113 | 3x |
ordr <- intersect(ordr, names(grade_groups)) |
| 114 | 29x |
} else if (remove_single) {
|
| 115 | 29x |
is_single <- sapply(grade_groups, length) == 1L |
| 116 | 29x |
ordr <- setdiff(ordr, unlist(grade_groups[is_single])) |
| 117 |
} |
|
| 118 | ||
| 119 |
# apply the order |
|
| 120 | 32x |
result <- result[ordr] |
| 121 | ||
| 122 |
# remove groups without any elements in the original refs |
|
| 123 |
# note: it's OK if groups have 0 value |
|
| 124 | 32x |
keep_grp <- vapply(grade_groups, function(x, rf) {
|
| 125 | 64x |
any(x %in% rf) |
| 126 | 32x |
}, rf = names(refs_orig), logical(1)) |
| 127 | ||
| 128 | 32x |
keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp] |
| 129 | 32x |
result <- result[keep_el] |
| 130 | ||
| 131 | 32x |
result |
| 132 |
} |
|
| 133 | ||
| 134 |
#' @describeIn count_occurrences_by_grade Statistics function which counts the |
|
| 135 |
#' number of patients by highest grade. |
|
| 136 |
#' |
|
| 137 |
#' @return |
|
| 138 |
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or |
|
| 139 |
#' grade level grouping. |
|
| 140 |
#' |
|
| 141 |
#' @examples |
|
| 142 |
#' s_count_occurrences_by_grade( |
|
| 143 |
#' df, |
|
| 144 |
#' .N_col = 10L, |
|
| 145 |
#' .var = "AETOXGR", |
|
| 146 |
#' id = "USUBJID", |
|
| 147 |
#' grade_groups = list("ANY" = levels(df$AETOXGR))
|
|
| 148 |
#' ) |
|
| 149 |
#' |
|
| 150 |
#' @export |
|
| 151 |
s_count_occurrences_by_grade <- function(df, |
|
| 152 |
labelstr = "", |
|
| 153 |
.var, |
|
| 154 |
.N_row, # nolint |
|
| 155 |
.N_col, # nolint |
|
| 156 |
..., |
|
| 157 |
id = "USUBJID", |
|
| 158 |
grade_groups = list(), |
|
| 159 |
remove_single = TRUE, |
|
| 160 |
only_grade_groups = FALSE, |
|
| 161 |
denom = c("N_col", "n", "N_row")) {
|
|
| 162 | 75x |
assert_valid_factor(df[[.var]]) |
| 163 | 75x |
assert_df_with_variables(df, list(grade = .var, id = id)) |
| 164 | ||
| 165 | 75x |
denom <- match.arg(denom) %>% |
| 166 | 75x |
switch( |
| 167 | 75x |
n = nlevels(factor(df[[id]])), |
| 168 | 75x |
N_row = .N_row, |
| 169 | 75x |
N_col = .N_col |
| 170 |
) |
|
| 171 | ||
| 172 | 75x |
if (nrow(df) < 1) {
|
| 173 | 5x |
grade_levels <- levels(df[[.var]]) |
| 174 | 5x |
l_count <- as.list(rep(0, length(grade_levels))) |
| 175 | 5x |
names(l_count) <- grade_levels |
| 176 |
} else {
|
|
| 177 | 70x |
if (isTRUE(is.factor(df[[id]]))) {
|
| 178 | ! |
assert_valid_factor(df[[id]], any.missing = FALSE) |
| 179 |
} else {
|
|
| 180 | 70x |
checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE) |
| 181 |
} |
|
| 182 | 70x |
checkmate::assert_count(.N_col) |
| 183 | ||
| 184 | 70x |
id <- df[[id]] |
| 185 | 70x |
grade <- df[[.var]] |
| 186 | ||
| 187 | 70x |
if (!is.ordered(grade)) {
|
| 188 | 70x |
grade_lbl <- obj_label(grade) |
| 189 | 70x |
lvls <- levels(grade) |
| 190 | 70x |
if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
|
| 191 | 69x |
lvl_ord <- lvls |
| 192 |
} else {
|
|
| 193 | 1x |
lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
|
| 194 | 1x |
lvl_ord <- levels(grade)[order(as.numeric(lvls))] |
| 195 |
} |
|
| 196 | 70x |
grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl) |
| 197 |
} |
|
| 198 | ||
| 199 | 70x |
missing_lvl <- grepl("missing", tolower(levels(grade)))
|
| 200 | 70x |
if (any(missing_lvl)) {
|
| 201 | 1x |
grade <- factor( |
| 202 | 1x |
grade, |
| 203 | 1x |
levels = c(levels(grade)[!missing_lvl], levels(grade)[missing_lvl]), |
| 204 | 1x |
ordered = is.ordered(grade) |
| 205 |
) |
|
| 206 |
} |
|
| 207 | 70x |
df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE) |
| 208 | 70x |
l_count <- as.list(table(df_max$grade)) |
| 209 |
} |
|
| 210 | ||
| 211 | 75x |
if (length(grade_groups) > 0) {
|
| 212 | 30x |
l_count <- h_append_grade_groups(grade_groups, l_count, remove_single, only_grade_groups) |
| 213 |
} |
|
| 214 | ||
| 215 | 75x |
l_count_fraction <- lapply( |
| 216 | 75x |
l_count, |
| 217 | 75x |
function(i, denom) {
|
| 218 | 299x |
if (i == 0 && denom == 0) {
|
| 219 | 9x |
c(0, 0) |
| 220 |
} else {
|
|
| 221 | 290x |
c(i, i / denom) |
| 222 |
} |
|
| 223 |
}, |
|
| 224 | 75x |
denom = denom |
| 225 |
) |
|
| 226 | ||
| 227 | 75x |
list( |
| 228 | 75x |
count_fraction = l_count_fraction, |
| 229 | 75x |
count_fraction_fixed_dp = l_count_fraction |
| 230 |
) |
|
| 231 |
} |
|
| 232 | ||
| 233 |
#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun` |
|
| 234 |
#' in `count_occurrences_by_grade()`. |
|
| 235 |
#' |
|
| 236 |
#' @return |
|
| 237 |
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 238 |
#' |
|
| 239 |
#' @examples |
|
| 240 |
#' a_count_occurrences_by_grade( |
|
| 241 |
#' df, |
|
| 242 |
#' .N_col = 10L, |
|
| 243 |
#' .N_row = 10L, |
|
| 244 |
#' .var = "AETOXGR", |
|
| 245 |
#' id = "USUBJID", |
|
| 246 |
#' grade_groups = list("ANY" = levels(df$AETOXGR))
|
|
| 247 |
#' ) |
|
| 248 |
#' |
|
| 249 |
#' @export |
|
| 250 |
a_count_occurrences_by_grade <- function(df, |
|
| 251 |
labelstr = "", |
|
| 252 |
..., |
|
| 253 |
.stats = NULL, |
|
| 254 |
.stat_names = NULL, |
|
| 255 |
.formats = NULL, |
|
| 256 |
.labels = NULL, |
|
| 257 |
.indent_mods = NULL) {
|
|
| 258 |
# Check for additional parameters to the statistics function |
|
| 259 | 56x |
dots_extra_args <- list(...) |
| 260 | 56x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 261 | 56x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 262 | ||
| 263 |
# Check for user-defined functions |
|
| 264 | 56x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 265 | 56x |
.stats <- default_and_custom_stats_list$all_stats |
| 266 | 56x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 267 | ||
| 268 |
# Apply statistics function |
|
| 269 | 56x |
x_stats <- .apply_stat_functions( |
| 270 | 56x |
default_stat_fnc = s_count_occurrences_by_grade, |
| 271 | 56x |
custom_stat_fnc_list = custom_stat_functions, |
| 272 | 56x |
args_list = c( |
| 273 | 56x |
df = list(df), |
| 274 | 56x |
labelstr = list(labelstr), |
| 275 | 56x |
extra_afun_params, |
| 276 | 56x |
dots_extra_args |
| 277 |
) |
|
| 278 |
) |
|
| 279 | ||
| 280 |
# Fill in formatting defaults |
|
| 281 | 56x |
.stats <- get_stats("count_occurrences_by_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
|
| 282 | 56x |
x_stats <- x_stats[.stats] |
| 283 | 56x |
levels_per_stats <- lapply(x_stats, names) |
| 284 | 56x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
| 285 | 56x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
| 286 | 56x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
| 287 | ||
| 288 | 56x |
x_stats <- x_stats[.stats] %>% |
| 289 | 56x |
.unlist_keep_nulls() %>% |
| 290 | 56x |
setNames(names(.formats)) |
| 291 | ||
| 292 |
# Auto format handling |
|
| 293 | 56x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 294 | ||
| 295 |
# Get and check statistical names |
|
| 296 | 56x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 297 | ||
| 298 | 56x |
in_rows( |
| 299 | 56x |
.list = x_stats, |
| 300 | 56x |
.formats = .formats, |
| 301 | 56x |
.names = .labels %>% .unlist_keep_nulls(), |
| 302 | 56x |
.stat_names = .stat_names, |
| 303 | 56x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 304 | 56x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 305 |
) |
|
| 306 |
} |
|
| 307 | ||
| 308 |
#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function |
|
| 309 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 310 |
#' |
|
| 311 |
#' @return |
|
| 312 |
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions, |
|
| 313 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 314 |
#' the statistics from `s_count_occurrences_by_grade()` to the table layout. |
|
| 315 |
#' |
|
| 316 |
#' @examples |
|
| 317 |
#' library(dplyr) |
|
| 318 |
#' |
|
| 319 |
#' df <- data.frame( |
|
| 320 |
#' USUBJID = as.character(c(1:6, 1)), |
|
| 321 |
#' ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
|
|
| 322 |
#' AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)), |
|
| 323 |
#' AESEV = factor( |
|
| 324 |
#' x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
|
|
| 325 |
#' levels = c("MILD", "MODERATE", "SEVERE")
|
|
| 326 |
#' ), |
|
| 327 |
#' stringsAsFactors = FALSE |
|
| 328 |
#' ) |
|
| 329 |
#' |
|
| 330 |
#' df_adsl <- df %>% |
|
| 331 |
#' select(USUBJID, ARM) %>% |
|
| 332 |
#' unique() |
|
| 333 |
#' |
|
| 334 |
#' # Layout creating function with custom format. |
|
| 335 |
#' basic_table() %>% |
|
| 336 |
#' split_cols_by("ARM") %>%
|
|
| 337 |
#' add_colcounts() %>% |
|
| 338 |
#' count_occurrences_by_grade( |
|
| 339 |
#' var = "AESEV", |
|
| 340 |
#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
|
|
| 341 |
#' ) %>% |
|
| 342 |
#' build_table(df, alt_counts_df = df_adsl) |
|
| 343 |
#' |
|
| 344 |
#' # Define additional grade groupings. |
|
| 345 |
#' grade_groups <- list( |
|
| 346 |
#' "-Any-" = c("1", "2", "3", "4", "5"),
|
|
| 347 |
#' "Grade 1-2" = c("1", "2"),
|
|
| 348 |
#' "Grade 3-5" = c("3", "4", "5")
|
|
| 349 |
#' ) |
|
| 350 |
#' |
|
| 351 |
#' basic_table() %>% |
|
| 352 |
#' split_cols_by("ARM") %>%
|
|
| 353 |
#' add_colcounts() %>% |
|
| 354 |
#' count_occurrences_by_grade( |
|
| 355 |
#' var = "AETOXGR", |
|
| 356 |
#' grade_groups = grade_groups, |
|
| 357 |
#' only_grade_groups = TRUE |
|
| 358 |
#' ) %>% |
|
| 359 |
#' build_table(df, alt_counts_df = df_adsl) |
|
| 360 |
#' |
|
| 361 |
#' @export |
|
| 362 |
#' @order 2 |
|
| 363 |
count_occurrences_by_grade <- function(lyt, |
|
| 364 |
var, |
|
| 365 |
id = "USUBJID", |
|
| 366 |
grade_groups = list(), |
|
| 367 |
remove_single = TRUE, |
|
| 368 |
only_grade_groups = FALSE, |
|
| 369 |
var_labels = var, |
|
| 370 |
show_labels = "default", |
|
| 371 |
riskdiff = FALSE, |
|
| 372 |
na_str = default_na_str(), |
|
| 373 |
nested = TRUE, |
|
| 374 |
..., |
|
| 375 |
table_names = var, |
|
| 376 |
.stats = "count_fraction", |
|
| 377 |
.stat_names = NULL, |
|
| 378 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
| 379 |
.labels = NULL, |
|
| 380 |
.indent_mods = NULL) {
|
|
| 381 | 12x |
checkmate::assert_flag(riskdiff) |
| 382 | 12x |
afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff |
| 383 | ||
| 384 |
# Process standard extra arguments |
|
| 385 | 12x |
extra_args <- list(".stats" = .stats)
|
| 386 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 387 | 12x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 388 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 389 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 390 | ||
| 391 |
# Process additional arguments to the statistic function |
|
| 392 | 12x |
extra_args <- c( |
| 393 | 12x |
extra_args, |
| 394 | 12x |
id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups, |
| 395 | 12x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)),
|
| 396 |
... |
|
| 397 |
) |
|
| 398 | ||
| 399 |
# Append additional info from layout to the analysis function |
|
| 400 | 12x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 401 | 12x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
| 402 | ||
| 403 | 12x |
analyze( |
| 404 | 12x |
lyt = lyt, |
| 405 | 12x |
vars = var, |
| 406 | 12x |
afun = afun, |
| 407 | 12x |
na_str = na_str, |
| 408 | 12x |
nested = nested, |
| 409 | 12x |
extra_args = extra_args, |
| 410 | 12x |
var_labels = var_labels, |
| 411 | 12x |
show_labels = show_labels, |
| 412 | 12x |
table_names = table_names |
| 413 |
) |
|
| 414 |
} |
|
| 415 | ||
| 416 |
#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments |
|
| 417 |
#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. |
|
| 418 |
#' |
|
| 419 |
#' @return |
|
| 420 |
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions, |
|
| 421 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows |
|
| 422 |
#' containing the statistics from `s_count_occurrences_by_grade()` to the table layout. |
|
| 423 |
#' |
|
| 424 |
#' @examples |
|
| 425 |
#' # Layout creating function with custom format. |
|
| 426 |
#' basic_table() %>% |
|
| 427 |
#' add_colcounts() %>% |
|
| 428 |
#' split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
|
|
| 429 |
#' summarize_occurrences_by_grade( |
|
| 430 |
#' var = "AESEV", |
|
| 431 |
#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
|
|
| 432 |
#' ) %>% |
|
| 433 |
#' build_table(df, alt_counts_df = df_adsl) |
|
| 434 |
#' |
|
| 435 |
#' basic_table() %>% |
|
| 436 |
#' add_colcounts() %>% |
|
| 437 |
#' split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
|
|
| 438 |
#' summarize_occurrences_by_grade( |
|
| 439 |
#' var = "AETOXGR", |
|
| 440 |
#' grade_groups = grade_groups |
|
| 441 |
#' ) %>% |
|
| 442 |
#' build_table(df, alt_counts_df = df_adsl) |
|
| 443 |
#' |
|
| 444 |
#' @export |
|
| 445 |
#' @order 3 |
|
| 446 |
summarize_occurrences_by_grade <- function(lyt, |
|
| 447 |
var, |
|
| 448 |
id = "USUBJID", |
|
| 449 |
grade_groups = list(), |
|
| 450 |
remove_single = TRUE, |
|
| 451 |
only_grade_groups = FALSE, |
|
| 452 |
riskdiff = FALSE, |
|
| 453 |
na_str = default_na_str(), |
|
| 454 |
..., |
|
| 455 |
.stats = "count_fraction", |
|
| 456 |
.stat_names = NULL, |
|
| 457 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
| 458 |
.labels = NULL, |
|
| 459 |
.indent_mods = 0L) {
|
|
| 460 | 6x |
checkmate::assert_flag(riskdiff) |
| 461 | 6x |
afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff |
| 462 | ||
| 463 |
# Process standard extra arguments |
|
| 464 | 6x |
extra_args <- list(".stats" = .stats)
|
| 465 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 466 | 6x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 467 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 468 | 6x |
if (is.null(.indent_mods)) {
|
| 469 | ! |
indent_mod <- 0L |
| 470 | 6x |
} else if (length(.indent_mods) == 1) {
|
| 471 | 6x |
indent_mod <- .indent_mods |
| 472 |
} else {
|
|
| 473 | ! |
indent_mod <- 0L |
| 474 | ! |
extra_args[[".indent_mods"]] <- .indent_mods |
| 475 |
} |
|
| 476 | ||
| 477 |
# Process additional arguments to the statistic function |
|
| 478 | 6x |
extra_args <- c( |
| 479 | 6x |
extra_args, |
| 480 | 6x |
id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups, |
| 481 | 6x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)),
|
| 482 |
... |
|
| 483 |
) |
|
| 484 | ||
| 485 |
# Append additional info from layout to the analysis function |
|
| 486 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 487 | 6x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
| 488 | ||
| 489 | 6x |
summarize_row_groups( |
| 490 | 6x |
lyt = lyt, |
| 491 | 6x |
var = var, |
| 492 | 6x |
cfun = afun, |
| 493 | 6x |
na_str = na_str, |
| 494 | 6x |
extra_args = extra_args, |
| 495 | 6x |
indent_mod = indent_mod |
| 496 |
) |
|
| 497 |
} |
| 1 |
#' Tabulate biomarker effects on binary response by subgroup |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The [tabulate_rsp_biomarkers()] function creates a layout element to tabulate the estimated biomarker effects on a |
|
| 6 |
#' binary response endpoint across subgroups, returning statistics including response rate and odds ratio for each |
|
| 7 |
#' population subgroup. The table is created from `df`, a list of data frames returned by [extract_rsp_biomarkers()], |
|
| 8 |
#' with the statistics to include specified via the `vars` parameter. |
|
| 9 |
#' |
|
| 10 |
#' A forest plot can be created from the resulting table using the [g_forest()] function. |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by |
|
| 14 |
#' [extract_rsp_biomarkers()]. |
|
| 15 |
#' @param vars (`character`)\cr the names of statistics to be reported among: |
|
| 16 |
#' * `n_tot`: Total number of patients per group. |
|
| 17 |
#' * `n_rsp`: Total number of responses per group. |
|
| 18 |
#' * `prop`: Total response proportion per group. |
|
| 19 |
#' * `or`: Odds ratio. |
|
| 20 |
#' * `ci`: Confidence interval of odds ratio. |
|
| 21 |
#' * `pval`: p-value of the effect. |
|
| 22 |
#' Note, the statistics `n_tot`, `or` and `ci` are required. |
|
| 23 |
#' |
|
| 24 |
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup. |
|
| 25 |
#' |
|
| 26 |
#' @details These functions create a layout starting from a data frame which contains |
|
| 27 |
#' the required statistics. The tables are then typically used as input for forest plots. |
|
| 28 |
#' |
|
| 29 |
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does |
|
| 30 |
#' not start from an input layout `lyt`. This is because internally the table is |
|
| 31 |
#' created by combining multiple subtables. |
|
| 32 |
#' |
|
| 33 |
#' @seealso [extract_rsp_biomarkers()] |
|
| 34 |
#' |
|
| 35 |
#' @examples |
|
| 36 |
#' library(dplyr) |
|
| 37 |
#' library(forcats) |
|
| 38 |
#' |
|
| 39 |
#' adrs <- tern_ex_adrs |
|
| 40 |
#' adrs_labels <- formatters::var_labels(adrs) |
|
| 41 |
#' |
|
| 42 |
#' adrs_f <- adrs %>% |
|
| 43 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 44 |
#' mutate(rsp = AVALC == "CR") |
|
| 45 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") |
|
| 46 |
#' |
|
| 47 |
#' df <- extract_rsp_biomarkers( |
|
| 48 |
#' variables = list( |
|
| 49 |
#' rsp = "rsp", |
|
| 50 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
| 51 |
#' covariates = "SEX", |
|
| 52 |
#' subgroups = "BMRKR2" |
|
| 53 |
#' ), |
|
| 54 |
#' data = adrs_f |
|
| 55 |
#' ) |
|
| 56 |
#' |
|
| 57 |
#' \donttest{
|
|
| 58 |
#' ## Table with default columns. |
|
| 59 |
#' tabulate_rsp_biomarkers(df) |
|
| 60 |
#' |
|
| 61 |
#' ## Table with a manually chosen set of columns: leave out "pval", reorder. |
|
| 62 |
#' tab <- tabulate_rsp_biomarkers( |
|
| 63 |
#' df = df, |
|
| 64 |
#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
|
|
| 65 |
#' ) |
|
| 66 |
#' |
|
| 67 |
#' ## Finally produce the forest plot. |
|
| 68 |
#' g_forest(tab, xlim = c(0.7, 1.4)) |
|
| 69 |
#' } |
|
| 70 |
#' |
|
| 71 |
#' @export |
|
| 72 |
#' @name response_biomarkers_subgroups |
|
| 73 |
tabulate_rsp_biomarkers <- function(df, |
|
| 74 |
vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
|
|
| 75 |
na_str = default_na_str(), |
|
| 76 |
..., |
|
| 77 |
.stat_names = NULL, |
|
| 78 |
.formats = NULL, |
|
| 79 |
.labels = NULL, |
|
| 80 |
.indent_mods = NULL) {
|
|
| 81 | 4x |
checkmate::assert_data_frame(df) |
| 82 | 4x |
checkmate::assert_character(df$biomarker) |
| 83 | 4x |
checkmate::assert_character(df$biomarker_label) |
| 84 | 4x |
checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))
|
| 85 | ||
| 86 |
# Process standard extra arguments |
|
| 87 | 4x |
extra_args <- list(".stats" = vars)
|
| 88 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 89 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 90 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 91 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 92 | ||
| 93 | 4x |
colvars <- d_rsp_subgroups_colvars( |
| 94 | 4x |
vars, |
| 95 | 4x |
conf_level = df$conf_level[1], |
| 96 | 4x |
method = df$pval_label[1] |
| 97 |
) |
|
| 98 | ||
| 99 |
# Process additional arguments to the statistic function |
|
| 100 | 4x |
extra_args <- c(extra_args, biomarker = TRUE, ...) |
| 101 | ||
| 102 |
# Adding additional info from layout to analysis function |
|
| 103 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 104 | 4x |
formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]]) |
| 105 | ||
| 106 |
# Create "ci" column from "lcl" and "ucl" |
|
| 107 | 4x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
| 108 | ||
| 109 | 4x |
df_subs <- split(df, f = df$biomarker) |
| 110 | 4x |
tbls <- lapply( |
| 111 | 4x |
df_subs, |
| 112 | 4x |
function(df) {
|
| 113 | 7x |
lyt <- basic_table() |
| 114 | ||
| 115 |
# Split cols by the multiple variables to populate into columns. |
|
| 116 | 7x |
lyt <- split_cols_by_multivar( |
| 117 | 7x |
lyt = lyt, |
| 118 | 7x |
vars = colvars$vars, |
| 119 | 7x |
varlabels = colvars$labels |
| 120 |
) |
|
| 121 | ||
| 122 |
# Row split by biomarker |
|
| 123 | 7x |
lyt <- split_rows_by( |
| 124 | 7x |
lyt = lyt, |
| 125 | 7x |
var = "biomarker_label", |
| 126 | 7x |
nested = FALSE |
| 127 |
) |
|
| 128 | ||
| 129 |
# Add "All Patients" row |
|
| 130 | 7x |
lyt <- split_rows_by( |
| 131 | 7x |
lyt = lyt, |
| 132 | 7x |
var = "row_type", |
| 133 | 7x |
split_fun = keep_split_levels("content"),
|
| 134 | 7x |
nested = TRUE, |
| 135 | 7x |
child_labels = "hidden" |
| 136 |
) |
|
| 137 | 7x |
lyt <- analyze_colvars( |
| 138 | 7x |
lyt = lyt, |
| 139 | 7x |
afun = a_response_subgroups, |
| 140 | 7x |
na_str = na_str, |
| 141 | 7x |
extra_args = c(extra_args, overall = TRUE) |
| 142 |
) |
|
| 143 | ||
| 144 |
# Add analysis rows |
|
| 145 | 7x |
if ("analysis" %in% df$row_type) {
|
| 146 | 4x |
lyt <- split_rows_by( |
| 147 | 4x |
lyt = lyt, |
| 148 | 4x |
var = "row_type", |
| 149 | 4x |
split_fun = keep_split_levels("analysis"),
|
| 150 | 4x |
nested = TRUE, |
| 151 | 4x |
child_labels = "hidden" |
| 152 |
) |
|
| 153 | 4x |
lyt <- split_rows_by( |
| 154 | 4x |
lyt = lyt, |
| 155 | 4x |
var = "var_label", |
| 156 | 4x |
nested = TRUE, |
| 157 | 4x |
indent_mod = 1L |
| 158 |
) |
|
| 159 | 4x |
lyt <- analyze_colvars( |
| 160 | 4x |
lyt = lyt, |
| 161 | 4x |
afun = a_response_subgroups, |
| 162 | 4x |
na_str = na_str, |
| 163 | 4x |
inclNAs = TRUE, |
| 164 | 4x |
extra_args = extra_args |
| 165 |
) |
|
| 166 |
} |
|
| 167 | 7x |
build_table(lyt, df = df) |
| 168 |
} |
|
| 169 |
) |
|
| 170 | ||
| 171 | 4x |
result <- do.call(rbind, tbls) |
| 172 | ||
| 173 | 4x |
n_id <- grep("n_tot", vars)
|
| 174 | 4x |
or_id <- match("or", vars)
|
| 175 | 4x |
ci_id <- match("ci", vars)
|
| 176 | 4x |
structure( |
| 177 | 4x |
result, |
| 178 | 4x |
forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
|
| 179 | 4x |
col_x = or_id, |
| 180 | 4x |
col_ci = ci_id, |
| 181 | 4x |
col_symbol_size = n_id |
| 182 |
) |
|
| 183 |
} |
|
| 184 | ||
| 185 |
#' Prepare response data estimates for multiple biomarkers in a single data frame |
|
| 186 |
#' |
|
| 187 |
#' @description `r lifecycle::badge("stable")`
|
|
| 188 |
#' |
|
| 189 |
#' Prepares estimates for number of responses, patients and overall response rate, |
|
| 190 |
#' as well as odds ratio estimates, confidence intervals and p-values, |
|
| 191 |
#' for multiple biomarkers across population subgroups in a single data frame. |
|
| 192 |
#' `variables` corresponds to the names of variables found in `data`, passed as a |
|
| 193 |
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous |
|
| 194 |
#' biomarker variables) and optionally `covariates`, `subgroups` and `strata`. |
|
| 195 |
#' `groups_lists` optionally specifies groupings for `subgroups` variables. |
|
| 196 |
#' |
|
| 197 |
#' @inheritParams argument_convention |
|
| 198 |
#' @inheritParams response_subgroups |
|
| 199 |
#' @param control (named `list`)\cr controls for the response definition and the |
|
| 200 |
#' confidence level produced by [control_logistic()]. |
|
| 201 |
#' |
|
| 202 |
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`, |
|
| 203 |
#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, |
|
| 204 |
#' `var_label`, and `row_type`. |
|
| 205 |
#' |
|
| 206 |
#' @note You can also specify a continuous variable in `rsp` and then use the |
|
| 207 |
#' `response_definition` control to convert that internally to a logical |
|
| 208 |
#' variable reflecting binary response. |
|
| 209 |
#' |
|
| 210 |
#' @seealso [h_logistic_mult_cont_df()] which is used internally. |
|
| 211 |
#' |
|
| 212 |
#' @examples |
|
| 213 |
#' library(dplyr) |
|
| 214 |
#' library(forcats) |
|
| 215 |
#' |
|
| 216 |
#' adrs <- tern_ex_adrs |
|
| 217 |
#' adrs_labels <- formatters::var_labels(adrs) |
|
| 218 |
#' |
|
| 219 |
#' adrs_f <- adrs %>% |
|
| 220 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 221 |
#' mutate(rsp = AVALC == "CR") |
|
| 222 |
#' |
|
| 223 |
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`, |
|
| 224 |
#' # in logistic regression models with one covariate `RACE`. The subgroups |
|
| 225 |
#' # are defined by the levels of `BMRKR2`. |
|
| 226 |
#' df <- extract_rsp_biomarkers( |
|
| 227 |
#' variables = list( |
|
| 228 |
#' rsp = "rsp", |
|
| 229 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
| 230 |
#' covariates = "SEX", |
|
| 231 |
#' subgroups = "BMRKR2" |
|
| 232 |
#' ), |
|
| 233 |
#' data = adrs_f |
|
| 234 |
#' ) |
|
| 235 |
#' df |
|
| 236 |
#' |
|
| 237 |
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification |
|
| 238 |
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY` |
|
| 239 |
#' # which is then binarized internally (response is defined as this variable |
|
| 240 |
#' # being larger than 750). |
|
| 241 |
#' df_grouped <- extract_rsp_biomarkers( |
|
| 242 |
#' variables = list( |
|
| 243 |
#' rsp = "EOSDY", |
|
| 244 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
| 245 |
#' covariates = "SEX", |
|
| 246 |
#' subgroups = "BMRKR2", |
|
| 247 |
#' strata = "STRATA1" |
|
| 248 |
#' ), |
|
| 249 |
#' data = adrs_f, |
|
| 250 |
#' groups_lists = list( |
|
| 251 |
#' BMRKR2 = list( |
|
| 252 |
#' "low" = "LOW", |
|
| 253 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
| 254 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
| 255 |
#' ) |
|
| 256 |
#' ), |
|
| 257 |
#' control = control_logistic( |
|
| 258 |
#' response_definition = "I(response > 750)" |
|
| 259 |
#' ) |
|
| 260 |
#' ) |
|
| 261 |
#' df_grouped |
|
| 262 |
#' |
|
| 263 |
#' @export |
|
| 264 |
extract_rsp_biomarkers <- function(variables, |
|
| 265 |
data, |
|
| 266 |
groups_lists = list(), |
|
| 267 |
control = control_logistic(), |
|
| 268 |
label_all = "All Patients") {
|
|
| 269 | 5x |
if ("strat" %in% names(variables)) {
|
| 270 | ! |
warning( |
| 271 | ! |
"Warning: the `strat` element name of the `variables` list argument to `extract_rsp_biomarkers() ", |
| 272 | ! |
"was deprecated in tern 0.9.4.\n ", |
| 273 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
| 274 |
) |
|
| 275 | ! |
variables[["strata"]] <- variables[["strat"]] |
| 276 |
} |
|
| 277 | ||
| 278 | 5x |
assert_list_of_variables(variables) |
| 279 | 5x |
checkmate::assert_string(variables$rsp) |
| 280 | 5x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
| 281 | 5x |
checkmate::assert_string(label_all) |
| 282 | ||
| 283 |
# Start with all patients. |
|
| 284 | 5x |
result_all <- h_logistic_mult_cont_df( |
| 285 | 5x |
variables = variables, |
| 286 | 5x |
data = data, |
| 287 | 5x |
control = control |
| 288 |
) |
|
| 289 | 5x |
result_all$subgroup <- label_all |
| 290 | 5x |
result_all$var <- "ALL" |
| 291 | 5x |
result_all$var_label <- label_all |
| 292 | 5x |
result_all$row_type <- "content" |
| 293 | 5x |
if (is.null(variables$subgroups)) {
|
| 294 |
# Only return result for all patients. |
|
| 295 | 1x |
result_all |
| 296 |
} else {
|
|
| 297 |
# Add subgroups results. |
|
| 298 | 4x |
l_data <- h_split_by_subgroups( |
| 299 | 4x |
data, |
| 300 | 4x |
variables$subgroups, |
| 301 | 4x |
groups_lists = groups_lists |
| 302 |
) |
|
| 303 | 4x |
l_result <- lapply(l_data, function(grp) {
|
| 304 | 20x |
result <- h_logistic_mult_cont_df( |
| 305 | 20x |
variables = variables, |
| 306 | 20x |
data = grp$df, |
| 307 | 20x |
control = control |
| 308 |
) |
|
| 309 | 20x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
| 310 | 20x |
cbind(result, result_labels) |
| 311 |
}) |
|
| 312 | 4x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
| 313 | 4x |
result_subgroups$row_type <- "analysis" |
| 314 | 4x |
rbind( |
| 315 | 4x |
result_all, |
| 316 | 4x |
result_subgroups |
| 317 |
) |
|
| 318 |
} |
|
| 319 |
} |
| 1 |
#' Control function for descriptive statistics |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify |
|
| 6 |
#' details for [s_summary()]. This function family is mainly used by [analyze_vars()]. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams argument_convention |
|
| 9 |
#' @param quantiles (`numeric(2)`)\cr vector of length two to specify the quantiles to calculate. |
|
| 10 |
#' @param quantile_type (`numeric(1)`)\cr number between 1 and 9 selecting quantile algorithms to be used. |
|
| 11 |
#' Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`. |
|
| 12 |
#' This differs from R's default. See more about `type` in [stats::quantile()]. |
|
| 13 |
#' @param test_mean (`numeric(1)`)\cr number to test against the mean under the null hypothesis when calculating |
|
| 14 |
#' p-value. |
|
| 15 |
#' |
|
| 16 |
#' @return A list of components with the same names as the arguments. |
|
| 17 |
#' |
|
| 18 |
#' @export |
|
| 19 |
control_analyze_vars <- function(conf_level = 0.95, |
|
| 20 |
quantiles = c(0.25, 0.75), |
|
| 21 |
quantile_type = 2, |
|
| 22 |
test_mean = 0) {
|
|
| 23 | 1152x |
checkmate::assert_vector(quantiles, len = 2) |
| 24 | 1152x |
checkmate::assert_int(quantile_type, lower = 1, upper = 9) |
| 25 | 1152x |
checkmate::assert_numeric(test_mean) |
| 26 | 1152x |
lapply(quantiles, assert_proportion_value) |
| 27 | 1151x |
assert_proportion_value(conf_level) |
| 28 | 1150x |
list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean) |
| 29 |
} |
|
| 30 | ||
| 31 |
# Helper function to fix numeric or counts pval if necessary |
|
| 32 |
.correct_num_or_counts_pval <- function(type, .stats) {
|
|
| 33 | 352x |
if (type == "numeric") {
|
| 34 | 110x |
if (!is.null(.stats) && any(grepl("^pval", .stats))) {
|
| 35 | 10x |
.stats[grepl("^pval", .stats)] <- "pval" # tmp fix xxx
|
| 36 |
} |
|
| 37 |
} else {
|
|
| 38 | 242x |
if (!is.null(.stats) && any(grepl("^pval", .stats))) {
|
| 39 | 9x |
.stats[grepl("^pval", .stats)] <- "pval_counts" # tmp fix xxx
|
| 40 |
} |
|
| 41 |
} |
|
| 42 | 352x |
.stats |
| 43 |
} |
|
| 44 | ||
| 45 |
#' Analyze variables |
|
| 46 |
#' |
|
| 47 |
#' @description `r lifecycle::badge("stable")`
|
|
| 48 |
#' |
|
| 49 |
#' The analyze function [analyze_vars()] creates a layout element to summarize one or more variables, using the S3 |
|
| 50 |
#' generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics for |
|
| 51 |
#' numeric variables can be viewed by running `get_stats("analyze_vars_numeric")` and for non-numeric variables by
|
|
| 52 |
#' running `get_stats("analyze_vars_counts")`. Use the `.stats` parameter to specify the statistics to include in your
|
|
| 53 |
#' output summary table. Use `compare_with_ref_group = TRUE` to compare the variable with reference groups. |
|
| 54 |
#' |
|
| 55 |
#' @details |
|
| 56 |
#' **Automatic digit formatting:** The number of digits to display can be automatically determined from the analyzed |
|
| 57 |
#' variable(s) (`vars`) for certain statistics by setting the statistic format to `"auto"` in `.formats`. |
|
| 58 |
#' This utilizes the [format_auto()] formatting function. Note that only data for the current row & variable (for all |
|
| 59 |
#' columns) will be considered (`.df_row[[.var]]`, see [`rtables::additional_fun_params`]) and not the whole dataset. |
|
| 60 |
#' |
|
| 61 |
#' @inheritParams argument_convention |
|
| 62 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 63 |
#' |
|
| 64 |
#' Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
|
|
| 65 |
#' |
|
| 66 |
#' Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts"), type = "sh")``
|
|
| 67 |
#' |
|
| 68 |
#' @name analyze_variables |
|
| 69 |
#' @order 1 |
|
| 70 |
NULL |
|
| 71 | ||
| 72 |
#' @describeIn analyze_variables S3 generic function to produces a variable summary. |
|
| 73 |
#' |
|
| 74 |
#' @return |
|
| 75 |
#' * `s_summary()` returns different statistics depending on the class of `x`. |
|
| 76 |
#' |
|
| 77 |
#' @export |
|
| 78 |
s_summary <- function(x, ...) {
|
|
| 79 | 1718x |
UseMethod("s_summary", x)
|
| 80 |
} |
|
| 81 | ||
| 82 |
#' @describeIn analyze_variables Method for `numeric` class. |
|
| 83 |
#' |
|
| 84 |
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using |
|
| 85 |
#' the helper function [control_analyze_vars()]. Some possible parameter options are: |
|
| 86 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median. |
|
| 87 |
#' * `quantiles` (`numeric(2)`)\cr vector of length two to specify the quantiles. |
|
| 88 |
#' * `quantile_type` (`numeric(1)`)\cr between 1 and 9 selecting quantile algorithms to be used. |
|
| 89 |
#' See more about `type` in [stats::quantile()]. |
|
| 90 |
#' * `test_mean` (`numeric(1)`)\cr value to test against the mean under the null hypothesis when calculating p-value. |
|
| 91 |
#' |
|
| 92 |
#' @return |
|
| 93 |
#' * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items: |
|
| 94 |
#' * `n`: The [length()] of `x`. |
|
| 95 |
#' * `sum`: The [sum()] of `x`. |
|
| 96 |
#' * `mean`: The [mean()] of `x`. |
|
| 97 |
#' * `sd`: The [stats::sd()] of `x`. |
|
| 98 |
#' * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`). |
|
| 99 |
#' * `mean_sd`: The [mean()] and [stats::sd()] of `x`. |
|
| 100 |
#' * `mean_se`: The [mean()] of `x` and its standard error (see above). |
|
| 101 |
#' * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]). |
|
| 102 |
#' * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]). |
|
| 103 |
#' * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]). |
|
| 104 |
#' * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]). |
|
| 105 |
#' * `median`: The [stats::median()] of `x`. |
|
| 106 |
#' * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`, |
|
| 107 |
#' where `xc` = `x` - [stats::median()]). |
|
| 108 |
#' * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]). |
|
| 109 |
#' * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]). |
|
| 110 |
#' * `iqr`: The [stats::IQR()] of `x`. |
|
| 111 |
#' * `range`: The [range_noinf()] of `x`. |
|
| 112 |
#' * `min`: The [max()] of `x`. |
|
| 113 |
#' * `max`: The [min()] of `x`. |
|
| 114 |
#' * `median_range`: The [median()] and [range_noinf()] of `x`. |
|
| 115 |
#' * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100). |
|
| 116 |
#' * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`). |
|
| 117 |
#' * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`). |
|
| 118 |
#' |
|
| 119 |
#' @note |
|
| 120 |
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in |
|
| 121 |
#' `rtables` when the intersection of a column and a row delimits an empty data selection. |
|
| 122 |
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter |
|
| 123 |
#' being standard behavior in R. |
|
| 124 |
#' |
|
| 125 |
#' @method s_summary numeric |
|
| 126 |
#' |
|
| 127 |
#' @examples |
|
| 128 |
#' # `s_summary.numeric` |
|
| 129 |
#' |
|
| 130 |
#' ## Basic usage: empty numeric returns NA-filled items. |
|
| 131 |
#' s_summary(numeric()) |
|
| 132 |
#' |
|
| 133 |
#' ## Management of NA values. |
|
| 134 |
#' x <- c(NA_real_, 1) |
|
| 135 |
#' s_summary(x, na_rm = TRUE) |
|
| 136 |
#' s_summary(x, na_rm = FALSE) |
|
| 137 |
#' |
|
| 138 |
#' x <- c(NA_real_, 1, 2) |
|
| 139 |
#' s_summary(x) |
|
| 140 |
#' |
|
| 141 |
#' ## Benefits in `rtables` contructions: |
|
| 142 |
#' dta_test <- data.frame( |
|
| 143 |
#' Group = rep(LETTERS[seq(3)], each = 2), |
|
| 144 |
#' sub_group = rep(letters[seq(2)], each = 3), |
|
| 145 |
#' x = seq(6) |
|
| 146 |
#' ) |
|
| 147 |
#' |
|
| 148 |
#' ## The summary obtained in with `rtables`: |
|
| 149 |
#' basic_table() %>% |
|
| 150 |
#' split_cols_by(var = "Group") %>% |
|
| 151 |
#' split_rows_by(var = "sub_group") %>% |
|
| 152 |
#' analyze(vars = "x", afun = s_summary) %>% |
|
| 153 |
#' build_table(df = dta_test) |
|
| 154 |
#' |
|
| 155 |
#' ## By comparison with `lapply`: |
|
| 156 |
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group))) |
|
| 157 |
#' lapply(X, function(x) s_summary(x$x)) |
|
| 158 |
#' |
|
| 159 |
#' @export |
|
| 160 |
s_summary.numeric <- function(x, control = control_analyze_vars(), ...) {
|
|
| 161 | 1197x |
checkmate::assert_numeric(x) |
| 162 | 1197x |
args_list <- list(...) |
| 163 | 1197x |
.N_row <- args_list[[".N_row"]] # nolint |
| 164 | 1197x |
.N_col <- args_list[[".N_col"]] # nolint |
| 165 | 1197x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
| 166 | 1197x |
compare_with_ref_group <- args_list[["compare_with_ref_group"]] |
| 167 | ||
| 168 | 1197x |
if (na_rm) {
|
| 169 | 1195x |
x <- x[!is.na(x)] |
| 170 |
} # no explicit NA because it should be numeric |
|
| 171 | ||
| 172 | 1197x |
y <- list() |
| 173 | ||
| 174 | 1197x |
y$n <- c("n" = length(x))
|
| 175 | ||
| 176 | 1197x |
y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE)))
|
| 177 | ||
| 178 | 1197x |
y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE)))
|
| 179 | ||
| 180 | 1197x |
y$sd <- c("sd" = stats::sd(x, na.rm = FALSE))
|
| 181 | ||
| 182 | 1197x |
y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x))))
|
| 183 | ||
| 184 | 1197x |
y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE)) |
| 185 | ||
| 186 | 1197x |
y$mean_se <- c(y$mean, y$se) |
| 187 | ||
| 188 | 1197x |
mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE) |
| 189 | 1197x |
y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level)))
|
| 190 | ||
| 191 | 1197x |
mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n) |
| 192 | 1197x |
names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr")
|
| 193 | 1197x |
y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE") |
| 194 | ||
| 195 | 1197x |
mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) |
| 196 | 1197x |
names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
|
| 197 | 1197x |
y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD") |
| 198 | 1197x |
mean_ci_3d <- c(y$mean, y$mean_ci) |
| 199 | 1197x |
y$mean_ci_3d <- formatters::with_label(mean_ci_3d, paste0("Mean (", f_conf_level(control$conf_level), ")"))
|
| 200 | ||
| 201 | 1197x |
mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2) |
| 202 | 1197x |
y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))
|
| 203 | ||
| 204 | 1197x |
y$median <- c("median" = stats::median(x, na.rm = FALSE))
|
| 205 | ||
| 206 | 1197x |
y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE))
|
| 207 | ||
| 208 | 1197x |
median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE) |
| 209 | 1197x |
y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))
|
| 210 | ||
| 211 | 1197x |
median_ci_3d <- c(y$median, median_ci) |
| 212 | 1197x |
y$median_ci_3d <- formatters::with_label(median_ci_3d, paste0("Median (", f_conf_level(control$conf_level), ")"))
|
| 213 | ||
| 214 | 1197x |
q <- control$quantiles |
| 215 | 1197x |
if (any(is.na(x))) {
|
| 216 | 2x |
qnts <- rep(NA_real_, length(q)) |
| 217 |
} else {
|
|
| 218 | 1195x |
qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE) |
| 219 |
} |
|
| 220 | 1197x |
names(qnts) <- paste("quantile", q, sep = "_")
|
| 221 | 1197x |
y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile")) |
| 222 | ||
| 223 | 1197x |
y$iqr <- c("iqr" = ifelse(
|
| 224 | 1197x |
any(is.na(x)), |
| 225 | 1197x |
NA_real_, |
| 226 | 1197x |
stats::IQR(x, na.rm = FALSE, type = control$quantile_type) |
| 227 |
)) |
|
| 228 | ||
| 229 | 1197x |
y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max"))
|
| 230 | 1197x |
y$min <- y$range[1] |
| 231 | 1197x |
y$max <- y$range[2] |
| 232 | ||
| 233 | 1197x |
y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)") |
| 234 | ||
| 235 | 1197x |
y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100)
|
| 236 | ||
| 237 |
# Geometric Mean - Convert negative values to NA for log calculation. |
|
| 238 | 1197x |
geom_verbose <- args_list[["geom_verbose"]] %||% FALSE # Additional info if requested |
| 239 | 1197x |
checkmate::assert_flag(geom_verbose) |
| 240 | 1197x |
x_no_negative_vals <- x |
| 241 | 1197x |
if (identical(x_no_negative_vals, numeric())) {
|
| 242 | 76x |
x_no_negative_vals <- NA |
| 243 |
} |
|
| 244 | 1197x |
x_no_negative_vals[x_no_negative_vals <= 0] <- NA |
| 245 | 1197x |
if (geom_verbose) {
|
| 246 | 2x |
if (any(x <= 0)) {
|
| 247 | 2x |
warning("Negative values were converted to NA for calculation of the geometric mean.")
|
| 248 |
} |
|
| 249 | 2x |
if (all(is.na(x_no_negative_vals))) {
|
| 250 | 1x |
warning("Since all values are negative or NA, the geometric mean is NA.")
|
| 251 |
} |
|
| 252 |
} |
|
| 253 | 1197x |
y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE)))
|
| 254 | 1197x |
y$geom_sd <- c("geom_sd" = geom_sd <- exp(sd(log(x_no_negative_vals), na.rm = FALSE)))
|
| 255 | 1197x |
y$geom_mean_sd <- c(y$geom_mean, y$geom_sd) |
| 256 | 1197x |
geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE) |
| 257 | 1197x |
y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level)))
|
| 258 | ||
| 259 | 1197x |
y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off
|
| 260 | ||
| 261 | 1197x |
geom_mean_ci_3d <- c(y$geom_mean, y$geom_mean_ci) |
| 262 | 1197x |
y$geom_mean_ci_3d <- formatters::with_label( |
| 263 | 1197x |
geom_mean_ci_3d, |
| 264 | 1197x |
paste0("Geometric Mean (", f_conf_level(control$conf_level), ")")
|
| 265 |
) |
|
| 266 | ||
| 267 |
# Compare with reference group |
|
| 268 | 1197x |
if (isTRUE(compare_with_ref_group)) {
|
| 269 | 13x |
.ref_group <- args_list[[".ref_group"]] |
| 270 | 13x |
.in_ref_col <- args_list[[".in_ref_col"]] |
| 271 | 13x |
checkmate::assert_numeric(.ref_group) |
| 272 | 13x |
checkmate::assert_flag(.in_ref_col) |
| 273 | ||
| 274 | 13x |
y$pval <- numeric() |
| 275 | 13x |
if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
|
| 276 | 9x |
y$pval <- stats::t.test(x, .ref_group)$p.value |
| 277 |
} |
|
| 278 |
} |
|
| 279 | ||
| 280 | 1197x |
y |
| 281 |
} |
|
| 282 | ||
| 283 |
#' @describeIn analyze_variables Method for `factor` class. |
|
| 284 |
#' |
|
| 285 |
#' @return |
|
| 286 |
#' * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items: |
|
| 287 |
#' * `n`: The [length()] of `x`. |
|
| 288 |
#' * `count`: A list with the number of cases for each level of the factor `x`. |
|
| 289 |
#' * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the |
|
| 290 |
#' factor `x` relative to the denominator, or `NA` if the denominator is zero. |
|
| 291 |
#' |
|
| 292 |
#' @note |
|
| 293 |
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element |
|
| 294 |
#' per factor level. If there are no levels in `x`, the function fails. |
|
| 295 |
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values |
|
| 296 |
#' set `na_rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit |
|
| 297 |
#' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the |
|
| 298 |
#' default `na_level` (`"<Missing>"`) will also be excluded when `na_rm` is set to `TRUE`. |
|
| 299 |
#' |
|
| 300 |
#' @method s_summary factor |
|
| 301 |
#' |
|
| 302 |
#' @examples |
|
| 303 |
#' # `s_summary.factor` |
|
| 304 |
#' |
|
| 305 |
#' ## Basic usage: |
|
| 306 |
#' s_summary(factor(c("a", "a", "b", "c", "a")))
|
|
| 307 |
#' |
|
| 308 |
#' # Empty factor returns zero-filled items. |
|
| 309 |
#' s_summary(factor(levels = c("a", "b", "c")))
|
|
| 310 |
#' |
|
| 311 |
#' ## Management of NA values. |
|
| 312 |
#' x <- factor(c(NA, "Female")) |
|
| 313 |
#' x <- explicit_na(x) |
|
| 314 |
#' s_summary(x, na_rm = TRUE) |
|
| 315 |
#' s_summary(x, na_rm = FALSE) |
|
| 316 |
#' |
|
| 317 |
#' ## Different denominators. |
|
| 318 |
#' x <- factor(c("a", "a", "b", "c", "a"))
|
|
| 319 |
#' s_summary(x, denom = "N_row", .N_row = 10L) |
|
| 320 |
#' s_summary(x, denom = "N_col", .N_col = 20L) |
|
| 321 |
#' |
|
| 322 |
#' @export |
|
| 323 |
s_summary.factor <- function(x, denom = c("n", "N_col", "N_row"), ...) {
|
|
| 324 | 306x |
assert_valid_factor(x) |
| 325 | 303x |
args_list <- list(...) |
| 326 | 303x |
.N_row <- args_list[[".N_row"]] # nolint |
| 327 | 303x |
.N_col <- args_list[[".N_col"]] # nolint |
| 328 | 303x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
| 329 | 303x |
na_str <- args_list[["na_str"]] %||% "NA" |
| 330 | 303x |
na_str_drop <- args_list[["na_str_drop"]] |
| 331 | 303x |
verbose <- args_list[["verbose"]] %||% TRUE |
| 332 | 303x |
compare_with_ref_group <- args_list[["compare_with_ref_group"]] |
| 333 | 303x |
checkmate::assert_string(na_str_drop, null.ok = TRUE) |
| 334 | ||
| 335 | 303x |
if (na_rm) {
|
| 336 | 294x |
x <- x[!is.na(x)] |
| 337 | 294x |
if (!is.null(na_str_drop)) {
|
| 338 | 227x |
x <- fct_discard(x, na_str_drop) |
| 339 |
} |
|
| 340 |
} else {
|
|
| 341 | 9x |
x <- x %>% explicit_na(label = na_str) |
| 342 |
} |
|
| 343 | ||
| 344 | 303x |
y <- list() |
| 345 | ||
| 346 | 303x |
y$n <- list("n" = c("n" = length(x))) # all list of a list
|
| 347 | ||
| 348 | 303x |
y$count <- lapply(as.list(table(x, useNA = "ifany")), setNames, nm = "count") |
| 349 | ||
| 350 | 303x |
denom <- match.arg(denom) %>% |
| 351 | 303x |
switch( |
| 352 | 303x |
n = length(x), |
| 353 | 303x |
N_row = .N_row, |
| 354 | 303x |
N_col = .N_col |
| 355 |
) |
|
| 356 | ||
| 357 | 303x |
y$count_fraction <- lapply( |
| 358 | 303x |
y$count, |
| 359 | 303x |
function(x) {
|
| 360 | 2183x |
c(x, "p" = ifelse(denom > 0, x / denom, 0)) |
| 361 |
} |
|
| 362 |
) |
|
| 363 | ||
| 364 | 303x |
y$count_fraction_fixed_dp <- y$count_fraction |
| 365 | ||
| 366 | 303x |
y$fraction <- lapply( |
| 367 | 303x |
y$count, |
| 368 | 303x |
function(count) c("num" = unname(count), "denom" = denom)
|
| 369 |
) |
|
| 370 | ||
| 371 | 303x |
y$n_blq <- list("n_blq" = c("n_blq" = sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x))))
|
| 372 | ||
| 373 | 303x |
if (isTRUE(compare_with_ref_group)) {
|
| 374 | 16x |
.ref_group <- as_factor_keep_attributes(args_list[[".ref_group"]], verbose = verbose) |
| 375 | 16x |
.in_ref_col <- args_list[[".in_ref_col"]] |
| 376 | 16x |
checkmate::assert_flag(.in_ref_col) |
| 377 | 16x |
assert_valid_factor(x) |
| 378 | 16x |
assert_valid_factor(.ref_group) |
| 379 | ||
| 380 | 16x |
if (na_rm) {
|
| 381 | 14x |
x <- x[!is.na(x)] %>% fct_discard("<Missing>")
|
| 382 | 14x |
.ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>")
|
| 383 |
} else {
|
|
| 384 | 2x |
x <- x %>% explicit_na(label = na_str) |
| 385 | 2x |
.ref_group <- .ref_group %>% explicit_na(label = na_str) |
| 386 |
} |
|
| 387 | ||
| 388 | 2x |
if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA")
|
| 389 | 16x |
checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2) |
| 390 | ||
| 391 | 16x |
y$pval_counts <- numeric() |
| 392 | 16x |
if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
|
| 393 | 13x |
tab <- rbind(table(x), table(.ref_group)) |
| 394 | 13x |
res <- suppressWarnings(stats::chisq.test(tab)) |
| 395 | 13x |
y$pval_counts <- res$p.value |
| 396 |
} |
|
| 397 |
} |
|
| 398 | ||
| 399 | 303x |
y |
| 400 |
} |
|
| 401 | ||
| 402 |
#' @describeIn analyze_variables Method for `character` class. This makes an automatic |
|
| 403 |
#' conversion to factor (with a warning) and then forwards to the method for factors. |
|
| 404 |
#' |
|
| 405 |
#' @note |
|
| 406 |
#' * Automatic conversion of character to factor does not guarantee that the table |
|
| 407 |
#' can be generated correctly. In particular for sparse tables this very likely can fail. |
|
| 408 |
#' It is therefore better to always pre-process the dataset such that factors are manually |
|
| 409 |
#' created from character variables before passing the dataset to [rtables::build_table()]. |
|
| 410 |
#' |
|
| 411 |
#' @method s_summary character |
|
| 412 |
#' |
|
| 413 |
#' @examples |
|
| 414 |
#' # `s_summary.character` |
|
| 415 |
#' |
|
| 416 |
#' ## Basic usage: |
|
| 417 |
#' s_summary(c("a", "a", "b", "c", "a"), verbose = FALSE)
|
|
| 418 |
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na_rm = FALSE, verbose = FALSE)
|
|
| 419 |
#' |
|
| 420 |
#' @export |
|
| 421 |
s_summary.character <- function(x, denom = c("n", "N_col", "N_row"), ...) {
|
|
| 422 | 13x |
args_list <- list(...) |
| 423 | 13x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
| 424 | 13x |
na_str <- args_list[["na_str"]] %||% "NA" |
| 425 | 13x |
verbose <- args_list[["verbose"]] %||% TRUE |
| 426 | ||
| 427 | 13x |
if (na_rm) {
|
| 428 | 12x |
y <- as_factor_keep_attributes(x, verbose = verbose) |
| 429 |
} else {
|
|
| 430 | 1x |
y <- as_factor_keep_attributes(x, verbose = verbose, na_level = na_str) |
| 431 |
} |
|
| 432 | ||
| 433 | 13x |
s_summary(x = y, denom = denom, ...) |
| 434 |
} |
|
| 435 | ||
| 436 |
#' @describeIn analyze_variables Method for `logical` class. |
|
| 437 |
#' |
|
| 438 |
#' @return |
|
| 439 |
#' * If `x` is of class `logical`, returns a `list` with named `numeric` items: |
|
| 440 |
#' * `n`: The [length()] of `x` (possibly after removing `NA`s). |
|
| 441 |
#' * `count`: Count of `TRUE` in `x`. |
|
| 442 |
#' * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the |
|
| 443 |
#' denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here. |
|
| 444 |
#' |
|
| 445 |
#' @method s_summary logical |
|
| 446 |
#' |
|
| 447 |
#' @examples |
|
| 448 |
#' # `s_summary.logical` |
|
| 449 |
#' |
|
| 450 |
#' ## Basic usage: |
|
| 451 |
#' s_summary(c(TRUE, FALSE, TRUE, TRUE)) |
|
| 452 |
#' |
|
| 453 |
#' # Empty factor returns zero-filled items. |
|
| 454 |
#' s_summary(as.logical(c())) |
|
| 455 |
#' |
|
| 456 |
#' ## Management of NA values. |
|
| 457 |
#' x <- c(NA, TRUE, FALSE) |
|
| 458 |
#' s_summary(x, na_rm = TRUE) |
|
| 459 |
#' s_summary(x, na_rm = FALSE) |
|
| 460 |
#' |
|
| 461 |
#' ## Different denominators. |
|
| 462 |
#' x <- c(TRUE, FALSE, TRUE, TRUE) |
|
| 463 |
#' s_summary(x, denom = "N_row", .N_row = 10L) |
|
| 464 |
#' s_summary(x, denom = "N_col", .N_col = 20L) |
|
| 465 |
#' |
|
| 466 |
#' @export |
|
| 467 |
s_summary.logical <- function(x, denom = c("n", "N_col", "N_row"), ...) {
|
|
| 468 | 211x |
checkmate::assert_logical(x) |
| 469 | 211x |
args_list <- list(...) |
| 470 | 211x |
.N_row <- args_list[[".N_row"]] # nolint |
| 471 | 211x |
.N_col <- args_list[[".N_col"]] # nolint |
| 472 | 211x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
| 473 | 211x |
compare_with_ref_group <- args_list[["compare_with_ref_group"]] |
| 474 | ||
| 475 | 211x |
if (na_rm) {
|
| 476 | 208x |
x <- x[!is.na(x)] |
| 477 |
} # na values are and should be logical here |
|
| 478 | ||
| 479 | 211x |
y <- list() |
| 480 | 211x |
y$n <- c("n" = length(x))
|
| 481 | 211x |
denom <- match.arg(denom) %>% |
| 482 | 211x |
switch( |
| 483 | 211x |
n = length(x), |
| 484 | 211x |
N_row = .N_row, |
| 485 | 211x |
N_col = .N_col |
| 486 |
) |
|
| 487 | 211x |
y$count <- c("count" = sum(x, na.rm = TRUE))
|
| 488 | 211x |
y$count_fraction <- c(y$count, "fraction" = ifelse(denom > 0, y$count / denom, 0)) |
| 489 | 211x |
y$count_fraction_fixed_dp <- y$count_fraction |
| 490 | 211x |
y$fraction <- c("num" = unname(y$count), "denom" = denom)
|
| 491 | 211x |
y$n_blq <- c("n_blq" = 0L)
|
| 492 | ||
| 493 | ||
| 494 | 211x |
if (isTRUE(compare_with_ref_group)) {
|
| 495 | 4x |
.ref_group <- args_list[[".ref_group"]] |
| 496 | 4x |
.in_ref_col <- args_list[[".in_ref_col"]] |
| 497 | 4x |
checkmate::assert_flag(.in_ref_col) |
| 498 | ||
| 499 | 4x |
if (na_rm) {
|
| 500 | 3x |
x <- stats::na.omit(x) |
| 501 | 3x |
.ref_group <- stats::na.omit(.ref_group) |
| 502 |
} else {
|
|
| 503 | 1x |
x[is.na(x)] <- FALSE |
| 504 | 1x |
.ref_group[is.na(.ref_group)] <- FALSE |
| 505 |
} |
|
| 506 | ||
| 507 | 4x |
y$pval_counts <- numeric() |
| 508 | 4x |
if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
|
| 509 | 4x |
x <- factor(x, levels = c(TRUE, FALSE)) |
| 510 | 4x |
.ref_group <- factor(.ref_group, levels = c(TRUE, FALSE)) |
| 511 | 4x |
tbl <- rbind(table(x), table(.ref_group)) |
| 512 | 4x |
y$pval_counts <- suppressWarnings(prop_chisq(tbl)) |
| 513 |
} |
|
| 514 |
} |
|
| 515 | ||
| 516 | 211x |
y |
| 517 |
} |
|
| 518 | ||
| 519 |
#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and |
|
| 520 |
#' `compare_vars()` and as `cfun` in `summarize_colvars()`. |
|
| 521 |
#' |
|
| 522 |
#' @param compare_with_ref_group (`flag`)\cr whether comparison statistics should be analyzed instead of summary |
|
| 523 |
#' statistics (`compare_with_ref_group = TRUE` adds `pval` statistic comparing |
|
| 524 |
#' against reference group). |
|
| 525 |
#' |
|
| 526 |
#' @return |
|
| 527 |
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 528 |
#' |
|
| 529 |
#' @note |
|
| 530 |
#' * To use for comparison (with additional p-value statistic), parameter |
|
| 531 |
#' `compare_with_ref_group` must be set to `TRUE`. |
|
| 532 |
#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. |
|
| 533 |
#' |
|
| 534 |
#' @examples |
|
| 535 |
#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
|
|
| 536 |
#' a_summary( |
|
| 537 |
#' factor(c("a", "a", "b", "c", "a")),
|
|
| 538 |
#' .ref_group = factor(c("a", "a", "b", "c")), compare_with_ref_group = TRUE, .in_ref_col = TRUE
|
|
| 539 |
#' ) |
|
| 540 |
#' |
|
| 541 |
#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
|
|
| 542 |
#' a_summary( |
|
| 543 |
#' c("A", "B", "A", "C"),
|
|
| 544 |
#' .ref_group = c("B", "A", "C"), .var = "x", compare_with_ref_group = TRUE, verbose = FALSE,
|
|
| 545 |
#' .in_ref_col = FALSE |
|
| 546 |
#' ) |
|
| 547 |
#' |
|
| 548 |
#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) |
|
| 549 |
#' a_summary( |
|
| 550 |
#' c(TRUE, FALSE, FALSE, TRUE, TRUE), |
|
| 551 |
#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare_with_ref_group = TRUE, |
|
| 552 |
#' .in_ref_col = FALSE |
|
| 553 |
#' ) |
|
| 554 |
#' |
|
| 555 |
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") |
|
| 556 |
#' a_summary(rnorm(10, 5, 1), |
|
| 557 |
#' .ref_group = rnorm(20, -5, 1), .var = "bla", compare_with_ref_group = TRUE, |
|
| 558 |
#' .in_ref_col = FALSE |
|
| 559 |
#' ) |
|
| 560 |
#' |
|
| 561 |
#' @export |
|
| 562 |
a_summary <- function(x, |
|
| 563 |
..., |
|
| 564 |
.stats = NULL, |
|
| 565 |
.stat_names = NULL, |
|
| 566 |
.formats = NULL, |
|
| 567 |
.labels = NULL, |
|
| 568 |
.indent_mods = NULL) {
|
|
| 569 | 352x |
dots_extra_args <- list(...) |
| 570 | ||
| 571 |
# Check if there are user-defined functions |
|
| 572 | 352x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 573 | 352x |
.stats <- default_and_custom_stats_list$all_stats # just the labels of stats |
| 574 | 352x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 575 | ||
| 576 |
# Correction of the pval indication if it is numeric or counts |
|
| 577 | 352x |
type <- ifelse(is.numeric(x), "numeric", "counts") # counts is "categorical" |
| 578 | 352x |
.stats <- .correct_num_or_counts_pval(type, .stats) |
| 579 | ||
| 580 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) |
|
| 581 | 352x |
extra_afun_params <- retrieve_extra_afun_params( |
| 582 | 352x |
names(dots_extra_args$.additional_fun_parameters) |
| 583 |
) |
|
| 584 | 352x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
| 585 | ||
| 586 |
# Check if compare_with_ref_group is TRUE but no ref col is set |
|
| 587 | 352x |
if (isTRUE(dots_extra_args$compare_with_ref_group) && |
| 588 | 352x |
all( |
| 589 | 352x |
length(dots_extra_args[[".ref_group"]]) == 0, # only used for testing |
| 590 | 352x |
length(extra_afun_params[[".ref_group"]]) == 0 |
| 591 |
) |
|
| 592 |
) {
|
|
| 593 | ! |
stop( |
| 594 | ! |
"For comparison (compare_with_ref_group = TRUE), the reference group must be specified.", |
| 595 | ! |
"\nSee ref_group in split_cols_by()." |
| 596 |
) |
|
| 597 |
} |
|
| 598 | ||
| 599 |
# Main statistical functions application |
|
| 600 | 352x |
x_stats <- .apply_stat_functions( |
| 601 | 352x |
default_stat_fnc = s_summary, |
| 602 | 352x |
custom_stat_fnc_list = custom_stat_functions, |
| 603 | 352x |
args_list = c( |
| 604 | 352x |
x = list(x), |
| 605 | 352x |
extra_afun_params, |
| 606 | 352x |
dots_extra_args |
| 607 |
) |
|
| 608 |
) |
|
| 609 | ||
| 610 |
# Fill in with stats defaults if needed |
|
| 611 | 352x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
|
| 612 | 352x |
.stats <- get_stats( |
| 613 | 352x |
met_grp, |
| 614 | 352x |
stats_in = .stats, |
| 615 | 352x |
custom_stats_in = names(custom_stat_functions), |
| 616 | 352x |
add_pval = dots_extra_args$compare_with_ref_group %||% FALSE |
| 617 |
) |
|
| 618 | ||
| 619 | 352x |
x_stats <- x_stats[.stats] |
| 620 | ||
| 621 | 352x |
is_char <- is.character(x) || is.factor(x) |
| 622 | 352x |
if (is_char) {
|
| 623 | 238x |
x_stats <- x_stats[sapply(x_stats, \(x) length(x) > 0 || is.numeric(x))] # only return non-empty stats |
| 624 | 238x |
levels_per_stats <- lapply(x_stats, names) |
| 625 |
} else {
|
|
| 626 | 114x |
levels_per_stats <- names(x_stats) %>% |
| 627 | 114x |
as.list() %>% |
| 628 | 114x |
setNames(names(x_stats)) |
| 629 |
} |
|
| 630 | ||
| 631 |
# Fill in formats/indents/labels with custom input and defaults |
|
| 632 | 352x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
| 633 | 352x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
| 634 | 352x |
lbls <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
| 635 | ||
| 636 | 352x |
if (is_char) {
|
| 637 |
# Keep pval_counts stat if present from comparisons and empty |
|
| 638 | 238x |
if ("pval_counts" %in% names(x_stats) && length(x_stats[["pval_counts"]]) == 0) {
|
| 639 | 3x |
x_stats[["pval_counts"]] <- list(NULL) %>% setNames("pval_counts")
|
| 640 |
} |
|
| 641 | ||
| 642 |
# Unlist stats |
|
| 643 | 238x |
x_stats <- x_stats %>% |
| 644 | 238x |
.unlist_keep_nulls() %>% |
| 645 | 238x |
setNames(names(.formats)) |
| 646 |
} |
|
| 647 | ||
| 648 |
# Check for custom labels from control_analyze_vars |
|
| 649 | 352x |
.labels <- if ("control" %in% names(dots_extra_args)) {
|
| 650 | 2x |
labels_use_control(lbls, dots_extra_args[["control"]], .labels) |
| 651 |
} else {
|
|
| 652 | 350x |
lbls |
| 653 |
} |
|
| 654 | ||
| 655 |
# Auto format handling |
|
| 656 | 352x |
.formats <- apply_auto_formatting( |
| 657 | 352x |
.formats, |
| 658 | 352x |
x_stats, |
| 659 | 352x |
extra_afun_params$.df_row, |
| 660 | 352x |
extra_afun_params$.var |
| 661 |
) |
|
| 662 | ||
| 663 |
# Get and check statistical names from defaults |
|
| 664 | 352x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 665 | ||
| 666 | 352x |
in_rows( |
| 667 | 352x |
.list = x_stats, |
| 668 | 352x |
.formats = .formats, |
| 669 | 352x |
.names = names(.labels), |
| 670 | 352x |
.stat_names = .stat_names, |
| 671 | 352x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 672 | 352x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 673 |
) |
|
| 674 |
} |
|
| 675 | ||
| 676 |
#' @describeIn analyze_variables Layout-creating function which can take statistics function arguments |
|
| 677 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 678 |
#' |
|
| 679 |
#' @param na_str_drop (`string`)\cr Additional `NA` string to be dropped from factor calculations. If `NULL` |
|
| 680 |
#' nothing will be removed beyond standard `NA` handling. |
|
| 681 |
#' @param ... additional arguments passed to `s_summary()`, including: |
|
| 682 |
#' * `denom`: (`string`) See parameter description below. |
|
| 683 |
#' * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no |
|
| 684 |
#' column-based subsetting). |
|
| 685 |
#' * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within. |
|
| 686 |
#' * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out |
|
| 687 |
#' information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only. |
|
| 688 |
#' @param compare_with_ref_group (logical)\cr whether to compare the variable with a reference group. |
|
| 689 |
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector |
|
| 690 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation |
|
| 691 |
#' for that statistic's row label. |
|
| 692 |
#' @param formats_var (`NULL` or `string`)\cr Passed to [rtables::analyze()]. `.formats` must be `"default"` and |
|
| 693 |
#' `format` must be `NULL` when this is non-NULL. |
|
| 694 |
#' @param format (`NULL`, `list`, `string` or `function`)\cr Passed to [rtables::analyze()]. `.formats` must be |
|
| 695 |
#' `"default"` and `formats_var` must be `NULL` when this is non-NULL. |
|
| 696 |
#' @param na_strs_var (`string` or `NULL`)\cr Passed to `analyze`. `na_str` must be |
|
| 697 |
#' `NA` when this is non-NULL. |
|
| 698 |
#' |
|
| 699 |
#' @return |
|
| 700 |
#' * `analyze_vars()` returns a layout object suitable for passing to further layouting functions, |
|
| 701 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 702 |
#' the statistics from `s_summary()` to the table layout. |
|
| 703 |
#' |
|
| 704 |
#' @examples |
|
| 705 |
#' ## Fabricated dataset. |
|
| 706 |
#' dta_test <- data.frame( |
|
| 707 |
#' USUBJID = rep(1:6, each = 3), |
|
| 708 |
#' PARAMCD = rep("lab", 6 * 3),
|
|
| 709 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
| 710 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)), |
|
| 711 |
#' AVAL = c(9:1, rep(NA, 9)) |
|
| 712 |
#' ) |
|
| 713 |
#' |
|
| 714 |
#' # `analyze_vars()` in `rtables` pipelines |
|
| 715 |
#' ## Default output within a `rtables` pipeline. |
|
| 716 |
#' l <- basic_table() %>% |
|
| 717 |
#' split_cols_by(var = "ARM") %>% |
|
| 718 |
#' split_rows_by(var = "AVISIT") %>% |
|
| 719 |
#' analyze_vars(vars = "AVAL") |
|
| 720 |
#' |
|
| 721 |
#' build_table(l, df = dta_test) |
|
| 722 |
#' |
|
| 723 |
#' ## Select and format statistics output. |
|
| 724 |
#' l <- basic_table() %>% |
|
| 725 |
#' split_cols_by(var = "ARM") %>% |
|
| 726 |
#' split_rows_by(var = "AVISIT") %>% |
|
| 727 |
#' analyze_vars( |
|
| 728 |
#' vars = "AVAL", |
|
| 729 |
#' .stats = c("n", "mean_sd", "quantiles"),
|
|
| 730 |
#' .formats = c("mean_sd" = "xx.x, xx.x"),
|
|
| 731 |
#' .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
|
|
| 732 |
#' ) |
|
| 733 |
#' |
|
| 734 |
#' build_table(l, df = dta_test) |
|
| 735 |
#' |
|
| 736 |
#' ## Use arguments interpreted by `s_summary`. |
|
| 737 |
#' l <- basic_table() %>% |
|
| 738 |
#' split_cols_by(var = "ARM") %>% |
|
| 739 |
#' split_rows_by(var = "AVISIT") %>% |
|
| 740 |
#' analyze_vars(vars = "AVAL", na_rm = FALSE) |
|
| 741 |
#' |
|
| 742 |
#' build_table(l, df = dta_test) |
|
| 743 |
#' |
|
| 744 |
#' ## Handle `NA` levels first when summarizing factors. |
|
| 745 |
#' dta_test$AVISIT <- NA_character_ |
|
| 746 |
#' dta_test <- df_explicit_na(dta_test) |
|
| 747 |
#' l <- basic_table() %>% |
|
| 748 |
#' split_cols_by(var = "ARM") %>% |
|
| 749 |
#' analyze_vars(vars = "AVISIT", na_rm = FALSE) |
|
| 750 |
#' |
|
| 751 |
#' build_table(l, df = dta_test) |
|
| 752 |
#' |
|
| 753 |
#' # auto format |
|
| 754 |
#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4))
|
|
| 755 |
#' basic_table() %>% |
|
| 756 |
#' analyze_vars( |
|
| 757 |
#' vars = "VAR", |
|
| 758 |
#' .stats = c("n", "mean", "mean_sd", "range"),
|
|
| 759 |
#' .formats = c("mean_sd" = "auto", "range" = "auto")
|
|
| 760 |
#' ) %>% |
|
| 761 |
#' build_table(dt) |
|
| 762 |
#' |
|
| 763 |
#' @export |
|
| 764 |
#' @order 2 |
|
| 765 |
analyze_vars <- function(lyt, |
|
| 766 |
vars, |
|
| 767 |
var_labels = vars, |
|
| 768 |
na_str = default_na_str(), |
|
| 769 |
na_str_drop = "<Missing>", |
|
| 770 |
nested = TRUE, |
|
| 771 |
show_labels = "default", |
|
| 772 |
table_names = vars, |
|
| 773 |
section_div = NA_character_, |
|
| 774 |
..., |
|
| 775 |
na_rm = TRUE, |
|
| 776 |
compare_with_ref_group = FALSE, |
|
| 777 |
.stats = c("n", "mean_sd", "median", "range", "count_fraction"),
|
|
| 778 |
.stat_names = NULL, |
|
| 779 |
.formats = NULL, |
|
| 780 |
.labels = NULL, |
|
| 781 |
.indent_mods = NULL, |
|
| 782 |
formats_var = NULL, |
|
| 783 |
na_strs_var = NULL, |
|
| 784 |
format = NULL) {
|
|
| 785 |
# Depending on main functions |
|
| 786 | 47x |
extra_args <- list( |
| 787 | 47x |
"na_rm" = na_rm, |
| 788 | 47x |
"na_str_drop" = na_str_drop, |
| 789 | 47x |
"compare_with_ref_group" = compare_with_ref_group, |
| 790 |
... |
|
| 791 |
) |
|
| 792 | ||
| 793 |
## handle na_str = NA (logical) for user convenience |
|
| 794 | 47x |
if (identical(na_str, NA)) {
|
| 795 | 1x |
na_str <- NA_character_ |
| 796 |
} |
|
| 797 | ||
| 798 | 47x |
if (!is.null(formats_var) && !identical(.formats, "default")) {
|
| 799 | 1x |
stop( |
| 800 | 1x |
".formats must be set to 'default' when specifying a formats variable ", |
| 801 | 1x |
"(got formats_var: ", |
| 802 | 1x |
formats_var, |
| 803 |
")." |
|
| 804 |
) |
|
| 805 |
} |
|
| 806 | ||
| 807 | 46x |
if (!is.null(format) && !identical(.formats, "default")) {
|
| 808 | 1x |
stop( |
| 809 | 1x |
".formats must be set to 'default' when passing the format argument down ", |
| 810 | 1x |
"to analyze() (got format class:", |
| 811 | 1x |
paste(class(format), collapse = " - "), |
| 812 |
")." |
|
| 813 |
) |
|
| 814 |
} |
|
| 815 | ||
| 816 | ||
| 817 | 45x |
if (!is.null(na_strs_var) && !identical(na_str, NA_character_)) {
|
| 818 | ! |
stop( |
| 819 | ! |
"na_str must be set to NA when specifying an na strings variable ", |
| 820 | ! |
"(got na_strs_var: ", |
| 821 | ! |
na_strs_var, |
| 822 |
")." |
|
| 823 |
) |
|
| 824 |
} |
|
| 825 | ||
| 826 |
# Needed defaults |
|
| 827 | 45x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
| 828 | 3x |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 829 | 11x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 830 | 4x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 831 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 832 | ||
| 833 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) |
|
| 834 | 45x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 835 | 45x |
formals(a_summary) <- c( |
| 836 | 45x |
formals(a_summary), |
| 837 | 45x |
extra_args[[".additional_fun_parameters"]] |
| 838 |
) |
|
| 839 | ||
| 840 |
# Main {rtables} structural call
|
|
| 841 | 45x |
analyze( |
| 842 | 45x |
lyt = lyt, |
| 843 | 45x |
vars = vars, |
| 844 | 45x |
var_labels = var_labels, |
| 845 | 45x |
afun = a_summary, |
| 846 | 45x |
na_str = na_str, |
| 847 | 45x |
inclNAs = !na_rm, |
| 848 | 45x |
nested = nested, |
| 849 | 45x |
extra_args = extra_args, |
| 850 | 45x |
show_labels = show_labels, |
| 851 | 45x |
table_names = table_names, |
| 852 | 45x |
section_div = section_div, |
| 853 | 45x |
formats_var = formats_var, |
| 854 | 45x |
na_strs_var = na_strs_var |
| 855 |
) |
|
| 856 |
} |
| 1 |
#' Helper functions for tabulating biomarker effects on binary response by subgroup |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Helper functions which are documented here separately to not confuse the user |
|
| 6 |
#' when reading about the user-facing functions. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams response_biomarkers_subgroups |
|
| 9 |
#' @inheritParams extract_rsp_biomarkers |
|
| 10 |
#' @inheritParams argument_convention |
|
| 11 |
#' |
|
| 12 |
#' @examples |
|
| 13 |
#' library(dplyr) |
|
| 14 |
#' library(forcats) |
|
| 15 |
#' |
|
| 16 |
#' adrs <- tern_ex_adrs |
|
| 17 |
#' adrs_labels <- formatters::var_labels(adrs) |
|
| 18 |
#' |
|
| 19 |
#' adrs_f <- adrs %>% |
|
| 20 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 21 |
#' mutate(rsp = AVALC == "CR") |
|
| 22 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") |
|
| 23 |
#' |
|
| 24 |
#' @name h_response_biomarkers_subgroups |
|
| 25 |
NULL |
|
| 26 | ||
| 27 |
#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list |
|
| 28 |
#' to the "logistic regression" variable list. The reason is that currently there is an |
|
| 29 |
#' inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`. |
|
| 30 |
#' |
|
| 31 |
#' @param biomarker (`string`)\cr the name of the biomarker variable. |
|
| 32 |
#' |
|
| 33 |
#' @return |
|
| 34 |
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`. |
|
| 35 |
#' |
|
| 36 |
#' @examples |
|
| 37 |
#' # This is how the variable list is converted internally. |
|
| 38 |
#' h_rsp_to_logistic_variables( |
|
| 39 |
#' variables = list( |
|
| 40 |
#' rsp = "RSP", |
|
| 41 |
#' covariates = c("A", "B"),
|
|
| 42 |
#' strata = "D" |
|
| 43 |
#' ), |
|
| 44 |
#' biomarker = "AGE" |
|
| 45 |
#' ) |
|
| 46 |
#' |
|
| 47 |
#' @export |
|
| 48 |
h_rsp_to_logistic_variables <- function(variables, biomarker) {
|
|
| 49 | 49x |
if ("strat" %in% names(variables)) {
|
| 50 | ! |
warning( |
| 51 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_rsp_to_logistic_variables() ", |
| 52 | ! |
"was deprecated in tern 0.9.4.\n ", |
| 53 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
| 54 |
) |
|
| 55 | ! |
variables[["strata"]] <- variables[["strat"]] |
| 56 |
} |
|
| 57 | 49x |
checkmate::assert_list(variables) |
| 58 | 49x |
checkmate::assert_string(variables$rsp) |
| 59 | 49x |
checkmate::assert_string(biomarker) |
| 60 | 49x |
list( |
| 61 | 49x |
response = variables$rsp, |
| 62 | 49x |
arm = biomarker, |
| 63 | 49x |
covariates = variables$covariates, |
| 64 | 49x |
strata = variables$strata |
| 65 |
) |
|
| 66 |
} |
|
| 67 | ||
| 68 |
#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and |
|
| 69 |
#' overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple |
|
| 70 |
#' biomarkers in a given single data set. |
|
| 71 |
#' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements |
|
| 72 |
#' `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates` |
|
| 73 |
#' and `strata`. |
|
| 74 |
#' |
|
| 75 |
#' @return |
|
| 76 |
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers. |
|
| 77 |
#' |
|
| 78 |
#' @examples |
|
| 79 |
#' # For a single population, estimate separately the effects |
|
| 80 |
#' # of two biomarkers. |
|
| 81 |
#' df <- h_logistic_mult_cont_df( |
|
| 82 |
#' variables = list( |
|
| 83 |
#' rsp = "rsp", |
|
| 84 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
| 85 |
#' covariates = "SEX" |
|
| 86 |
#' ), |
|
| 87 |
#' data = adrs_f |
|
| 88 |
#' ) |
|
| 89 |
#' df |
|
| 90 |
#' |
|
| 91 |
#' # If the data set is empty, still the corresponding rows with missings are returned. |
|
| 92 |
#' h_coxreg_mult_cont_df( |
|
| 93 |
#' variables = list( |
|
| 94 |
#' rsp = "rsp", |
|
| 95 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
| 96 |
#' covariates = "SEX", |
|
| 97 |
#' strata = "STRATA1" |
|
| 98 |
#' ), |
|
| 99 |
#' data = adrs_f[NULL, ] |
|
| 100 |
#' ) |
|
| 101 |
#' |
|
| 102 |
#' @export |
|
| 103 |
h_logistic_mult_cont_df <- function(variables, |
|
| 104 |
data, |
|
| 105 |
control = control_logistic()) {
|
|
| 106 | 28x |
if ("strat" %in% names(variables)) {
|
| 107 | ! |
warning( |
| 108 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_logistic_mult_cont_df() ", |
| 109 | ! |
"was deprecated in tern 0.9.4.\n ", |
| 110 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
| 111 |
) |
|
| 112 | ! |
variables[["strata"]] <- variables[["strat"]] |
| 113 |
} |
|
| 114 | 28x |
assert_df_with_variables(data, variables) |
| 115 | ||
| 116 | 28x |
checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE) |
| 117 | 28x |
checkmate::assert_list(control, names = "named") |
| 118 | ||
| 119 | 28x |
conf_level <- control[["conf_level"]] |
| 120 | 28x |
pval_label <- "p-value (Wald)" |
| 121 | ||
| 122 |
# If there is any data, run model, otherwise return empty results. |
|
| 123 | 28x |
if (nrow(data) > 0) {
|
| 124 | 27x |
bm_cols <- match(variables$biomarkers, names(data)) |
| 125 | 27x |
l_result <- lapply(variables$biomarkers, function(bm) {
|
| 126 | 48x |
model_fit <- fit_logistic( |
| 127 | 48x |
variables = h_rsp_to_logistic_variables(variables, bm), |
| 128 | 48x |
data = data, |
| 129 | 48x |
response_definition = control$response_definition |
| 130 |
) |
|
| 131 | 48x |
result <- h_logistic_simple_terms( |
| 132 | 48x |
x = bm, |
| 133 | 48x |
fit_glm = model_fit, |
| 134 | 48x |
conf_level = control$conf_level |
| 135 |
) |
|
| 136 | 48x |
resp_vector <- if (inherits(model_fit, "glm")) {
|
| 137 | 38x |
model_fit$model[[variables$rsp]] |
| 138 |
} else {
|
|
| 139 | 10x |
as.logical(as.matrix(model_fit$y)[, "status"]) |
| 140 |
} |
|
| 141 | 48x |
data.frame( |
| 142 |
# Dummy column needed downstream to create a nested header. |
|
| 143 | 48x |
biomarker = bm, |
| 144 | 48x |
biomarker_label = formatters::var_labels(data[bm], fill = TRUE), |
| 145 | 48x |
n_tot = length(resp_vector), |
| 146 | 48x |
n_rsp = sum(resp_vector), |
| 147 | 48x |
prop = mean(resp_vector), |
| 148 | 48x |
or = as.numeric(result[1L, "odds_ratio"]), |
| 149 | 48x |
lcl = as.numeric(result[1L, "lcl"]), |
| 150 | 48x |
ucl = as.numeric(result[1L, "ucl"]), |
| 151 | 48x |
conf_level = conf_level, |
| 152 | 48x |
pval = as.numeric(result[1L, "pvalue"]), |
| 153 | 48x |
pval_label = pval_label, |
| 154 | 48x |
stringsAsFactors = FALSE |
| 155 |
) |
|
| 156 |
}) |
|
| 157 | 27x |
do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
| 158 |
} else {
|
|
| 159 | 1x |
data.frame( |
| 160 | 1x |
biomarker = variables$biomarkers, |
| 161 | 1x |
biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE), |
| 162 | 1x |
n_tot = 0L, |
| 163 | 1x |
n_rsp = 0L, |
| 164 | 1x |
prop = NA, |
| 165 | 1x |
or = NA, |
| 166 | 1x |
lcl = NA, |
| 167 | 1x |
ucl = NA, |
| 168 | 1x |
conf_level = conf_level, |
| 169 | 1x |
pval = NA, |
| 170 | 1x |
pval_label = pval_label, |
| 171 | 1x |
row.names = seq_along(variables$biomarkers), |
| 172 | 1x |
stringsAsFactors = FALSE |
| 173 |
) |
|
| 174 |
} |
|
| 175 |
} |
| 1 |
#' Formatting functions |
|
| 2 |
#' |
|
| 3 |
#' See below for the list of formatting functions created in `tern` to work with `rtables`. |
|
| 4 |
#' |
|
| 5 |
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional |
|
| 6 |
#' custom formats can be created via the [`formatters::sprintf_format()`] function. |
|
| 7 |
#' |
|
| 8 |
#' @family formatting functions |
|
| 9 |
#' @name formatting_functions |
|
| 10 |
NULL |
|
| 11 | ||
| 12 |
#' Format fraction and percentage |
|
| 13 |
#' |
|
| 14 |
#' @description `r lifecycle::badge("stable")`
|
|
| 15 |
#' |
|
| 16 |
#' Formats a fraction together with ratio in percent. |
|
| 17 |
#' |
|
| 18 |
#' @param x (named `integer`)\cr vector with elements `num` and `denom`. |
|
| 19 |
#' @param ... not used. Required for `rtables` interface. |
|
| 20 |
#' |
|
| 21 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' format_fraction(x = c(num = 2L, denom = 3L)) |
|
| 25 |
#' format_fraction(x = c(num = 0L, denom = 3L)) |
|
| 26 |
#' |
|
| 27 |
#' @family formatting functions |
|
| 28 |
#' @export |
|
| 29 |
format_fraction <- function(x, ...) {
|
|
| 30 | 220x |
attr(x, "label") <- NULL |
| 31 | ||
| 32 | 220x |
checkmate::assert_vector(x) |
| 33 | 220x |
checkmate::assert_count(x["num"]) |
| 34 | 218x |
checkmate::assert_count(x["denom"]) |
| 35 | ||
| 36 | 218x |
result <- if (x["num"] == 0) {
|
| 37 | 10x |
paste0(x["num"], "/", x["denom"]) |
| 38 |
} else {
|
|
| 39 | 208x |
paste0( |
| 40 | 208x |
x["num"], "/", x["denom"], |
| 41 | 208x |
" (", round(x["num"] / x["denom"] * 100, 1), "%)"
|
| 42 |
) |
|
| 43 |
} |
|
| 44 | ||
| 45 | 218x |
return(result) |
| 46 |
} |
|
| 47 | ||
| 48 |
#' Format fraction and percentage with fixed single decimal place |
|
| 49 |
#' |
|
| 50 |
#' @description `r lifecycle::badge("stable")`
|
|
| 51 |
#' |
|
| 52 |
#' Formats a fraction together with ratio in percent with fixed single decimal place. |
|
| 53 |
#' Includes trailing zero in case of whole number percentages to always keep one decimal place. |
|
| 54 |
#' |
|
| 55 |
#' @inheritParams format_fraction |
|
| 56 |
#' |
|
| 57 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. |
|
| 58 |
#' |
|
| 59 |
#' @examples |
|
| 60 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L)) |
|
| 61 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L)) |
|
| 62 |
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L)) |
|
| 63 |
#' |
|
| 64 |
#' @family formatting functions |
|
| 65 |
#' @export |
|
| 66 |
format_fraction_fixed_dp <- function(x, ...) {
|
|
| 67 | 3x |
attr(x, "label") <- NULL |
| 68 | 3x |
checkmate::assert_vector(x) |
| 69 | 3x |
checkmate::assert_count(x["num"]) |
| 70 | 3x |
checkmate::assert_count(x["denom"]) |
| 71 | ||
| 72 | 3x |
result <- if (x["num"] == 0) {
|
| 73 | 1x |
paste0(x["num"], "/", x["denom"]) |
| 74 |
} else {
|
|
| 75 | 2x |
paste0( |
| 76 | 2x |
x["num"], "/", x["denom"], |
| 77 | 2x |
" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
|
| 78 |
) |
|
| 79 |
} |
|
| 80 | 3x |
return(result) |
| 81 |
} |
|
| 82 | ||
| 83 |
#' Format count and fraction |
|
| 84 |
#' |
|
| 85 |
#' @description `r lifecycle::badge("stable")`
|
|
| 86 |
#' |
|
| 87 |
#' Formats a count together with fraction with special consideration when count is `0`. |
|
| 88 |
#' |
|
| 89 |
#' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively. |
|
| 90 |
#' @param ... not used. Required for `rtables` interface. |
|
| 91 |
#' |
|
| 92 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. |
|
| 93 |
#' |
|
| 94 |
#' @examples |
|
| 95 |
#' format_count_fraction(x = c(2, 0.6667)) |
|
| 96 |
#' format_count_fraction(x = c(0, 0)) |
|
| 97 |
#' |
|
| 98 |
#' @family formatting functions |
|
| 99 |
#' @export |
|
| 100 |
format_count_fraction <- function(x, ...) {
|
|
| 101 | 102x |
attr(x, "label") <- NULL |
| 102 | ||
| 103 | 102x |
if (any(is.na(x))) {
|
| 104 | 1x |
return("NA")
|
| 105 |
} |
|
| 106 | ||
| 107 | 101x |
checkmate::assert_vector(x) |
| 108 | 101x |
checkmate::assert_integerish(x[1]) |
| 109 | 101x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
| 110 | ||
| 111 | 101x |
result <- if (x[1] == 0) {
|
| 112 | 13x |
"0" |
| 113 |
} else {
|
|
| 114 | 88x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)")
|
| 115 |
} |
|
| 116 | ||
| 117 | 101x |
return(result) |
| 118 |
} |
|
| 119 | ||
| 120 |
#' Format count and percentage with fixed single decimal place |
|
| 121 |
#' |
|
| 122 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 123 |
#' |
|
| 124 |
#' Formats a count together with fraction with special consideration when count is `0`. |
|
| 125 |
#' |
|
| 126 |
#' @inheritParams format_count_fraction |
|
| 127 |
#' |
|
| 128 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. |
|
| 129 |
#' |
|
| 130 |
#' @examples |
|
| 131 |
#' format_count_fraction_fixed_dp(x = c(2, 0.6667)) |
|
| 132 |
#' format_count_fraction_fixed_dp(x = c(2, 0.5)) |
|
| 133 |
#' format_count_fraction_fixed_dp(x = c(0, 0)) |
|
| 134 |
#' |
|
| 135 |
#' @family formatting functions |
|
| 136 |
#' @export |
|
| 137 |
format_count_fraction_fixed_dp <- function(x, ...) {
|
|
| 138 | 1408x |
attr(x, "label") <- NULL |
| 139 | ||
| 140 | 1408x |
if (any(is.na(x))) {
|
| 141 | ! |
return("NA")
|
| 142 |
} |
|
| 143 | ||
| 144 | 1408x |
checkmate::assert_vector(x) |
| 145 | 1408x |
checkmate::assert_integerish(x[1]) |
| 146 | 1408x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
| 147 | ||
| 148 | 1408x |
result <- if (x[1] == 0) {
|
| 149 | 195x |
"0" |
| 150 | 1408x |
} else if (.is_equal_float(x[2], 1)) {
|
| 151 | 549x |
sprintf("%d (100%%)", x[1])
|
| 152 |
} else {
|
|
| 153 | 664x |
sprintf("%d (%.1f%%)", x[1], x[2] * 100)
|
| 154 |
} |
|
| 155 | ||
| 156 | 1408x |
return(result) |
| 157 |
} |
|
| 158 | ||
| 159 |
#' Format count and fraction with special case for count < 10 |
|
| 160 |
#' |
|
| 161 |
#' @description `r lifecycle::badge("stable")`
|
|
| 162 |
#' |
|
| 163 |
#' Formats a count together with fraction with special consideration when count is less than 10. |
|
| 164 |
#' |
|
| 165 |
#' @inheritParams format_count_fraction |
|
| 166 |
#' |
|
| 167 |
#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed. |
|
| 168 |
#' |
|
| 169 |
#' @examples |
|
| 170 |
#' format_count_fraction_lt10(x = c(275, 0.9673)) |
|
| 171 |
#' format_count_fraction_lt10(x = c(2, 0.6667)) |
|
| 172 |
#' format_count_fraction_lt10(x = c(9, 1)) |
|
| 173 |
#' |
|
| 174 |
#' @family formatting functions |
|
| 175 |
#' @export |
|
| 176 |
format_count_fraction_lt10 <- function(x, ...) {
|
|
| 177 | 7x |
attr(x, "label") <- NULL |
| 178 | ||
| 179 | 7x |
if (any(is.na(x))) {
|
| 180 | 1x |
return("NA")
|
| 181 |
} |
|
| 182 | ||
| 183 | 6x |
checkmate::assert_vector(x) |
| 184 | 6x |
checkmate::assert_integerish(x[1]) |
| 185 | 6x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
| 186 | ||
| 187 | 6x |
result <- if (x[1] < 10) {
|
| 188 | 3x |
paste0(x[1]) |
| 189 |
} else {
|
|
| 190 | 3x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)")
|
| 191 |
} |
|
| 192 | ||
| 193 | 6x |
return(result) |
| 194 |
} |
|
| 195 | ||
| 196 |
#' Format XX as a formatting function |
|
| 197 |
#' |
|
| 198 |
#' Translate a string where x and dots are interpreted as number place |
|
| 199 |
#' holders, and others as formatting elements. |
|
| 200 |
#' |
|
| 201 |
#' @param str (`string`)\cr template. |
|
| 202 |
#' |
|
| 203 |
#' @return An `rtables` formatting function. |
|
| 204 |
#' |
|
| 205 |
#' @examples |
|
| 206 |
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6)) |
|
| 207 |
#' |
|
| 208 |
#' z <- format_xx("xx (xx.x)")
|
|
| 209 |
#' sapply(test, z) |
|
| 210 |
#' |
|
| 211 |
#' z <- format_xx("xx.x - xx.x")
|
|
| 212 |
#' sapply(test, z) |
|
| 213 |
#' |
|
| 214 |
#' z <- format_xx("xx.x, incl. xx.x% NE")
|
|
| 215 |
#' sapply(test, z) |
|
| 216 |
#' |
|
| 217 |
#' @family formatting functions |
|
| 218 |
#' @export |
|
| 219 |
format_xx <- function(str) {
|
|
| 220 |
# Find position in the string. |
|
| 221 | 1x |
positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE) |
| 222 | 1x |
x_positions <- regmatches(x = str, m = positions)[[1]] |
| 223 | ||
| 224 |
# Roundings depends on the number of x behind [.]. |
|
| 225 | 1x |
roundings <- lapply( |
| 226 | 1x |
X = x_positions, |
| 227 | 1x |
function(x) {
|
| 228 | 2x |
y <- strsplit(split = "\\.", x = x)[[1]] |
| 229 | 2x |
rounding <- function(x) {
|
| 230 | 4x |
round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)) |
| 231 |
} |
|
| 232 | 2x |
return(rounding) |
| 233 |
} |
|
| 234 |
) |
|
| 235 | ||
| 236 | 1x |
rtable_format <- function(x, output) {
|
| 237 | 2x |
values <- Map(y = x, fun = roundings, function(y, fun) fun(y)) |
| 238 | 2x |
regmatches(x = str, m = positions)[[1]] <- values |
| 239 | 2x |
return(str) |
| 240 |
} |
|
| 241 | ||
| 242 | 1x |
return(rtable_format) |
| 243 |
} |
|
| 244 | ||
| 245 |
#' Format numeric values by significant figures |
|
| 246 |
#' |
|
| 247 |
#' Format numeric values to print with a specified number of significant figures. |
|
| 248 |
#' |
|
| 249 |
#' @param sigfig (`integer(1)`)\cr number of significant figures to display. |
|
| 250 |
#' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal |
|
| 251 |
#' places in string are ignored in favor of formatting by significant figures. Formats options are: |
|
| 252 |
#' `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`. |
|
| 253 |
#' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for |
|
| 254 |
#' standard significant figures formatting - fixed (non-scientific notation) format (`"f"`) |
|
| 255 |
#' and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the |
|
| 256 |
#' [formatC()] `format` argument for more options. |
|
| 257 |
#' |
|
| 258 |
#' @return An `rtables` formatting function. |
|
| 259 |
#' |
|
| 260 |
#' @examples |
|
| 261 |
#' fmt_3sf <- format_sigfig(3) |
|
| 262 |
#' fmt_3sf(1.658) |
|
| 263 |
#' fmt_3sf(1e1) |
|
| 264 |
#' |
|
| 265 |
#' fmt_5sf <- format_sigfig(5) |
|
| 266 |
#' fmt_5sf(0.57) |
|
| 267 |
#' fmt_5sf(0.000025645) |
|
| 268 |
#' |
|
| 269 |
#' @family formatting functions |
|
| 270 |
#' @export |
|
| 271 |
format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") {
|
|
| 272 | 3x |
checkmate::assert_integerish(sigfig) |
| 273 | 3x |
format <- gsub("xx\\.|xx\\.x+", "xx", format)
|
| 274 | 3x |
checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)"))
|
| 275 | 3x |
function(x, ...) {
|
| 276 | ! |
if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.")
|
| 277 | 12x |
num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#") |
| 278 | 12x |
num <- gsub("\\.$", "", num) # remove trailing "."
|
| 279 | ||
| 280 | 12x |
format_value(num, format) |
| 281 |
} |
|
| 282 |
} |
|
| 283 | ||
| 284 |
#' Format fraction with lower threshold |
|
| 285 |
#' |
|
| 286 |
#' @description `r lifecycle::badge("stable")`
|
|
| 287 |
#' |
|
| 288 |
#' Formats a fraction when the second element of the input `x` is the fraction. It applies |
|
| 289 |
#' a lower threshold, below which it is just stated that the fraction is smaller than that. |
|
| 290 |
#' |
|
| 291 |
#' @param threshold (`proportion`)\cr lower threshold. |
|
| 292 |
#' |
|
| 293 |
#' @return An `rtables` formatting function that takes numeric input `x` where the second |
|
| 294 |
#' element is the fraction that is formatted. If the fraction is above or equal to the threshold, |
|
| 295 |
#' then it is displayed in percentage. If it is positive but below the threshold, it returns, |
|
| 296 |
#' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned. |
|
| 297 |
#' |
|
| 298 |
#' @examples |
|
| 299 |
#' format_fun <- format_fraction_threshold(0.05) |
|
| 300 |
#' format_fun(x = c(20, 0.1)) |
|
| 301 |
#' format_fun(x = c(2, 0.01)) |
|
| 302 |
#' format_fun(x = c(0, 0)) |
|
| 303 |
#' |
|
| 304 |
#' @family formatting functions |
|
| 305 |
#' @export |
|
| 306 |
format_fraction_threshold <- function(threshold) {
|
|
| 307 | 1x |
assert_proportion_value(threshold) |
| 308 | 1x |
string_below_threshold <- paste0("<", round(threshold * 100))
|
| 309 | 1x |
function(x, ...) {
|
| 310 | 3x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
| 311 | 3x |
ifelse( |
| 312 | 3x |
x[2] > 0.01, |
| 313 | 3x |
round(x[2] * 100), |
| 314 | 3x |
ifelse( |
| 315 | 3x |
x[2] == 0, |
| 316 | 3x |
"0", |
| 317 | 3x |
string_below_threshold |
| 318 |
) |
|
| 319 |
) |
|
| 320 |
} |
|
| 321 |
} |
|
| 322 | ||
| 323 |
#' Format extreme values |
|
| 324 |
#' |
|
| 325 |
#' @description `r lifecycle::badge("stable")`
|
|
| 326 |
#' |
|
| 327 |
#' `rtables` formatting functions that handle extreme values. |
|
| 328 |
#' |
|
| 329 |
#' @param digits (`integer(1)`)\cr number of decimal places to display. |
|
| 330 |
#' |
|
| 331 |
#' @details For each input, apply a format to the specified number of `digits`. If the value is |
|
| 332 |
#' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is |
|
| 333 |
#' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2. |
|
| 334 |
#' If it is zero, then returns "0.00". |
|
| 335 |
#' |
|
| 336 |
#' @family formatting functions |
|
| 337 |
#' @name extreme_format |
|
| 338 |
NULL |
|
| 339 | ||
| 340 |
#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings |
|
| 341 |
#' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`. |
|
| 342 |
#' |
|
| 343 |
#' @return |
|
| 344 |
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds, |
|
| 345 |
#' and `format_string`, with thresholds formatted as strings. |
|
| 346 |
#' |
|
| 347 |
#' @examples |
|
| 348 |
#' h_get_format_threshold(2L) |
|
| 349 |
#' |
|
| 350 |
#' @export |
|
| 351 |
h_get_format_threshold <- function(digits = 2L) {
|
|
| 352 | 2013x |
checkmate::assert_integerish(digits) |
| 353 | ||
| 354 | 2013x |
low_threshold <- 1 / (10 ^ digits) # styler: off |
| 355 | 2013x |
high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off |
| 356 | ||
| 357 | 2013x |
string_below_threshold <- paste0("<", low_threshold)
|
| 358 | 2013x |
string_above_threshold <- paste0(">", high_threshold)
|
| 359 | ||
| 360 | 2013x |
list( |
| 361 | 2013x |
"threshold" = c(low = low_threshold, high = high_threshold), |
| 362 | 2013x |
"format_string" = c(low = string_below_threshold, high = string_above_threshold) |
| 363 |
) |
|
| 364 |
} |
|
| 365 | ||
| 366 |
#' @describeIn extreme_format Internal helper function to apply a threshold format to a value. |
|
| 367 |
#' Creates a formatted string to be used in Formatting Functions. |
|
| 368 |
#' |
|
| 369 |
#' @param x (`numeric(1)`)\cr value to format. |
|
| 370 |
#' |
|
| 371 |
#' @return |
|
| 372 |
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation |
|
| 373 |
#' of the given value to the digit threshold, as a formatted string. |
|
| 374 |
#' |
|
| 375 |
#' @examples |
|
| 376 |
#' h_format_threshold(0.001) |
|
| 377 |
#' h_format_threshold(1000) |
|
| 378 |
#' |
|
| 379 |
#' @export |
|
| 380 |
h_format_threshold <- function(x, digits = 2L) {
|
|
| 381 | 2015x |
if (is.na(x)) {
|
| 382 | 4x |
return(x) |
| 383 |
} |
|
| 384 | ||
| 385 | 2011x |
checkmate::assert_numeric(x, lower = 0) |
| 386 | ||
| 387 | 2011x |
l_fmt <- h_get_format_threshold(digits) |
| 388 | ||
| 389 | 2011x |
result <- if (x < l_fmt$threshold["low"] && 0 < x) {
|
| 390 | 44x |
l_fmt$format_string["low"] |
| 391 | 2011x |
} else if (x > l_fmt$threshold["high"]) {
|
| 392 | 99x |
l_fmt$format_string["high"] |
| 393 |
} else {
|
|
| 394 | 1868x |
sprintf(fmt = paste0("%.", digits, "f"), x)
|
| 395 |
} |
|
| 396 | ||
| 397 | 2011x |
unname(result) |
| 398 |
} |
|
| 399 | ||
| 400 |
#' Format a single extreme value |
|
| 401 |
#' |
|
| 402 |
#' @description `r lifecycle::badge("stable")`
|
|
| 403 |
#' |
|
| 404 |
#' Create a formatting function for a single extreme value. |
|
| 405 |
#' |
|
| 406 |
#' @inheritParams extreme_format |
|
| 407 |
#' |
|
| 408 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value. |
|
| 409 |
#' |
|
| 410 |
#' @examples |
|
| 411 |
#' format_fun <- format_extreme_values(2L) |
|
| 412 |
#' format_fun(x = 0.127) |
|
| 413 |
#' format_fun(x = Inf) |
|
| 414 |
#' format_fun(x = 0) |
|
| 415 |
#' format_fun(x = 0.009) |
|
| 416 |
#' |
|
| 417 |
#' @family formatting functions |
|
| 418 |
#' @export |
|
| 419 |
format_extreme_values <- function(digits = 2L) {
|
|
| 420 | 1x |
function(x, ...) {
|
| 421 | 5x |
checkmate::assert_scalar(x, na.ok = TRUE) |
| 422 | ||
| 423 | 5x |
h_format_threshold(x = x, digits = digits) |
| 424 |
} |
|
| 425 |
} |
|
| 426 | ||
| 427 |
#' Format extreme values part of a confidence interval |
|
| 428 |
#' |
|
| 429 |
#' @description `r lifecycle::badge("stable")`
|
|
| 430 |
#' |
|
| 431 |
#' Formatting Function for extreme values part of a confidence interval. Values |
|
| 432 |
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2. |
|
| 433 |
#' |
|
| 434 |
#' @inheritParams extreme_format |
|
| 435 |
#' |
|
| 436 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme |
|
| 437 |
#' values confidence interval. |
|
| 438 |
#' |
|
| 439 |
#' @examples |
|
| 440 |
#' format_fun <- format_extreme_values_ci(2L) |
|
| 441 |
#' format_fun(x = c(0.127, Inf)) |
|
| 442 |
#' format_fun(x = c(0, 0.009)) |
|
| 443 |
#' |
|
| 444 |
#' @family formatting functions |
|
| 445 |
#' @export |
|
| 446 |
format_extreme_values_ci <- function(digits = 2L) {
|
|
| 447 | 9x |
function(x, ...) {
|
| 448 | 54x |
checkmate::assert_vector(x, len = 2) |
| 449 | 54x |
l_result <- h_format_threshold(x = x[1], digits = digits) |
| 450 | 54x |
h_result <- h_format_threshold(x = x[2], digits = digits) |
| 451 | ||
| 452 | 54x |
paste0("(", l_result, ", ", h_result, ")")
|
| 453 |
} |
|
| 454 |
} |
|
| 455 | ||
| 456 |
#' Format automatically using data significant digits |
|
| 457 |
#' |
|
| 458 |
#' @description `r lifecycle::badge("stable")`
|
|
| 459 |
#' |
|
| 460 |
#' Formatting function for the majority of default methods used in [analyze_vars()]. |
|
| 461 |
#' For non-derived values, the significant digits of data is used (e.g. range), while derived |
|
| 462 |
#' values have one more digits (measure of location and dispersion like mean, standard deviation). |
|
| 463 |
#' This function can be called internally with "auto" like, for example, |
|
| 464 |
#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function.
|
|
| 465 |
#' |
|
| 466 |
#' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to |
|
| 467 |
#' find significant digits. In [analyze_vars] this comes from `.df_row` (see |
|
| 468 |
#' [rtables::additional_fun_params]), and it is the row data after the above row splits. No |
|
| 469 |
#' column split is considered. |
|
| 470 |
#' @param x_stat (`string`)\cr string indicating the current statistical method used. |
|
| 471 |
#' |
|
| 472 |
#' @return A string that `rtables` prints in a table cell. |
|
| 473 |
#' |
|
| 474 |
#' @details |
|
| 475 |
#' The internal function is needed to work with `rtables` default structure for |
|
| 476 |
#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation. |
|
| 477 |
#' It can be more than one element (e.g. for `.stats = "mean_sd"`). |
|
| 478 |
#' |
|
| 479 |
#' @examples |
|
| 480 |
#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4) |
|
| 481 |
#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3])) |
|
| 482 |
#' |
|
| 483 |
#' # x is the result coming into the formatting function -> res!! |
|
| 484 |
#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res) |
|
| 485 |
#' format_auto(x_todo, "range")(x = range(x_todo)) |
|
| 486 |
#' no_sc_x <- c(0.0000001, 1) |
|
| 487 |
#' format_auto(no_sc_x, "range")(x = no_sc_x) |
|
| 488 |
#' |
|
| 489 |
#' @family formatting functions |
|
| 490 |
#' @export |
|
| 491 |
format_auto <- function(dt_var, x_stat) {
|
|
| 492 | 16x |
function(x = "", ...) {
|
| 493 | 56x |
checkmate::assert_numeric(x, min.len = 1) |
| 494 | 56x |
checkmate::assert_numeric(dt_var, min.len = 1) |
| 495 |
# Defaults - they may be a param in the future |
|
| 496 | 56x |
der_stats <- c( |
| 497 | 56x |
"mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr", |
| 498 | 56x |
"mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi", |
| 499 | 56x |
"median_ci" |
| 500 |
) |
|
| 501 | 56x |
nonder_stats <- c("n", "range", "min", "max")
|
| 502 | ||
| 503 |
# Safenet for miss-modifications |
|
| 504 | 56x |
stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint |
| 505 | 56x |
checkmate::assert_choice(x_stat, c(der_stats, nonder_stats)) |
| 506 | ||
| 507 |
# Finds the max number of digits in data |
|
| 508 | 56x |
detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>% |
| 509 | 56x |
max() |
| 510 | ||
| 511 | 56x |
if (x_stat %in% der_stats) {
|
| 512 | 40x |
detect_dig <- detect_dig + 1 |
| 513 |
} |
|
| 514 | ||
| 515 |
# Render input |
|
| 516 | 56x |
str_vals <- formatC(x, digits = detect_dig, format = "f") |
| 517 | 56x |
def_fmt <- get_formats_from_stats(x_stat)[[x_stat]] |
| 518 | 56x |
str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]] |
| 519 | 56x |
if (length(str_fmt) != length(str_vals)) {
|
| 520 | 2x |
stop( |
| 521 | 2x |
"Number of inserted values as result (", length(str_vals),
|
| 522 | 2x |
") is not the same as there should be in the default tern formats for ", |
| 523 | 2x |
x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ", |
| 524 | 2x |
"See tern_default_formats to check all of them." |
| 525 |
) |
|
| 526 |
} |
|
| 527 | ||
| 528 |
# Squashing them together |
|
| 529 | 54x |
inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]] |
| 530 | 54x |
stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint |
| 531 | ||
| 532 | 54x |
out <- vector("character", length = length(inv_str_fmt) + length(str_vals))
|
| 533 | 54x |
is_even <- seq_along(out) %% 2 == 0 |
| 534 | 54x |
out[is_even] <- str_vals |
| 535 | 54x |
out[!is_even] <- inv_str_fmt |
| 536 | ||
| 537 | 54x |
return(paste0(out, collapse = "")) |
| 538 |
} |
|
| 539 |
} |
|
| 540 | ||
| 541 |
# Utility function that could be useful in general |
|
| 542 |
str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) {
|
|
| 543 | 110x |
regmatches(string, gregexpr(pattern, string), invert = invert) |
| 544 |
} |
|
| 545 | ||
| 546 |
# Helper function |
|
| 547 |
count_decimalplaces <- function(dec) {
|
|
| 548 | 2038x |
if (is.na(dec)) {
|
| 549 | 6x |
return(0) |
| 550 | 2032x |
} else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision
|
| 551 | 1939x |
nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]]) |
| 552 |
} else {
|
|
| 553 | 93x |
return(0) |
| 554 |
} |
|
| 555 |
} |
|
| 556 | ||
| 557 |
#' Apply automatic formatting |
|
| 558 |
#' |
|
| 559 |
#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with |
|
| 560 |
#' the correct implementation of `format_auto` for the given statistics, data, and variable. |
|
| 561 |
#' |
|
| 562 |
#' @inheritParams argument_convention |
|
| 563 |
#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds |
|
| 564 |
#' to an element in `.formats`, with matching names. |
|
| 565 |
#' |
|
| 566 |
#' @keywords internal |
|
| 567 |
apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) {
|
|
| 568 | 1598x |
is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) |
| 569 | 1598x |
if (any(is_auto_fmt)) {
|
| 570 | 8x |
auto_stats <- x_stats[is_auto_fmt] |
| 571 | 8x |
var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets |
| 572 | 8x |
.formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df) |
| 573 |
} |
|
| 574 | 1598x |
.formats |
| 575 |
} |
| 1 |
#' Get default statistical methods and their associated formats, labels, and indent modifiers |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 4 |
#' |
|
| 5 |
#' Utility functions to get valid statistic methods for different method groups |
|
| 6 |
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers |
|
| 7 |
#' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be |
|
| 8 |
#' seen in [analyze_vars()]. See notes to understand why this is experimental. |
|
| 9 |
#' |
|
| 10 |
#' @param stats (`character`)\cr statistical methods to return defaults for. |
|
| 11 |
#' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a |
|
| 12 |
#' statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name), |
|
| 13 |
#' each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only |
|
| 14 |
#' calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be |
|
| 15 |
#' flattened such that the names of the list elements returned by the function have the format `statistic.level` (or |
|
| 16 |
#' just `statistic` for statistics calculated for a single row). Defaults to `NULL`. |
|
| 17 |
#' @param tern_defaults (`list` or `vector`)\cr defaults to use to fill in missing values if no user input is given. |
|
| 18 |
#' Must be of the same type as the values that are being filled in (e.g. indentation must be integers). |
|
| 19 |
#' |
|
| 20 |
#' @details |
|
| 21 |
#' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`. |
|
| 22 |
#' |
|
| 23 |
#' @note |
|
| 24 |
#' These defaults are experimental because we use the names of functions to retrieve the default |
|
| 25 |
#' statistics. This should be generalized in groups of methods according to more reasonable groupings. |
|
| 26 |
#' |
|
| 27 |
#' @name default_stats_formats_labels |
|
| 28 |
NULL |
|
| 29 | ||
| 30 |
#' @describeIn default_stats_formats_labels Get statistics available for a given method |
|
| 31 |
#' group (analyze function). To check available defaults see `tern::tern_default_stats` list. |
|
| 32 |
#' |
|
| 33 |
#' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function) |
|
| 34 |
#' to retrieve default statistics for. A character vector can be used to specify more than one statistical |
|
| 35 |
#' method group. |
|
| 36 |
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical |
|
| 37 |
#' functions are used, `stats_in` needs to have them in too. |
|
| 38 |
#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics. |
|
| 39 |
#' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains |
|
| 40 |
#' `"analyze_vars_counts"`) be added to the statistical methods? |
|
| 41 |
#' |
|
| 42 |
#' @return |
|
| 43 |
#' * `get_stats()` returns a `character` vector of statistical methods. |
|
| 44 |
#' |
|
| 45 |
#' @examples |
|
| 46 |
#' # analyze_vars is numeric |
|
| 47 |
#' num_stats <- get_stats("analyze_vars_numeric") # also the default
|
|
| 48 |
#' |
|
| 49 |
#' # Other type |
|
| 50 |
#' cnt_stats <- get_stats("analyze_vars_counts")
|
|
| 51 |
#' |
|
| 52 |
#' # Weirdly taking the pval from count_occurrences |
|
| 53 |
#' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval")
|
|
| 54 |
#' |
|
| 55 |
#' # All count_occurrences |
|
| 56 |
#' all_cnt_occ <- get_stats("count_occurrences")
|
|
| 57 |
#' |
|
| 58 |
#' # Multiple |
|
| 59 |
#' get_stats(c("count_occurrences", "analyze_vars_counts"))
|
|
| 60 |
#' |
|
| 61 |
#' @export |
|
| 62 |
get_stats <- function(method_groups = "analyze_vars_numeric", |
|
| 63 |
stats_in = NULL, custom_stats_in = NULL, add_pval = FALSE) {
|
|
| 64 | 1670x |
checkmate::assert_character(method_groups) |
| 65 | 1670x |
checkmate::assert_character(stats_in, null.ok = TRUE) |
| 66 | 1670x |
checkmate::assert_character(custom_stats_in, null.ok = TRUE) |
| 67 | 1670x |
checkmate::assert_flag(add_pval) |
| 68 | ||
| 69 |
# Default is still numeric |
|
| 70 | 1670x |
if (any(method_groups == "analyze_vars")) {
|
| 71 | 3x |
method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric" |
| 72 |
} |
|
| 73 | ||
| 74 | 1670x |
type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks
|
| 75 | ||
| 76 |
# Defaults for loop |
|
| 77 | 1670x |
out <- NULL |
| 78 | ||
| 79 |
# Loop for multiple method groups |
|
| 80 | 1670x |
for (mgi in method_groups) {
|
| 81 | 1697x |
if (mgi %in% names(tern_default_stats)) {
|
| 82 | 1696x |
out_tmp <- tern_default_stats[[mgi]] |
| 83 |
} else {
|
|
| 84 | 1x |
stop("The selected method group (", mgi, ") has no default statistical method.")
|
| 85 |
} |
|
| 86 | 1696x |
out <- unique(c(out, out_tmp)) |
| 87 |
} |
|
| 88 | ||
| 89 |
# Add custom stats |
|
| 90 | 1669x |
out <- c(out, custom_stats_in) |
| 91 | ||
| 92 |
# If you added pval to the stats_in you certainly want it |
|
| 93 | 1669x |
if (!is.null(stats_in) && any(grepl("^pval", stats_in))) {
|
| 94 | 140x |
stats_in_pval_value <- stats_in[grepl("^pval", stats_in)]
|
| 95 | ||
| 96 |
# Must be only one value between choices |
|
| 97 | 140x |
checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts", "pvalue"))
|
| 98 | ||
| 99 |
# Mismatch with counts and numeric |
|
| 100 | 139x |
if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" ||
|
| 101 | 139x |
any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint
|
| 102 | 2x |
stop( |
| 103 | 2x |
"Inserted p-value (", stats_in_pval_value, ") is not valid for type ",
|
| 104 | 2x |
type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")), |
| 105 | 2x |
" instead." |
| 106 |
) |
|
| 107 |
} |
|
| 108 | ||
| 109 |
# Lets add it even if present (thanks to unique) |
|
| 110 | 137x |
add_pval <- TRUE |
| 111 |
} |
|
| 112 | ||
| 113 |
# Mainly used in "analyze_vars" but it could be necessary elsewhere |
|
| 114 | 1666x |
if (isTRUE(add_pval)) {
|
| 115 | 147x |
if (any(grepl("counts", method_groups))) {
|
| 116 | 16x |
out <- unique(c(out, "pval_counts")) |
| 117 |
} else {
|
|
| 118 | 131x |
out <- unique(c(out, "pval")) |
| 119 |
} |
|
| 120 |
} |
|
| 121 | ||
| 122 |
# Filtering for stats_in (character vector) |
|
| 123 | 1666x |
if (!is.null(stats_in)) {
|
| 124 | 1614x |
out <- intersect(stats_in, out) # It orders them too |
| 125 |
} |
|
| 126 | ||
| 127 |
# If intersect did not find matches (and no pval?) -> error |
|
| 128 | 1666x |
if (length(out) == 0) {
|
| 129 | 2x |
stop( |
| 130 | 2x |
"The selected method group(s) (", paste0(method_groups, collapse = ", "), ")",
|
| 131 | 2x |
" do not have the required default statistical methods:\n", |
| 132 | 2x |
paste0(stats_in, collapse = " ") |
| 133 |
) |
|
| 134 |
} |
|
| 135 | ||
| 136 | 1664x |
out |
| 137 |
} |
|
| 138 | ||
| 139 |
#' @describeIn default_stats_formats_labels Get statistical *names* available for a given method |
|
| 140 |
#' group (analyze function). Please use the `s_*` functions to get the statistical names. |
|
| 141 |
#' @param stat_results (`list`)\cr list of statistical results. It should be used close to the end of |
|
| 142 |
#' a statistical function. See examples for a structure with two statistical results and two groups. |
|
| 143 |
#' @param stat_names_in (`character`)\cr custom modification of statistical values. |
|
| 144 |
#' |
|
| 145 |
#' @return |
|
| 146 |
#' * `get_stat_names()` returns a named list of `character` vectors, indicating the names of |
|
| 147 |
#' statistical outputs. |
|
| 148 |
#' |
|
| 149 |
#' @examples |
|
| 150 |
#' stat_results <- list( |
|
| 151 |
#' "n" = list("M" = 1, "F" = 2),
|
|
| 152 |
#' "count_fraction" = list("M" = c(1, 0.2), "F" = c(2, 0.1))
|
|
| 153 |
#' ) |
|
| 154 |
#' get_stat_names(stat_results) |
|
| 155 |
#' get_stat_names(stat_results, list("n" = "argh"))
|
|
| 156 |
#' |
|
| 157 |
#' @export |
|
| 158 |
get_stat_names <- function(stat_results, stat_names_in = NULL) {
|
|
| 159 | 1600x |
checkmate::assert_character(names(stat_results), min.len = 1) |
| 160 | 1600x |
checkmate::assert_list(stat_names_in, null.ok = TRUE) |
| 161 | ||
| 162 | 1600x |
stat_nms_from_stats <- lapply(stat_results, function(si) {
|
| 163 | 5777x |
nm <- names(si) |
| 164 | 5777x |
if (is.null(nm)) {
|
| 165 | 2722x |
nm <- rep(NA_character_, length(si)) # no statistical names |
| 166 |
} |
|
| 167 | 5777x |
nm |
| 168 |
}) |
|
| 169 | ||
| 170 |
# Modify some with custom stat names |
|
| 171 | 1600x |
if (!is.null(stat_names_in)) {
|
| 172 |
# Stats is the main |
|
| 173 | 6x |
common_names <- intersect(names(stat_nms_from_stats), names(stat_names_in)) |
| 174 | 6x |
stat_nms_from_stats[common_names] <- stat_names_in[common_names] |
| 175 |
} |
|
| 176 | ||
| 177 | 1600x |
stat_nms_from_stats |
| 178 |
} |
|
| 179 | ||
| 180 |
# Utility function used to separate custom stats (user-defined functions) from defaults |
|
| 181 |
.split_std_from_custom_stats <- function(stats_in) {
|
|
| 182 | 897x |
out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL) |
| 183 | 897x |
if (is.list(stats_in)) {
|
| 184 | 12x |
is_custom_fnc <- sapply(stats_in, is.function) |
| 185 | 12x |
checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named") |
| 186 | 12x |
out[["custom_stats"]] <- stats_in[is_custom_fnc] |
| 187 | 12x |
out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) |
| 188 | 12x |
all_stats <- names(stats_in) # to keep the order |
| 189 | 12x |
all_stats[!is_custom_fnc] <- out[["default_stats"]] |
| 190 | 12x |
out[["all_stats"]] <- all_stats |
| 191 |
} else {
|
|
| 192 | 885x |
out[["default_stats"]] <- out[["all_stats"]] <- stats_in |
| 193 |
} |
|
| 194 | 897x |
out |
| 195 |
} |
|
| 196 | ||
| 197 |
# Utility function to apply statistical functions |
|
| 198 |
.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) {
|
|
| 199 |
# Default checks |
|
| 200 | 920x |
checkmate::assert_function(default_stat_fnc) |
| 201 | 920x |
checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named") |
| 202 | 920x |
checkmate::assert_list(args_list) |
| 203 | ||
| 204 |
# Checking custom stats have same formals |
|
| 205 | 920x |
if (!is.null(custom_stat_fnc_list)) {
|
| 206 | 12x |
fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]] |
| 207 | 12x |
for (fnc in custom_stat_fnc_list) {
|
| 208 | 17x |
if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
|
| 209 | 1x |
stop( |
| 210 | 1x |
"The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ", |
| 211 | 1x |
"as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]], |
| 212 | 1x |
" as first parameter, while the default function has ", fundamental_call_to_data, "." |
| 213 |
) |
|
| 214 |
} |
|
| 215 | 16x |
if (!any(names(formals(fnc)) == "...")) {
|
| 216 | 1x |
stop( |
| 217 | 1x |
"The custom statistical function needs to have `...` as a parameter to accept additional arguments. ", |
| 218 | 1x |
"In this case your custom function does not have `...`." |
| 219 |
) |
|
| 220 |
} |
|
| 221 |
} |
|
| 222 |
} |
|
| 223 | ||
| 224 |
# Applying |
|
| 225 | 918x |
out_default <- do.call(default_stat_fnc, args = args_list) |
| 226 | 916x |
out_custom <- lapply(custom_stat_fnc_list, function(fnc) do.call(fnc, args = args_list)) |
| 227 | ||
| 228 |
# Merging |
|
| 229 | 916x |
c(out_default, out_custom) |
| 230 |
} |
|
| 231 | ||
| 232 |
#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics. |
|
| 233 |
#' To check available defaults see list `tern::tern_default_formats`. |
|
| 234 |
#' |
|
| 235 |
#' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with |
|
| 236 |
#' values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows |
|
| 237 |
#' with no value is provided. See Details. |
|
| 238 |
#' |
|
| 239 |
#' @details if `formats_in` is `"default"`, instead of populating the |
|
| 240 |
#' return value with tern defaults, the return value will specify |
|
| 241 |
#' the `"default"` format for each element. This is useful |
|
| 242 |
#' primarily when formatting behavior should be inherited from a |
|
| 243 |
#' format specified via the `format` or `formats_var` argument to |
|
| 244 |
#' `analyze`. |
|
| 245 |
#' |
|
| 246 |
#' @return |
|
| 247 |
#' * `get_formats_from_stats()` returns a named list of formats as strings or functions. |
|
| 248 |
#' |
|
| 249 |
#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and |
|
| 250 |
#' return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`.
|
|
| 251 |
#' |
|
| 252 |
#' @examples |
|
| 253 |
#' # Defaults formats |
|
| 254 |
#' get_formats_from_stats(num_stats) |
|
| 255 |
#' get_formats_from_stats(cnt_stats) |
|
| 256 |
#' get_formats_from_stats(only_pval) |
|
| 257 |
#' get_formats_from_stats(all_cnt_occ) |
|
| 258 |
#' |
|
| 259 |
#' # Addition of customs |
|
| 260 |
#' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx")))
|
|
| 261 |
#' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx")))
|
|
| 262 |
#' |
|
| 263 |
#' @seealso [formatting_functions] |
|
| 264 |
#' |
|
| 265 |
#' @export |
|
| 266 |
get_formats_from_stats <- function(stats, |
|
| 267 |
formats_in = NULL, |
|
| 268 |
levels_per_stats = NULL, |
|
| 269 |
tern_defaults = tern_default_formats) {
|
|
| 270 | 1695x |
checkmate::assert_character(stats, min.len = 1) |
| 271 |
# It may be a list if there is a function in the formats |
|
| 272 | 1695x |
if (checkmate::test_list(formats_in, null.ok = TRUE)) {
|
| 273 | 1552x |
checkmate::assert_list(formats_in, null.ok = TRUE) |
| 274 |
# Or it may be a vector of characters |
|
| 275 |
} else {
|
|
| 276 | 143x |
checkmate::assert_character(formats_in, null.ok = TRUE) |
| 277 |
} |
|
| 278 | 1695x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
| 279 | ||
| 280 |
# If unnamed formats given as formats_in and same number of stats, use one format per stat |
|
| 281 |
if ( |
|
| 282 | 1695x |
!is.null(formats_in) && length(formats_in) == length(stats) && |
| 283 | 1695x |
is.null(names(formats_in)) && is.null(levels_per_stats) |
| 284 |
) {
|
|
| 285 | 2x |
out <- as.list(formats_in) %>% setNames(stats) |
| 286 | 2x |
return(out) |
| 287 |
} |
|
| 288 | ||
| 289 | 1693x |
full_default <- identical(formats_in, "default") |
| 290 | ||
| 291 | 1693x |
if (full_default) {
|
| 292 |
## act as if we got NULL to get the right structure for return value |
|
| 293 |
## then replace each element with "default" below |
|
| 294 | 20x |
formats_in <- NULL |
| 295 |
} |
|
| 296 | ||
| 297 |
# If levels_per_stats not given, assume one row per statistic |
|
| 298 | 383x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) |
| 299 | ||
| 300 |
# Apply custom formats |
|
| 301 | 1693x |
out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults) |
| 302 | ||
| 303 | 1693x |
if (full_default) {
|
| 304 | 20x |
out <- setNames(rep("default", length(out)), names(out))
|
| 305 |
} else {
|
|
| 306 |
# Default to NULL if no format |
|
| 307 | 1673x |
which_null <- names(which(sapply(levels_per_stats, is.null))) |
| 308 | 1673x |
levels_per_stats[which_null] <- which_null |
| 309 | 1673x |
case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE) |
| 310 | 1673x |
out[names(out) == out | case_input_is_not_stat] <- list(NULL) |
| 311 |
} |
|
| 312 | ||
| 313 | 1693x |
out |
| 314 |
} |
|
| 315 | ||
| 316 |
#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics. |
|
| 317 |
#' To check for available defaults see list `tern::tern_default_labels`. |
|
| 318 |
#' |
|
| 319 |
#' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the |
|
| 320 |
#' variable level (if rows correspond to levels of a variable) or statistic name will be used as label. |
|
| 321 |
#' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list |
|
| 322 |
#' of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `""` |
|
| 323 |
#' values are provided. |
|
| 324 |
#' |
|
| 325 |
#' @return |
|
| 326 |
#' * `get_labels_from_stats()` returns a named list of labels as strings. |
|
| 327 |
#' |
|
| 328 |
#' @examples |
|
| 329 |
#' # Defaults labels |
|
| 330 |
#' get_labels_from_stats(num_stats) |
|
| 331 |
#' get_labels_from_stats(cnt_stats) |
|
| 332 |
#' get_labels_from_stats(only_pval) |
|
| 333 |
#' get_labels_from_stats(all_cnt_occ) |
|
| 334 |
#' |
|
| 335 |
#' # Addition of customs |
|
| 336 |
#' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction"))
|
|
| 337 |
#' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions")))
|
|
| 338 |
#' |
|
| 339 |
#' @export |
|
| 340 |
get_labels_from_stats <- function(stats, |
|
| 341 |
labels_in = NULL, |
|
| 342 |
levels_per_stats = NULL, |
|
| 343 |
label_attr_from_stats = NULL, |
|
| 344 |
tern_defaults = tern_default_labels) {
|
|
| 345 | 1646x |
checkmate::assert_character(stats, min.len = 1) |
| 346 | ||
| 347 |
# If labels_in is NULL, use label_attr_from_stats |
|
| 348 | 1646x |
if (is.null(labels_in)) {
|
| 349 | 1369x |
labels_in <- label_attr_from_stats |
| 350 | 1369x |
labels_in <- label_attr_from_stats[ |
| 351 | 1369x |
nzchar(label_attr_from_stats) & |
| 352 | 1369x |
!sapply(label_attr_from_stats, is.null) & |
| 353 | 1369x |
!is.na(label_attr_from_stats) |
| 354 |
] |
|
| 355 |
} |
|
| 356 | ||
| 357 |
# It may be a list |
|
| 358 | 1646x |
if (checkmate::test_list(labels_in, null.ok = TRUE)) {
|
| 359 | 1440x |
checkmate::assert_list(labels_in, null.ok = TRUE) |
| 360 |
# Or it may be a vector of characters |
|
| 361 |
} else {
|
|
| 362 | 206x |
checkmate::assert_character(labels_in, null.ok = TRUE) |
| 363 |
} |
|
| 364 | 1646x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
| 365 | ||
| 366 |
# If unnamed labels given as labels_in and same number of stats, use one label per stat |
|
| 367 |
if ( |
|
| 368 | 1646x |
!is.null(labels_in) && length(labels_in) == length(stats) && |
| 369 | 1646x |
is.null(names(labels_in)) && is.null(levels_per_stats) |
| 370 |
) {
|
|
| 371 | 2x |
out <- as.list(labels_in) %>% setNames(stats) |
| 372 | 2x |
return(out) |
| 373 |
} |
|
| 374 | ||
| 375 |
# If levels_per_stats not given, assume one row per statistic |
|
| 376 | 332x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) |
| 377 | ||
| 378 |
# Apply custom labels |
|
| 379 | 1644x |
out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults) |
| 380 | 1644x |
out |
| 381 |
} |
|
| 382 | ||
| 383 |
#' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows. |
|
| 384 |
#' |
|
| 385 |
#' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for |
|
| 386 |
#' all values. |
|
| 387 |
#' @param row_nms `r lifecycle::badge("deprecated")` Deprecation cycle started. See the `levels_per_stats` parameter
|
|
| 388 |
#' for details. |
|
| 389 |
#' |
|
| 390 |
#' @return |
|
| 391 |
#' * `get_indents_from_stats()` returns a named list of indentation modifiers as integers. |
|
| 392 |
#' |
|
| 393 |
#' @examples |
|
| 394 |
#' get_indents_from_stats(all_cnt_occ, indents_in = 3L) |
|
| 395 |
#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) |
|
| 396 |
#' get_indents_from_stats( |
|
| 397 |
#' all_cnt_occ, |
|
| 398 |
#' indents_in = list(a = 2L, count.a = 1L, count.b = 5L) |
|
| 399 |
#' ) |
|
| 400 |
#' |
|
| 401 |
#' @export |
|
| 402 |
get_indents_from_stats <- function(stats, |
|
| 403 |
indents_in = NULL, |
|
| 404 |
levels_per_stats = NULL, |
|
| 405 |
tern_defaults = as.list(rep(0L, length(stats))) %>% setNames(stats), |
|
| 406 |
row_nms = lifecycle::deprecated()) {
|
|
| 407 | 1602x |
checkmate::assert_character(stats, min.len = 1) |
| 408 |
# It may be a list |
|
| 409 | 1602x |
if (checkmate::test_list(indents_in, null.ok = TRUE)) {
|
| 410 | 1510x |
checkmate::assert_list(indents_in, null.ok = TRUE) |
| 411 |
# Or it may be a vector of integers |
|
| 412 |
} else {
|
|
| 413 | 92x |
checkmate::assert_integerish(indents_in, null.ok = TRUE) |
| 414 |
} |
|
| 415 | 1602x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
| 416 | ||
| 417 |
# If levels_per_stats not given, assume one row per statistic |
|
| 418 | 292x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) |
| 419 | ||
| 420 |
# Single indentation level for all rows |
|
| 421 | 1602x |
if (is.null(names(indents_in)) && length(indents_in) == 1) {
|
| 422 | 20x |
out <- rep(indents_in, length(levels_per_stats %>% unlist())) |
| 423 | 20x |
return(out) |
| 424 |
} |
|
| 425 | ||
| 426 |
# Apply custom indentation |
|
| 427 | 1582x |
out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults) |
| 428 | 1582x |
out |
| 429 |
} |
|
| 430 | ||
| 431 |
# Function to loop over each stat and levels to set correct values |
|
| 432 |
.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) {
|
|
| 433 | 4919x |
out <- list() |
| 434 | ||
| 435 | 4919x |
for (stat_i in names(levels_per_stats)) {
|
| 436 |
# Get all levels of the statistic |
|
| 437 | 7991x |
all_lvls <- levels_per_stats[[stat_i]] |
| 438 | ||
| 439 | 7991x |
if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) { # One row per statistic
|
| 440 | 4237x |
out[[stat_i]] <- if (stat_i %in% names(user_in)) { # 1. Check for stat_i in user input
|
| 441 | 792x |
user_in[[stat_i]] |
| 442 | 4237x |
} else if (stat_i %in% names(tern_defaults)) { # 2. Check for stat_i in tern defaults
|
| 443 | 3397x |
tern_defaults[[stat_i]] |
| 444 | 4237x |
} else { # 3. Otherwise stat_i
|
| 445 | 48x |
stat_i |
| 446 |
} |
|
| 447 |
} else { # One row per combination of variable level and statistic
|
|
| 448 |
# Loop over levels for each statistic |
|
| 449 | 3754x |
for (lev_i in all_lvls) {
|
| 450 |
# Construct row name (stat_i.lev_i) |
|
| 451 | 13522x |
row_nm <- paste(stat_i, lev_i, sep = ".") |
| 452 | ||
| 453 | 13522x |
out[[row_nm]] <- if (row_nm %in% names(user_in)) { # 1. Check for stat_i.lev_i in user input
|
| 454 | 43x |
user_in[[row_nm]] |
| 455 | 13522x |
} else if (lev_i %in% names(user_in)) { # 2. Check for lev_i in user input
|
| 456 | 52x |
user_in[[lev_i]] |
| 457 | 13522x |
} else if (stat_i %in% names(user_in)) { # 3. Check for stat_i in user input
|
| 458 | 525x |
user_in[[stat_i]] |
| 459 | 13522x |
} else if (lev_i %in% names(tern_defaults)) { # 4. Check for lev_i in tern defaults (only used for labels)
|
| 460 | 1549x |
tern_defaults[[lev_i]] |
| 461 | 13522x |
} else if (stat_i %in% names(tern_defaults)) { # 5. Check for stat_i in tern defaults
|
| 462 | 8443x |
tern_defaults[[stat_i]] |
| 463 | 13522x |
} else { # 6. Otherwise lev_i
|
| 464 | 2910x |
lev_i |
| 465 |
} |
|
| 466 |
} |
|
| 467 |
} |
|
| 468 |
} |
|
| 469 | ||
| 470 | 4919x |
out |
| 471 |
} |
|
| 472 | ||
| 473 |
# Custom unlist function to retain NULL as "NULL" or NA |
|
| 474 |
.unlist_keep_nulls <- function(lst, null_placeholder = "NULL", recursive = FALSE) {
|
|
| 475 | 4836x |
lapply(lst, function(x) if (is.null(x)) null_placeholder else x) %>% |
| 476 | 4836x |
unlist(recursive = recursive) |
| 477 |
} |
|
| 478 | ||
| 479 |
#' Update labels according to control specifications |
|
| 480 |
#' |
|
| 481 |
#' @description `r lifecycle::badge("stable")`
|
|
| 482 |
#' |
|
| 483 |
#' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant |
|
| 484 |
#' control specification. For example, if control has element `conf_level` set to `0.9`, the default |
|
| 485 |
#' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied |
|
| 486 |
#' via `labels_custom` will not be updated regardless of `control`. |
|
| 487 |
#' |
|
| 488 |
#' @param labels_default (named `character`)\cr a named vector of statistic labels to modify |
|
| 489 |
#' according to the control specifications. Labels that are explicitly defined in `labels_custom` will |
|
| 490 |
#' not be affected. |
|
| 491 |
#' @param labels_custom (named `character`)\cr named vector of labels that are customized by |
|
| 492 |
#' the user and should not be affected by `control`. |
|
| 493 |
#' @param control (named `list`)\cr list of control parameters to apply to adjust default labels. |
|
| 494 |
#' |
|
| 495 |
#' @return A named character vector of labels with control specifications applied to relevant labels. |
|
| 496 |
#' |
|
| 497 |
#' @examples |
|
| 498 |
#' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57) |
|
| 499 |
#' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>%
|
|
| 500 |
#' labels_use_control(control = control) |
|
| 501 |
#' |
|
| 502 |
#' @export |
|
| 503 |
labels_use_control <- function(labels_default, control, labels_custom = NULL) {
|
|
| 504 | 21x |
if ("conf_level" %in% names(control)) {
|
| 505 | 21x |
labels_default <- sapply( |
| 506 | 21x |
names(labels_default), |
| 507 | 21x |
function(x) {
|
| 508 | 111x |
if (!x %in% names(labels_custom)) {
|
| 509 | 108x |
gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]])) |
| 510 |
} else {
|
|
| 511 | 3x |
labels_default[[x]] |
| 512 |
} |
|
| 513 |
} |
|
| 514 |
) |
|
| 515 |
} |
|
| 516 | 21x |
if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) &&
|
| 517 | 21x |
!"quantiles" %in% names(labels_custom)) { # nolint
|
| 518 | 16x |
labels_default["quantiles"] <- gsub( |
| 519 | 16x |
"[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""), |
| 520 | 16x |
labels_default["quantiles"] |
| 521 |
) |
|
| 522 |
} |
|
| 523 | 21x |
if ("quantiles" %in% names(control) && "quantiles_lower" %in% names(labels_default) &&
|
| 524 | 21x |
!"quantiles_lower" %in% names(labels_custom)) { # nolint
|
| 525 | 6x |
labels_default["quantiles_lower"] <- gsub( |
| 526 | 6x |
"[0-9]+%-ile", paste0(control[["quantiles"]][1] * 100, "%-ile", ""), |
| 527 | 6x |
labels_default["quantiles_lower"] |
| 528 |
) |
|
| 529 |
} |
|
| 530 | 21x |
if ("quantiles" %in% names(control) && "quantiles_upper" %in% names(labels_default) &&
|
| 531 | 21x |
!"quantiles_upper" %in% names(labels_custom)) { # nolint
|
| 532 | 6x |
labels_default["quantiles_upper"] <- gsub( |
| 533 | 6x |
"[0-9]+%-ile", paste0(control[["quantiles"]][2] * 100, "%-ile", ""), |
| 534 | 6x |
labels_default["quantiles_upper"] |
| 535 |
) |
|
| 536 |
} |
|
| 537 | 21x |
if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) &&
|
| 538 | 21x |
!"mean_pval" %in% names(labels_custom)) { # nolint
|
| 539 | 2x |
labels_default["mean_pval"] <- gsub( |
| 540 | 2x |
"p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"] |
| 541 |
) |
|
| 542 |
} |
|
| 543 | ||
| 544 | 21x |
labels_default |
| 545 |
} |
|
| 546 | ||
| 547 |
# tern_default_stats ----------------------------------------------------------- |
|
| 548 |
#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`. |
|
| 549 |
#' |
|
| 550 |
#' @format |
|
| 551 |
#' * `tern_default_stats` is a named list of available statistics, with each element |
|
| 552 |
#' named for their corresponding statistical method group. |
|
| 553 |
#' |
|
| 554 |
#' @export |
|
| 555 |
tern_default_stats <- list( |
|
| 556 |
abnormal = c("fraction"),
|
|
| 557 |
abnormal_by_baseline = c("fraction"),
|
|
| 558 |
abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"),
|
|
| 559 |
abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"),
|
|
| 560 |
abnormal_lab_worsen_by_baseline = c("fraction"),
|
|
| 561 |
analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"),
|
|
| 562 |
analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "fraction", "n_blq"),
|
|
| 563 |
analyze_vars_numeric = c( |
|
| 564 |
"n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval", |
|
| 565 |
"median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv", |
|
| 566 |
"geom_mean", "geom_sd", "geom_mean_sd", "geom_mean_ci", "geom_cv", |
|
| 567 |
"median_ci_3d", |
|
| 568 |
"mean_ci_3d", "geom_mean_ci_3d" |
|
| 569 |
), |
|
| 570 |
count_cumulative = c("count_fraction"),
|
|
| 571 |
count_missed_doses = c("n", "count_fraction"),
|
|
| 572 |
count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"),
|
|
| 573 |
count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"),
|
|
| 574 |
count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
|
|
| 575 |
count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
|
|
| 576 |
count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
|
|
| 577 |
coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"),
|
|
| 578 |
estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci", "n_unique", "n_rate"),
|
|
| 579 |
estimate_multinomial_response = c("n_prop", "prop_ci"),
|
|
| 580 |
estimate_odds_ratio = c("or_ci", "n_tot"),
|
|
| 581 |
estimate_proportion = c("n_prop", "prop_ci"),
|
|
| 582 |
estimate_proportion_diff = c("diff", "diff_ci"),
|
|
| 583 |
summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"),
|
|
| 584 |
summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"),
|
|
| 585 |
summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
|
|
| 586 |
summarize_num_patients = c("unique", "nonunique", "unique_count"),
|
|
| 587 |
summarize_patients_events_in_cols = c("unique", "all"),
|
|
| 588 |
surv_time = c( |
|
| 589 |
"median", "median_ci", "median_ci_3d", "quantiles", |
|
| 590 |
"quantiles_lower", "quantiles_upper", "range_censor", "range_event", "range" |
|
| 591 |
), |
|
| 592 |
surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_rate_3d"),
|
|
| 593 |
surv_timepoint_diff = c("rate_diff", "rate_diff_ci", "ztest_pval", "rate_diff_ci_3d"),
|
|
| 594 |
tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
|
|
| 595 |
tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval", "riskdiff"),
|
|
| 596 |
tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
|
|
| 597 |
tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval", "riskdiff"),
|
|
| 598 |
test_proportion_diff = c("pval")
|
|
| 599 |
) |
|
| 600 | ||
| 601 |
# tern_default_formats --------------------------------------------------------- |
|
| 602 |
#' @describeIn default_stats_formats_labels Named vector of default formats for `tern`. |
|
| 603 |
#' |
|
| 604 |
#' @format |
|
| 605 |
#' * `tern_default_formats` is a named vector of available default formats, with each element |
|
| 606 |
#' named for their corresponding statistic. |
|
| 607 |
#' |
|
| 608 |
#' @export |
|
| 609 |
tern_default_formats <- c( |
|
| 610 |
ci = list(format_extreme_values_ci(2L)), |
|
| 611 |
count = "xx.", |
|
| 612 |
count_fraction = format_count_fraction, |
|
| 613 |
count_fraction_fixed_dp = format_count_fraction_fixed_dp, |
|
| 614 |
cv = "xx.x", |
|
| 615 |
event_free_rate = "xx.xx", |
|
| 616 |
fraction = format_fraction_fixed_dp, |
|
| 617 |
geom_cv = "xx.x", |
|
| 618 |
geom_mean = "xx.x", |
|
| 619 |
geom_mean_ci = "(xx.xx, xx.xx)", |
|
| 620 |
geom_mean_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
| 621 |
geom_mean_sd = "xx.x (xx.x)", |
|
| 622 |
geom_sd = "xx.x", |
|
| 623 |
hr = list(format_extreme_values(2L)), |
|
| 624 |
hr_ci = "(xx.xx, xx.xx)", |
|
| 625 |
hr_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
| 626 |
iqr = "xx.x", |
|
| 627 |
lsmean = "xx.xx", |
|
| 628 |
lsmean_diff = "xx.xx", |
|
| 629 |
lsmean_diff_ci = "(xx.xx, xx.xx)", |
|
| 630 |
mad = "xx.x", |
|
| 631 |
max = "xx.x", |
|
| 632 |
mean = "xx.x", |
|
| 633 |
mean_ci = "(xx.xx, xx.xx)", |
|
| 634 |
mean_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
| 635 |
mean_pval = "x.xxxx | (<0.0001)", |
|
| 636 |
mean_sd = "xx.x (xx.x)", |
|
| 637 |
mean_sdi = "(xx.xx, xx.xx)", |
|
| 638 |
mean_se = "xx.x (xx.x)", |
|
| 639 |
mean_sei = "(xx.xx, xx.xx)", |
|
| 640 |
median = "xx.x", |
|
| 641 |
median_ci = "(xx.xx, xx.xx)", |
|
| 642 |
median_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
| 643 |
median_range = "xx.x (xx.x - xx.x)", |
|
| 644 |
min = "xx.x", |
|
| 645 |
n = "xx.", |
|
| 646 |
n_blq = "xx.", |
|
| 647 |
n_events = "xx", |
|
| 648 |
n_patients = "xx (xx.x%)", |
|
| 649 |
n_prop = "xx (xx.x%)", |
|
| 650 |
n_rate = "xx (xx.x)", |
|
| 651 |
n_rsp = "xx", |
|
| 652 |
n_tot = "xx", |
|
| 653 |
n_tot_events = "xx", |
|
| 654 |
n_unique = "xx", |
|
| 655 |
nonunique = "xx", |
|
| 656 |
or = list(format_extreme_values(2L)), |
|
| 657 |
or_ci = "xx.xx (xx.xx - xx.xx)", |
|
| 658 |
person_years = "xx.x", |
|
| 659 |
prop = "xx.x%", |
|
| 660 |
prop_ci = "(xx.x, xx.x)", |
|
| 661 |
pt_at_risk = "xx", |
|
| 662 |
pval = "x.xxxx | (<0.0001)", |
|
| 663 |
pvalue = "x.xxxx | (<0.0001)", |
|
| 664 |
pval_counts = "x.xxxx | (<0.0001)", |
|
| 665 |
quantiles = "xx.x - xx.x", |
|
| 666 |
quantiles_lower = "xx.xx (xx.xx - xx.xx)", |
|
| 667 |
quantiles_upper = "xx.xx (xx.xx - xx.xx)", |
|
| 668 |
range = "xx.x - xx.x", |
|
| 669 |
range_censor = "xx.x to xx.x", |
|
| 670 |
range_event = "xx.x to xx.x", |
|
| 671 |
rate = "xx.xxxx", |
|
| 672 |
rate_ci = "(xx.xxxx, xx.xxxx)", |
|
| 673 |
rate_diff = "xx.xx", |
|
| 674 |
rate_diff_ci = "(xx.xx, xx.xx)", |
|
| 675 |
rate_diff_ci_3d = format_xx("xx.xx (xx.xx, xx.xx)"),
|
|
| 676 |
rate_ratio = "xx.xxxx", |
|
| 677 |
rate_ratio_ci = "(xx.xxxx, xx.xxxx)", |
|
| 678 |
rate_se = "xx.xx", |
|
| 679 |
riskdiff = "xx.x (xx.x - xx.x)", |
|
| 680 |
sd = "xx.x", |
|
| 681 |
se = "xx.x", |
|
| 682 |
sum = "xx.x", |
|
| 683 |
sum_exposure = "xx", |
|
| 684 |
unique = format_count_fraction_fixed_dp, |
|
| 685 |
unique_count = "xx", |
|
| 686 |
ztest_pval = "x.xxxx | (<0.0001)" |
|
| 687 |
) |
|
| 688 | ||
| 689 |
# tern_default_labels ---------------------------------------------------------- |
|
| 690 |
#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`. |
|
| 691 |
#' |
|
| 692 |
#' @format |
|
| 693 |
#' * `tern_default_labels` is a named `character` vector of available default labels, with each element |
|
| 694 |
#' named for their corresponding statistic. |
|
| 695 |
#' |
|
| 696 |
#' @export |
|
| 697 |
tern_default_labels <- c( |
|
| 698 |
cv = "CV (%)", |
|
| 699 |
iqr = "IQR", |
|
| 700 |
geom_cv = "CV % Geometric Mean", |
|
| 701 |
geom_mean = "Geometric Mean", |
|
| 702 |
geom_mean_sd = "Geometric Mean (SD)", |
|
| 703 |
geom_mean_ci = "Geometric Mean 95% CI", |
|
| 704 |
geom_mean_ci_3d = "Geometric Mean (95% CI)", |
|
| 705 |
geom_sd = "Geometric SD", |
|
| 706 |
mad = "Median Absolute Deviation", |
|
| 707 |
max = "Maximum", |
|
| 708 |
mean = "Mean", |
|
| 709 |
mean_ci = "Mean 95% CI", |
|
| 710 |
mean_ci_3d = "Mean (95% CI)", |
|
| 711 |
mean_pval = "Mean p-value (H0: mean = 0)", |
|
| 712 |
mean_sd = "Mean (SD)", |
|
| 713 |
mean_sdi = "Mean -/+ 1xSD", |
|
| 714 |
mean_se = "Mean (SE)", |
|
| 715 |
mean_sei = "Mean -/+ 1xSE", |
|
| 716 |
median = "Median", |
|
| 717 |
median_ci = "Median 95% CI", |
|
| 718 |
median_ci_3d = "Median (95% CI)", |
|
| 719 |
median_range = "Median (Min - Max)", |
|
| 720 |
min = "Minimum", |
|
| 721 |
n = "n", |
|
| 722 |
n_blq = "n_blq", |
|
| 723 |
nonunique = "Number of events", |
|
| 724 |
pval = "p-value (t-test)", # Default for numeric |
|
| 725 |
pval_counts = "p-value (chi-squared test)", # Default for counts |
|
| 726 |
quantiles = "25% and 75%-ile", |
|
| 727 |
quantiles_lower = "25%-ile (95% CI)", |
|
| 728 |
quantiles_upper = "75%-ile (95% CI)", |
|
| 729 |
range = "Min - Max", |
|
| 730 |
range_censor = "Range (censored)", |
|
| 731 |
range_event = "Range (event)", |
|
| 732 |
rate = "Adjusted Rate", |
|
| 733 |
rate_ratio = "Adjusted Rate Ratio", |
|
| 734 |
sd = "SD", |
|
| 735 |
se = "SE", |
|
| 736 |
sum = "Sum", |
|
| 737 |
unique = "Number of patients with at least one event" |
|
| 738 |
) |
|
| 739 | ||
| 740 |
#' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics: |
|
| 741 |
#' [analyze_vars()] and [analyze_vars_in_cols()] principally. |
|
| 742 |
#' |
|
| 743 |
#' @param type (`string`)\cr `"numeric"` or `"counts"`. |
|
| 744 |
#' |
|
| 745 |
#' @return |
|
| 746 |
#' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type. |
|
| 747 |
#' |
|
| 748 |
#' @examples |
|
| 749 |
#' summary_formats() |
|
| 750 |
#' summary_formats(type = "counts", include_pval = TRUE) |
|
| 751 |
#' |
|
| 752 |
#' @export |
|
| 753 |
summary_formats <- function(type = "numeric", include_pval = FALSE) {
|
|
| 754 | 2x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
|
| 755 | 2x |
get_formats_from_stats(get_stats(met_grp, add_pval = include_pval)) |
| 756 |
} |
|
| 757 | ||
| 758 |
#' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics. |
|
| 759 |
#' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`. |
|
| 760 |
#' |
|
| 761 |
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()]. |
|
| 762 |
#' |
|
| 763 |
#' @details |
|
| 764 |
#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or |
|
| 765 |
#' `get_formats_from_stats` respectively to retrieve relevant information. |
|
| 766 |
#' |
|
| 767 |
#' @return |
|
| 768 |
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. |
|
| 769 |
#' |
|
| 770 |
#' @examples |
|
| 771 |
#' summary_labels() |
|
| 772 |
#' summary_labels(type = "counts", include_pval = TRUE) |
|
| 773 |
#' |
|
| 774 |
#' @export |
|
| 775 |
summary_labels <- function(type = "numeric", include_pval = FALSE) {
|
|
| 776 | 2x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
|
| 777 | 2x |
get_labels_from_stats(get_stats(met_grp, add_pval = include_pval)) |
| 778 |
} |
| 1 |
#' Subgroup treatment effect pattern (STEP) fit for survival outcome |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' This fits the subgroup treatment effect pattern (STEP) models for a survival outcome. The treatment arm |
|
| 6 |
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated |
|
| 7 |
#' hazard ratios are for the comparison of the second level vs. the first one. |
|
| 8 |
#' |
|
| 9 |
#' The model which is fit is: |
|
| 10 |
#' |
|
| 11 |
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)` |
|
| 12 |
#' |
|
| 13 |
#' where `degree` is specified by `control_step()`. |
|
| 14 |
#' |
|
| 15 |
#' @inheritParams argument_convention |
|
| 16 |
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`, |
|
| 17 |
#' `arm`, `biomarker`, and optional `covariates` and `strata`. |
|
| 18 |
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()]. |
|
| 19 |
#' |
|
| 20 |
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used |
|
| 21 |
#' for the biomarker variable, including where the center of the intervals are and their bounds. The |
|
| 22 |
#' second part of the columns contain the estimates for the treatment arm comparison. |
|
| 23 |
#' |
|
| 24 |
#' @note For the default degree 0 the `biomarker` variable is not included in the model. |
|
| 25 |
#' |
|
| 26 |
#' @seealso [control_step()] and [control_coxph()] for the available customization options. |
|
| 27 |
#' |
|
| 28 |
#' @examples |
|
| 29 |
#' # Testing dataset with just two treatment arms. |
|
| 30 |
#' library(dplyr) |
|
| 31 |
#' |
|
| 32 |
#' adtte_f <- tern_ex_adtte %>% |
|
| 33 |
#' filter( |
|
| 34 |
#' PARAMCD == "OS", |
|
| 35 |
#' ARM %in% c("B: Placebo", "A: Drug X")
|
|
| 36 |
#' ) %>% |
|
| 37 |
#' mutate( |
|
| 38 |
#' # Reorder levels of ARM to display reference arm before treatment arm. |
|
| 39 |
#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")), |
|
| 40 |
#' is_event = CNSR == 0 |
|
| 41 |
#' ) |
|
| 42 |
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
|
|
| 43 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels |
|
| 44 |
#' |
|
| 45 |
#' variables <- list( |
|
| 46 |
#' arm = "ARM", |
|
| 47 |
#' biomarker = "BMRKR1", |
|
| 48 |
#' covariates = c("AGE", "BMRKR2"),
|
|
| 49 |
#' event = "is_event", |
|
| 50 |
#' time = "AVAL" |
|
| 51 |
#' ) |
|
| 52 |
#' |
|
| 53 |
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup. |
|
| 54 |
#' step_matrix <- fit_survival_step( |
|
| 55 |
#' variables = variables, |
|
| 56 |
#' data = adtte_f |
|
| 57 |
#' ) |
|
| 58 |
#' dim(step_matrix) |
|
| 59 |
#' head(step_matrix) |
|
| 60 |
#' |
|
| 61 |
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local |
|
| 62 |
#' # models. Or specify different Cox regression options. |
|
| 63 |
#' step_matrix2 <- fit_survival_step( |
|
| 64 |
#' variables = variables, |
|
| 65 |
#' data = adtte_f, |
|
| 66 |
#' control = c(control_coxph(conf_level = 0.9), control_step(degree = 2)) |
|
| 67 |
#' ) |
|
| 68 |
#' |
|
| 69 |
#' # Use a global model with cubic interaction and only 5 points. |
|
| 70 |
#' step_matrix3 <- fit_survival_step( |
|
| 71 |
#' variables = variables, |
|
| 72 |
#' data = adtte_f, |
|
| 73 |
#' control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L)) |
|
| 74 |
#' ) |
|
| 75 |
#' |
|
| 76 |
#' @export |
|
| 77 |
fit_survival_step <- function(variables, |
|
| 78 |
data, |
|
| 79 |
control = c(control_step(), control_coxph())) {
|
|
| 80 | 4x |
checkmate::assert_list(control) |
| 81 | 4x |
assert_df_with_variables(data, variables) |
| 82 | 4x |
data <- data[!is.na(data[[variables$biomarker]]), ] |
| 83 | 4x |
window_sel <- h_step_window(x = data[[variables$biomarker]], control = control) |
| 84 | 4x |
interval_center <- window_sel$interval[, "Interval Center"] |
| 85 | 4x |
form <- h_step_survival_formula(variables = variables, control = control) |
| 86 | 4x |
estimates <- if (is.null(control$bandwidth)) {
|
| 87 | 1x |
h_step_survival_est( |
| 88 | 1x |
formula = form, |
| 89 | 1x |
data = data, |
| 90 | 1x |
variables = variables, |
| 91 | 1x |
x = interval_center, |
| 92 | 1x |
control = control |
| 93 |
) |
|
| 94 |
} else {
|
|
| 95 | 3x |
tmp <- mapply( |
| 96 | 3x |
FUN = h_step_survival_est, |
| 97 | 3x |
x = interval_center, |
| 98 | 3x |
subset = as.list(as.data.frame(window_sel$sel)), |
| 99 | 3x |
MoreArgs = list( |
| 100 | 3x |
formula = form, |
| 101 | 3x |
data = data, |
| 102 | 3x |
variables = variables, |
| 103 | 3x |
control = control |
| 104 |
) |
|
| 105 |
) |
|
| 106 |
# Maybe we find a more elegant solution than this. |
|
| 107 | 3x |
rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
|
| 108 | 3x |
t(tmp) |
| 109 |
} |
|
| 110 | 4x |
result <- cbind(window_sel$interval, estimates) |
| 111 | 4x |
structure( |
| 112 | 4x |
result, |
| 113 | 4x |
class = c("step", "matrix"),
|
| 114 | 4x |
variables = variables, |
| 115 | 4x |
control = control |
| 116 |
) |
|
| 117 |
} |
| 1 |
# summarize_glm_count ---------------------------------------------------------- |
|
| 2 |
#' Summarize Poisson negative binomial regression |
|
| 3 |
#' |
|
| 4 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 5 |
#' |
|
| 6 |
#' Summarize results of a Poisson negative binomial regression. |
|
| 7 |
#' This can be used to analyze count and/or frequency data using a linear model. |
|
| 8 |
#' It is specifically useful for analyzing count data (using the Poisson or Negative |
|
| 9 |
#' Binomial distribution) that is result of a generalized linear model of one (e.g. arm) or more |
|
| 10 |
#' covariates. |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams h_glm_count |
|
| 13 |
#' @inheritParams argument_convention |
|
| 14 |
#' @param rate_mean_method (`character(1)`)\cr method used to estimate the mean odds ratio. Defaults to `emmeans`. |
|
| 15 |
#' see details for more information. |
|
| 16 |
#' @param scale (`numeric(1)`)\cr linear scaling factor for rate and confidence intervals. Defaults to `1`. |
|
| 17 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 18 |
#' |
|
| 19 |
#' Options are: ``r shQuote(get_stats("summarize_glm_count"), type = "sh")``
|
|
| 20 |
#' |
|
| 21 |
#' @details |
|
| 22 |
#' `summarize_glm_count()` uses `s_glm_count()` to calculate the statistics for the table. This |
|
| 23 |
#' analysis function uses [h_glm_count()] to estimate the GLM with [stats::glm()] for Poisson and Quasi-Poisson |
|
| 24 |
#' distributions or [MASS::glm.nb()] for Negative Binomial distribution. All methods assume a |
|
| 25 |
#' logarithmic link function. |
|
| 26 |
#' |
|
| 27 |
#' At this point, rates and confidence intervals are estimated from the model using |
|
| 28 |
#' either [emmeans::emmeans()] when `rate_mean_method = "emmeans"` or [h_ppmeans()] |
|
| 29 |
#' when `rate_mean_method = "ppmeans"`. |
|
| 30 |
#' |
|
| 31 |
#' If a reference group is specified while building the table with `split_cols_by(ref_group)`, |
|
| 32 |
#' no rate ratio or `p-value` are calculated. Otherwise, we use [emmeans::contrast()] to |
|
| 33 |
#' calculate the rate ratio and `p-value` for the reference group. Values are always estimated |
|
| 34 |
#' with `method = "trt.vs.ctrl"` and `ref` equal to the first `arm` value. |
|
| 35 |
#' |
|
| 36 |
#' @name summarize_glm_count |
|
| 37 |
NULL |
|
| 38 | ||
| 39 |
#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments |
|
| 40 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
| 41 |
#' |
|
| 42 |
#' @return |
|
| 43 |
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions, |
|
| 44 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
| 45 |
#' the statistics from `s_glm_count()` to the table layout. |
|
| 46 |
#' |
|
| 47 |
#' @examples |
|
| 48 |
#' library(dplyr) |
|
| 49 |
#' |
|
| 50 |
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE") |
|
| 51 |
#' anl$AVAL_f <- as.factor(anl$AVAL) |
|
| 52 |
#' |
|
| 53 |
#' lyt <- basic_table() %>% |
|
| 54 |
#' split_cols_by("ARM", ref_group = "B: Placebo") %>%
|
|
| 55 |
#' add_colcounts() %>% |
|
| 56 |
#' analyze_vars( |
|
| 57 |
#' "AVAL_f", |
|
| 58 |
#' var_labels = "Number of exacerbations per patient", |
|
| 59 |
#' .stats = c("count_fraction"),
|
|
| 60 |
#' .formats = c("count_fraction" = "xx (xx.xx%)"),
|
|
| 61 |
#' .labels = c("Number of exacerbations per patient")
|
|
| 62 |
#' ) %>% |
|
| 63 |
#' summarize_glm_count( |
|
| 64 |
#' vars = "AVAL", |
|
| 65 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL), |
|
| 66 |
#' conf_level = 0.95, |
|
| 67 |
#' distribution = "poisson", |
|
| 68 |
#' rate_mean_method = "emmeans", |
|
| 69 |
#' var_labels = "Adjusted (P) exacerbation rate (per year)", |
|
| 70 |
#' table_names = "adjP", |
|
| 71 |
#' .stats = c("rate"),
|
|
| 72 |
#' .labels = c(rate = "Rate") |
|
| 73 |
#' ) %>% |
|
| 74 |
#' summarize_glm_count( |
|
| 75 |
#' vars = "AVAL", |
|
| 76 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
|
|
| 77 |
#' conf_level = 0.95, |
|
| 78 |
#' distribution = "quasipoisson", |
|
| 79 |
#' rate_mean_method = "ppmeans", |
|
| 80 |
#' var_labels = "Adjusted (QP) exacerbation rate (per year)", |
|
| 81 |
#' table_names = "adjQP", |
|
| 82 |
#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
|
|
| 83 |
#' .labels = c( |
|
| 84 |
#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio", |
|
| 85 |
#' rate_ratio_ci = "Rate Ratio CI", pval = "p value" |
|
| 86 |
#' ) |
|
| 87 |
#' ) %>% |
|
| 88 |
#' summarize_glm_count( |
|
| 89 |
#' vars = "AVAL", |
|
| 90 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
|
|
| 91 |
#' conf_level = 0.95, |
|
| 92 |
#' distribution = "negbin", |
|
| 93 |
#' rate_mean_method = "emmeans", |
|
| 94 |
#' var_labels = "Adjusted (NB) exacerbation rate (per year)", |
|
| 95 |
#' table_names = "adjNB", |
|
| 96 |
#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
|
|
| 97 |
#' .labels = c( |
|
| 98 |
#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio", |
|
| 99 |
#' rate_ratio_ci = "Rate Ratio CI", pval = "p value" |
|
| 100 |
#' ) |
|
| 101 |
#' ) |
|
| 102 |
#' |
|
| 103 |
#' build_table(lyt = lyt, df = anl) |
|
| 104 |
#' |
|
| 105 |
#' @export |
|
| 106 |
summarize_glm_count <- function(lyt, |
|
| 107 |
vars, |
|
| 108 |
variables, |
|
| 109 |
distribution, |
|
| 110 |
conf_level, |
|
| 111 |
rate_mean_method = c("emmeans", "ppmeans")[1],
|
|
| 112 |
weights = stats::weights, |
|
| 113 |
scale = 1, |
|
| 114 |
var_labels, |
|
| 115 |
na_str = default_na_str(), |
|
| 116 |
nested = TRUE, |
|
| 117 |
..., |
|
| 118 |
show_labels = "visible", |
|
| 119 |
table_names = vars, |
|
| 120 |
.stats = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
|
|
| 121 |
.stat_names = NULL, |
|
| 122 |
.formats = NULL, |
|
| 123 |
.labels = NULL, |
|
| 124 |
.indent_mods = list("rate_ci" = 1L, "rate_ratio_ci" = 1L, "pval" = 1L)) {
|
|
| 125 | 3x |
checkmate::assert_choice(rate_mean_method, c("emmeans", "ppmeans"))
|
| 126 | ||
| 127 |
# Process standard extra arguments |
|
| 128 | 3x |
extra_args <- list(".stats" = .stats)
|
| 129 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 130 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 131 | 3x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 132 | 3x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 133 | ||
| 134 |
# Process additional arguments to the statistic function |
|
| 135 | 3x |
extra_args <- c( |
| 136 | 3x |
extra_args, |
| 137 | 3x |
variables = list(variables), distribution = list(distribution), conf_level = list(conf_level), |
| 138 | 3x |
rate_mean_method = list(rate_mean_method), weights = list(weights), scale = list(scale), |
| 139 |
... |
|
| 140 |
) |
|
| 141 | ||
| 142 |
# Append additional info from layout to the analysis function |
|
| 143 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 144 | 3x |
formals(a_glm_count) <- c(formals(a_glm_count), extra_args[[".additional_fun_parameters"]]) |
| 145 | ||
| 146 | 3x |
analyze( |
| 147 | 3x |
lyt = lyt, |
| 148 | 3x |
vars = vars, |
| 149 | 3x |
afun = a_glm_count, |
| 150 | 3x |
na_str = na_str, |
| 151 | 3x |
nested = nested, |
| 152 | 3x |
extra_args = extra_args, |
| 153 | 3x |
var_labels = var_labels, |
| 154 | 3x |
show_labels = show_labels, |
| 155 | 3x |
table_names = table_names |
| 156 |
) |
|
| 157 |
} |
|
| 158 | ||
| 159 |
#' @describeIn summarize_glm_count Statistics function that produces a named list of results |
|
| 160 |
#' of the investigated Poisson model. |
|
| 161 |
#' |
|
| 162 |
#' @return |
|
| 163 |
#' * `s_glm_count()` returns a named `list` of 5 statistics: |
|
| 164 |
#' * `n`: Count of complete sample size for the group. |
|
| 165 |
#' * `rate`: Estimated event rate per follow-up time. |
|
| 166 |
#' * `rate_ci`: Confidence level for estimated rate per follow-up time. |
|
| 167 |
#' * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm. |
|
| 168 |
#' * `rate_ratio_ci`: Confidence level for the rate ratio. |
|
| 169 |
#' * `pval`: p-value. |
|
| 170 |
#' |
|
| 171 |
#' @keywords internal |
|
| 172 |
s_glm_count <- function(df, |
|
| 173 |
.var, |
|
| 174 |
.df_row, |
|
| 175 |
.ref_group, |
|
| 176 |
.in_ref_col, |
|
| 177 |
variables, |
|
| 178 |
distribution, |
|
| 179 |
conf_level, |
|
| 180 |
rate_mean_method, |
|
| 181 |
weights, |
|
| 182 |
scale = 1, |
|
| 183 |
...) {
|
|
| 184 | 14x |
arm <- variables$arm |
| 185 | ||
| 186 | 14x |
y <- df[[.var]] |
| 187 | 13x |
smry_level <- as.character(unique(df[[arm]])) |
| 188 | ||
| 189 |
# ensure there is only 1 value |
|
| 190 | 13x |
checkmate::assert_scalar(smry_level) |
| 191 | ||
| 192 | 13x |
results <- h_glm_count( |
| 193 | 13x |
.var = .var, |
| 194 | 13x |
.df_row = .df_row, |
| 195 | 13x |
variables = variables, |
| 196 | 13x |
distribution = distribution, |
| 197 | 13x |
weights |
| 198 |
) |
|
| 199 | ||
| 200 | 13x |
if (rate_mean_method == "emmeans") {
|
| 201 | 13x |
emmeans_smry <- summary(results$emmeans_fit, level = conf_level) |
| 202 | ! |
} else if (rate_mean_method == "ppmeans") {
|
| 203 | ! |
emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level) |
| 204 |
} |
|
| 205 | ||
| 206 | 13x |
emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ] |
| 207 | ||
| 208 |
# This happens if there is a reference col. No Ratio is calculated? |
|
| 209 | 13x |
if (.in_ref_col) {
|
| 210 | 5x |
list( |
| 211 | 5x |
n = length(y[!is.na(y)]), |
| 212 | 5x |
rate = formatters::with_label( |
| 213 | 5x |
ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate * scale), |
| 214 | 5x |
"Adjusted Rate" |
| 215 |
), |
|
| 216 | 5x |
rate_ci = formatters::with_label( |
| 217 | 5x |
c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), |
| 218 | 5x |
f_conf_level(conf_level) |
| 219 |
), |
|
| 220 | 5x |
rate_ratio = formatters::with_label(numeric(), "Adjusted Rate Ratio"), |
| 221 | 5x |
rate_ratio_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), |
| 222 | 5x |
pval = formatters::with_label(numeric(), "p-value") |
| 223 |
) |
|
| 224 |
} else {
|
|
| 225 | 8x |
emmeans_contrasts <- emmeans::contrast( |
| 226 | 8x |
results$emmeans_fit, |
| 227 | 8x |
method = "trt.vs.ctrl", |
| 228 | 8x |
ref = grep( |
| 229 | 8x |
as.character(unique(.ref_group[[arm]])), |
| 230 | 8x |
as.data.frame(results$emmeans_fit)[[arm]] |
| 231 |
) |
|
| 232 |
) |
|
| 233 | ||
| 234 | 8x |
contrasts_smry <- summary( |
| 235 | 8x |
emmeans_contrasts, |
| 236 | 8x |
infer = TRUE, |
| 237 | 8x |
adjust = "none" |
| 238 |
) |
|
| 239 | ||
| 240 | 8x |
smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ] |
| 241 | ||
| 242 | 8x |
list( |
| 243 | 8x |
n = length(y[!is.na(y)]), |
| 244 | 8x |
rate = formatters::with_label( |
| 245 | 8x |
ifelse(distribution == "negbin", |
| 246 | 8x |
emmeans_smry_level$response * scale, |
| 247 | 8x |
emmeans_smry_level$rate * scale |
| 248 |
), |
|
| 249 | 8x |
"Adjusted Rate" |
| 250 |
), |
|
| 251 | 8x |
rate_ci = formatters::with_label( |
| 252 | 8x |
c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), |
| 253 | 8x |
f_conf_level(conf_level) |
| 254 |
), |
|
| 255 | 8x |
rate_ratio = formatters::with_label( |
| 256 | 8x |
smry_contrasts_level$ratio, |
| 257 | 8x |
"Adjusted Rate Ratio" |
| 258 |
), |
|
| 259 | 8x |
rate_ratio_ci = formatters::with_label( |
| 260 | 8x |
c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL), |
| 261 | 8x |
f_conf_level(conf_level) |
| 262 |
), |
|
| 263 | 8x |
pval = formatters::with_label( |
| 264 | 8x |
smry_contrasts_level$p.value, |
| 265 | 8x |
"p-value" |
| 266 |
) |
|
| 267 |
) |
|
| 268 |
} |
|
| 269 |
} |
|
| 270 | ||
| 271 |
#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`. |
|
| 272 |
#' |
|
| 273 |
#' @return |
|
| 274 |
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
| 275 |
#' |
|
| 276 |
#' @keywords internal |
|
| 277 |
a_glm_count <- function(df, |
|
| 278 |
..., |
|
| 279 |
.stats = NULL, |
|
| 280 |
.stat_names = NULL, |
|
| 281 |
.formats = NULL, |
|
| 282 |
.labels = NULL, |
|
| 283 |
.indent_mods = NULL) {
|
|
| 284 |
# Check for additional parameters to the statistics function |
|
| 285 | 9x |
dots_extra_args <- list(...) |
| 286 | 9x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 287 | 9x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 288 | ||
| 289 |
# Check for user-defined functions |
|
| 290 | 9x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 291 | 9x |
.stats <- default_and_custom_stats_list$all_stats |
| 292 | 9x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 293 | ||
| 294 |
# Apply statistics function |
|
| 295 | 9x |
x_stats <- .apply_stat_functions( |
| 296 | 9x |
default_stat_fnc = s_glm_count, |
| 297 | 9x |
custom_stat_fnc_list = custom_stat_functions, |
| 298 | 9x |
args_list = c( |
| 299 | 9x |
df = list(df), |
| 300 | 9x |
extra_afun_params, |
| 301 | 9x |
dots_extra_args |
| 302 |
) |
|
| 303 |
) |
|
| 304 | ||
| 305 |
# Fill in formatting defaults |
|
| 306 | 9x |
.stats <- get_stats("summarize_glm_count",
|
| 307 | 9x |
stats_in = .stats, |
| 308 | 9x |
custom_stats_in = names(custom_stat_functions) |
| 309 |
) |
|
| 310 | 9x |
.formats <- get_formats_from_stats(.stats, .formats) |
| 311 | 9x |
.labels <- get_labels_from_stats(.stats, .labels) |
| 312 | 9x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
| 313 | ||
| 314 | 9x |
x_stats <- x_stats[.stats] |
| 315 | ||
| 316 |
# Auto format handling |
|
| 317 | 9x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 318 | ||
| 319 |
# Get and check statistical names |
|
| 320 | 9x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 321 | ||
| 322 | 9x |
in_rows( |
| 323 | 9x |
.list = x_stats, |
| 324 | 9x |
.formats = .formats, |
| 325 | 9x |
.names = .labels %>% .unlist_keep_nulls(), |
| 326 | 9x |
.stat_names = .stat_names, |
| 327 | 9x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 328 | 9x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 329 |
) |
|
| 330 |
} |
|
| 331 | ||
| 332 |
# h_glm_count ------------------------------------------------------------------ |
|
| 333 | ||
| 334 |
#' Helper functions for Poisson models |
|
| 335 |
#' |
|
| 336 |
#' @description `r lifecycle::badge("experimental")`
|
|
| 337 |
#' |
|
| 338 |
#' Helper functions that returns the results of [stats::glm()] when Poisson or Quasi-Poisson |
|
| 339 |
#' distributions are needed (see `family` parameter), or [MASS::glm.nb()] for Negative Binomial |
|
| 340 |
#' distributions. Link function for the GLM is `log`. |
|
| 341 |
#' |
|
| 342 |
#' @inheritParams argument_convention |
|
| 343 |
#' |
|
| 344 |
#' @seealso [summarize_glm_count] |
|
| 345 |
#' |
|
| 346 |
#' @name h_glm_count |
|
| 347 |
NULL |
|
| 348 | ||
| 349 |
#' @describeIn h_glm_count Helper function to return the results of the |
|
| 350 |
#' selected model (Poisson, Quasi-Poisson, negative binomial). |
|
| 351 |
#' |
|
| 352 |
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called |
|
| 353 |
#' in `.var` and `variables`. |
|
| 354 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with |
|
| 355 |
#' expected elements: |
|
| 356 |
#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple |
|
| 357 |
#' groups will be summarized. Specifically, the first level of `arm` variable is taken as the |
|
| 358 |
#' reference group. |
|
| 359 |
#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as |
|
| 360 |
#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`. |
|
| 361 |
#' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset. |
|
| 362 |
#' @param distribution (`character`)\cr a character value specifying the distribution |
|
| 363 |
#' used in the regression (Poisson, Quasi-Poisson, negative binomial). |
|
| 364 |
#' @param weights (`character`)\cr a character vector specifying weights used |
|
| 365 |
#' in averaging predictions. Number of weights must equal the number of levels included in the covariates. |
|
| 366 |
#' Weights option passed to [emmeans::emmeans()]. |
|
| 367 |
#' |
|
| 368 |
#' @return |
|
| 369 |
#' * `h_glm_count()` returns the results of the selected model. |
|
| 370 |
#' |
|
| 371 |
#' @keywords internal |
|
| 372 |
h_glm_count <- function(.var, |
|
| 373 |
.df_row, |
|
| 374 |
variables, |
|
| 375 |
distribution, |
|
| 376 |
weights) {
|
|
| 377 | 21x |
checkmate::assert_subset(distribution, c("poisson", "quasipoisson", "negbin"), empty.ok = FALSE)
|
| 378 | 19x |
switch(distribution, |
| 379 | 13x |
poisson = h_glm_poisson(.var, .df_row, variables, weights), |
| 380 | 1x |
quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights), |
| 381 | 5x |
negbin = h_glm_negbin(.var, .df_row, variables, weights) |
| 382 |
) |
|
| 383 |
} |
|
| 384 | ||
| 385 |
#' @describeIn h_glm_count Helper function to return results of a Poisson model. |
|
| 386 |
#' |
|
| 387 |
#' @return |
|
| 388 |
#' * `h_glm_poisson()` returns the results of a Poisson model. |
|
| 389 |
#' |
|
| 390 |
#' @keywords internal |
|
| 391 |
h_glm_poisson <- function(.var, |
|
| 392 |
.df_row, |
|
| 393 |
variables, |
|
| 394 |
weights) {
|
|
| 395 | 17x |
arm <- variables$arm |
| 396 | 17x |
covariates <- variables$covariates |
| 397 | ||
| 398 | 17x |
formula <- stats::as.formula(paste0( |
| 399 | 17x |
.var, " ~ ", |
| 400 |
" + ", |
|
| 401 | 17x |
paste(covariates, collapse = " + "), |
| 402 |
" + ", |
|
| 403 | 17x |
arm |
| 404 |
)) |
|
| 405 | ||
| 406 | 17x |
if (is.null(variables$offset)) {
|
| 407 | 1x |
glm_fit <- stats::glm( |
| 408 | 1x |
formula = formula, |
| 409 | 1x |
data = .df_row, |
| 410 | 1x |
family = stats::poisson(link = "log") |
| 411 |
) |
|
| 412 |
} else {
|
|
| 413 | 16x |
offset <- .df_row[[variables$offset]] |
| 414 | 14x |
glm_fit <- stats::glm( |
| 415 | 14x |
formula = formula, |
| 416 | 14x |
offset = offset, |
| 417 | 14x |
data = .df_row, |
| 418 | 14x |
family = stats::poisson(link = "log") |
| 419 |
) |
|
| 420 |
} |
|
| 421 | ||
| 422 | 15x |
emmeans_fit <- emmeans::emmeans( |
| 423 | 15x |
glm_fit, |
| 424 | 15x |
specs = arm, |
| 425 | 15x |
data = .df_row, |
| 426 | 15x |
type = "response", |
| 427 | 15x |
offset = 0, |
| 428 | 15x |
weights = weights |
| 429 |
) |
|
| 430 | ||
| 431 | 15x |
list( |
| 432 | 15x |
glm_fit = glm_fit, |
| 433 | 15x |
emmeans_fit = emmeans_fit |
| 434 |
) |
|
| 435 |
} |
|
| 436 | ||
| 437 |
#' @describeIn h_glm_count Helper function to return results of a Quasi-Poisson model. |
|
| 438 |
#' |
|
| 439 |
#' @return |
|
| 440 |
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model. |
|
| 441 |
#' |
|
| 442 |
#' @keywords internal |
|
| 443 |
h_glm_quasipoisson <- function(.var, |
|
| 444 |
.df_row, |
|
| 445 |
variables, |
|
| 446 |
weights) {
|
|
| 447 | 5x |
arm <- variables$arm |
| 448 | 5x |
covariates <- variables$covariates |
| 449 | ||
| 450 | 5x |
formula <- stats::as.formula(paste0( |
| 451 | 5x |
.var, " ~ ", |
| 452 |
" + ", |
|
| 453 | 5x |
paste(covariates, collapse = " + "), |
| 454 |
" + ", |
|
| 455 | 5x |
arm |
| 456 |
)) |
|
| 457 | ||
| 458 | 5x |
if (is.null(variables$offset)) {
|
| 459 | ! |
glm_fit <- stats::glm( |
| 460 | ! |
formula = formula, |
| 461 | ! |
data = .df_row, |
| 462 | ! |
family = stats::quasipoisson(link = "log") |
| 463 |
) |
|
| 464 |
} else {
|
|
| 465 | 5x |
offset <- .df_row[[variables$offset]] |
| 466 | 3x |
glm_fit <- stats::glm( |
| 467 | 3x |
formula = formula, |
| 468 | 3x |
offset = offset, |
| 469 | 3x |
data = .df_row, |
| 470 | 3x |
family = stats::quasipoisson(link = "log") |
| 471 |
) |
|
| 472 |
} |
|
| 473 | 3x |
emmeans_fit <- emmeans::emmeans( |
| 474 | 3x |
glm_fit, |
| 475 | 3x |
specs = arm, |
| 476 | 3x |
data = .df_row, |
| 477 | 3x |
type = "response", |
| 478 | 3x |
offset = 0, |
| 479 | 3x |
weights = weights |
| 480 |
) |
|
| 481 | ||
| 482 | 3x |
list( |
| 483 | 3x |
glm_fit = glm_fit, |
| 484 | 3x |
emmeans_fit = emmeans_fit |
| 485 |
) |
|
| 486 |
} |
|
| 487 | ||
| 488 |
#' @describeIn h_glm_count Helper function to return results of a negative binomial model. |
|
| 489 |
#' |
|
| 490 |
#' @return |
|
| 491 |
#' * `h_glm_negbin()` returns the results of a negative binomial model. |
|
| 492 |
#' |
|
| 493 |
#' @keywords internal |
|
| 494 |
h_glm_negbin <- function(.var, |
|
| 495 |
.df_row, |
|
| 496 |
variables, |
|
| 497 |
weights) {
|
|
| 498 | 9x |
arm <- variables$arm |
| 499 | 9x |
covariates <- variables$covariates |
| 500 | 9x |
formula <- stats::as.formula(paste0( |
| 501 | 9x |
.var, " ~ ", |
| 502 |
" + ", |
|
| 503 | 9x |
paste(covariates, collapse = " + "), |
| 504 |
" + ", |
|
| 505 | 9x |
arm |
| 506 |
)) |
|
| 507 | ||
| 508 | 9x |
if (is.null(variables$offset)) {
|
| 509 | 1x |
formula <- stats::as.formula(paste0( |
| 510 | 1x |
.var, " ~ ", |
| 511 |
" + ", |
|
| 512 | 1x |
paste(covariates, collapse = " + "), |
| 513 |
" + ", |
|
| 514 | 1x |
arm |
| 515 |
)) |
|
| 516 |
} else {
|
|
| 517 | 8x |
offset <- variables$offset |
| 518 | 8x |
formula_txt <- sprintf( |
| 519 | 8x |
"%s ~ %s + %s + offset(%s)", |
| 520 | 8x |
.var, |
| 521 | 8x |
arm, paste0(covariates, collapse = " + "), offset |
| 522 |
) |
|
| 523 | 8x |
formula <- stats::as.formula( |
| 524 | 8x |
formula_txt |
| 525 |
) |
|
| 526 |
} |
|
| 527 | ||
| 528 | 9x |
glm_fit <- MASS::glm.nb( |
| 529 | 9x |
formula = formula, |
| 530 | 9x |
data = .df_row, |
| 531 | 9x |
link = "log" |
| 532 |
) |
|
| 533 | ||
| 534 | 7x |
emmeans_fit <- emmeans::emmeans( |
| 535 | 7x |
glm_fit, |
| 536 | 7x |
specs = arm, |
| 537 | 7x |
data = .df_row, |
| 538 | 7x |
type = "response", |
| 539 | 7x |
offset = 0, |
| 540 | 7x |
weights = weights |
| 541 |
) |
|
| 542 | ||
| 543 | 7x |
list( |
| 544 | 7x |
glm_fit = glm_fit, |
| 545 | 7x |
emmeans_fit = emmeans_fit |
| 546 |
) |
|
| 547 |
} |
|
| 548 | ||
| 549 |
# h_ppmeans -------------------------------------------------------------------- |
|
| 550 |
#' Function to return the estimated means using predicted probabilities |
|
| 551 |
#' |
|
| 552 |
#' @description |
|
| 553 |
#' For each arm level, the predicted mean rate is calculated using the fitted model object, with `newdata` |
|
| 554 |
#' set to the result of `stats::model.frame`, a reconstructed data or the original data, depending on the |
|
| 555 |
#' object formula (coming from the fit). The confidence interval is derived using the `conf_level` parameter. |
|
| 556 |
#' |
|
| 557 |
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm. |
|
| 558 |
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called in `.var` and `variables`. |
|
| 559 |
#' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be |
|
| 560 |
#' summarized. Specifically, the first level of `arm` variable is taken as the reference group. |
|
| 561 |
#' @param conf_level (`proportion`)\cr value used to derive the confidence interval for the rate. |
|
| 562 |
#' |
|
| 563 |
#' @return |
|
| 564 |
#' * `h_ppmeans()` returns the estimated means. |
|
| 565 |
#' |
|
| 566 |
#' @seealso [summarize_glm_count()]. |
|
| 567 |
#' |
|
| 568 |
#' @export |
|
| 569 |
h_ppmeans <- function(obj, .df_row, arm, conf_level) {
|
|
| 570 | 1x |
alpha <- 1 - conf_level |
| 571 | 1x |
p <- 1 - alpha / 2 |
| 572 | ||
| 573 | 1x |
arm_levels <- levels(.df_row[[arm]]) |
| 574 | ||
| 575 | 1x |
out <- lapply(arm_levels, function(lev) {
|
| 576 | 3x |
temp <- .df_row |
| 577 | 3x |
temp[[arm]] <- factor(lev, levels = arm_levels) |
| 578 | ||
| 579 | 3x |
mf <- stats::model.frame(obj$formula, data = temp) |
| 580 | 3x |
X <- stats::model.matrix(obj$formula, data = mf) # nolint |
| 581 | ||
| 582 | 3x |
rate <- stats::predict(obj, newdata = mf, type = "response") |
| 583 | 3x |
rate_hat <- mean(rate) |
| 584 | ||
| 585 | 3x |
zz <- colMeans(rate * X) |
| 586 | 3x |
se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz)) |
| 587 | 3x |
rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat) |
| 588 | 3x |
rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat) |
| 589 | ||
| 590 | 3x |
c(rate_hat, rate_lwr, rate_upr) |
| 591 |
}) |
|
| 592 | ||
| 593 | 1x |
names(out) <- arm_levels |
| 594 | 1x |
out <- do.call(rbind, out) |
| 595 | 1x |
if ("negbin" %in% class(obj)) {
|
| 596 | ! |
colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
|
| 597 |
} else {
|
|
| 598 | 1x |
colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
|
| 599 |
} |
|
| 600 | 1x |
out <- as.data.frame(out) |
| 601 | 1x |
out[[arm]] <- rownames(out) |
| 602 | 1x |
out |
| 603 |
} |
| 1 |
#' Count number of patients and sum exposure across all patients in columns |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The analyze function [analyze_patients_exposure_in_cols()] creates a layout element to count total numbers of |
|
| 6 |
#' patients and sum an analysis value (i.e. exposure) across all patients in columns. |
|
| 7 |
#' |
|
| 8 |
#' The primary analysis variable `ex_var` is the exposure variable used to calculate the `sum_exposure` statistic. The |
|
| 9 |
#' `id` variable is used to uniquely identify patients in the data such that only unique patients are counted in the |
|
| 10 |
#' `n_patients` statistic, and the `var` variable is used to create a row split if needed. The percentage returned as |
|
| 11 |
#' part of the `n_patients` statistic is the proportion of all records that correspond to a unique patient. |
|
| 12 |
#' |
|
| 13 |
#' The summarize function [summarize_patients_exposure_in_cols()] performs the same function as |
|
| 14 |
#' [analyze_patients_exposure_in_cols()] except it creates content rows, not data rows, to summarize the current table |
|
| 15 |
#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have |
|
| 16 |
#' occurred. |
|
| 17 |
#' |
|
| 18 |
#' If a column split has not yet been performed in the table, `col_split` must be set to `TRUE` for the first call of |
|
| 19 |
#' [analyze_patients_exposure_in_cols()] or [summarize_patients_exposure_in_cols()]. |
|
| 20 |
#' |
|
| 21 |
#' @inheritParams argument_convention |
|
| 22 |
#' @param ex_var (`string`)\cr name of the variable in `df` containing exposure values. |
|
| 23 |
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty, this will be used as label. |
|
| 24 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
| 25 |
#' |
|
| 26 |
#' Options are: ``r shQuote(get_stats("analyze_patients_exposure_in_cols"), type = "sh")``
|
|
| 27 |
#' |
|
| 28 |
#' @name summarize_patients_exposure_in_cols |
|
| 29 |
#' @order 1 |
|
| 30 |
NULL |
|
| 31 | ||
| 32 |
#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers |
|
| 33 |
#' of patients and the sum of exposure across all patients. |
|
| 34 |
#' |
|
| 35 |
#' @return |
|
| 36 |
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics: |
|
| 37 |
#' * `n_patients`: Number of unique patients in `df`. |
|
| 38 |
#' * `sum_exposure`: Sum of `ex_var` across all patients in `df`. |
|
| 39 |
#' |
|
| 40 |
#' @keywords internal |
|
| 41 |
s_count_patients_sum_exposure <- function(df, |
|
| 42 |
labelstr = "", |
|
| 43 |
.stats = c("n_patients", "sum_exposure"),
|
|
| 44 |
.N_col, # nolint |
|
| 45 |
..., |
|
| 46 |
ex_var = "AVAL", |
|
| 47 |
id = "USUBJID", |
|
| 48 |
custom_label = NULL, |
|
| 49 |
var_level = NULL) {
|
|
| 50 | 56x |
assert_df_with_variables(df, list(ex_var = ex_var, id = id)) |
| 51 | 56x |
checkmate::assert_string(id) |
| 52 | 56x |
checkmate::assert_string(labelstr) |
| 53 | 56x |
checkmate::assert_string(custom_label, null.ok = TRUE) |
| 54 | 56x |
checkmate::assert_numeric(df[[ex_var]]) |
| 55 | 56x |
checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))
|
| 56 | ||
| 57 | 56x |
row_label <- if (labelstr != "") {
|
| 58 | ! |
labelstr |
| 59 | 56x |
} else if (!is.null(var_level)) {
|
| 60 | 42x |
var_level |
| 61 | 56x |
} else if (!is.null(custom_label)) {
|
| 62 | 6x |
custom_label |
| 63 |
} else {
|
|
| 64 | 8x |
"Total patients numbers/person time" |
| 65 |
} |
|
| 66 | ||
| 67 | 56x |
y <- list() |
| 68 | ||
| 69 | 56x |
if ("n_patients" %in% .stats) {
|
| 70 | 56x |
y$n_patients <- |
| 71 | 56x |
formatters::with_label( |
| 72 | 56x |
s_num_patients_content( |
| 73 | 56x |
df = df, |
| 74 | 56x |
.N_col = .N_col, # nolint |
| 75 | 56x |
.var = id, |
| 76 | 56x |
labelstr = "" |
| 77 | 56x |
)$unique, |
| 78 | 56x |
row_label |
| 79 |
) |
|
| 80 |
} |
|
| 81 | 56x |
if ("sum_exposure" %in% .stats) {
|
| 82 | 56x |
y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label) |
| 83 |
} |
|
| 84 | 56x |
y |
| 85 |
} |
|
| 86 | ||
| 87 |
#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in |
|
| 88 |
#' [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in |
|
| 89 |
#' [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`. |
|
| 90 |
#' |
|
| 91 |
#' @return |
|
| 92 |
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()]. |
|
| 93 |
#' |
|
| 94 |
#' @export |
|
| 95 |
a_count_patients_sum_exposure <- function(df, |
|
| 96 |
labelstr = "", |
|
| 97 |
..., |
|
| 98 |
.stats = NULL, |
|
| 99 |
.stat_names = NULL, |
|
| 100 |
.formats = NULL, |
|
| 101 |
.labels = NULL, |
|
| 102 |
.indent_mods = NULL) {
|
|
| 103 | 32x |
checkmate::assert_character(.stats, len = 1) |
| 104 | ||
| 105 |
# Check for additional parameters to the statistics function |
|
| 106 | 32x |
dots_extra_args <- list(...) |
| 107 | 32x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
| 108 | 32x |
dots_extra_args$.additional_fun_parameters <- NULL |
| 109 | ||
| 110 | 32x |
add_total_level <- dots_extra_args$add_total_level |
| 111 | 32x |
checkmate::assert_flag(add_total_level) |
| 112 | ||
| 113 | 32x |
var <- dots_extra_args$var |
| 114 | 32x |
if (!is.null(var)) {
|
| 115 | 21x |
assert_df_with_variables(df, list(var = var)) |
| 116 | 21x |
df[[var]] <- as.factor(df[[var]]) |
| 117 |
} |
|
| 118 | ||
| 119 |
# Check for user-defined functions |
|
| 120 | 32x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
| 121 | 32x |
.stats <- default_and_custom_stats_list$all_stats |
| 122 | 32x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
| 123 | ||
| 124 | 32x |
x_stats <- list() |
| 125 | 32x |
if (!is.null(var)) {
|
| 126 | 21x |
for (lvl in levels(df[[var]])) {
|
| 127 | 42x |
x_stats_i <- .apply_stat_functions( |
| 128 | 42x |
default_stat_fnc = s_count_patients_sum_exposure, |
| 129 | 42x |
custom_stat_fnc_list = custom_stat_functions, |
| 130 | 42x |
args_list = c( |
| 131 | 42x |
df = list(subset(df, get(var) == lvl)), |
| 132 | 42x |
labelstr = list(labelstr), |
| 133 | 42x |
var_level = lvl, |
| 134 | 42x |
extra_afun_params, |
| 135 | 42x |
dots_extra_args |
| 136 |
) |
|
| 137 |
) |
|
| 138 | 42x |
x_stats[[.stats]][[lvl]] <- x_stats_i[[.stats]] |
| 139 |
} |
|
| 140 |
} |
|
| 141 | ||
| 142 | 32x |
if (add_total_level || is.null(var)) {
|
| 143 | 13x |
x_stats_total <- .apply_stat_functions( |
| 144 | 13x |
default_stat_fnc = s_count_patients_sum_exposure, |
| 145 | 13x |
custom_stat_fnc_list = custom_stat_functions, |
| 146 | 13x |
args_list = c( |
| 147 | 13x |
df = list(df), |
| 148 | 13x |
labelstr = list(labelstr), |
| 149 | 13x |
extra_afun_params, |
| 150 | 13x |
dots_extra_args |
| 151 |
) |
|
| 152 |
) |
|
| 153 | 13x |
x_stats[[.stats]][["Total"]] <- x_stats_total[[.stats]] |
| 154 |
} |
|
| 155 | ||
| 156 |
# Fill in formatting defaults |
|
| 157 | 32x |
.stats <- get_stats( |
| 158 | 32x |
"analyze_patients_exposure_in_cols", |
| 159 | 32x |
stats_in = .stats, |
| 160 | 32x |
custom_stats_in = names(custom_stat_functions) |
| 161 |
) |
|
| 162 | 32x |
x_stats <- x_stats[.stats] |
| 163 | 32x |
levels_per_stats <- lapply(x_stats, names) |
| 164 | 32x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
| 165 | 32x |
.labels <- get_labels_from_stats( |
| 166 | 32x |
.stats, .labels, levels_per_stats, |
| 167 | 32x |
tern_defaults = c(lapply(x_stats[[1]], attr, "label"), tern_default_labels) |
| 168 |
) |
|
| 169 | 32x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
| 170 | ||
| 171 | 32x |
x_stats <- x_stats[.stats] %>% |
| 172 | 32x |
.unlist_keep_nulls() %>% |
| 173 | 32x |
setNames(names(.formats)) |
| 174 | ||
| 175 |
# Auto format handling |
|
| 176 | 32x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
| 177 | ||
| 178 |
# Get and check statistical names |
|
| 179 | 32x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
| 180 | ||
| 181 | 32x |
in_rows( |
| 182 | 32x |
.list = x_stats, |
| 183 | 32x |
.formats = .formats, |
| 184 | 32x |
.names = .labels %>% .unlist_keep_nulls(), |
| 185 | 32x |
.stat_names = .stat_names, |
| 186 | 32x |
.labels = .labels %>% .unlist_keep_nulls(), |
| 187 | 32x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
| 188 |
) |
|
| 189 |
} |
|
| 190 | ||
| 191 |
#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics |
|
| 192 |
#' function arguments and additional format arguments. This function is a wrapper for |
|
| 193 |
#' [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()]. |
|
| 194 |
#' |
|
| 195 |
#' @return |
|
| 196 |
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further |
|
| 197 |
#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will |
|
| 198 |
#' add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in |
|
| 199 |
#' columns, to the table layout. |
|
| 200 |
#' |
|
| 201 |
#' @examples |
|
| 202 |
#' lyt5 <- basic_table() %>% |
|
| 203 |
#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) |
|
| 204 |
#' |
|
| 205 |
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl) |
|
| 206 |
#' result5 |
|
| 207 |
#' |
|
| 208 |
#' lyt6 <- basic_table() %>% |
|
| 209 |
#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure") |
|
| 210 |
#' |
|
| 211 |
#' result6 <- build_table(lyt6, df = df, alt_counts_df = adsl) |
|
| 212 |
#' result6 |
|
| 213 |
#' |
|
| 214 |
#' @export |
|
| 215 |
#' @order 3 |
|
| 216 |
summarize_patients_exposure_in_cols <- function(lyt, |
|
| 217 |
var, |
|
| 218 |
ex_var = "AVAL", |
|
| 219 |
id = "USUBJID", |
|
| 220 |
add_total_level = FALSE, |
|
| 221 |
custom_label = NULL, |
|
| 222 |
col_split = TRUE, |
|
| 223 |
na_str = default_na_str(), |
|
| 224 |
..., |
|
| 225 |
.stats = c("n_patients", "sum_exposure"),
|
|
| 226 |
.stat_names = NULL, |
|
| 227 |
.formats = NULL, |
|
| 228 |
.labels = c(n_patients = "Patients", sum_exposure = "Person time"), |
|
| 229 |
.indent_mods = NULL) {
|
|
| 230 |
# Process standard extra arguments |
|
| 231 | 3x |
extra_args <- list() |
| 232 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 233 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 234 | 3x |
col_labels <- unlist(.labels[.stats]) |
| 235 | 3x |
.labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")]
|
| 236 | 3x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 237 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 238 | ||
| 239 |
# Process additional arguments to the statistic function |
|
| 240 | 3x |
extra_args <- c( |
| 241 | 3x |
extra_args, |
| 242 | 3x |
ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, |
| 243 |
... |
|
| 244 |
) |
|
| 245 | ||
| 246 |
# Adding additional info from layout to analysis function |
|
| 247 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 248 | 3x |
formals(a_count_patients_sum_exposure) <- c( |
| 249 | 3x |
formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]] |
| 250 |
) |
|
| 251 | ||
| 252 | 3x |
if (col_split) {
|
| 253 | 3x |
lyt <- split_cols_by_multivar( |
| 254 | 3x |
lyt = lyt, |
| 255 | 3x |
vars = rep(var, length(.stats)), |
| 256 | 3x |
varlabels = col_labels, |
| 257 | 3x |
extra_args = list(.stats = .stats) |
| 258 |
) |
|
| 259 |
} |
|
| 260 | 3x |
summarize_row_groups( |
| 261 | 3x |
lyt = lyt, |
| 262 | 3x |
var = var, |
| 263 | 3x |
cfun = a_count_patients_sum_exposure, |
| 264 | 3x |
na_str = na_str, |
| 265 | 3x |
extra_args = extra_args |
| 266 |
) |
|
| 267 |
} |
|
| 268 | ||
| 269 |
#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics |
|
| 270 |
#' function arguments and additional format arguments. This function is a wrapper for |
|
| 271 |
#' [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()]. |
|
| 272 |
#' |
|
| 273 |
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required |
|
| 274 |
#' column split has been done already earlier in the layout pipe. |
|
| 275 |
#' |
|
| 276 |
#' @return |
|
| 277 |
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further |
|
| 278 |
#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will |
|
| 279 |
#' add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in |
|
| 280 |
#' columns, to the table layout. |
|
| 281 |
#' |
|
| 282 |
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows, |
|
| 283 |
#' `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple |
|
| 284 |
#' pages when pagination is used. |
|
| 285 |
#' |
|
| 286 |
#' @examples |
|
| 287 |
#' set.seed(1) |
|
| 288 |
#' df <- data.frame( |
|
| 289 |
#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
|
|
| 290 |
#' ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
|
|
| 291 |
#' SEX = c(rep("Female", 6), rep("Male", 6)),
|
|
| 292 |
#' AVAL = as.numeric(sample(seq(1, 20), 12)), |
|
| 293 |
#' stringsAsFactors = TRUE |
|
| 294 |
#' ) |
|
| 295 |
#' adsl <- data.frame( |
|
| 296 |
#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
|
|
| 297 |
#' ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
|
|
| 298 |
#' SEX = c(rep("Female", 2), rep("Male", 2)),
|
|
| 299 |
#' stringsAsFactors = TRUE |
|
| 300 |
#' ) |
|
| 301 |
#' |
|
| 302 |
#' lyt <- basic_table() %>% |
|
| 303 |
#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
|
|
| 304 |
#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>% |
|
| 305 |
#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE) |
|
| 306 |
#' result <- build_table(lyt, df = df, alt_counts_df = adsl) |
|
| 307 |
#' result |
|
| 308 |
#' |
|
| 309 |
#' lyt2 <- basic_table() %>% |
|
| 310 |
#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
|
|
| 311 |
#' summarize_patients_exposure_in_cols( |
|
| 312 |
#' var = "AVAL", col_split = TRUE, |
|
| 313 |
#' .stats = "n_patients", custom_label = "some custom label" |
|
| 314 |
#' ) %>% |
|
| 315 |
#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL") |
|
| 316 |
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl) |
|
| 317 |
#' result2 |
|
| 318 |
#' |
|
| 319 |
#' lyt3 <- basic_table() %>% |
|
| 320 |
#' analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL") |
|
| 321 |
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl) |
|
| 322 |
#' result3 |
|
| 323 |
#' |
|
| 324 |
#' # Adding total levels and custom label |
|
| 325 |
#' lyt4 <- basic_table( |
|
| 326 |
#' show_colcounts = TRUE |
|
| 327 |
#' ) %>% |
|
| 328 |
#' analyze_patients_exposure_in_cols( |
|
| 329 |
#' var = "ARMCD", |
|
| 330 |
#' col_split = TRUE, |
|
| 331 |
#' add_total_level = TRUE, |
|
| 332 |
#' custom_label = "TOTAL" |
|
| 333 |
#' ) %>% |
|
| 334 |
#' append_topleft(c("", "Sex"))
|
|
| 335 |
#' |
|
| 336 |
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl) |
|
| 337 |
#' result4 |
|
| 338 |
#' |
|
| 339 |
#' @export |
|
| 340 |
#' @order 2 |
|
| 341 |
analyze_patients_exposure_in_cols <- function(lyt, |
|
| 342 |
var = NULL, |
|
| 343 |
ex_var = "AVAL", |
|
| 344 |
id = "USUBJID", |
|
| 345 |
add_total_level = FALSE, |
|
| 346 |
custom_label = NULL, |
|
| 347 |
col_split = TRUE, |
|
| 348 |
na_str = default_na_str(), |
|
| 349 |
.stats = c("n_patients", "sum_exposure"),
|
|
| 350 |
.stat_names = NULL, |
|
| 351 |
.formats = NULL, |
|
| 352 |
.labels = c(n_patients = "Patients", sum_exposure = "Person time"), |
|
| 353 |
.indent_mods = NULL, |
|
| 354 |
...) {
|
|
| 355 |
# Process standard extra arguments |
|
| 356 | 6x |
extra_args <- list() |
| 357 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
| 358 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
| 359 | 6x |
col_labels <- unlist(.labels[.stats]) |
| 360 | 6x |
.labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")]
|
| 361 | 6x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
| 362 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
| 363 | ||
| 364 |
# Process additional arguments to the statistic function |
|
| 365 | 6x |
extra_args <- c( |
| 366 | 6x |
extra_args, |
| 367 | 6x |
var = var, ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, |
| 368 |
... |
|
| 369 |
) |
|
| 370 | ||
| 371 |
# Adding additional info from layout to analysis function |
|
| 372 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
| 373 | 6x |
formals(a_count_patients_sum_exposure) <- c( |
| 374 | 6x |
formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]] |
| 375 |
) |
|
| 376 | ||
| 377 | 6x |
if (col_split) {
|
| 378 | 4x |
lyt <- split_cols_by_multivar( |
| 379 | 4x |
lyt = lyt, |
| 380 | 4x |
vars = rep(ex_var, length(.stats)), |
| 381 | 4x |
varlabels = col_labels, |
| 382 | 4x |
extra_args = list(.stats = .stats) |
| 383 |
) |
|
| 384 |
} |
|
| 385 | ||
| 386 | 6x |
analyze_colvars( |
| 387 | 6x |
lyt = lyt, |
| 388 | 6x |
afun = a_count_patients_sum_exposure, |
| 389 | 6x |
na_str = na_str, |
| 390 | 6x |
extra_args = extra_args |
| 391 |
) |
|
| 392 |
} |
| 1 |
#' Horizontal waterfall plot |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup. |
|
| 6 |
#' |
|
| 7 |
#' @param height (`numeric`)\cr vector containing values to be plotted as the waterfall bars. |
|
| 8 |
#' @param id (`character`)\cr vector containing identifiers to use as the x-axis label for the waterfall bars. |
|
| 9 |
#' @param col (`character`)\cr color(s). |
|
| 10 |
#' @param col_var (`factor`, `character`, or `NULL`)\cr categorical variable for bar coloring. `NULL` by default. |
|
| 11 |
#' @param xlab (`string`)\cr x label. Default is `"ID"`. |
|
| 12 |
#' @param ylab (`string`)\cr y label. Default is `"Value"`. |
|
| 13 |
#' @param title (`string`)\cr text to be displayed as plot title. |
|
| 14 |
#' @param col_legend_title (`string`)\cr text to be displayed as legend title. |
|
| 15 |
#' |
|
| 16 |
#' @return A `ggplot` waterfall plot. |
|
| 17 |
#' |
|
| 18 |
#' @examples |
|
| 19 |
#' library(dplyr) |
|
| 20 |
#' |
|
| 21 |
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3]) |
|
| 22 |
#' |
|
| 23 |
#' g_waterfall( |
|
| 24 |
#' height = c(3, 5, -1), |
|
| 25 |
#' id = letters[1:3], |
|
| 26 |
#' col_var = letters[1:3] |
|
| 27 |
#' ) |
|
| 28 |
#' |
|
| 29 |
#' adsl_f <- tern_ex_adsl %>% |
|
| 30 |
#' select(USUBJID, STUDYID, ARM, ARMCD, SEX) |
|
| 31 |
#' |
|
| 32 |
#' adrs_f <- tern_ex_adrs %>% |
|
| 33 |
#' filter(PARAMCD == "OVRINV") %>% |
|
| 34 |
#' mutate(pchg = rnorm(n(), 10, 50)) |
|
| 35 |
#' |
|
| 36 |
#' adrs_f <- head(adrs_f, 30) |
|
| 37 |
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ] |
|
| 38 |
#' head(adrs_f) |
|
| 39 |
#' |
|
| 40 |
#' g_waterfall( |
|
| 41 |
#' height = adrs_f$pchg, |
|
| 42 |
#' id = adrs_f$USUBJID, |
|
| 43 |
#' col_var = adrs_f$AVALC |
|
| 44 |
#' ) |
|
| 45 |
#' |
|
| 46 |
#' g_waterfall( |
|
| 47 |
#' height = adrs_f$pchg, |
|
| 48 |
#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
|
|
| 49 |
#' col_var = adrs_f$SEX |
|
| 50 |
#' ) |
|
| 51 |
#' |
|
| 52 |
#' g_waterfall( |
|
| 53 |
#' height = adrs_f$pchg, |
|
| 54 |
#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
|
|
| 55 |
#' xlab = "ID", |
|
| 56 |
#' ylab = "Percentage Change", |
|
| 57 |
#' title = "Waterfall plot" |
|
| 58 |
#' ) |
|
| 59 |
#' |
|
| 60 |
#' @export |
|
| 61 |
g_waterfall <- function(height, |
|
| 62 |
id, |
|
| 63 |
col_var = NULL, |
|
| 64 |
col = getOption("ggplot2.discrete.colour"),
|
|
| 65 |
xlab = NULL, |
|
| 66 |
ylab = NULL, |
|
| 67 |
col_legend_title = NULL, |
|
| 68 |
title = NULL) {
|
|
| 69 | 2x |
if (!is.null(col_var)) {
|
| 70 | 1x |
check_same_n(height = height, id = id, col_var = col_var) |
| 71 |
} else {
|
|
| 72 | 1x |
check_same_n(height = height, id = id) |
| 73 |
} |
|
| 74 | ||
| 75 | 2x |
checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
|
| 76 | 2x |
checkmate::assert_character(col, null.ok = TRUE) |
| 77 | ||
| 78 | 2x |
xlabel <- deparse(substitute(id)) |
| 79 | 2x |
ylabel <- deparse(substitute(height)) |
| 80 | ||
| 81 | 2x |
col_label <- if (!missing(col_var)) {
|
| 82 | 1x |
deparse(substitute(col_var)) |
| 83 |
} |
|
| 84 | ||
| 85 | 2x |
xlab <- if (is.null(xlab)) xlabel else xlab |
| 86 | 2x |
ylab <- if (is.null(ylab)) ylabel else ylab |
| 87 | 2x |
col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title |
| 88 | ||
| 89 | 2x |
plot_data <- data.frame( |
| 90 | 2x |
height = height, |
| 91 | 2x |
id = as.character(id), |
| 92 | 2x |
col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)), |
| 93 | 2x |
stringsAsFactors = FALSE |
| 94 |
) |
|
| 95 | ||
| 96 | 2x |
plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ] |
| 97 | ||
| 98 | 2x |
p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) + |
| 99 | 2x |
ggplot2::geom_col() + |
| 100 | 2x |
ggplot2::geom_text( |
| 101 | 2x |
label = format(plot_data_ord$height, digits = 2), |
| 102 | 2x |
vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5) |
| 103 |
) + |
|
| 104 | 2x |
ggplot2::xlab(xlab) + |
| 105 | 2x |
ggplot2::ylab(ylab) + |
| 106 | 2x |
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5)) |
| 107 | ||
| 108 | 2x |
if (!is.null(col_var)) {
|
| 109 | 1x |
p <- p + |
| 110 | 1x |
ggplot2::aes(fill = col_var) + |
| 111 | 1x |
ggplot2::labs(fill = col_legend_title) + |
| 112 | 1x |
ggplot2::theme( |
| 113 | 1x |
legend.position = "bottom", |
| 114 | 1x |
legend.background = ggplot2::element_blank(), |
| 115 | 1x |
legend.title = ggplot2::element_text(face = "bold"), |
| 116 | 1x |
legend.box.background = ggplot2::element_rect(colour = "black") |
| 117 |
) |
|
| 118 |
} |
|
| 119 | ||
| 120 | 2x |
if (!is.null(col)) {
|
| 121 | 1x |
p <- p + |
| 122 | 1x |
ggplot2::scale_fill_manual(values = col) |
| 123 |
} |
|
| 124 | ||
| 125 | 2x |
if (!is.null(title)) {
|
| 126 | 1x |
p <- p + |
| 127 | 1x |
ggplot2::labs(title = title) + |
| 128 | 1x |
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) |
| 129 |
} |
|
| 130 | ||
| 131 | 2x |
p |
| 132 |
} |
| 1 |
#' Individual patient plots |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Line plot(s) displaying trend in patients' parameter values over time is rendered. |
|
| 6 |
#' Patients' individual baseline values can be added to the plot(s) as reference. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams argument_convention |
|
| 9 |
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis. |
|
| 10 |
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis. |
|
| 11 |
#' @param xlab (`string`)\cr plot label for x-axis. |
|
| 12 |
#' @param ylab (`string`)\cr plot label for y-axis. |
|
| 13 |
#' @param id_var (`string`)\cr variable used as patient identifier. |
|
| 14 |
#' @param title (`string`)\cr title for plot. |
|
| 15 |
#' @param subtitle (`string`)\cr subtitle for plot. |
|
| 16 |
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on |
|
| 17 |
#' plot when `TRUE`. |
|
| 18 |
#' @param yvar_baseline (`string`)\cr variable with baseline values only. |
|
| 19 |
#' Ignored when `add_baseline_hline` is `FALSE`. |
|
| 20 |
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided |
|
| 21 |
#' by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display. |
|
| 22 |
#' @param plotting_choices (`string`)\cr specifies options for displaying |
|
| 23 |
#' plots. Must be one of `"all_in_one"`, `"split_by_max_obs"`, or `"separate_by_obs"`. |
|
| 24 |
#' @param max_obs_per_plot (`integer(1)`)\cr number of observations to be plotted on one |
|
| 25 |
#' plot. Ignored if `plotting_choices` is not `"separate_by_obs"`. |
|
| 26 |
#' @param caption (`string`)\cr optional caption below the plot. |
|
| 27 |
#' @param col (`character`)\cr line colors. |
|
| 28 |
#' |
|
| 29 |
#' @seealso Relevant helper function [h_g_ipp()]. |
|
| 30 |
#' |
|
| 31 |
#' @name g_ipp |
|
| 32 |
#' @aliases individual_patient_plot |
|
| 33 |
NULL |
|
| 34 | ||
| 35 |
#' Helper function to create simple line plot over time |
|
| 36 |
#' |
|
| 37 |
#' @description `r lifecycle::badge("stable")`
|
|
| 38 |
#' |
|
| 39 |
#' Function that generates a simple line plot displaying parameter trends over time. |
|
| 40 |
#' |
|
| 41 |
#' @inheritParams argument_convention |
|
| 42 |
#' @inheritParams g_ipp |
|
| 43 |
#' |
|
| 44 |
#' @return A `ggplot` line plot. |
|
| 45 |
#' |
|
| 46 |
#' @seealso [g_ipp()] which uses this function. |
|
| 47 |
#' |
|
| 48 |
#' @examples |
|
| 49 |
#' library(dplyr) |
|
| 50 |
#' |
|
| 51 |
#' # Select a small sample of data to plot. |
|
| 52 |
#' adlb <- tern_ex_adlb %>% |
|
| 53 |
#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
|
|
| 54 |
#' slice(1:36) |
|
| 55 |
#' |
|
| 56 |
#' p <- h_g_ipp( |
|
| 57 |
#' df = adlb, |
|
| 58 |
#' xvar = "AVISIT", |
|
| 59 |
#' yvar = "AVAL", |
|
| 60 |
#' xlab = "Visit", |
|
| 61 |
#' id_var = "USUBJID", |
|
| 62 |
#' ylab = "SGOT/ALT (U/L)", |
|
| 63 |
#' add_baseline_hline = TRUE |
|
| 64 |
#' ) |
|
| 65 |
#' p |
|
| 66 |
#' |
|
| 67 |
#' @export |
|
| 68 |
h_g_ipp <- function(df, |
|
| 69 |
xvar, |
|
| 70 |
yvar, |
|
| 71 |
xlab, |
|
| 72 |
ylab, |
|
| 73 |
id_var, |
|
| 74 |
title = "Individual Patient Plots", |
|
| 75 |
subtitle = "", |
|
| 76 |
caption = NULL, |
|
| 77 |
add_baseline_hline = FALSE, |
|
| 78 |
yvar_baseline = "BASE", |
|
| 79 |
ggtheme = nestcolor::theme_nest(), |
|
| 80 |
col = NULL) {
|
|
| 81 | 13x |
checkmate::assert_string(xvar) |
| 82 | 13x |
checkmate::assert_string(yvar) |
| 83 | 13x |
checkmate::assert_string(yvar_baseline) |
| 84 | 13x |
checkmate::assert_string(id_var) |
| 85 | 13x |
checkmate::assert_string(xlab) |
| 86 | 13x |
checkmate::assert_string(ylab) |
| 87 | 13x |
checkmate::assert_string(title) |
| 88 | 13x |
checkmate::assert_string(subtitle) |
| 89 | 13x |
checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df)) |
| 90 | 13x |
checkmate::assert_data_frame(df) |
| 91 | 13x |
checkmate::assert_flag(add_baseline_hline) |
| 92 | 13x |
checkmate::assert_character(col, null.ok = TRUE) |
| 93 | ||
| 94 | 13x |
p <- ggplot2::ggplot( |
| 95 | 13x |
data = df, |
| 96 | 13x |
mapping = ggplot2::aes( |
| 97 | 13x |
x = .data[[xvar]], |
| 98 | 13x |
y = .data[[yvar]], |
| 99 | 13x |
group = .data[[id_var]], |
| 100 | 13x |
colour = .data[[id_var]] |
| 101 |
) |
|
| 102 |
) + |
|
| 103 | 13x |
ggplot2::geom_line(linewidth = 0.4) + |
| 104 | 13x |
ggplot2::geom_point(size = 2) + |
| 105 | 13x |
ggplot2::labs( |
| 106 | 13x |
x = xlab, |
| 107 | 13x |
y = ylab, |
| 108 | 13x |
title = title, |
| 109 | 13x |
subtitle = subtitle, |
| 110 | 13x |
caption = caption |
| 111 |
) + |
|
| 112 | 13x |
ggtheme |
| 113 | ||
| 114 | 13x |
if (add_baseline_hline) {
|
| 115 | 12x |
baseline_df <- df[, c(id_var, yvar_baseline)] |
| 116 | 12x |
baseline_df <- unique(baseline_df) |
| 117 | ||
| 118 | 12x |
p <- p + |
| 119 | 12x |
ggplot2::geom_hline( |
| 120 | 12x |
data = baseline_df, |
| 121 | 12x |
mapping = ggplot2::aes( |
| 122 | 12x |
yintercept = .data[[yvar_baseline]], |
| 123 | 12x |
colour = .data[[id_var]] |
| 124 |
), |
|
| 125 | 12x |
linetype = "dotdash", |
| 126 | 12x |
linewidth = 0.4 |
| 127 |
) + |
|
| 128 | 12x |
ggplot2::geom_text( |
| 129 | 12x |
data = baseline_df, |
| 130 | 12x |
mapping = ggplot2::aes( |
| 131 | 12x |
x = 1, |
| 132 | 12x |
y = .data[[yvar_baseline]], |
| 133 | 12x |
label = .data[[id_var]], |
| 134 | 12x |
colour = .data[[id_var]] |
| 135 |
), |
|
| 136 | 12x |
nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)), |
| 137 | 12x |
vjust = "right", |
| 138 | 12x |
size = 2 |
| 139 |
) |
|
| 140 | ||
| 141 | 12x |
if (!is.null(col)) {
|
| 142 | 1x |
p <- p + |
| 143 | 1x |
ggplot2::scale_color_manual(values = col) |
| 144 |
} |
|
| 145 |
} |
|
| 146 | 13x |
p |
| 147 |
} |
|
| 148 | ||
| 149 |
#' @describeIn g_ipp Plotting function for individual patient plots which, depending on user |
|
| 150 |
#' preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter |
|
| 151 |
#' values over time. |
|
| 152 |
#' |
|
| 153 |
#' @return A `ggplot` object or a list of `ggplot` objects. |
|
| 154 |
#' |
|
| 155 |
#' @examples |
|
| 156 |
#' library(dplyr) |
|
| 157 |
#' |
|
| 158 |
#' # Select a small sample of data to plot. |
|
| 159 |
#' adlb <- tern_ex_adlb %>% |
|
| 160 |
#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
|
|
| 161 |
#' slice(1:36) |
|
| 162 |
#' |
|
| 163 |
#' plot_list <- g_ipp( |
|
| 164 |
#' df = adlb, |
|
| 165 |
#' xvar = "AVISIT", |
|
| 166 |
#' yvar = "AVAL", |
|
| 167 |
#' xlab = "Visit", |
|
| 168 |
#' ylab = "SGOT/ALT (U/L)", |
|
| 169 |
#' title = "Individual Patient Plots", |
|
| 170 |
#' add_baseline_hline = TRUE, |
|
| 171 |
#' plotting_choices = "split_by_max_obs", |
|
| 172 |
#' max_obs_per_plot = 5 |
|
| 173 |
#' ) |
|
| 174 |
#' plot_list |
|
| 175 |
#' |
|
| 176 |
#' @export |
|
| 177 |
g_ipp <- function(df, |
|
| 178 |
xvar, |
|
| 179 |
yvar, |
|
| 180 |
xlab, |
|
| 181 |
ylab, |
|
| 182 |
id_var = "USUBJID", |
|
| 183 |
title = "Individual Patient Plots", |
|
| 184 |
subtitle = "", |
|
| 185 |
caption = NULL, |
|
| 186 |
add_baseline_hline = FALSE, |
|
| 187 |
yvar_baseline = "BASE", |
|
| 188 |
ggtheme = nestcolor::theme_nest(), |
|
| 189 |
plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
|
|
| 190 |
max_obs_per_plot = 4, |
|
| 191 |
col = NULL) {
|
|
| 192 | 3x |
checkmate::assert_count(max_obs_per_plot) |
| 193 | 3x |
checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
|
| 194 | 3x |
checkmate::assert_character(col, null.ok = TRUE) |
| 195 | ||
| 196 | 3x |
plotting_choices <- match.arg(plotting_choices) |
| 197 | ||
| 198 | 3x |
if (plotting_choices == "all_in_one") {
|
| 199 | 1x |
p <- h_g_ipp( |
| 200 | 1x |
df = df, |
| 201 | 1x |
xvar = xvar, |
| 202 | 1x |
yvar = yvar, |
| 203 | 1x |
xlab = xlab, |
| 204 | 1x |
ylab = ylab, |
| 205 | 1x |
id_var = id_var, |
| 206 | 1x |
title = title, |
| 207 | 1x |
subtitle = subtitle, |
| 208 | 1x |
caption = caption, |
| 209 | 1x |
add_baseline_hline = add_baseline_hline, |
| 210 | 1x |
yvar_baseline = yvar_baseline, |
| 211 | 1x |
ggtheme = ggtheme, |
| 212 | 1x |
col = col |
| 213 |
) |
|
| 214 | ||
| 215 | 1x |
return(p) |
| 216 | 2x |
} else if (plotting_choices == "split_by_max_obs") {
|
| 217 | 1x |
id_vec <- unique(df[[id_var]]) |
| 218 | 1x |
id_list <- split( |
| 219 | 1x |
id_vec, |
| 220 | 1x |
rep(1:ceiling(length(id_vec) / max_obs_per_plot), |
| 221 | 1x |
each = max_obs_per_plot, |
| 222 | 1x |
length.out = length(id_vec) |
| 223 |
) |
|
| 224 |
) |
|
| 225 | ||
| 226 | 1x |
df_list <- list() |
| 227 | 1x |
plot_list <- list() |
| 228 | ||
| 229 | 1x |
for (i in seq_along(id_list)) {
|
| 230 | 2x |
df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ] |
| 231 | ||
| 232 | 2x |
plots <- h_g_ipp( |
| 233 | 2x |
df = df_list[[i]], |
| 234 | 2x |
xvar = xvar, |
| 235 | 2x |
yvar = yvar, |
| 236 | 2x |
xlab = xlab, |
| 237 | 2x |
ylab = ylab, |
| 238 | 2x |
id_var = id_var, |
| 239 | 2x |
title = title, |
| 240 | 2x |
subtitle = subtitle, |
| 241 | 2x |
caption = caption, |
| 242 | 2x |
add_baseline_hline = add_baseline_hline, |
| 243 | 2x |
yvar_baseline = yvar_baseline, |
| 244 | 2x |
ggtheme = ggtheme, |
| 245 | 2x |
col = col |
| 246 |
) |
|
| 247 | ||
| 248 | 2x |
plot_list[[i]] <- plots |
| 249 |
} |
|
| 250 | 1x |
return(plot_list) |
| 251 |
} else {
|
|
| 252 | 1x |
ind_df <- split(df, df[[id_var]]) |
| 253 | 1x |
plot_list <- lapply( |
| 254 | 1x |
ind_df, |
| 255 | 1x |
function(x) {
|
| 256 | 8x |
h_g_ipp( |
| 257 | 8x |
df = x, |
| 258 | 8x |
xvar = xvar, |
| 259 | 8x |
yvar = yvar, |
| 260 | 8x |
xlab = xlab, |
| 261 | 8x |
ylab = ylab, |
| 262 | 8x |
id_var = id_var, |
| 263 | 8x |
title = title, |
| 264 | 8x |
subtitle = subtitle, |
| 265 | 8x |
caption = caption, |
| 266 | 8x |
add_baseline_hline = add_baseline_hline, |
| 267 | 8x |
yvar_baseline = yvar_baseline, |
| 268 | 8x |
ggtheme = ggtheme, |
| 269 | 8x |
col = col |
| 270 |
) |
|
| 271 |
} |
|
| 272 |
) |
|
| 273 | ||
| 274 | 1x |
return(plot_list) |
| 275 |
} |
|
| 276 |
} |
| 1 |
#' Helper functions for tabulating survival duration by subgroup |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Helper functions that tabulate in a data frame statistics such as median survival |
|
| 6 |
#' time and hazard ratio for population subgroups. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams argument_convention |
|
| 9 |
#' @inheritParams survival_coxph_pairwise |
|
| 10 |
#' @inheritParams survival_duration_subgroups |
|
| 11 |
#' @param arm (`factor`)\cr the treatment group variable. |
|
| 12 |
#' |
|
| 13 |
#' @details Main functionality is to prepare data for use in a layout-creating function. |
|
| 14 |
#' |
|
| 15 |
#' @examples |
|
| 16 |
#' library(dplyr) |
|
| 17 |
#' library(forcats) |
|
| 18 |
#' |
|
| 19 |
#' adtte <- tern_ex_adtte |
|
| 20 |
#' |
|
| 21 |
#' # Save variable labels before data processing steps. |
|
| 22 |
#' adtte_labels <- formatters::var_labels(adtte) |
|
| 23 |
#' |
|
| 24 |
#' adtte_f <- adtte %>% |
|
| 25 |
#' filter( |
|
| 26 |
#' PARAMCD == "OS", |
|
| 27 |
#' ARM %in% c("B: Placebo", "A: Drug X"),
|
|
| 28 |
#' SEX %in% c("M", "F")
|
|
| 29 |
#' ) %>% |
|
| 30 |
#' mutate( |
|
| 31 |
#' # Reorder levels of ARM to display reference arm before treatment arm. |
|
| 32 |
#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")), |
|
| 33 |
#' SEX = droplevels(SEX), |
|
| 34 |
#' is_event = CNSR == 0 |
|
| 35 |
#' ) |
|
| 36 |
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
|
|
| 37 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels |
|
| 38 |
#' |
|
| 39 |
#' @name h_survival_duration_subgroups |
|
| 40 |
NULL |
|
| 41 | ||
| 42 |
#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame of median survival times by arm. |
|
| 43 |
#' |
|
| 44 |
#' @return |
|
| 45 |
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`. |
|
| 46 |
#' |
|
| 47 |
#' @examples |
|
| 48 |
#' # Extract median survival time for one group. |
|
| 49 |
#' h_survtime_df( |
|
| 50 |
#' tte = adtte_f$AVAL, |
|
| 51 |
#' is_event = adtte_f$is_event, |
|
| 52 |
#' arm = adtte_f$ARM |
|
| 53 |
#' ) |
|
| 54 |
#' |
|
| 55 |
#' @export |
|
| 56 |
h_survtime_df <- function(tte, is_event, arm) {
|
|
| 57 | 79x |
checkmate::assert_numeric(tte) |
| 58 | 78x |
checkmate::assert_logical(is_event, len = length(tte)) |
| 59 | 78x |
assert_valid_factor(arm, len = length(tte)) |
| 60 | ||
| 61 | 78x |
df_tte <- data.frame( |
| 62 | 78x |
tte = tte, |
| 63 | 78x |
is_event = is_event, |
| 64 | 78x |
stringsAsFactors = FALSE |
| 65 |
) |
|
| 66 | ||
| 67 |
# Delete NAs |
|
| 68 | 78x |
non_missing_rows <- stats::complete.cases(df_tte) |
| 69 | 78x |
df_tte <- df_tte[non_missing_rows, ] |
| 70 | 78x |
arm <- arm[non_missing_rows] |
| 71 | ||
| 72 | 78x |
lst_tte <- split(df_tte, arm) |
| 73 | 78x |
lst_results <- Map(function(x, arm) {
|
| 74 | 156x |
if (nrow(x) > 0) {
|
| 75 | 152x |
s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event") |
| 76 | 152x |
median_est <- unname(as.numeric(s_surv$median)) |
| 77 | 152x |
n_events <- sum(x$is_event) |
| 78 |
} else {
|
|
| 79 | 4x |
median_est <- NA |
| 80 | 4x |
n_events <- NA |
| 81 |
} |
|
| 82 | ||
| 83 | 156x |
data.frame( |
| 84 | 156x |
arm = arm, |
| 85 | 156x |
n = nrow(x), |
| 86 | 156x |
n_events = n_events, |
| 87 | 156x |
median = median_est, |
| 88 | 156x |
stringsAsFactors = FALSE |
| 89 |
) |
|
| 90 | 78x |
}, lst_tte, names(lst_tte)) |
| 91 | ||
| 92 | 78x |
df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE)) |
| 93 | 78x |
df$arm <- factor(df$arm, levels = levels(arm)) |
| 94 | 78x |
df |
| 95 |
} |
|
| 96 | ||
| 97 |
#' @describeIn h_survival_duration_subgroups Summarizes median survival times by arm and across subgroups |
|
| 98 |
#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and |
|
| 99 |
#' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies |
|
| 100 |
#' groupings for `subgroups` variables. |
|
| 101 |
#' |
|
| 102 |
#' @return |
|
| 103 |
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`, |
|
| 104 |
#' `var`, `var_label`, and `row_type`. |
|
| 105 |
#' |
|
| 106 |
#' @examples |
|
| 107 |
#' # Extract median survival time for multiple groups. |
|
| 108 |
#' h_survtime_subgroups_df( |
|
| 109 |
#' variables = list( |
|
| 110 |
#' tte = "AVAL", |
|
| 111 |
#' is_event = "is_event", |
|
| 112 |
#' arm = "ARM", |
|
| 113 |
#' subgroups = c("SEX", "BMRKR2")
|
|
| 114 |
#' ), |
|
| 115 |
#' data = adtte_f |
|
| 116 |
#' ) |
|
| 117 |
#' |
|
| 118 |
#' # Define groupings for BMRKR2 levels. |
|
| 119 |
#' h_survtime_subgroups_df( |
|
| 120 |
#' variables = list( |
|
| 121 |
#' tte = "AVAL", |
|
| 122 |
#' is_event = "is_event", |
|
| 123 |
#' arm = "ARM", |
|
| 124 |
#' subgroups = c("SEX", "BMRKR2")
|
|
| 125 |
#' ), |
|
| 126 |
#' data = adtte_f, |
|
| 127 |
#' groups_lists = list( |
|
| 128 |
#' BMRKR2 = list( |
|
| 129 |
#' "low" = "LOW", |
|
| 130 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
| 131 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
| 132 |
#' ) |
|
| 133 |
#' ) |
|
| 134 |
#' ) |
|
| 135 |
#' |
|
| 136 |
#' @export |
|
| 137 |
h_survtime_subgroups_df <- function(variables, |
|
| 138 |
data, |
|
| 139 |
groups_lists = list(), |
|
| 140 |
label_all = "All Patients") {
|
|
| 141 | 15x |
checkmate::assert_character(variables$tte) |
| 142 | 15x |
checkmate::assert_character(variables$is_event) |
| 143 | 15x |
checkmate::assert_character(variables$arm) |
| 144 | 15x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
| 145 | ||
| 146 | 15x |
assert_df_with_variables(data, variables) |
| 147 | ||
| 148 | 15x |
checkmate::assert_string(label_all) |
| 149 | ||
| 150 |
# Add All Patients. |
|
| 151 | 15x |
result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]]) |
| 152 | 15x |
result_all$subgroup <- label_all |
| 153 | 15x |
result_all$var <- "ALL" |
| 154 | 15x |
result_all$var_label <- label_all |
| 155 | 15x |
result_all$row_type <- "content" |
| 156 | ||
| 157 |
# Add Subgroups. |
|
| 158 | 15x |
if (is.null(variables$subgroups)) {
|
| 159 | 3x |
result_all |
| 160 |
} else {
|
|
| 161 | 12x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
| 162 | 12x |
l_result <- lapply(l_data, function(grp) {
|
| 163 | 60x |
result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]]) |
| 164 | 60x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
| 165 | 60x |
cbind(result, result_labels) |
| 166 |
}) |
|
| 167 | 12x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
| 168 | 12x |
result_subgroups$row_type <- "analysis" |
| 169 | 12x |
rbind( |
| 170 | 12x |
result_all, |
| 171 | 12x |
result_subgroups |
| 172 |
) |
|
| 173 |
} |
|
| 174 |
} |
|
| 175 | ||
| 176 |
#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame with estimates of |
|
| 177 |
#' treatment hazard ratio. |
|
| 178 |
#' |
|
| 179 |
#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed. |
|
| 180 |
#' |
|
| 181 |
#' @return |
|
| 182 |
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, |
|
| 183 |
#' `conf_level`, `pval` and `pval_label`. |
|
| 184 |
#' |
|
| 185 |
#' @examples |
|
| 186 |
#' # Extract hazard ratio for one group. |
|
| 187 |
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM) |
|
| 188 |
#' |
|
| 189 |
#' # Extract hazard ratio for one group with stratification factor. |
|
| 190 |
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1) |
|
| 191 |
#' |
|
| 192 |
#' @export |
|
| 193 |
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
|
|
| 194 | 85x |
checkmate::assert_numeric(tte) |
| 195 | 85x |
checkmate::assert_logical(is_event, len = length(tte)) |
| 196 | 85x |
assert_valid_factor(arm, n.levels = 2, len = length(tte)) |
| 197 | ||
| 198 | 85x |
df_tte <- data.frame(tte = tte, is_event = is_event) |
| 199 | 85x |
strata_vars <- NULL |
| 200 | ||
| 201 | 85x |
if (!is.null(strata_data)) {
|
| 202 | 5x |
if (is.data.frame(strata_data)) {
|
| 203 | 4x |
strata_vars <- names(strata_data) |
| 204 | 4x |
checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte)) |
| 205 | 4x |
assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars))) |
| 206 |
} else {
|
|
| 207 | 1x |
assert_valid_factor(strata_data, len = nrow(df_tte)) |
| 208 | 1x |
strata_vars <- "strata_data" |
| 209 |
} |
|
| 210 | 5x |
df_tte[strata_vars] <- strata_data |
| 211 |
} |
|
| 212 | ||
| 213 | 85x |
l_df <- split(df_tte, arm) |
| 214 | ||
| 215 | 85x |
if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
|
| 216 |
# Hazard ratio and CI. |
|
| 217 | 79x |
result <- s_coxph_pairwise( |
| 218 | 79x |
df = l_df[[2]], |
| 219 | 79x |
.ref_group = l_df[[1]], |
| 220 | 79x |
.in_ref_col = FALSE, |
| 221 | 79x |
.var = "tte", |
| 222 | 79x |
is_event = "is_event", |
| 223 | 79x |
strata = strata_vars, |
| 224 | 79x |
control = control |
| 225 |
) |
|
| 226 | ||
| 227 | 79x |
df <- data.frame( |
| 228 |
# Dummy column needed downstream to create a nested header. |
|
| 229 | 79x |
arm = " ", |
| 230 | 79x |
n_tot = unname(as.numeric(result$n_tot)), |
| 231 | 79x |
n_tot_events = unname(as.numeric(result$n_tot_events)), |
| 232 | 79x |
hr = unname(as.numeric(result$hr)), |
| 233 | 79x |
lcl = unname(result$hr_ci[1]), |
| 234 | 79x |
ucl = unname(result$hr_ci[2]), |
| 235 | 79x |
conf_level = control[["conf_level"]], |
| 236 | 79x |
pval = as.numeric(result$pvalue), |
| 237 | 79x |
pval_label = obj_label(result$pvalue), |
| 238 | 79x |
stringsAsFactors = FALSE |
| 239 |
) |
|
| 240 |
} else if ( |
|
| 241 | 6x |
(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) || |
| 242 | 6x |
(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0) |
| 243 |
) {
|
|
| 244 | 6x |
df_tte_complete <- df_tte[stats::complete.cases(df_tte), ] |
| 245 | 6x |
df <- data.frame( |
| 246 |
# Dummy column needed downstream to create a nested header. |
|
| 247 | 6x |
arm = " ", |
| 248 | 6x |
n_tot = nrow(df_tte_complete), |
| 249 | 6x |
n_tot_events = sum(df_tte_complete$is_event), |
| 250 | 6x |
hr = NA, |
| 251 | 6x |
lcl = NA, |
| 252 | 6x |
ucl = NA, |
| 253 | 6x |
conf_level = control[["conf_level"]], |
| 254 | 6x |
pval = NA, |
| 255 | 6x |
pval_label = NA, |
| 256 | 6x |
stringsAsFactors = FALSE |
| 257 |
) |
|
| 258 |
} else {
|
|
| 259 | ! |
df <- data.frame( |
| 260 |
# Dummy column needed downstream to create a nested header. |
|
| 261 | ! |
arm = " ", |
| 262 | ! |
n_tot = 0L, |
| 263 | ! |
n_tot_events = 0L, |
| 264 | ! |
hr = NA, |
| 265 | ! |
lcl = NA, |
| 266 | ! |
ucl = NA, |
| 267 | ! |
conf_level = control[["conf_level"]], |
| 268 | ! |
pval = NA, |
| 269 | ! |
pval_label = NA, |
| 270 | ! |
stringsAsFactors = FALSE |
| 271 |
) |
|
| 272 |
} |
|
| 273 | ||
| 274 | 85x |
df |
| 275 |
} |
|
| 276 | ||
| 277 |
#' @describeIn h_survival_duration_subgroups Summarizes estimates of the treatment hazard ratio |
|
| 278 |
#' across subgroups in a data frame. `variables` corresponds to the names of variables found in |
|
| 279 |
#' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and |
|
| 280 |
#' optionally `subgroups` and `strata`. `groups_lists` optionally specifies |
|
| 281 |
#' groupings for `subgroups` variables. |
|
| 282 |
#' |
|
| 283 |
#' @return |
|
| 284 |
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, |
|
| 285 |
#' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`. |
|
| 286 |
#' |
|
| 287 |
#' @examples |
|
| 288 |
#' # Extract hazard ratio for multiple groups. |
|
| 289 |
#' h_coxph_subgroups_df( |
|
| 290 |
#' variables = list( |
|
| 291 |
#' tte = "AVAL", |
|
| 292 |
#' is_event = "is_event", |
|
| 293 |
#' arm = "ARM", |
|
| 294 |
#' subgroups = c("SEX", "BMRKR2")
|
|
| 295 |
#' ), |
|
| 296 |
#' data = adtte_f |
|
| 297 |
#' ) |
|
| 298 |
#' |
|
| 299 |
#' # Define groupings of BMRKR2 levels. |
|
| 300 |
#' h_coxph_subgroups_df( |
|
| 301 |
#' variables = list( |
|
| 302 |
#' tte = "AVAL", |
|
| 303 |
#' is_event = "is_event", |
|
| 304 |
#' arm = "ARM", |
|
| 305 |
#' subgroups = c("SEX", "BMRKR2")
|
|
| 306 |
#' ), |
|
| 307 |
#' data = adtte_f, |
|
| 308 |
#' groups_lists = list( |
|
| 309 |
#' BMRKR2 = list( |
|
| 310 |
#' "low" = "LOW", |
|
| 311 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
| 312 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
| 313 |
#' ) |
|
| 314 |
#' ) |
|
| 315 |
#' ) |
|
| 316 |
#' |
|
| 317 |
#' # Extract hazard ratio for multiple groups with stratification factors. |
|
| 318 |
#' h_coxph_subgroups_df( |
|
| 319 |
#' variables = list( |
|
| 320 |
#' tte = "AVAL", |
|
| 321 |
#' is_event = "is_event", |
|
| 322 |
#' arm = "ARM", |
|
| 323 |
#' subgroups = c("SEX", "BMRKR2"),
|
|
| 324 |
#' strata = c("STRATA1", "STRATA2")
|
|
| 325 |
#' ), |
|
| 326 |
#' data = adtte_f |
|
| 327 |
#' ) |
|
| 328 |
#' |
|
| 329 |
#' @export |
|
| 330 |
h_coxph_subgroups_df <- function(variables, |
|
| 331 |
data, |
|
| 332 |
groups_lists = list(), |
|
| 333 |
control = control_coxph(), |
|
| 334 |
label_all = "All Patients") {
|
|
| 335 | 17x |
if ("strat" %in% names(variables)) {
|
| 336 | ! |
warning( |
| 337 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_coxph_subgroups_df() ", |
| 338 | ! |
"was deprecated in tern 0.9.4.\n ", |
| 339 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
| 340 |
) |
|
| 341 | ! |
variables[["strata"]] <- variables[["strat"]] |
| 342 |
} |
|
| 343 | ||
| 344 | 17x |
checkmate::assert_character(variables$tte) |
| 345 | 17x |
checkmate::assert_character(variables$is_event) |
| 346 | 17x |
checkmate::assert_character(variables$arm) |
| 347 | 17x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
| 348 | 17x |
checkmate::assert_character(variables$strata, null.ok = TRUE) |
| 349 | 17x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
| 350 | 17x |
assert_df_with_variables(data, variables) |
| 351 | 17x |
checkmate::assert_string(label_all) |
| 352 | ||
| 353 |
# Add All Patients. |
|
| 354 | 17x |
result_all <- h_coxph_df( |
| 355 | 17x |
tte = data[[variables$tte]], |
| 356 | 17x |
is_event = data[[variables$is_event]], |
| 357 | 17x |
arm = data[[variables$arm]], |
| 358 | 17x |
strata_data = if (is.null(variables$strata)) NULL else data[variables$strata], |
| 359 | 17x |
control = control |
| 360 |
) |
|
| 361 | 17x |
result_all$subgroup <- label_all |
| 362 | 17x |
result_all$var <- "ALL" |
| 363 | 17x |
result_all$var_label <- label_all |
| 364 | 17x |
result_all$row_type <- "content" |
| 365 | ||
| 366 |
# Add Subgroups. |
|
| 367 | 17x |
if (is.null(variables$subgroups)) {
|
| 368 | 3x |
result_all |
| 369 |
} else {
|
|
| 370 | 14x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
| 371 | ||
| 372 | 14x |
l_result <- lapply(l_data, function(grp) {
|
| 373 | 64x |
result <- h_coxph_df( |
| 374 | 64x |
tte = grp$df[[variables$tte]], |
| 375 | 64x |
is_event = grp$df[[variables$is_event]], |
| 376 | 64x |
arm = grp$df[[variables$arm]], |
| 377 | 64x |
strata_data = if (is.null(variables$strata)) NULL else grp$df[variables$strata], |
| 378 | 64x |
control = control |
| 379 |
) |
|
| 380 | 64x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
| 381 | 64x |
cbind(result, result_labels) |
| 382 |
}) |
|
| 383 | ||
| 384 | 14x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
| 385 | 14x |
result_subgroups$row_type <- "analysis" |
| 386 | ||
| 387 | 14x |
rbind( |
| 388 | 14x |
result_all, |
| 389 | 14x |
result_subgroups |
| 390 |
) |
|
| 391 |
} |
|
| 392 |
} |
|
| 393 | ||
| 394 |
#' Split data frame by subgroups |
|
| 395 |
#' |
|
| 396 |
#' @description `r lifecycle::badge("stable")`
|
|
| 397 |
#' |
|
| 398 |
#' Split a data frame into a non-nested list of subsets. |
|
| 399 |
#' |
|
| 400 |
#' @inheritParams argument_convention |
|
| 401 |
#' @inheritParams survival_duration_subgroups |
|
| 402 |
#' @param data (`data.frame`)\cr dataset to split. |
|
| 403 |
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets. |
|
| 404 |
#' Unused levels not present in `data` are dropped. Note that the order in this vector |
|
| 405 |
#' determines the order in the downstream table. |
|
| 406 |
#' |
|
| 407 |
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`). |
|
| 408 |
#' |
|
| 409 |
#' @details Main functionality is to prepare data for use in forest plot layouts. |
|
| 410 |
#' |
|
| 411 |
#' @examples |
|
| 412 |
#' df <- data.frame( |
|
| 413 |
#' x = c(1:5), |
|
| 414 |
#' y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
|
|
| 415 |
#' z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
|
|
| 416 |
#' ) |
|
| 417 |
#' formatters::var_labels(df) <- paste("label for", names(df))
|
|
| 418 |
#' |
|
| 419 |
#' h_split_by_subgroups( |
|
| 420 |
#' data = df, |
|
| 421 |
#' subgroups = c("y", "z")
|
|
| 422 |
#' ) |
|
| 423 |
#' |
|
| 424 |
#' h_split_by_subgroups( |
|
| 425 |
#' data = df, |
|
| 426 |
#' subgroups = c("y", "z"),
|
|
| 427 |
#' groups_lists = list( |
|
| 428 |
#' y = list("AB" = c("A", "B"), "C" = "C")
|
|
| 429 |
#' ) |
|
| 430 |
#' ) |
|
| 431 |
#' |
|
| 432 |
#' @export |
|
| 433 |
h_split_by_subgroups <- function(data, |
|
| 434 |
subgroups, |
|
| 435 |
groups_lists = list()) {
|
|
| 436 | 66x |
checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE) |
| 437 | 66x |
checkmate::assert_list(groups_lists, names = "named") |
| 438 | 66x |
checkmate::assert_subset(names(groups_lists), subgroups) |
| 439 | 66x |
assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups))) |
| 440 | ||
| 441 | 66x |
data_labels <- unname(formatters::var_labels(data)) |
| 442 | 66x |
df_subgroups <- data[, subgroups, drop = FALSE] |
| 443 | 66x |
subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE) |
| 444 | ||
| 445 | 66x |
l_labels <- Map(function(grp_i, name_i) {
|
| 446 | 120x |
existing_levels <- levels(droplevels(grp_i)) |
| 447 | 120x |
grp_levels <- if (name_i %in% names(groups_lists)) {
|
| 448 |
# For this variable groupings are defined. We check which groups are contained in the data. |
|
| 449 | 11x |
group_list_i <- groups_lists[[name_i]] |
| 450 | 11x |
group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE) |
| 451 | 11x |
names(which(group_has_levels)) |
| 452 |
} else {
|
|
| 453 | 109x |
existing_levels |
| 454 |
} |
|
| 455 | 120x |
df_labels <- data.frame( |
| 456 | 120x |
subgroup = grp_levels, |
| 457 | 120x |
var = name_i, |
| 458 | 120x |
var_label = unname(subgroup_labels[name_i]), |
| 459 | 120x |
stringsAsFactors = FALSE # Rationale is that subgroups may not be unique. |
| 460 |
) |
|
| 461 | 66x |
}, df_subgroups, names(df_subgroups)) |
| 462 | ||
| 463 |
# Create a data frame with one row per subgroup. |
|
| 464 | 66x |
df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE)) |
| 465 | 66x |
row_label <- paste0(df_labels$var, ".", df_labels$subgroup) |
| 466 | 66x |
row_split_var <- factor(row_label, levels = row_label) |
| 467 | ||
| 468 |
# Create a list of data subsets. |
|
| 469 | 66x |
lapply(split(df_labels, row_split_var), function(row_i) {
|
| 470 | 294x |
which_row <- if (row_i$var %in% names(groups_lists)) {
|
| 471 | 31x |
data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]] |
| 472 |
} else {
|
|
| 473 | 263x |
data[[row_i$var]] == row_i$subgroup |
| 474 |
} |
|
| 475 | 294x |
df <- data[which_row, ] |
| 476 | 294x |
rownames(df) <- NULL |
| 477 | 294x |
formatters::var_labels(df) <- data_labels |
| 478 | ||
| 479 | 294x |
list( |
| 480 | 294x |
df = df, |
| 481 | 294x |
df_labels = data.frame(row_i, row.names = NULL) |
| 482 |
) |
|
| 483 |
}) |
|
| 484 |
} |
| 1 |
#' Helper functions for Cox proportional hazards regression |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()]. |
|
| 6 |
#' |
|
| 7 |
#' @inheritParams argument_convention |
|
| 8 |
#' @inheritParams h_coxreg_univar_extract |
|
| 9 |
#' @inheritParams cox_regression_inter |
|
| 10 |
#' @inheritParams control_coxreg |
|
| 11 |
#' |
|
| 12 |
#' @seealso [cox_regression] |
|
| 13 |
#' |
|
| 14 |
#' @name h_cox_regression |
|
| 15 |
NULL |
|
| 16 | ||
| 17 |
#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used |
|
| 18 |
#' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models. |
|
| 19 |
#' |
|
| 20 |
#' @return |
|
| 21 |
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]). |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' # `h_coxreg_univar_formulas` |
|
| 25 |
#' |
|
| 26 |
#' ## Simple formulas. |
|
| 27 |
#' h_coxreg_univar_formulas( |
|
| 28 |
#' variables = list( |
|
| 29 |
#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
|
|
| 30 |
#' ) |
|
| 31 |
#' ) |
|
| 32 |
#' |
|
| 33 |
#' ## Addition of an optional strata. |
|
| 34 |
#' h_coxreg_univar_formulas( |
|
| 35 |
#' variables = list( |
|
| 36 |
#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
|
|
| 37 |
#' strata = "SITE" |
|
| 38 |
#' ) |
|
| 39 |
#' ) |
|
| 40 |
#' |
|
| 41 |
#' ## Inclusion of the interaction term. |
|
| 42 |
#' h_coxreg_univar_formulas( |
|
| 43 |
#' variables = list( |
|
| 44 |
#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
|
|
| 45 |
#' strata = "SITE" |
|
| 46 |
#' ), |
|
| 47 |
#' interaction = TRUE |
|
| 48 |
#' ) |
|
| 49 |
#' |
|
| 50 |
#' ## Only covariates fitted in separate models. |
|
| 51 |
#' h_coxreg_univar_formulas( |
|
| 52 |
#' variables = list( |
|
| 53 |
#' time = "time", event = "status", covariates = c("X", "y")
|
|
| 54 |
#' ) |
|
| 55 |
#' ) |
|
| 56 |
#' |
|
| 57 |
#' @export |
|
| 58 |
h_coxreg_univar_formulas <- function(variables, |
|
| 59 |
interaction = FALSE) {
|
|
| 60 | 50x |
checkmate::assert_list(variables, names = "named") |
| 61 | 50x |
has_arm <- "arm" %in% names(variables) |
| 62 | 50x |
arm_name <- if (has_arm) "arm" else NULL |
| 63 | ||
| 64 | 50x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
| 65 | ||
| 66 | 50x |
checkmate::assert_flag(interaction) |
| 67 | ||
| 68 | 50x |
if (!has_arm || is.null(variables$covariates)) {
|
| 69 | 10x |
checkmate::assert_false(interaction) |
| 70 |
} |
|
| 71 | ||
| 72 | 48x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
| 73 | ||
| 74 | 48x |
if (!is.null(variables$covariates)) {
|
| 75 | 47x |
forms <- paste0( |
| 76 | 47x |
"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
|
| 77 | 47x |
ifelse(has_arm, variables$arm, "1"), |
| 78 | 47x |
ifelse(interaction, " * ", " + "), |
| 79 | 47x |
variables$covariates, |
| 80 | 47x |
ifelse( |
| 81 | 47x |
!is.null(variables$strata), |
| 82 | 47x |
paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
|
| 83 |
"" |
|
| 84 |
) |
|
| 85 |
) |
|
| 86 |
} else {
|
|
| 87 | 1x |
forms <- NULL |
| 88 |
} |
|
| 89 | 48x |
nams <- variables$covariates |
| 90 | 48x |
if (has_arm) {
|
| 91 | 41x |
ref <- paste0( |
| 92 | 41x |
"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
|
| 93 | 41x |
variables$arm, |
| 94 | 41x |
ifelse( |
| 95 | 41x |
!is.null(variables$strata), |
| 96 | 41x |
paste0( |
| 97 | 41x |
" + strata(", paste0(variables$strata, collapse = ", "), ")"
|
| 98 |
), |
|
| 99 |
"" |
|
| 100 |
) |
|
| 101 |
) |
|
| 102 | 41x |
forms <- c(ref, forms) |
| 103 | 41x |
nams <- c("ref", nams)
|
| 104 |
} |
|
| 105 | 48x |
stats::setNames(forms, nams) |
| 106 |
} |
|
| 107 | ||
| 108 |
#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas |
|
| 109 |
#' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox |
|
| 110 |
#' regression models. Interactions will not be included in multivariate Cox regression model. |
|
| 111 |
#' |
|
| 112 |
#' @return |
|
| 113 |
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]). |
|
| 114 |
#' |
|
| 115 |
#' @examples |
|
| 116 |
#' # `h_coxreg_multivar_formula` |
|
| 117 |
#' |
|
| 118 |
#' h_coxreg_multivar_formula( |
|
| 119 |
#' variables = list( |
|
| 120 |
#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
|
|
| 121 |
#' ) |
|
| 122 |
#' ) |
|
| 123 |
#' |
|
| 124 |
#' # Addition of an optional strata. |
|
| 125 |
#' h_coxreg_multivar_formula( |
|
| 126 |
#' variables = list( |
|
| 127 |
#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
|
|
| 128 |
#' strata = "SITE" |
|
| 129 |
#' ) |
|
| 130 |
#' ) |
|
| 131 |
#' |
|
| 132 |
#' # Example without treatment arm. |
|
| 133 |
#' h_coxreg_multivar_formula( |
|
| 134 |
#' variables = list( |
|
| 135 |
#' time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
|
|
| 136 |
#' strata = "SITE" |
|
| 137 |
#' ) |
|
| 138 |
#' ) |
|
| 139 |
#' |
|
| 140 |
#' @export |
|
| 141 |
h_coxreg_multivar_formula <- function(variables) {
|
|
| 142 | 89x |
checkmate::assert_list(variables, names = "named") |
| 143 | 89x |
has_arm <- "arm" %in% names(variables) |
| 144 | 89x |
arm_name <- if (has_arm) "arm" else NULL |
| 145 | ||
| 146 | 89x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
| 147 | ||
| 148 | 89x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
| 149 | ||
| 150 | 89x |
y <- paste0( |
| 151 | 89x |
"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
|
| 152 | 89x |
ifelse(has_arm, variables$arm, "1") |
| 153 |
) |
|
| 154 | 89x |
if (length(variables$covariates) > 0) {
|
| 155 | 26x |
y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ") |
| 156 |
} |
|
| 157 | 89x |
if (!is.null(variables$strata)) {
|
| 158 | 5x |
y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
|
| 159 |
} |
|
| 160 | 89x |
y |
| 161 |
} |
|
| 162 | ||
| 163 |
#' @describeIn h_cox_regression Utility function to help tabulate the result of |
|
| 164 |
#' a univariate Cox regression model. |
|
| 165 |
#' |
|
| 166 |
#' @param effect (`string`)\cr the treatment variable. |
|
| 167 |
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()]. |
|
| 168 |
#' |
|
| 169 |
#' @return |
|
| 170 |
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`, |
|
| 171 |
#' `n`, `hr`, `lcl`, `ucl`, and `pval`. |
|
| 172 |
#' |
|
| 173 |
#' @examples |
|
| 174 |
#' library(survival) |
|
| 175 |
#' |
|
| 176 |
#' dta_simple <- data.frame( |
|
| 177 |
#' time = c(5, 5, 10, 10, 5, 5, 10, 10), |
|
| 178 |
#' status = c(0, 0, 1, 0, 0, 1, 1, 1), |
|
| 179 |
#' armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
|
|
| 180 |
#' var1 = c(45, 55, 65, 75, 55, 65, 85, 75), |
|
| 181 |
#' var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
|
|
| 182 |
#' ) |
|
| 183 |
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple) |
|
| 184 |
#' result <- h_coxreg_univar_extract( |
|
| 185 |
#' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple |
|
| 186 |
#' ) |
|
| 187 |
#' result |
|
| 188 |
#' |
|
| 189 |
#' @export |
|
| 190 |
h_coxreg_univar_extract <- function(effect, |
|
| 191 |
covar, |
|
| 192 |
data, |
|
| 193 |
mod, |
|
| 194 |
control = control_coxreg()) {
|
|
| 195 | 66x |
checkmate::assert_string(covar) |
| 196 | 66x |
checkmate::assert_string(effect) |
| 197 | 66x |
checkmate::assert_class(mod, "coxph") |
| 198 | 66x |
test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] |
| 199 | ||
| 200 | 66x |
mod_aov <- muffled_car_anova(mod, test_statistic) |
| 201 | 66x |
msum <- summary(mod, conf.int = control$conf_level) |
| 202 | 66x |
sum_cox <- broom::tidy(msum) |
| 203 | ||
| 204 |
# Combine results together. |
|
| 205 | 66x |
effect_aov <- mod_aov[effect, , drop = TRUE] |
| 206 | 66x |
pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]] |
| 207 | 66x |
sum_main <- sum_cox[grepl(effect, sum_cox$level), ] |
| 208 | ||
| 209 | 66x |
term_label <- if (effect == covar) {
|
| 210 | 34x |
paste0( |
| 211 | 34x |
levels(data[[covar]])[2], |
| 212 | 34x |
" vs control (",
|
| 213 | 34x |
levels(data[[covar]])[1], |
| 214 |
")" |
|
| 215 |
) |
|
| 216 |
} else {
|
|
| 217 | 32x |
unname(labels_or_names(data[covar])) |
| 218 |
} |
|
| 219 | 66x |
data.frame( |
| 220 | 66x |
effect = ifelse(covar == effect, "Treatment:", "Covariate:"), |
| 221 | 66x |
term = covar, |
| 222 | 66x |
term_label = term_label, |
| 223 | 66x |
level = levels(data[[effect]])[2], |
| 224 | 66x |
n = mod[["n"]], |
| 225 | 66x |
hr = unname(sum_main["exp(coef)"]), |
| 226 | 66x |
lcl = unname(sum_main[grep("lower", names(sum_main))]),
|
| 227 | 66x |
ucl = unname(sum_main[grep("upper", names(sum_main))]),
|
| 228 | 66x |
pval = pval, |
| 229 | 66x |
stringsAsFactors = FALSE |
| 230 |
) |
|
| 231 |
} |
|
| 232 | ||
| 233 |
#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help |
|
| 234 |
#' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable. |
|
| 235 |
#' |
|
| 236 |
#' @return |
|
| 237 |
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`, |
|
| 238 |
#' `n`, `term`, and `term_label`. |
|
| 239 |
#' |
|
| 240 |
#' @examples |
|
| 241 |
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple) |
|
| 242 |
#' result <- h_coxreg_multivar_extract( |
|
| 243 |
#' var = "var1", mod = mod, data = dta_simple |
|
| 244 |
#' ) |
|
| 245 |
#' result |
|
| 246 |
#' |
|
| 247 |
#' @export |
|
| 248 |
h_coxreg_multivar_extract <- function(var, |
|
| 249 |
data, |
|
| 250 |
mod, |
|
| 251 |
control = control_coxreg()) {
|
|
| 252 | 132x |
test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] |
| 253 | 132x |
mod_aov <- muffled_car_anova(mod, test_statistic) |
| 254 | ||
| 255 | 132x |
msum <- summary(mod, conf.int = control$conf_level) |
| 256 | 132x |
sum_anova <- broom::tidy(mod_aov) |
| 257 | 132x |
sum_cox <- broom::tidy(msum) |
| 258 | ||
| 259 | 132x |
ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
|
| 260 | 132x |
names(ret_anova)[2] <- "pval" |
| 261 | 132x |
if (is.factor(data[[var]])) {
|
| 262 | 53x |
ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")] |
| 263 |
} else {
|
|
| 264 | 79x |
ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")] |
| 265 |
} |
|
| 266 | 132x |
names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
|
| 267 | 132x |
varlab <- unname(labels_or_names(data[var])) |
| 268 | 132x |
ret_cox$term <- varlab |
| 269 | ||
| 270 | 132x |
if (is.numeric(data[[var]])) {
|
| 271 | 79x |
ret <- ret_cox |
| 272 | 79x |
ret$term_label <- ret$term |
| 273 | 53x |
} else if (length(levels(data[[var]])) <= 2) {
|
| 274 | 34x |
ret_anova$pval <- NA |
| 275 | 34x |
ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")") |
| 276 | 34x |
ret_cox$level <- gsub(var, "", ret_cox$level) |
| 277 | 34x |
ret_cox$term_label <- ret_cox$level |
| 278 | 34x |
ret <- dplyr::bind_rows(ret_anova, ret_cox) |
| 279 |
} else {
|
|
| 280 | 19x |
ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")") |
| 281 | 19x |
ret_cox$level <- gsub(var, "", ret_cox$level) |
| 282 | 19x |
ret_cox$term_label <- ret_cox$level |
| 283 | 19x |
ret <- dplyr::bind_rows(ret_anova, ret_cox) |
| 284 |
} |
|
| 285 | ||
| 286 | 132x |
as.data.frame(ret) |
| 287 |
} |
| 1 |
#' Univariate formula special term |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The special term `univariate` indicate that the model should be fitted individually for |
|
| 6 |
#' every variable included in univariate. |
|
| 7 |
#' |
|
| 8 |
#' @param x (`character`)\cr a vector of variable names separated by commas. |
|
| 9 |
#' |
|
| 10 |
#' @return When used within a model formula, produces univariate models for each variable provided. |
|
| 11 |
#' |
|
| 12 |
#' @details |
|
| 13 |
#' If provided alongside with pairwise specification, the model |
|
| 14 |
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models |
|
| 15 |
#' + `y ~ ARM` |
|
| 16 |
#' + `y ~ ARM + SEX` |
|
| 17 |
#' + `y ~ ARM + AGE` |
|
| 18 |
#' + `y ~ ARM + RACE` |
|
| 19 |
#' |
|
| 20 |
#' @export |
|
| 21 |
univariate <- function(x) {
|
|
| 22 | 2x |
structure(x, varname = deparse(substitute(x))) |
| 23 |
} |
|
| 24 | ||
| 25 |
# Get the right-hand-term of a formula |
|
| 26 |
rht <- function(x) {
|
|
| 27 | 4x |
checkmate::assert_formula(x) |
| 28 | 4x |
y <- as.character(rev(x)[[1]]) |
| 29 | 4x |
return(y) |
| 30 |
} |
|
| 31 | ||
| 32 |
#' Hazard ratio estimation in interactions |
|
| 33 |
#' |
|
| 34 |
#' This function estimates the hazard ratios between arms when an interaction variable is given with |
|
| 35 |
#' specific values. |
|
| 36 |
#' |
|
| 37 |
#' @param variable,given (`character(2)`)\cr names of the two variables in the interaction. We seek the estimation of |
|
| 38 |
#' the levels of `variable` given the levels of `given`. |
|
| 39 |
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels given by [levels()]. |
|
| 40 |
#' @param mmat (named `numeric`) a vector filled with `0`s used as a template to obtain the design matrix. |
|
| 41 |
#' @param coef (`numeric`)\cr vector of estimated coefficients. |
|
| 42 |
#' @param vcov (`matrix`)\cr variance-covariance matrix of underlying model. |
|
| 43 |
#' @param conf_level (`proportion`)\cr confidence level of estimate intervals. |
|
| 44 |
#' |
|
| 45 |
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A) |
|
| 46 |
#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex. |
|
| 47 |
#' The cox regression estimates the coefficients along with a variance-covariance matrix for: |
|
| 48 |
#' |
|
| 49 |
#' - b1 (arm b), b2 (arm c) |
|
| 50 |
#' - b3 (sex m) |
|
| 51 |
#' - b4 (arm b: sex m), b5 (arm c: sex m) |
|
| 52 |
#' |
|
| 53 |
#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation |
|
| 54 |
#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5), |
|
| 55 |
#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained |
|
| 56 |
#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95. |
|
| 57 |
#' |
|
| 58 |
#' @return A list of matrices (one per level of variable) with rows corresponding to the combinations of |
|
| 59 |
#' `variable` and `given`, with columns: |
|
| 60 |
#' * `coef_hat`: Estimation of the coefficient. |
|
| 61 |
#' * `coef_se`: Standard error of the estimation. |
|
| 62 |
#' * `hr`: Hazard ratio. |
|
| 63 |
#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio. |
|
| 64 |
#' |
|
| 65 |
#' @seealso [s_cox_multivariate()]. |
|
| 66 |
#' |
|
| 67 |
#' @examples |
|
| 68 |
#' library(dplyr) |
|
| 69 |
#' library(survival) |
|
| 70 |
#' |
|
| 71 |
#' ADSL <- tern_ex_adsl %>% |
|
| 72 |
#' filter(SEX %in% c("F", "M"))
|
|
| 73 |
#' |
|
| 74 |
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS") |
|
| 75 |
#' adtte$ARMCD <- droplevels(adtte$ARMCD) |
|
| 76 |
#' adtte$SEX <- droplevels(adtte$SEX) |
|
| 77 |
#' |
|
| 78 |
#' mod <- coxph( |
|
| 79 |
#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2, |
|
| 80 |
#' data = adtte |
|
| 81 |
#' ) |
|
| 82 |
#' |
|
| 83 |
#' mmat <- stats::model.matrix(mod)[1, ] |
|
| 84 |
#' mmat[!mmat == 0] <- 0 |
|
| 85 |
#' |
|
| 86 |
#' @keywords internal |
|
| 87 |
estimate_coef <- function(variable, given, |
|
| 88 |
lvl_var, lvl_given, |
|
| 89 |
coef, |
|
| 90 |
mmat, |
|
| 91 |
vcov, |
|
| 92 |
conf_level = 0.95) {
|
|
| 93 | 8x |
var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level |
| 94 | 8x |
giv_lvl <- paste0(given, lvl_given) |
| 95 | ||
| 96 | 8x |
design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) |
| 97 | 8x |
design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] |
| 98 | 8x |
design_mat <- within( |
| 99 | 8x |
data = design_mat, |
| 100 | 8x |
expr = {
|
| 101 | 8x |
inter <- paste0(variable, ":", given) |
| 102 | 8x |
rev_inter <- paste0(given, ":", variable) |
| 103 |
} |
|
| 104 |
) |
|
| 105 | ||
| 106 | 8x |
split_by_variable <- design_mat$variable |
| 107 | 8x |
interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") |
| 108 | ||
| 109 | 8x |
design_mat <- apply( |
| 110 | 8x |
X = design_mat, MARGIN = 1, FUN = function(x) {
|
| 111 | 27x |
mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 |
| 112 | 27x |
return(mmat) |
| 113 |
} |
|
| 114 |
) |
|
| 115 | 8x |
colnames(design_mat) <- interaction_names |
| 116 | ||
| 117 | 8x |
betas <- as.matrix(coef) |
| 118 | ||
| 119 | 8x |
coef_hat <- t(design_mat) %*% betas |
| 120 | 8x |
dimnames(coef_hat)[2] <- "coef" |
| 121 | ||
| 122 | 8x |
coef_se <- apply(design_mat, 2, function(x) {
|
| 123 | 27x |
vcov_el <- as.logical(x) |
| 124 | 27x |
y <- vcov[vcov_el, vcov_el] |
| 125 | 27x |
y <- sum(y) |
| 126 | 27x |
y <- sqrt(y) |
| 127 | 27x |
return(y) |
| 128 |
}) |
|
| 129 | ||
| 130 | 8x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
| 131 | 8x |
y <- cbind(coef_hat, `se(coef)` = coef_se) |
| 132 | ||
| 133 | 8x |
y <- apply(y, 1, function(x) {
|
| 134 | 27x |
x["hr"] <- exp(x["coef"]) |
| 135 | 27x |
x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) |
| 136 | 27x |
x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) |
| 137 | ||
| 138 | 27x |
return(x) |
| 139 |
}) |
|
| 140 | ||
| 141 | 8x |
y <- t(y) |
| 142 | 8x |
y <- by(y, split_by_variable, identity) |
| 143 | 8x |
y <- lapply(y, as.matrix) |
| 144 | ||
| 145 | 8x |
attr(y, "details") <- paste0( |
| 146 | 8x |
"Estimations of ", variable, |
| 147 | 8x |
" hazard ratio given the level of ", given, " compared to ", |
| 148 | 8x |
variable, " level ", lvl_var[1], "." |
| 149 |
) |
|
| 150 | 8x |
return(y) |
| 151 |
} |
|
| 152 | ||
| 153 |
#' `tryCatch` around `car::Anova` |
|
| 154 |
#' |
|
| 155 |
#' Captures warnings when executing [car::Anova]. |
|
| 156 |
#' |
|
| 157 |
#' @inheritParams car::Anova |
|
| 158 |
#' |
|
| 159 |
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings. |
|
| 160 |
#' |
|
| 161 |
#' @examples |
|
| 162 |
#' # `car::Anova` on cox regression model including strata and expected |
|
| 163 |
#' # a likelihood ratio test triggers a warning as only Wald method is |
|
| 164 |
#' # accepted. |
|
| 165 |
#' |
|
| 166 |
#' library(survival) |
|
| 167 |
#' |
|
| 168 |
#' mod <- coxph( |
|
| 169 |
#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps), |
|
| 170 |
#' data = ovarian |
|
| 171 |
#' ) |
|
| 172 |
#' |
|
| 173 |
#' @keywords internal |
|
| 174 |
try_car_anova <- function(mod, |
|
| 175 |
test.statistic) { # nolint
|
|
| 176 | 2x |
y <- tryCatch( |
| 177 | 2x |
withCallingHandlers( |
| 178 | 2x |
expr = {
|
| 179 | 2x |
warn_text <- c() |
| 180 | 2x |
list( |
| 181 | 2x |
aov = car::Anova( |
| 182 | 2x |
mod, |
| 183 | 2x |
test.statistic = test.statistic, |
| 184 | 2x |
type = "III" |
| 185 |
), |
|
| 186 | 2x |
warn_text = warn_text |
| 187 |
) |
|
| 188 |
}, |
|
| 189 | 2x |
warning = function(w) {
|
| 190 |
# If a warning is detected it is handled as "w". |
|
| 191 | ! |
warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))
|
| 192 | ||
| 193 |
# A warning is sometimes expected, then, we want to restart |
|
| 194 |
# the execution while ignoring the warning. |
|
| 195 | ! |
invokeRestart("muffleWarning")
|
| 196 |
} |
|
| 197 |
), |
|
| 198 | 2x |
finally = {
|
| 199 |
} |
|
| 200 |
) |
|
| 201 | ||
| 202 | 2x |
return(y) |
| 203 |
} |
|
| 204 | ||
| 205 |
#' Fit a Cox regression model and ANOVA |
|
| 206 |
#' |
|
| 207 |
#' The functions derives the effect p-values using [car::Anova()] from [survival::coxph()] results. |
|
| 208 |
#' |
|
| 209 |
#' @inheritParams t_coxreg |
|
| 210 |
#' |
|
| 211 |
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and |
|
| 212 |
#' `aov` (result of [car::Anova()]). |
|
| 213 |
#' |
|
| 214 |
#' @noRd |
|
| 215 |
fit_n_aov <- function(formula, |
|
| 216 |
data = data, |
|
| 217 |
conf_level = conf_level, |
|
| 218 |
pval_method = c("wald", "likelihood"),
|
|
| 219 |
...) {
|
|
| 220 | 1x |
pval_method <- match.arg(pval_method) |
| 221 | ||
| 222 | 1x |
environment(formula) <- environment() |
| 223 | 1x |
suppressWarnings({
|
| 224 |
# We expect some warnings due to coxph which fails strict programming. |
|
| 225 | 1x |
mod <- survival::coxph(formula, data = data, ...) |
| 226 | 1x |
msum <- summary(mod, conf.int = conf_level) |
| 227 |
}) |
|
| 228 | ||
| 229 | 1x |
aov <- try_car_anova( |
| 230 | 1x |
mod, |
| 231 | 1x |
test.statistic = switch(pval_method, |
| 232 | 1x |
"wald" = "Wald", |
| 233 | 1x |
"likelihood" = "LR" |
| 234 |
) |
|
| 235 |
) |
|
| 236 | ||
| 237 | 1x |
warn_attr <- aov$warn_text |
| 238 | ! |
if (!is.null(aov$warn_text)) message(warn_attr) |
| 239 | ||
| 240 | 1x |
aov <- aov$aov |
| 241 | 1x |
y <- list(mod = mod, msum = msum, aov = aov) |
| 242 | 1x |
attr(y, "message") <- warn_attr |
| 243 | ||
| 244 | 1x |
return(y) |
| 245 |
} |
|
| 246 | ||
| 247 |
# argument_checks |
|
| 248 |
check_formula <- function(formula) {
|
|
| 249 | 1x |
if (!(inherits(formula, "formula"))) {
|
| 250 | 1x |
stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
|
| 251 |
} |
|
| 252 | ||
| 253 | ! |
invisible() |
| 254 |
} |
|
| 255 | ||
| 256 |
check_covariate_formulas <- function(covariates) {
|
|
| 257 | 1x |
if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) {
|
| 258 | 1x |
stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
|
| 259 |
} |
|
| 260 | ||
| 261 | ! |
invisible() |
| 262 |
} |
|
| 263 | ||
| 264 |
name_covariate_names <- function(covariates) {
|
|
| 265 | 1x |
miss_names <- names(covariates) == "" |
| 266 | 1x |
no_names <- is.null(names(covariates)) |
| 267 | ! |
if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name") |
| 268 | ! |
if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name") |
| 269 | 1x |
return(covariates) |
| 270 |
} |
|
| 271 | ||
| 272 |
check_increments <- function(increments, covariates) {
|
|
| 273 | 1x |
if (!is.null(increments)) {
|
| 274 | 1x |
covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name") |
| 275 | 1x |
lapply( |
| 276 | 1x |
X = names(increments), FUN = function(x) {
|
| 277 | 3x |
if (!x %in% covariates) {
|
| 278 | 1x |
warning( |
| 279 | 1x |
paste( |
| 280 | 1x |
"Check `increments`, the `increment` for ", x, |
| 281 | 1x |
"doesn't match any names in investigated covariate(s)." |
| 282 |
) |
|
| 283 |
) |
|
| 284 |
} |
|
| 285 |
} |
|
| 286 |
) |
|
| 287 |
} |
|
| 288 | ||
| 289 | 1x |
invisible() |
| 290 |
} |
|
| 291 | ||
| 292 |
#' Multivariate Cox model - summarized results |
|
| 293 |
#' |
|
| 294 |
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or |
|
| 295 |
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually |
|
| 296 |
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the |
|
| 297 |
#' covariates included in the model. |
|
| 298 |
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the |
|
| 299 |
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis, |
|
| 300 |
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**, |
|
| 301 |
#' `NEST's bookdown`) |
|
| 302 |
#' |
|
| 303 |
#' @param formula (`formula`)\cr a formula corresponding to the investigated [survival::Surv()] survival model |
|
| 304 |
#' including covariates. |
|
| 305 |
#' @param data (`data.frame`)\cr a data frame which includes the variable in formula and covariates. |
|
| 306 |
#' @param conf_level (`proportion`)\cr the confidence level for the hazard ratio interval estimations. Default is 0.95. |
|
| 307 |
#' @param pval_method (`string`)\cr the method used for the estimation of p-values, should be one of |
|
| 308 |
#' `"wald"` (default) or `"likelihood"`. |
|
| 309 |
#' @param ... optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the |
|
| 310 |
#' method for tie handling, one of `exact` (default), `efron`, `breslow`. |
|
| 311 |
#' |
|
| 312 |
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`. |
|
| 313 |
#' |
|
| 314 |
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms |
|
| 315 |
#' but is out of scope as defined by the Global Data Standards Repository |
|
| 316 |
#' (**`GDS_Standard_TLG_Specs_Tables_2.doc`**). |
|
| 317 |
#' |
|
| 318 |
#' @seealso [estimate_coef()]. |
|
| 319 |
#' |
|
| 320 |
#' @examples |
|
| 321 |
#' library(dplyr) |
|
| 322 |
#' |
|
| 323 |
#' adtte <- tern_ex_adtte |
|
| 324 |
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered |
|
| 325 |
#' adtte_f <- filter( |
|
| 326 |
#' adtte_f, |
|
| 327 |
#' PARAMCD == "OS" & |
|
| 328 |
#' SEX %in% c("F", "M") &
|
|
| 329 |
#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
|
|
| 330 |
#' ) |
|
| 331 |
#' adtte_f$SEX <- droplevels(adtte_f$SEX) |
|
| 332 |
#' adtte_f$RACE <- droplevels(adtte_f$RACE) |
|
| 333 |
#' |
|
| 334 |
#' @keywords internal |
|
| 335 |
s_cox_multivariate <- function(formula, data, |
|
| 336 |
conf_level = 0.95, |
|
| 337 |
pval_method = c("wald", "likelihood"),
|
|
| 338 |
...) {
|
|
| 339 | 1x |
tf <- stats::terms(formula, specials = c("strata"))
|
| 340 | 1x |
covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))] |
| 341 | 1x |
lapply( |
| 342 | 1x |
X = covariates, |
| 343 | 1x |
FUN = function(x) {
|
| 344 | 3x |
if (is.character(data[[x]])) {
|
| 345 | 1x |
data[[x]] <<- as.factor(data[[x]]) |
| 346 |
} |
|
| 347 | 3x |
invisible() |
| 348 |
} |
|
| 349 |
) |
|
| 350 | 1x |
pval_method <- match.arg(pval_method) |
| 351 | ||
| 352 |
# Results directly exported from environment(fit_n_aov) to environment(s_function_draft) |
|
| 353 | 1x |
y <- fit_n_aov( |
| 354 | 1x |
formula = formula, |
| 355 | 1x |
data = data, |
| 356 | 1x |
conf_level = conf_level, |
| 357 | 1x |
pval_method = pval_method, |
| 358 |
... |
|
| 359 |
) |
|
| 360 | 1x |
mod <- y$mod |
| 361 | 1x |
aov <- y$aov |
| 362 | 1x |
msum <- y$msum |
| 363 | 1x |
list2env(as.list(y), environment()) |
| 364 | ||
| 365 | 1x |
all_term_labs <- attr(mod$terms, "term.labels") |
| 366 | 1x |
term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)] |
| 367 | 1x |
names(term_labs) <- term_labs |
| 368 | ||
| 369 | 1x |
coef_inter <- NULL |
| 370 | 1x |
if (any(attr(mod$terms, "order") > 1)) {
|
| 371 | 1x |
for_inter <- all_term_labs[attr(mod$terms, "order") > 1] |
| 372 | 1x |
names(for_inter) <- for_inter |
| 373 | 1x |
mmat <- stats::model.matrix(mod)[1, ] |
| 374 | 1x |
mmat[!mmat == 0] <- 0 |
| 375 | 1x |
mcoef <- stats::coef(mod) |
| 376 | 1x |
mvcov <- stats::vcov(mod) |
| 377 | ||
| 378 | 1x |
estimate_coef_local <- function(variable, given) {
|
| 379 | 6x |
estimate_coef( |
| 380 | 6x |
variable, given, |
| 381 | 6x |
coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level, |
| 382 | 6x |
lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]]) |
| 383 |
) |
|
| 384 |
} |
|
| 385 | ||
| 386 | 1x |
coef_inter <- lapply( |
| 387 | 1x |
for_inter, function(x) {
|
| 388 | 3x |
y <- attr(mod$terms, "factors")[, x] |
| 389 | 3x |
y <- names(y[y > 0]) |
| 390 | 3x |
Map(estimate_coef_local, variable = y, given = rev(y)) |
| 391 |
} |
|
| 392 |
) |
|
| 393 |
} |
|
| 394 | ||
| 395 | 1x |
list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter) |
| 396 |
} |
| 1 |
#' Helper functions for tabulating binary response by subgroup |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Helper functions that tabulate in a data frame statistics such as response rate |
|
| 6 |
#' and odds ratio for population subgroups. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams argument_convention |
|
| 9 |
#' @inheritParams response_subgroups |
|
| 10 |
#' @param arm (`factor`)\cr the treatment group variable. |
|
| 11 |
#' |
|
| 12 |
#' @details Main functionality is to prepare data for use in a layout-creating function. |
|
| 13 |
#' |
|
| 14 |
#' @examples |
|
| 15 |
#' library(dplyr) |
|
| 16 |
#' library(forcats) |
|
| 17 |
#' |
|
| 18 |
#' adrs <- tern_ex_adrs |
|
| 19 |
#' adrs_labels <- formatters::var_labels(adrs) |
|
| 20 |
#' |
|
| 21 |
#' adrs_f <- adrs %>% |
|
| 22 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
| 23 |
#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
|
|
| 24 |
#' droplevels() %>% |
|
| 25 |
#' mutate( |
|
| 26 |
#' # Reorder levels of factor to make the placebo group the reference arm. |
|
| 27 |
#' ARM = fct_relevel(ARM, "B: Placebo"), |
|
| 28 |
#' rsp = AVALC == "CR" |
|
| 29 |
#' ) |
|
| 30 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response") |
|
| 31 |
#' |
|
| 32 |
#' @name h_response_subgroups |
|
| 33 |
NULL |
|
| 34 | ||
| 35 |
#' @describeIn h_response_subgroups Helper to prepare a data frame of binary responses by arm. |
|
| 36 |
#' |
|
| 37 |
#' @return |
|
| 38 |
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`. |
|
| 39 |
#' |
|
| 40 |
#' @examples |
|
| 41 |
#' h_proportion_df( |
|
| 42 |
#' c(TRUE, FALSE, FALSE), |
|
| 43 |
#' arm = factor(c("A", "A", "B"), levels = c("A", "B"))
|
|
| 44 |
#' ) |
|
| 45 |
#' |
|
| 46 |
#' @export |
|
| 47 |
h_proportion_df <- function(rsp, arm) {
|
|
| 48 | 79x |
checkmate::assert_logical(rsp) |
| 49 | 78x |
assert_valid_factor(arm, len = length(rsp)) |
| 50 | 78x |
non_missing_rsp <- !is.na(rsp) |
| 51 | 78x |
rsp <- rsp[non_missing_rsp] |
| 52 | 78x |
arm <- arm[non_missing_rsp] |
| 53 | ||
| 54 | 78x |
lst_rsp <- split(rsp, arm) |
| 55 | 78x |
lst_results <- Map(function(x, arm) {
|
| 56 | 156x |
if (length(x) > 0) {
|
| 57 | 154x |
s_prop <- s_proportion(df = x) |
| 58 | 154x |
data.frame( |
| 59 | 154x |
arm = arm, |
| 60 | 154x |
n = length(x), |
| 61 | 154x |
n_rsp = unname(s_prop$n_prop[1]), |
| 62 | 154x |
prop = unname(s_prop$n_prop[2]), |
| 63 | 154x |
stringsAsFactors = FALSE |
| 64 |
) |
|
| 65 |
} else {
|
|
| 66 | 2x |
data.frame( |
| 67 | 2x |
arm = arm, |
| 68 | 2x |
n = 0L, |
| 69 | 2x |
n_rsp = NA, |
| 70 | 2x |
prop = NA, |
| 71 | 2x |
stringsAsFactors = FALSE |
| 72 |
) |
|
| 73 |
} |
|
| 74 | 78x |
}, lst_rsp, names(lst_rsp)) |
| 75 | ||
| 76 | 78x |
df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE)) |
| 77 | 78x |
df$arm <- factor(df$arm, levels = levels(arm)) |
| 78 | 78x |
df |
| 79 |
} |
|
| 80 | ||
| 81 |
#' @describeIn h_response_subgroups Summarizes proportion of binary responses by arm and across subgroups |
|
| 82 |
#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and |
|
| 83 |
#' requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies |
|
| 84 |
#' groupings for `subgroups` variables. |
|
| 85 |
#' |
|
| 86 |
#' @return |
|
| 87 |
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, |
|
| 88 |
#' `var`, `var_label`, and `row_type`. |
|
| 89 |
#' |
|
| 90 |
#' @examples |
|
| 91 |
#' h_proportion_subgroups_df( |
|
| 92 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
| 93 |
#' data = adrs_f |
|
| 94 |
#' ) |
|
| 95 |
#' |
|
| 96 |
#' # Define groupings for BMRKR2 levels. |
|
| 97 |
#' h_proportion_subgroups_df( |
|
| 98 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
| 99 |
#' data = adrs_f, |
|
| 100 |
#' groups_lists = list( |
|
| 101 |
#' BMRKR2 = list( |
|
| 102 |
#' "low" = "LOW", |
|
| 103 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
| 104 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
| 105 |
#' ) |
|
| 106 |
#' ) |
|
| 107 |
#' ) |
|
| 108 |
#' |
|
| 109 |
#' @export |
|
| 110 |
h_proportion_subgroups_df <- function(variables, |
|
| 111 |
data, |
|
| 112 |
groups_lists = list(), |
|
| 113 |
label_all = "All Patients") {
|
|
| 114 | 17x |
checkmate::assert_character(variables$rsp) |
| 115 | 17x |
checkmate::assert_character(variables$arm) |
| 116 | 17x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
| 117 | 17x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
| 118 | 17x |
assert_df_with_variables(data, variables) |
| 119 | 17x |
checkmate::assert_string(label_all) |
| 120 | ||
| 121 |
# Add All Patients. |
|
| 122 | 17x |
result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]]) |
| 123 | 17x |
result_all$subgroup <- label_all |
| 124 | 17x |
result_all$var <- "ALL" |
| 125 | 17x |
result_all$var_label <- label_all |
| 126 | 17x |
result_all$row_type <- "content" |
| 127 | ||
| 128 |
# Add Subgroups. |
|
| 129 | 17x |
if (is.null(variables$subgroups)) {
|
| 130 | 3x |
result_all |
| 131 |
} else {
|
|
| 132 | 14x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
| 133 | ||
| 134 | 14x |
l_result <- lapply(l_data, function(grp) {
|
| 135 | 58x |
result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]]) |
| 136 | 58x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
| 137 | 58x |
cbind(result, result_labels) |
| 138 |
}) |
|
| 139 | 14x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
| 140 | 14x |
result_subgroups$row_type <- "analysis" |
| 141 | ||
| 142 | 14x |
rbind( |
| 143 | 14x |
result_all, |
| 144 | 14x |
result_subgroups |
| 145 |
) |
|
| 146 |
} |
|
| 147 |
} |
|
| 148 | ||
| 149 |
#' @describeIn h_response_subgroups Helper to prepare a data frame with estimates of |
|
| 150 |
#' the odds ratio between a treatment and a control arm. |
|
| 151 |
#' |
|
| 152 |
#' @inheritParams response_subgroups |
|
| 153 |
#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed. |
|
| 154 |
#' |
|
| 155 |
#' @return |
|
| 156 |
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and |
|
| 157 |
#' optionally `pval` and `pval_label`. |
|
| 158 |
#' |
|
| 159 |
#' @examples |
|
| 160 |
#' # Unstratatified analysis. |
|
| 161 |
#' h_odds_ratio_df( |
|
| 162 |
#' c(TRUE, FALSE, FALSE, TRUE), |
|
| 163 |
#' arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
|
|
| 164 |
#' ) |
|
| 165 |
#' |
|
| 166 |
#' # Include p-value. |
|
| 167 |
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq") |
|
| 168 |
#' |
|
| 169 |
#' # Stratatified analysis. |
|
| 170 |
#' h_odds_ratio_df( |
|
| 171 |
#' rsp = adrs_f$rsp, |
|
| 172 |
#' arm = adrs_f$ARM, |
|
| 173 |
#' strata_data = adrs_f[, c("STRATA1", "STRATA2")],
|
|
| 174 |
#' method = "cmh" |
|
| 175 |
#' ) |
|
| 176 |
#' |
|
| 177 |
#' @export |
|
| 178 |
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
|
|
| 179 | 84x |
assert_valid_factor(arm, n.levels = 2, len = length(rsp)) |
| 180 | ||
| 181 | 84x |
df_rsp <- data.frame( |
| 182 | 84x |
rsp = rsp, |
| 183 | 84x |
arm = arm |
| 184 |
) |
|
| 185 | ||
| 186 | 84x |
if (!is.null(strata_data)) {
|
| 187 | 11x |
strata_var <- interaction(strata_data, drop = TRUE) |
| 188 | 11x |
strata_name <- "strata" |
| 189 | ||
| 190 | 11x |
assert_valid_factor(strata_var, len = nrow(df_rsp)) |
| 191 | ||
| 192 | 11x |
df_rsp[[strata_name]] <- strata_var |
| 193 |
} else {
|
|
| 194 | 73x |
strata_name <- NULL |
| 195 |
} |
|
| 196 | ||
| 197 | 84x |
l_df <- split(df_rsp, arm) |
| 198 | ||
| 199 | 84x |
if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
|
| 200 |
# Odds ratio and CI. |
|
| 201 | 82x |
result_odds_ratio <- s_odds_ratio( |
| 202 | 82x |
df = l_df[[2]], |
| 203 | 82x |
.var = "rsp", |
| 204 | 82x |
.ref_group = l_df[[1]], |
| 205 | 82x |
.in_ref_col = FALSE, |
| 206 | 82x |
.df_row = df_rsp, |
| 207 | 82x |
variables = list(arm = "arm", strata = strata_name), |
| 208 | 82x |
conf_level = conf_level |
| 209 |
) |
|
| 210 | ||
| 211 | 82x |
df <- data.frame( |
| 212 |
# Dummy column needed downstream to create a nested header. |
|
| 213 | 82x |
arm = " ", |
| 214 | 82x |
n_tot = unname(result_odds_ratio$n_tot["n_tot"]), |
| 215 | 82x |
or = unname(result_odds_ratio$or_ci["est"]), |
| 216 | 82x |
lcl = unname(result_odds_ratio$or_ci["lcl"]), |
| 217 | 82x |
ucl = unname(result_odds_ratio$or_ci["ucl"]), |
| 218 | 82x |
conf_level = conf_level, |
| 219 | 82x |
stringsAsFactors = FALSE |
| 220 |
) |
|
| 221 | ||
| 222 | 82x |
if (!is.null(method)) {
|
| 223 |
# Test for difference. |
|
| 224 | 44x |
result_test <- s_test_proportion_diff( |
| 225 | 44x |
df = l_df[[2]], |
| 226 | 44x |
.var = "rsp", |
| 227 | 44x |
.ref_group = l_df[[1]], |
| 228 | 44x |
.in_ref_col = FALSE, |
| 229 | 44x |
variables = list(strata = strata_name), |
| 230 | 44x |
method = method |
| 231 |
) |
|
| 232 | ||
| 233 | 44x |
df$pval <- as.numeric(result_test$pval) |
| 234 | 44x |
df$pval_label <- obj_label(result_test$pval) |
| 235 |
} |
|
| 236 | ||
| 237 |
# In those cases cannot go through the model so will obtain n_tot from data. |
|
| 238 |
} else if ( |
|
| 239 | 2x |
(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) || |
| 240 | 2x |
(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0) |
| 241 |
) {
|
|
| 242 | 2x |
df <- data.frame( |
| 243 |
# Dummy column needed downstream to create a nested header. |
|
| 244 | 2x |
arm = " ", |
| 245 | 2x |
n_tot = sum(stats::complete.cases(df_rsp)), |
| 246 | 2x |
or = NA, |
| 247 | 2x |
lcl = NA, |
| 248 | 2x |
ucl = NA, |
| 249 | 2x |
conf_level = conf_level, |
| 250 | 2x |
stringsAsFactors = FALSE |
| 251 |
) |
|
| 252 | 2x |
if (!is.null(method)) {
|
| 253 | 2x |
df$pval <- NA |
| 254 | 2x |
df$pval_label <- NA |
| 255 |
} |
|
| 256 |
} else {
|
|
| 257 | ! |
df <- data.frame( |
| 258 |
# Dummy column needed downstream to create a nested header. |
|
| 259 | ! |
arm = " ", |
| 260 | ! |
n_tot = 0L, |
| 261 | ! |
or = NA, |
| 262 | ! |
lcl = NA, |
| 263 | ! |
ucl = NA, |
| 264 | ! |
conf_level = conf_level, |
| 265 | ! |
stringsAsFactors = FALSE |
| 266 |
) |
|
| 267 | ||
| 268 | ! |
if (!is.null(method)) {
|
| 269 | ! |
df$pval <- NA |
| 270 | ! |
df$pval_label <- NA |
| 271 |
} |
|
| 272 |
} |
|
| 273 | ||
| 274 | 84x |
df |
| 275 |
} |
|
| 276 | ||
| 277 |
#' @describeIn h_response_subgroups Summarizes estimates of the odds ratio between a treatment and a control |
|
| 278 |
#' arm across subgroups in a data frame. `variables` corresponds to the names of variables found in |
|
| 279 |
#' `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups` |
|
| 280 |
#' and `strata`. `groups_lists` optionally specifies groupings for `subgroups` variables. |
|
| 281 |
#' |
|
| 282 |
#' @return |
|
| 283 |
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, |
|
| 284 |
#' `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`. |
|
| 285 |
#' |
|
| 286 |
#' @examples |
|
| 287 |
#' # Unstratified analysis. |
|
| 288 |
#' h_odds_ratio_subgroups_df( |
|
| 289 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
| 290 |
#' data = adrs_f |
|
| 291 |
#' ) |
|
| 292 |
#' |
|
| 293 |
#' # Stratified analysis. |
|
| 294 |
#' h_odds_ratio_subgroups_df( |
|
| 295 |
#' variables = list( |
|
| 296 |
#' rsp = "rsp", |
|
| 297 |
#' arm = "ARM", |
|
| 298 |
#' subgroups = c("SEX", "BMRKR2"),
|
|
| 299 |
#' strata = c("STRATA1", "STRATA2")
|
|
| 300 |
#' ), |
|
| 301 |
#' data = adrs_f |
|
| 302 |
#' ) |
|
| 303 |
#' |
|
| 304 |
#' # Define groupings of BMRKR2 levels. |
|
| 305 |
#' h_odds_ratio_subgroups_df( |
|
| 306 |
#' variables = list( |
|
| 307 |
#' rsp = "rsp", |
|
| 308 |
#' arm = "ARM", |
|
| 309 |
#' subgroups = c("SEX", "BMRKR2")
|
|
| 310 |
#' ), |
|
| 311 |
#' data = adrs_f, |
|
| 312 |
#' groups_lists = list( |
|
| 313 |
#' BMRKR2 = list( |
|
| 314 |
#' "low" = "LOW", |
|
| 315 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
| 316 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
| 317 |
#' ) |
|
| 318 |
#' ) |
|
| 319 |
#' ) |
|
| 320 |
#' |
|
| 321 |
#' @export |
|
| 322 |
h_odds_ratio_subgroups_df <- function(variables, |
|
| 323 |
data, |
|
| 324 |
groups_lists = list(), |
|
| 325 |
conf_level = 0.95, |
|
| 326 |
method = NULL, |