1 |
#' Tabulate biomarker effects on survival by subgroup |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Tabulate the estimated effects of multiple continuous biomarker variables |
|
6 |
#' across population subgroups. |
|
7 |
#' |
|
8 |
#' @inheritParams fit_coxreg_multivar |
|
9 |
#' @inheritParams survival_duration_subgroups |
|
10 |
#' @inheritParams argument_convention |
|
11 |
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by |
|
12 |
#' [extract_survival_biomarkers()]. |
|
13 |
#' @param vars (`character`)\cr the names of statistics to be reported among: |
|
14 |
#' * `n_tot_events`: Total number of events per group. |
|
15 |
#' * `n_tot`: Total number of observations per group. |
|
16 |
#' * `median`: Median survival time. |
|
17 |
#' * `hr`: Hazard ratio. |
|
18 |
#' * `ci`: Confidence interval of hazard ratio. |
|
19 |
#' * `pval`: p-value of the effect. |
|
20 |
#' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required. |
|
21 |
#' |
|
22 |
#' @details These functions create a layout starting from a data frame which contains |
|
23 |
#' the required statistics. The tables are then typically used as input for forest plots. |
|
24 |
#' |
|
25 |
#' @examples |
|
26 |
#' library(dplyr) |
|
27 |
#' |
|
28 |
#' adtte <- tern_ex_adtte |
|
29 |
#' |
|
30 |
#' # Save variable labels before data processing steps. |
|
31 |
#' adtte_labels <- formatters::var_labels(adtte) |
|
32 |
#' |
|
33 |
#' adtte_f <- adtte %>% |
|
34 |
#' filter(PARAMCD == "OS") %>% |
|
35 |
#' mutate( |
|
36 |
#' AVALU = as.character(AVALU), |
|
37 |
#' is_event = CNSR == 0 |
|
38 |
#' ) |
|
39 |
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag") |
|
40 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels |
|
41 |
#' |
|
42 |
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`, |
|
43 |
#' # in multiple regression models containing one covariate `RACE`, |
|
44 |
#' # as well as one stratification variable `STRATA1`. The subgroups |
|
45 |
#' # are defined by the levels of `BMRKR2`. |
|
46 |
#' |
|
47 |
#' df <- extract_survival_biomarkers( |
|
48 |
#' variables = list( |
|
49 |
#' tte = "AVAL", |
|
50 |
#' is_event = "is_event", |
|
51 |
#' biomarkers = c("BMRKR1", "AGE"), |
|
52 |
#' strata = "STRATA1", |
|
53 |
#' covariates = "SEX", |
|
54 |
#' subgroups = "BMRKR2" |
|
55 |
#' ), |
|
56 |
#' label_all = "Total Patients", |
|
57 |
#' data = adtte_f |
|
58 |
#' ) |
|
59 |
#' df |
|
60 |
#' |
|
61 |
#' # Here we group the levels of `BMRKR2` manually. |
|
62 |
#' df_grouped <- extract_survival_biomarkers( |
|
63 |
#' variables = list( |
|
64 |
#' tte = "AVAL", |
|
65 |
#' is_event = "is_event", |
|
66 |
#' biomarkers = c("BMRKR1", "AGE"), |
|
67 |
#' strata = "STRATA1", |
|
68 |
#' covariates = "SEX", |
|
69 |
#' subgroups = "BMRKR2" |
|
70 |
#' ), |
|
71 |
#' data = adtte_f, |
|
72 |
#' groups_lists = list( |
|
73 |
#' BMRKR2 = list( |
|
74 |
#' "low" = "LOW", |
|
75 |
#' "low/medium" = c("LOW", "MEDIUM"), |
|
76 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH") |
|
77 |
#' ) |
|
78 |
#' ) |
|
79 |
#' ) |
|
80 |
#' df_grouped |
|
81 |
#' |
|
82 |
#' @name survival_biomarkers_subgroups |
|
83 |
#' @order 1 |
|
84 |
NULL |
|
85 | ||
86 |
#' Prepare survival data estimates for multiple biomarkers in a single data frame |
|
87 |
#' |
|
88 |
#' @description `r lifecycle::badge("stable")` |
|
89 |
#' |
|
90 |
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates, |
|
91 |
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame. |
|
92 |
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements |
|
93 |
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strata`. |
|
94 |
#' `groups_lists` optionally specifies groupings for `subgroups` variables. |
|
95 |
#' |
|
96 |
#' @inheritParams argument_convention |
|
97 |
#' @inheritParams fit_coxreg_multivar |
|
98 |
#' @inheritParams survival_duration_subgroups |
|
99 |
#' |
|
100 |
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`, |
|
101 |
#' `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, |
|
102 |
#' `var_label`, and `row_type`. |
|
103 |
#' |
|
104 |
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()]. |
|
105 |
#' |
|
106 |
#' @export |
|
107 |
extract_survival_biomarkers <- function(variables, |
|
108 |
data, |
|
109 |
groups_lists = list(), |
|
110 |
control = control_coxreg(), |
|
111 |
label_all = "All Patients") { |
|
112 | 6x |
if ("strat" %in% names(variables)) { |
113 | ! |
warning( |
114 | ! |
"Warning: the `strat` element name of the `variables` list argument to `extract_survival_biomarkers() ", |
115 | ! |
"was deprecated in tern 0.9.3.\n ", |
116 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
117 |
) |
|
118 | ! |
variables[["strata"]] <- variables[["strat"]] |
119 |
} |
|
120 | ||
121 | 6x |
checkmate::assert_list(variables) |
122 | 6x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
123 | 6x |
checkmate::assert_string(label_all) |
124 | ||
125 |
# Start with all patients. |
|
126 | 6x |
result_all <- h_coxreg_mult_cont_df( |
127 | 6x |
variables = variables, |
128 | 6x |
data = data, |
129 | 6x |
control = control |
130 |
) |
|
131 | 6x |
result_all$subgroup <- label_all |
132 | 6x |
result_all$var <- "ALL" |
133 | 6x |
result_all$var_label <- label_all |
134 | 6x |
result_all$row_type <- "content" |
135 | 6x |
if (is.null(variables$subgroups)) { |
136 |
# Only return result for all patients. |
|
137 | 1x |
result_all |
138 |
} else { |
|
139 |
# Add subgroups results. |
|
140 | 5x |
l_data <- h_split_by_subgroups( |
141 | 5x |
data, |
142 | 5x |
variables$subgroups, |
143 | 5x |
groups_lists = groups_lists |
144 |
) |
|
145 | 5x |
l_result <- lapply(l_data, function(grp) { |
146 | 25x |
result <- h_coxreg_mult_cont_df( |
147 | 25x |
variables = variables, |
148 | 25x |
data = grp$df, |
149 | 25x |
control = control |
150 |
) |
|
151 | 25x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
152 | 25x |
cbind(result, result_labels) |
153 |
}) |
|
154 | 5x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
155 | 5x |
result_subgroups$row_type <- "analysis" |
156 | 5x |
rbind( |
157 | 5x |
result_all, |
158 | 5x |
result_subgroups |
159 |
) |
|
160 |
} |
|
161 |
} |
|
162 | ||
163 |
#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table |
|
164 |
#' summarizing biomarker effects on survival by subgroup. |
|
165 |
#' |
|
166 |
#' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the |
|
167 |
#' [extract_survival_biomarkers()] function when creating `df`. |
|
168 |
#' |
|
169 |
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup. |
|
170 |
#' |
|
171 |
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does |
|
172 |
#' not start from an input layout `lyt`. This is because internally the table is |
|
173 |
#' created by combining multiple subtables. |
|
174 |
#' |
|
175 |
#' @seealso [h_tab_surv_one_biomarker()] which is used internally, [extract_survival_biomarkers()]. |
|
176 |
#' |
|
177 |
#' @examples |
|
178 |
#' ## Table with default columns. |
|
179 |
#' tabulate_survival_biomarkers(df) |
|
180 |
#' |
|
181 |
#' ## Table with a manually chosen set of columns: leave out "pval", reorder. |
|
182 |
#' tab <- tabulate_survival_biomarkers( |
|
183 |
#' df = df, |
|
184 |
#' vars = c("n_tot_events", "ci", "n_tot", "median", "hr"), |
|
185 |
#' time_unit = as.character(adtte_f$AVALU[1]) |
|
186 |
#' ) |
|
187 |
#' |
|
188 |
#' ## Finally produce the forest plot. |
|
189 |
#' \donttest{ |
|
190 |
#' g_forest(tab, xlim = c(0.8, 1.2)) |
|
191 |
#' } |
|
192 |
#' |
|
193 |
#' @export |
|
194 |
#' @order 2 |
|
195 |
tabulate_survival_biomarkers <- function(df, |
|
196 |
vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), |
|
197 |
groups_lists = list(), |
|
198 |
control = control_coxreg(), |
|
199 |
label_all = lifecycle::deprecated(), |
|
200 |
time_unit = NULL, |
|
201 |
na_str = default_na_str(), |
|
202 |
.indent_mods = 0L) { |
|
203 | 5x |
if (lifecycle::is_present(label_all)) { |
204 | 1x |
lifecycle::deprecate_warn( |
205 | 1x |
"0.9.5", "tabulate_survival_biomarkers(label_all)", |
206 | 1x |
details = paste( |
207 | 1x |
"Please assign the `label_all` parameter within the", |
208 | 1x |
"`extract_survival_biomarkers()` function when creating `df`." |
209 |
) |
|
210 |
) |
|
211 |
} |
|
212 | ||
213 | 5x |
checkmate::assert_data_frame(df) |
214 | 5x |
checkmate::assert_character(df$biomarker) |
215 | 5x |
checkmate::assert_character(df$biomarker_label) |
216 | 5x |
checkmate::assert_subset(vars, get_stats("tabulate_survival_biomarkers")) |
217 | ||
218 | 5x |
extra_args <- list(groups_lists = groups_lists, control = control) |
219 | ||
220 | 5x |
df_subs <- split(df, f = df$biomarker) |
221 | 5x |
tabs <- lapply(df_subs, FUN = function(df_sub) { |
222 | 9x |
tab_sub <- h_tab_surv_one_biomarker( |
223 | 9x |
df = df_sub, |
224 | 9x |
vars = vars, |
225 | 9x |
time_unit = time_unit, |
226 | 9x |
na_str = na_str, |
227 | 9x |
.indent_mods = .indent_mods, |
228 | 9x |
extra_args = extra_args |
229 |
) |
|
230 |
# Insert label row as first row in table. |
|
231 | 9x |
label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1] |
232 | 9x |
tab_sub |
233 |
}) |
|
234 | 5x |
result <- do.call(rbind, tabs) |
235 | ||
236 | 5x |
n_tot_ids <- grep("^n_tot", vars) |
237 | 5x |
hr_id <- match("hr", vars) |
238 | 5x |
ci_id <- match("ci", vars) |
239 | 5x |
structure( |
240 | 5x |
result, |
241 | 5x |
forest_header = paste0(c("Higher", "Lower"), "\nBetter"), |
242 | 5x |
col_x = hr_id, |
243 | 5x |
col_ci = ci_id, |
244 | 5x |
col_symbol_size = n_tot_ids[1] |
245 |
) |
|
246 |
} |
1 |
#' Helper functions for multivariate logistic regression |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Helper functions used in calculations for logistic regression. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. |
|
9 |
#' Limited functionality is also available for conditional logistic regression models fitted by |
|
10 |
#' [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()]. |
|
11 |
#' @param x (`character`)\cr a variable or interaction term in `fit_glm` (depending on the helper function used). |
|
12 |
#' |
|
13 |
#' @examples |
|
14 |
#' library(dplyr) |
|
15 |
#' library(broom) |
|
16 |
#' |
|
17 |
#' adrs_f <- tern_ex_adrs %>% |
|
18 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
19 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% |
|
20 |
#' mutate( |
|
21 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), |
|
22 |
#' RACE = factor(RACE), |
|
23 |
#' SEX = factor(SEX) |
|
24 |
#' ) |
|
25 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
26 |
#' mod1 <- fit_logistic( |
|
27 |
#' data = adrs_f, |
|
28 |
#' variables = list( |
|
29 |
#' response = "Response", |
|
30 |
#' arm = "ARMCD", |
|
31 |
#' covariates = c("AGE", "RACE") |
|
32 |
#' ) |
|
33 |
#' ) |
|
34 |
#' mod2 <- fit_logistic( |
|
35 |
#' data = adrs_f, |
|
36 |
#' variables = list( |
|
37 |
#' response = "Response", |
|
38 |
#' arm = "ARMCD", |
|
39 |
#' covariates = c("AGE", "RACE"), |
|
40 |
#' interaction = "AGE" |
|
41 |
#' ) |
|
42 |
#' ) |
|
43 |
#' |
|
44 |
#' @name h_logistic_regression |
|
45 |
NULL |
|
46 | ||
47 |
#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted |
|
48 |
#' model assuming only one interaction term. |
|
49 |
#' |
|
50 |
#' @return Vector of names of interaction variables. |
|
51 |
#' |
|
52 |
#' @export |
|
53 |
h_get_interaction_vars <- function(fit_glm) { |
|
54 | 27x |
checkmate::assert_class(fit_glm, "glm") |
55 | 27x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
56 | 27x |
terms_order <- attr(stats::terms(fit_glm), "order") |
57 | 27x |
interaction_term <- terms_name[terms_order == 2] |
58 | 27x |
checkmate::assert_string(interaction_term) |
59 | 27x |
strsplit(interaction_term, split = ":")[[1]] |
60 |
} |
|
61 | ||
62 |
#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the |
|
63 |
#' interaction variable names and the given levels. The main value here is that the order |
|
64 |
#' of first and second variable is checked in the `interaction_vars` input. |
|
65 |
#' |
|
66 |
#' @param interaction_vars (`character(2)`)\cr interaction variable names. |
|
67 |
#' @param first_var_with_level (`character(2)`)\cr the first variable name with the interaction level. |
|
68 |
#' @param second_var_with_level (`character(2)`)\cr the second variable name with the interaction level. |
|
69 |
#' |
|
70 |
#' @return Name of coefficient. |
|
71 |
#' |
|
72 |
#' @export |
|
73 |
h_interaction_coef_name <- function(interaction_vars, |
|
74 |
first_var_with_level, |
|
75 |
second_var_with_level) { |
|
76 | 45x |
checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE) |
77 | 45x |
checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE) |
78 | 45x |
checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE) |
79 | 45x |
checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars) |
80 | ||
81 | 45x |
first_name <- paste(first_var_with_level, collapse = "") |
82 | 45x |
second_name <- paste(second_var_with_level, collapse = "") |
83 | 45x |
if (first_var_with_level[1] == interaction_vars[1]) { |
84 | 34x |
paste(first_name, second_name, sep = ":") |
85 | 11x |
} else if (second_var_with_level[1] == interaction_vars[1]) { |
86 | 11x |
paste(second_name, first_name, sep = ":") |
87 |
} |
|
88 |
} |
|
89 | ||
90 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
91 |
#' for the case when both the odds ratio and the interaction variable are categorical. |
|
92 |
#' |
|
93 |
#' @param odds_ratio_var (`string`)\cr the odds ratio variable. |
|
94 |
#' @param interaction_var (`string`)\cr the interaction variable. |
|
95 |
#' |
|
96 |
#' @return Odds ratio. |
|
97 |
#' |
|
98 |
#' @export |
|
99 |
h_or_cat_interaction <- function(odds_ratio_var, |
|
100 |
interaction_var, |
|
101 |
fit_glm, |
|
102 |
conf_level = 0.95) { |
|
103 | 7x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
104 | 7x |
checkmate::assert_string(odds_ratio_var) |
105 | 7x |
checkmate::assert_string(interaction_var) |
106 | 7x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
107 | 7x |
checkmate::assert_vector(interaction_vars, len = 2) |
108 | ||
109 | 7x |
xs_level <- fit_glm$xlevels |
110 | 7x |
xs_coef <- stats::coef(fit_glm) |
111 | 7x |
xs_vcov <- stats::vcov(fit_glm) |
112 | 7x |
y <- list() |
113 | 7x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
114 | 12x |
x <- list() |
115 | 12x |
for (ref_level in xs_level[[interaction_var]]) { |
116 | 32x |
coef_names <- paste0(odds_ratio_var, var_level) |
117 | 32x |
if (ref_level != xs_level[[interaction_var]][1]) { |
118 | 20x |
interaction_coef_name <- h_interaction_coef_name( |
119 | 20x |
interaction_vars, |
120 | 20x |
c(odds_ratio_var, var_level), |
121 | 20x |
c(interaction_var, ref_level) |
122 |
) |
|
123 | 20x |
coef_names <- c( |
124 | 20x |
coef_names, |
125 | 20x |
interaction_coef_name |
126 |
) |
|
127 |
} |
|
128 | 32x |
if (length(coef_names) > 1) { |
129 | 20x |
ones <- t(c(1, 1)) |
130 | 20x |
est <- as.numeric(ones %*% xs_coef[coef_names]) |
131 | 20x |
se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones))) |
132 |
} else { |
|
133 | 12x |
est <- xs_coef[coef_names] |
134 | 12x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
135 |
} |
|
136 | 32x |
or <- exp(est) |
137 | 32x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
138 | 32x |
x[[ref_level]] <- list(or = or, ci = ci) |
139 |
} |
|
140 | 12x |
y[[var_level]] <- x |
141 |
} |
|
142 | 7x |
y |
143 |
} |
|
144 | ||
145 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
146 |
#' for the case when either the odds ratio or the interaction variable is continuous. |
|
147 |
#' |
|
148 |
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise |
|
149 |
#' the median is used. |
|
150 |
#' |
|
151 |
#' @return Odds ratio. |
|
152 |
#' |
|
153 |
#' @note We don't provide a function for the case when both variables are continuous because |
|
154 |
#' this does not arise in this table, as the treatment arm variable will always be involved |
|
155 |
#' and categorical. |
|
156 |
#' |
|
157 |
#' @export |
|
158 |
h_or_cont_interaction <- function(odds_ratio_var, |
|
159 |
interaction_var, |
|
160 |
fit_glm, |
|
161 |
at = NULL, |
|
162 |
conf_level = 0.95) { |
|
163 | 9x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
164 | 9x |
checkmate::assert_string(odds_ratio_var) |
165 | 9x |
checkmate::assert_string(interaction_var) |
166 | 9x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
167 | 9x |
checkmate::assert_vector(interaction_vars, len = 2) |
168 | 9x |
checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE) |
169 | 9x |
xs_level <- fit_glm$xlevels |
170 | 9x |
xs_coef <- stats::coef(fit_glm) |
171 | 9x |
xs_vcov <- stats::vcov(fit_glm) |
172 | 9x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
173 | 9x |
model_data <- fit_glm$model |
174 | 9x |
if (!is.null(at)) { |
175 | 2x |
checkmate::assert_set_equal(xs_class[interaction_var], "numeric") |
176 |
} |
|
177 | 9x |
y <- list() |
178 | 9x |
if (xs_class[interaction_var] == "numeric") { |
179 | 6x |
if (is.null(at)) { |
180 | 4x |
at <- ceiling(stats::median(model_data[[interaction_var]])) |
181 |
} |
|
182 | ||
183 | 6x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
184 | 12x |
x <- list() |
185 | 12x |
for (increment in at) { |
186 | 18x |
coef_names <- paste0(odds_ratio_var, var_level) |
187 | 18x |
if (increment != 0) { |
188 | 18x |
interaction_coef_name <- h_interaction_coef_name( |
189 | 18x |
interaction_vars, |
190 | 18x |
c(odds_ratio_var, var_level), |
191 | 18x |
c(interaction_var, "") |
192 |
) |
|
193 | 18x |
coef_names <- c( |
194 | 18x |
coef_names, |
195 | 18x |
interaction_coef_name |
196 |
) |
|
197 |
} |
|
198 | 18x |
if (length(coef_names) > 1) { |
199 | 18x |
xvec <- t(c(1, increment)) |
200 | 18x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
201 | 18x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
202 |
} else { |
|
203 | ! |
est <- xs_coef[coef_names] |
204 | ! |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
205 |
} |
|
206 | 18x |
or <- exp(est) |
207 | 18x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
208 | 18x |
x[[as.character(increment)]] <- list(or = or, ci = ci) |
209 |
} |
|
210 | 12x |
y[[var_level]] <- x |
211 |
} |
|
212 |
} else { |
|
213 | 3x |
checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric") |
214 | 3x |
checkmate::assert_set_equal(xs_class[interaction_var], "factor") |
215 | 3x |
for (var_level in xs_level[[interaction_var]]) { |
216 | 9x |
coef_names <- odds_ratio_var |
217 | 9x |
if (var_level != xs_level[[interaction_var]][1]) { |
218 | 6x |
interaction_coef_name <- h_interaction_coef_name( |
219 | 6x |
interaction_vars, |
220 | 6x |
c(odds_ratio_var, ""), |
221 | 6x |
c(interaction_var, var_level) |
222 |
) |
|
223 | 6x |
coef_names <- c( |
224 | 6x |
coef_names, |
225 | 6x |
interaction_coef_name |
226 |
) |
|
227 |
} |
|
228 | 9x |
if (length(coef_names) > 1) { |
229 | 6x |
xvec <- t(c(1, 1)) |
230 | 6x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
231 | 6x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
232 |
} else { |
|
233 | 3x |
est <- xs_coef[coef_names] |
234 | 3x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
235 |
} |
|
236 | 9x |
or <- exp(est) |
237 | 9x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
238 | 9x |
y[[var_level]] <- list(or = or, ci = ci) |
239 |
} |
|
240 |
} |
|
241 | 9x |
y |
242 |
} |
|
243 | ||
244 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
245 |
#' in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and |
|
246 |
#' [h_or_cat_interaction()]. |
|
247 |
#' |
|
248 |
#' @return Odds ratio. |
|
249 |
#' |
|
250 |
#' @export |
|
251 |
h_or_interaction <- function(odds_ratio_var, |
|
252 |
interaction_var, |
|
253 |
fit_glm, |
|
254 |
at = NULL, |
|
255 |
conf_level = 0.95) { |
|
256 | 13x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
257 | 13x |
if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) { |
258 | 7x |
h_or_cont_interaction( |
259 | 7x |
odds_ratio_var, |
260 | 7x |
interaction_var, |
261 | 7x |
fit_glm, |
262 | 7x |
at = at, |
263 | 7x |
conf_level = conf_level |
264 |
) |
|
265 | 6x |
} else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) { |
266 | 6x |
h_or_cat_interaction( |
267 | 6x |
odds_ratio_var, |
268 | 6x |
interaction_var, |
269 | 6x |
fit_glm, |
270 | 6x |
conf_level = conf_level |
271 |
) |
|
272 |
} else { |
|
273 | ! |
stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor") |
274 |
} |
|
275 |
} |
|
276 | ||
277 |
#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table |
|
278 |
#' of numbers of patients. |
|
279 |
#' |
|
280 |
#' @param terms (`character`)\cr simple terms. |
|
281 |
#' @param table (`table`)\cr table containing numbers for terms. |
|
282 |
#' |
|
283 |
#' @return Term labels containing numbers of patients. |
|
284 |
#' |
|
285 |
#' @export |
|
286 |
h_simple_term_labels <- function(terms, |
|
287 |
table) { |
|
288 | 45x |
checkmate::assert_true(is.table(table)) |
289 | 45x |
checkmate::assert_multi_class(terms, classes = c("factor", "character")) |
290 | 45x |
terms <- as.character(terms) |
291 | 45x |
term_n <- table[terms] |
292 | 45x |
paste0(terms, ", n = ", term_n) |
293 |
} |
|
294 | ||
295 |
#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table |
|
296 |
#' of numbers of patients. |
|
297 |
#' |
|
298 |
#' @param terms1 (`character`)\cr terms for first dimension (rows). |
|
299 |
#' @param terms2 (`character`)\cr terms for second dimension (rows). |
|
300 |
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the |
|
301 |
#' number of patients. In that case they can only be scalar (strings). |
|
302 |
#' |
|
303 |
#' @return Term labels containing numbers of patients. |
|
304 |
#' |
|
305 |
#' @export |
|
306 |
h_interaction_term_labels <- function(terms1, |
|
307 |
terms2, |
|
308 |
table, |
|
309 |
any = FALSE) { |
|
310 | 8x |
checkmate::assert_true(is.table(table)) |
311 | 8x |
checkmate::assert_flag(any) |
312 | 8x |
checkmate::assert_multi_class(terms1, classes = c("factor", "character")) |
313 | 8x |
checkmate::assert_multi_class(terms2, classes = c("factor", "character")) |
314 | 8x |
terms1 <- as.character(terms1) |
315 | 8x |
terms2 <- as.character(terms2) |
316 | 8x |
if (any) { |
317 | 4x |
checkmate::assert_scalar(terms1) |
318 | 4x |
checkmate::assert_scalar(terms2) |
319 | 4x |
paste0( |
320 | 4x |
terms1, " or ", terms2, ", n = ", |
321 |
# Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract. |
|
322 | 4x |
sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2] |
323 |
) |
|
324 |
} else { |
|
325 | 4x |
term_n <- table[cbind(terms1, terms2)] |
326 | 4x |
paste0(terms1, " * ", terms2, ", n = ", term_n) |
327 |
} |
|
328 |
} |
|
329 | ||
330 |
#' @describeIn h_logistic_regression Helper function to tabulate the main effect |
|
331 |
#' results of a (conditional) logistic regression model. |
|
332 |
#' |
|
333 |
#' @return Tabulated main effect results from a logistic regression model. |
|
334 |
#' |
|
335 |
#' @examples |
|
336 |
#' h_glm_simple_term_extract("AGE", mod1) |
|
337 |
#' h_glm_simple_term_extract("ARMCD", mod1) |
|
338 |
#' |
|
339 |
#' @export |
|
340 |
h_glm_simple_term_extract <- function(x, fit_glm) { |
|
341 | 73x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
342 | 73x |
checkmate::assert_string(x) |
343 | ||
344 | 73x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
345 | 73x |
xs_level <- fit_glm$xlevels |
346 | 73x |
xs_coef <- summary(fit_glm)$coefficients |
347 | 73x |
stats <- if (inherits(fit_glm, "glm")) { |
348 | 61x |
c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
349 |
} else { |
|
350 | 12x |
c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)") |
351 |
} |
|
352 |
# Make sure x is not an interaction term. |
|
353 | 73x |
checkmate::assert_subset(x, names(xs_class)) |
354 | 73x |
x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1]) |
355 | 73x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
356 | 73x |
colnames(x_stats) <- names(stats) |
357 | 73x |
x_stats$estimate <- as.list(x_stats$estimate) |
358 | 73x |
x_stats$std_error <- as.list(x_stats$std_error) |
359 | 73x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
360 | 73x |
x_stats$df <- as.list(1) |
361 | 73x |
if (xs_class[x] == "numeric") { |
362 | 58x |
x_stats$term <- x |
363 | 58x |
x_stats$term_label <- if (inherits(fit_glm, "glm")) { |
364 | 46x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
365 |
} else { |
|
366 |
# We just fill in here with the `term` itself as we don't have the data available. |
|
367 | 12x |
x |
368 |
} |
|
369 | 58x |
x_stats$is_variable_summary <- FALSE |
370 | 58x |
x_stats$is_term_summary <- TRUE |
371 |
} else { |
|
372 | 15x |
checkmate::assert_class(fit_glm, "glm") |
373 |
# The reason is that we don't have the original data set in the `clogit` object |
|
374 |
# and therefore cannot determine the `x_numbers` here. |
|
375 | 15x |
x_numbers <- table(fit_glm$data[[x]]) |
376 | 15x |
x_stats$term <- xs_level[[x]][-1] |
377 | 15x |
x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers) |
378 | 15x |
x_stats$is_variable_summary <- FALSE |
379 | 15x |
x_stats$is_term_summary <- TRUE |
380 | 15x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
381 | 15x |
x_main <- data.frame( |
382 | 15x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
383 | 15x |
term = xs_level[[x]][1], |
384 | 15x |
term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)), |
385 | 15x |
df = main_effects[x, "Df", drop = TRUE], |
386 | 15x |
stringsAsFactors = FALSE |
387 |
) |
|
388 | 15x |
x_main$pvalue <- as.list(x_main$pvalue) |
389 | 15x |
x_main$df <- as.list(x_main$df) |
390 | 15x |
x_main$estimate <- list(numeric(0)) |
391 | 15x |
x_main$std_error <- list(numeric(0)) |
392 | 15x |
if (length(xs_level[[x]][-1]) == 1) { |
393 | 6x |
x_main$pvalue <- list(numeric(0)) |
394 | 6x |
x_main$df <- list(numeric(0)) |
395 |
} |
|
396 | 15x |
x_main$is_variable_summary <- TRUE |
397 | 15x |
x_main$is_term_summary <- FALSE |
398 | 15x |
x_stats <- rbind(x_main, x_stats) |
399 |
} |
|
400 | 73x |
x_stats$variable <- x |
401 | 73x |
x_stats$variable_label <- if (inherits(fit_glm, "glm")) { |
402 | 61x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
403 |
} else { |
|
404 | 12x |
x |
405 |
} |
|
406 | 73x |
x_stats$interaction <- "" |
407 | 73x |
x_stats$interaction_label <- "" |
408 | 73x |
x_stats$reference <- "" |
409 | 73x |
x_stats$reference_label <- "" |
410 | 73x |
rownames(x_stats) <- NULL |
411 | 73x |
x_stats[c( |
412 | 73x |
"variable", |
413 | 73x |
"variable_label", |
414 | 73x |
"term", |
415 | 73x |
"term_label", |
416 | 73x |
"interaction", |
417 | 73x |
"interaction_label", |
418 | 73x |
"reference", |
419 | 73x |
"reference_label", |
420 | 73x |
"estimate", |
421 | 73x |
"std_error", |
422 | 73x |
"df", |
423 | 73x |
"pvalue", |
424 | 73x |
"is_variable_summary", |
425 | 73x |
"is_term_summary" |
426 |
)] |
|
427 |
} |
|
428 | ||
429 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction term |
|
430 |
#' results of a logistic regression model. |
|
431 |
#' |
|
432 |
#' @return Tabulated interaction term results from a logistic regression model. |
|
433 |
#' |
|
434 |
#' @examples |
|
435 |
#' h_glm_interaction_extract("ARMCD:AGE", mod2) |
|
436 |
#' |
|
437 |
#' @export |
|
438 |
h_glm_interaction_extract <- function(x, fit_glm) { |
|
439 | 6x |
vars <- h_get_interaction_vars(fit_glm) |
440 | 6x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
441 | ||
442 | 6x |
checkmate::assert_string(x) |
443 | ||
444 |
# Only take two-way interaction |
|
445 | 6x |
checkmate::assert_vector(vars, len = 2) |
446 | ||
447 |
# Only consider simple case: first variable in interaction is arm, a categorical variable |
|
448 | 6x |
checkmate::assert_disjunct(xs_class[vars[1]], "numeric") |
449 | ||
450 | 6x |
xs_level <- fit_glm$xlevels |
451 | 6x |
xs_coef <- summary(fit_glm)$coefficients |
452 | 6x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
453 | 6x |
stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
454 | 6x |
v1_comp <- xs_level[[vars[1]]][-1] |
455 | 6x |
if (xs_class[vars[2]] == "numeric") { |
456 | 3x |
x_stats <- as.data.frame( |
457 | 3x |
xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE], |
458 | 3x |
stringsAsFactors = FALSE |
459 |
) |
|
460 | 3x |
colnames(x_stats) <- names(stats) |
461 | 3x |
x_stats$term <- v1_comp |
462 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]]) |
463 | 3x |
x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers) |
464 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
465 | 3x |
term_main <- v1_ref |
466 | 3x |
ref_label <- h_simple_term_labels(v1_ref, x_numbers) |
467 | 3x |
} else if (xs_class[vars[2]] != "numeric") { |
468 | 3x |
v2_comp <- xs_level[[vars[2]]][-1] |
469 | 3x |
v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp) |
470 | 3x |
x_sel <- paste( |
471 | 3x |
paste0(vars[1], v1_v2_grid$v1), |
472 | 3x |
paste0(vars[2], v1_v2_grid$v2), |
473 | 3x |
sep = ":" |
474 |
) |
|
475 | 3x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
476 | 3x |
colnames(x_stats) <- names(stats) |
477 | 3x |
x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2) |
478 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]]) |
479 | 3x |
x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers) |
480 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
481 | 3x |
v2_ref <- xs_level[[vars[2]]][1] |
482 | 3x |
term_main <- paste(vars[1], vars[2], sep = " * ") |
483 | 3x |
ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE) |
484 |
} |
|
485 | 6x |
x_stats$df <- as.list(1) |
486 | 6x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
487 | 6x |
x_stats$is_variable_summary <- FALSE |
488 | 6x |
x_stats$is_term_summary <- TRUE |
489 | 6x |
x_main <- data.frame( |
490 | 6x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
491 | 6x |
term = term_main, |
492 | 6x |
term_label = paste("Reference", ref_label), |
493 | 6x |
df = main_effects[x, "Df", drop = TRUE], |
494 | 6x |
stringsAsFactors = FALSE |
495 |
) |
|
496 | 6x |
x_main$pvalue <- as.list(x_main$pvalue) |
497 | 6x |
x_main$df <- as.list(x_main$df) |
498 | 6x |
x_main$estimate <- list(numeric(0)) |
499 | 6x |
x_main$std_error <- list(numeric(0)) |
500 | 6x |
x_main$is_variable_summary <- TRUE |
501 | 6x |
x_main$is_term_summary <- FALSE |
502 | ||
503 | 6x |
x_stats <- rbind(x_main, x_stats) |
504 | 6x |
x_stats$variable <- x |
505 | 6x |
x_stats$variable_label <- paste( |
506 | 6x |
"Interaction of", |
507 | 6x |
formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE), |
508 |
"*", |
|
509 | 6x |
formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE) |
510 |
) |
|
511 | 6x |
x_stats$interaction <- "" |
512 | 6x |
x_stats$interaction_label <- "" |
513 | 6x |
x_stats$reference <- "" |
514 | 6x |
x_stats$reference_label <- "" |
515 | 6x |
rownames(x_stats) <- NULL |
516 | 6x |
x_stats[c( |
517 | 6x |
"variable", |
518 | 6x |
"variable_label", |
519 | 6x |
"term", |
520 | 6x |
"term_label", |
521 | 6x |
"interaction", |
522 | 6x |
"interaction_label", |
523 | 6x |
"reference", |
524 | 6x |
"reference_label", |
525 | 6x |
"estimate", |
526 | 6x |
"std_error", |
527 | 6x |
"df", |
528 | 6x |
"pvalue", |
529 | 6x |
"is_variable_summary", |
530 | 6x |
"is_term_summary" |
531 |
)] |
|
532 |
} |
|
533 | ||
534 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction |
|
535 |
#' results of a logistic regression model. This basically is a wrapper for |
|
536 |
#' [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results |
|
537 |
#' in the right data frame format. |
|
538 |
#' |
|
539 |
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model. |
|
540 |
#' |
|
541 |
#' @examples |
|
542 |
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2) |
|
543 |
#' |
|
544 |
#' @export |
|
545 |
h_glm_inter_term_extract <- function(odds_ratio_var, |
|
546 |
interaction_var, |
|
547 |
fit_glm, |
|
548 |
...) { |
|
549 |
# First obtain the main effects. |
|
550 | 11x |
main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm) |
551 | 11x |
main_stats$is_reference_summary <- FALSE |
552 | 11x |
main_stats$odds_ratio <- NA |
553 | 11x |
main_stats$lcl <- NA |
554 | 11x |
main_stats$ucl <- NA |
555 | ||
556 |
# Then we get the odds ratio estimates and put into df form. |
|
557 | 11x |
or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...) |
558 | 11x |
is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric" |
559 | ||
560 | 11x |
if (is_num_or_var) { |
561 |
# Numeric OR variable case. |
|
562 | 3x |
references <- names(or_numbers) |
563 | 3x |
n_ref <- length(references) |
564 | ||
565 | 3x |
extract_from_list <- function(l, name, pos = 1) { |
566 | 9x |
unname(unlist( |
567 | 9x |
lapply(or_numbers, function(x) { |
568 | 27x |
x[[name]][pos] |
569 |
}) |
|
570 |
)) |
|
571 |
} |
|
572 | 3x |
or_stats <- data.frame( |
573 | 3x |
variable = odds_ratio_var, |
574 | 3x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
575 | 3x |
term = odds_ratio_var, |
576 | 3x |
term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
577 | 3x |
interaction = interaction_var, |
578 | 3x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
579 | 3x |
reference = references, |
580 | 3x |
reference_label = references, |
581 | 3x |
estimate = NA, |
582 | 3x |
std_error = NA, |
583 | 3x |
odds_ratio = extract_from_list(or_numbers, "or"), |
584 | 3x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
585 | 3x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
586 | 3x |
df = NA, |
587 | 3x |
pvalue = NA, |
588 | 3x |
is_variable_summary = FALSE, |
589 | 3x |
is_term_summary = FALSE, |
590 | 3x |
is_reference_summary = TRUE |
591 |
) |
|
592 |
} else { |
|
593 |
# Categorical OR variable case. |
|
594 | 8x |
references <- names(or_numbers[[1]]) |
595 | 8x |
n_ref <- length(references) |
596 | ||
597 | 8x |
extract_from_list <- function(l, name, pos = 1) { |
598 | 24x |
unname(unlist( |
599 | 24x |
lapply(or_numbers, function(x) { |
600 | 42x |
lapply(x, function(y) y[[name]][pos]) |
601 |
}) |
|
602 |
)) |
|
603 |
} |
|
604 | 8x |
or_stats <- data.frame( |
605 | 8x |
variable = odds_ratio_var, |
606 | 8x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
607 | 8x |
term = rep(names(or_numbers), each = n_ref), |
608 | 8x |
term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])), |
609 | 8x |
interaction = interaction_var, |
610 | 8x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
611 | 8x |
reference = unlist(lapply(or_numbers, names)), |
612 | 8x |
reference_label = unlist(lapply(or_numbers, names)), |
613 | 8x |
estimate = NA, |
614 | 8x |
std_error = NA, |
615 | 8x |
odds_ratio = extract_from_list(or_numbers, "or"), |
616 | 8x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
617 | 8x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
618 | 8x |
df = NA, |
619 | 8x |
pvalue = NA, |
620 | 8x |
is_variable_summary = FALSE, |
621 | 8x |
is_term_summary = FALSE, |
622 | 8x |
is_reference_summary = TRUE |
623 |
) |
|
624 |
} |
|
625 | ||
626 | 11x |
df <- rbind( |
627 | 11x |
main_stats[, names(or_stats)], |
628 | 11x |
or_stats |
629 |
) |
|
630 | 11x |
df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ] |
631 |
} |
|
632 | ||
633 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including |
|
634 |
#' odds ratios and confidence intervals of simple terms. |
|
635 |
#' |
|
636 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model. |
|
637 |
#' |
|
638 |
#' @examples |
|
639 |
#' h_logistic_simple_terms("AGE", mod1) |
|
640 |
#' |
|
641 |
#' @export |
|
642 |
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) { |
|
643 | 52x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
644 | 52x |
if (inherits(fit_glm, "glm")) { |
645 | 41x |
checkmate::assert_set_equal(fit_glm$family$family, "binomial") |
646 |
} |
|
647 | 52x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
648 | 52x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
649 | 52x |
interaction <- terms_name[which(!terms_name %in% names(xs_class))] |
650 | 52x |
checkmate::assert_subset(x, terms_name) |
651 | 52x |
if (length(interaction) != 0) { |
652 |
# Make sure any item in x is not part of interaction term |
|
653 | 1x |
checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":"))) |
654 |
} |
|
655 | 52x |
x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm) |
656 | 52x |
x_stats <- do.call(rbind, x_stats) |
657 | 52x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
658 | 52x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
659 | 52x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
660 | 52x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
661 | 52x |
x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl) |
662 | 52x |
x_stats |
663 |
} |
|
664 | ||
665 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including |
|
666 |
#' odds ratios and confidence intervals of interaction terms. |
|
667 |
#' |
|
668 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model. |
|
669 |
#' |
|
670 |
#' @examples |
|
671 |
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2) |
|
672 |
#' |
|
673 |
#' @export |
|
674 |
h_logistic_inter_terms <- function(x, |
|
675 |
fit_glm, |
|
676 |
conf_level = 0.95, |
|
677 |
at = NULL) { |
|
678 |
# Find out the interaction variables and interaction term. |
|
679 | 4x |
inter_vars <- h_get_interaction_vars(fit_glm) |
680 | 4x |
checkmate::assert_vector(inter_vars, len = 2) |
681 | ||
682 | ||
683 | 4x |
inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x)) |
684 | 4x |
inter_term <- x[inter_term_index] |
685 | ||
686 |
# For the non-interaction vars we need the standard stuff. |
|
687 | 4x |
normal_terms <- setdiff(x, union(inter_vars, inter_term)) |
688 | ||
689 | 4x |
x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm) |
690 | 4x |
x_stats <- do.call(rbind, x_stats) |
691 | 4x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
692 | 4x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
693 | 4x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
694 | 4x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
695 | 4x |
normal_stats <- x_stats |
696 | 4x |
normal_stats$is_reference_summary <- FALSE |
697 | ||
698 |
# Now the interaction term itself. |
|
699 | 4x |
inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm) |
700 | 4x |
inter_term_stats$odds_ratio <- NA |
701 | 4x |
inter_term_stats$lcl <- NA |
702 | 4x |
inter_term_stats$ucl <- NA |
703 | 4x |
inter_term_stats$is_reference_summary <- FALSE |
704 | ||
705 | 4x |
is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric" |
706 | ||
707 |
# Interaction stuff. |
|
708 | 4x |
inter_stats_one <- h_glm_inter_term_extract( |
709 | 4x |
inter_vars[1], |
710 | 4x |
inter_vars[2], |
711 | 4x |
fit_glm, |
712 | 4x |
conf_level = conf_level, |
713 | 4x |
at = `if`(is_intervar1_numeric, NULL, at) |
714 |
) |
|
715 | 4x |
inter_stats_two <- h_glm_inter_term_extract( |
716 | 4x |
inter_vars[2], |
717 | 4x |
inter_vars[1], |
718 | 4x |
fit_glm, |
719 | 4x |
conf_level = conf_level, |
720 | 4x |
at = `if`(is_intervar1_numeric, at, NULL) |
721 |
) |
|
722 | ||
723 |
# Now just combine everything in one data frame. |
|
724 | 4x |
col_names <- c( |
725 | 4x |
"variable", |
726 | 4x |
"variable_label", |
727 | 4x |
"term", |
728 | 4x |
"term_label", |
729 | 4x |
"interaction", |
730 | 4x |
"interaction_label", |
731 | 4x |
"reference", |
732 | 4x |
"reference_label", |
733 | 4x |
"estimate", |
734 | 4x |
"std_error", |
735 | 4x |
"df", |
736 | 4x |
"pvalue", |
737 | 4x |
"odds_ratio", |
738 | 4x |
"lcl", |
739 | 4x |
"ucl", |
740 | 4x |
"is_variable_summary", |
741 | 4x |
"is_term_summary", |
742 | 4x |
"is_reference_summary" |
743 |
) |
|
744 | 4x |
df <- rbind( |
745 | 4x |
inter_stats_one[, col_names], |
746 | 4x |
inter_stats_two[, col_names], |
747 | 4x |
inter_term_stats[, col_names] |
748 |
) |
|
749 | 4x |
if (length(normal_terms) > 0) { |
750 | 4x |
df <- rbind( |
751 | 4x |
normal_stats[, col_names], |
752 | 4x |
df |
753 |
) |
|
754 |
} |
|
755 | 4x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
756 | 4x |
df |
757 |
} |
1 |
#' Cox proportional hazards regression |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_coxreg")` |
|
9 |
#' to see available statistics for this function. |
|
10 |
#' |
|
11 |
#' @details Cox models are the most commonly used methods to estimate the magnitude of |
|
12 |
#' the effect in survival analysis. It assumes proportional hazards: the ratio |
|
13 |
#' of the hazards between groups (e.g., two arms) is constant over time. |
|
14 |
#' This ratio is referred to as the "hazard ratio" (HR) and is one of the |
|
15 |
#' most commonly reported metrics to describe the effect size in survival |
|
16 |
#' analysis (NEST Team, 2020). |
|
17 |
#' |
|
18 |
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant |
|
19 |
#' helper functions, and [tidy_coxreg] for custom tidy methods. |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' library(survival) |
|
23 |
#' |
|
24 |
#' # Testing dataset [survival::bladder]. |
|
25 |
#' set.seed(1, kind = "Mersenne-Twister") |
|
26 |
#' dta_bladder <- with( |
|
27 |
#' data = bladder[bladder$enum < 5, ], |
|
28 |
#' tibble::tibble( |
|
29 |
#' TIME = stop, |
|
30 |
#' STATUS = event, |
|
31 |
#' ARM = as.factor(rx), |
|
32 |
#' COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"), |
|
33 |
#' COVAR2 = factor( |
|
34 |
#' sample(as.factor(enum)), |
|
35 |
#' levels = 1:4, labels = c("F", "F", "M", "M") |
|
36 |
#' ) %>% formatters::with_label("Sex (F/M)") |
|
37 |
#' ) |
|
38 |
#' ) |
|
39 |
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) |
|
40 |
#' dta_bladder$STUDYID <- factor("X") |
|
41 |
#' |
|
42 |
#' u1_variables <- list( |
|
43 |
#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2") |
|
44 |
#' ) |
|
45 |
#' |
|
46 |
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2")) |
|
47 |
#' |
|
48 |
#' m1_variables <- list( |
|
49 |
#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2") |
|
50 |
#' ) |
|
51 |
#' |
|
52 |
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2")) |
|
53 |
#' |
|
54 |
#' @name cox_regression |
|
55 |
#' @order 1 |
|
56 |
NULL |
|
57 | ||
58 |
#' @describeIn cox_regression Statistics function that transforms results tabulated |
|
59 |
#' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list. |
|
60 |
#' |
|
61 |
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg] |
|
62 |
#' function with tidying applied via [broom::tidy()]. |
|
63 |
#' @param .stats (`character`)\cr the names of statistics to be reported among: |
|
64 |
#' * `n`: number of observations (univariate only) |
|
65 |
#' * `hr`: hazard ratio |
|
66 |
#' * `ci`: confidence interval |
|
67 |
#' * `pval`: p-value of the treatment effect |
|
68 |
#' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only) |
|
69 |
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model. |
|
70 |
#' Defaults to `"all"`. Other options include `"var_main"` for main effects, `"inter"` for interaction effects, |
|
71 |
#' and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is `"all"`, specific |
|
72 |
#' variables can be selected by specifying `.var_nms`. |
|
73 |
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically |
|
74 |
#' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired |
|
75 |
#' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars` |
|
76 |
#' is `"var_main"`, `.var_nms` should be only the variable name. |
|
77 |
#' |
|
78 |
#' @return |
|
79 |
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s). |
|
80 |
#' |
|
81 |
#' @examples |
|
82 |
#' # s_coxreg |
|
83 |
#' |
|
84 |
#' # Univariate |
|
85 |
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder) |
|
86 |
#' df1 <- broom::tidy(univar_model) |
|
87 |
#' |
|
88 |
#' s_coxreg(model_df = df1, .stats = "hr") |
|
89 |
#' |
|
90 |
#' # Univariate with interactions |
|
91 |
#' univar_model_inter <- fit_coxreg_univar( |
|
92 |
#' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder |
|
93 |
#' ) |
|
94 |
#' df1_inter <- broom::tidy(univar_model_inter) |
|
95 |
#' |
|
96 |
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1") |
|
97 |
#' |
|
98 |
#' # Univariate without treatment arm - only "COVAR2" covariate effects |
|
99 |
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder) |
|
100 |
#' df1_covs <- broom::tidy(univar_covs_model) |
|
101 |
#' |
|
102 |
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)")) |
|
103 |
#' |
|
104 |
#' # Multivariate. |
|
105 |
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder) |
|
106 |
#' df2 <- broom::tidy(multivar_model) |
|
107 |
#' |
|
108 |
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1") |
|
109 |
#' s_coxreg( |
|
110 |
#' model_df = df2, .stats = "pval", .which_vars = "multi_lvl", |
|
111 |
#' .var_nms = c("COVAR1", "A Covariate Label") |
|
112 |
#' ) |
|
113 |
#' |
|
114 |
#' # Multivariate without treatment arm - only "COVAR1" main effect |
|
115 |
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder) |
|
116 |
#' df2_covs <- broom::tidy(multivar_covs_model) |
|
117 |
#' |
|
118 |
#' s_coxreg(model_df = df2_covs, .stats = "hr") |
|
119 |
#' |
|
120 |
#' @export |
|
121 |
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) { |
|
122 | 194x |
assert_df_with_variables(model_df, list(term = "term", stat = .stats)) |
123 | 194x |
checkmate::assert_multi_class(model_df$term, classes = c("factor", "character")) |
124 | 194x |
model_df$term <- as.character(model_df$term) |
125 | 194x |
.var_nms <- .var_nms[!is.na(.var_nms)] |
126 | ||
127 | 192x |
if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ] |
128 | 39x |
if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1) |
129 | ||
130 |
# We need a list with names corresponding to the stats to display of equal length to the list of stats. |
|
131 | 194x |
y <- split(model_df, f = model_df$term, drop = FALSE) |
132 | 194x |
y <- stats::setNames(y, nm = rep(.stats, length(y))) |
133 | ||
134 | 194x |
if (.which_vars == "var_main") { |
135 | 84x |
y <- lapply(y, function(x) x[1, ]) # only main effect |
136 | 110x |
} else if (.which_vars %in% c("inter", "multi_lvl")) { |
137 | 80x |
y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect |
138 |
} |
|
139 | ||
140 | 194x |
lapply( |
141 | 194x |
X = y, |
142 | 194x |
FUN = function(x) { |
143 | 198x |
z <- as.list(x[[.stats]]) |
144 | 198x |
stats::setNames(z, nm = x$term_label) |
145 |
} |
|
146 |
) |
|
147 |
} |
|
148 | ||
149 |
#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()] |
|
150 |
#' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`. |
|
151 |
#' |
|
152 |
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`. |
|
153 |
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`. |
|
154 |
#' @param na_str (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`. |
|
155 |
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to |
|
156 |
#' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching). |
|
157 |
#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed |
|
158 |
#' as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing |
|
159 |
#' from variables, then only Cox model(s) including the covariates will be fitted and the corresponding |
|
160 |
#' effect estimates will be tabulated later. |
|
161 |
#' |
|
162 |
#' @return |
|
163 |
#' * `a_coxreg()` returns formatted [rtables::CellValue()]. |
|
164 |
#' |
|
165 |
#' @examples |
|
166 |
#' a_coxreg( |
|
167 |
#' df = dta_bladder, |
|
168 |
#' labelstr = "Label 1", |
|
169 |
#' variables = u1_variables, |
|
170 |
#' .spl_context = list(value = "COVAR1"), |
|
171 |
#' .stats = "n", |
|
172 |
#' .formats = "xx" |
|
173 |
#' ) |
|
174 |
#' |
|
175 |
#' a_coxreg( |
|
176 |
#' df = dta_bladder, |
|
177 |
#' labelstr = "", |
|
178 |
#' variables = u1_variables, |
|
179 |
#' .spl_context = list(value = "COVAR2"), |
|
180 |
#' .stats = "pval", |
|
181 |
#' .formats = "xx.xxxx" |
|
182 |
#' ) |
|
183 |
#' |
|
184 |
#' @export |
|
185 |
a_coxreg <- function(df, |
|
186 |
labelstr, |
|
187 |
eff = FALSE, |
|
188 |
var_main = FALSE, |
|
189 |
multivar = FALSE, |
|
190 |
variables, |
|
191 |
at = list(), |
|
192 |
control = control_coxreg(), |
|
193 |
.spl_context, |
|
194 |
.stats, |
|
195 |
.formats, |
|
196 |
.indent_mods = NULL, |
|
197 |
na_str = "", |
|
198 |
cache_env = NULL) { |
|
199 | 191x |
cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm |
200 | 191x |
cov <- tail(.spl_context$value, 1) # current variable/covariate |
201 | 191x |
var_lbl <- formatters::var_labels(df)[cov] # check for df labels |
202 | 191x |
if (length(labelstr) > 1) { |
203 | ! |
labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none |
204 | 191x |
} else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) { |
205 | 62x |
labelstr <- var_lbl |
206 |
} |
|
207 | 191x |
if (eff || multivar || cov_no_arm) { |
208 | 82x |
control$interaction <- FALSE |
209 |
} else { |
|
210 | 109x |
variables$covariates <- cov |
211 | 40x |
if (var_main) control$interaction <- TRUE |
212 |
} |
|
213 | ||
214 | 191x |
if (is.null(cache_env[[cov]])) { |
215 | 30x |
if (!multivar) { |
216 | 23x |
model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy() |
217 |
} else { |
|
218 | 7x |
model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy() |
219 |
} |
|
220 | 30x |
cache_env[[cov]] <- model |
221 |
} else { |
|
222 | 161x |
model <- cache_env[[cov]] |
223 |
} |
|
224 | 109x |
if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_ |
225 | ||
226 | 191x |
if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) { |
227 | 15x |
multivar <- TRUE |
228 | 3x |
if (!cov_no_arm) var_main <- TRUE |
229 |
} |
|
230 | ||
231 | 191x |
vars_coxreg <- list(which_vars = "all", var_nms = NULL) |
232 | 191x |
if (eff) { |
233 | 40x |
if (multivar && !var_main) { # multivar treatment level |
234 | 6x |
var_lbl_arm <- formatters::var_labels(df)[[variables$arm]] |
235 | 6x |
vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl") |
236 |
} else { # treatment effect |
|
237 | 34x |
vars_coxreg["var_nms"] <- variables$arm |
238 | 6x |
if (var_main) vars_coxreg["which_vars"] <- "var_main" |
239 |
} |
|
240 |
} else { |
|
241 | 151x |
if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level |
242 | 118x |
vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main") |
243 | 33x |
} else if (multivar) { # multivar covariate level |
244 | 33x |
vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl") |
245 | 6x |
if (var_main) model[cov, .stats] <- NA_real_ |
246 |
} |
|
247 | 40x |
if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect |
248 |
} |
|
249 | 191x |
var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]] |
250 | 191x |
var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) { |
251 | 21x |
paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels |
252 | 191x |
} else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) || |
253 | 191x |
(multivar && var_main && is.numeric(df[[cov]]))) { # nolint |
254 | 47x |
labelstr # other main effect labels |
255 | 191x |
} else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) { |
256 | 6x |
"All" # multivar numeric covariate |
257 |
} else { |
|
258 | 117x |
names(var_vals) |
259 |
} |
|
260 | 191x |
in_rows( |
261 | 191x |
.list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods, |
262 | 191x |
.formats = stats::setNames(rep(.formats, length(var_names)), var_names), |
263 | 191x |
.format_na_strs = stats::setNames(rep(na_str, length(var_names)), var_names) |
264 |
) |
|
265 |
} |
|
266 | ||
267 |
#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table |
|
268 |
#' layout. This function is a wrapper for several `rtables` layouting functions. This function |
|
269 |
#' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()]. |
|
270 |
#' |
|
271 |
#' @inheritParams fit_coxreg_univar |
|
272 |
#' @param multivar (`flag`)\cr whether multivariate Cox regression should run (defaults to `FALSE`), otherwise |
|
273 |
#' univariate Cox regression will run. |
|
274 |
#' @param common_var (`string`)\cr the name of a factor variable in the dataset which takes the same value |
|
275 |
#' for all rows. This should be created during pre-processing if no such variable currently exists. |
|
276 |
#' @param .section_div (`string` or `NA`)\cr string which should be repeated as a section divider between sections. |
|
277 |
#' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between |
|
278 |
#' treatment and covariate sections and the second between different covariates. |
|
279 |
#' |
|
280 |
#' @return |
|
281 |
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions, |
|
282 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table |
|
283 |
#' containing the chosen statistics to the table layout. |
|
284 |
#' |
|
285 |
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`, |
|
286 |
#' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate |
|
287 |
#' Cox regression models, respectively. |
|
288 |
#' |
|
289 |
#' @examples |
|
290 |
#' # summarize_coxreg |
|
291 |
#' |
|
292 |
#' result_univar <- basic_table() %>% |
|
293 |
#' summarize_coxreg(variables = u1_variables) %>% |
|
294 |
#' build_table(dta_bladder) |
|
295 |
#' result_univar |
|
296 |
#' |
|
297 |
#' result_univar_covs <- basic_table() %>% |
|
298 |
#' summarize_coxreg( |
|
299 |
#' variables = u2_variables, |
|
300 |
#' ) %>% |
|
301 |
#' build_table(dta_bladder) |
|
302 |
#' result_univar_covs |
|
303 |
#' |
|
304 |
#' result_multivar <- basic_table() %>% |
|
305 |
#' summarize_coxreg( |
|
306 |
#' variables = m1_variables, |
|
307 |
#' multivar = TRUE, |
|
308 |
#' ) %>% |
|
309 |
#' build_table(dta_bladder) |
|
310 |
#' result_multivar |
|
311 |
#' |
|
312 |
#' result_multivar_covs <- basic_table() %>% |
|
313 |
#' summarize_coxreg( |
|
314 |
#' variables = m2_variables, |
|
315 |
#' multivar = TRUE, |
|
316 |
#' varlabels = c("Covariate 1", "Covariate 2") # custom labels |
|
317 |
#' ) %>% |
|
318 |
#' build_table(dta_bladder) |
|
319 |
#' result_multivar_covs |
|
320 |
#' |
|
321 |
#' @export |
|
322 |
#' @order 2 |
|
323 |
summarize_coxreg <- function(lyt, |
|
324 |
variables, |
|
325 |
control = control_coxreg(), |
|
326 |
at = list(), |
|
327 |
multivar = FALSE, |
|
328 |
common_var = "STUDYID", |
|
329 |
.stats = c("n", "hr", "ci", "pval", "pval_inter"), |
|
330 |
.formats = c( |
|
331 |
n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)", |
|
332 |
pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)" |
|
333 |
), |
|
334 |
varlabels = NULL, |
|
335 |
.indent_mods = NULL, |
|
336 |
na_str = "", |
|
337 |
.section_div = NA_character_) { |
|
338 | 11x |
if (multivar && control$interaction) { |
339 | 1x |
warning(paste( |
340 | 1x |
"Interactions are not available for multivariate cox regression using summarize_coxreg.", |
341 | 1x |
"The model will be calculated without interaction effects." |
342 |
)) |
|
343 |
} |
|
344 | 11x |
if (control$interaction && !"arm" %in% names(variables)) { |
345 | 1x |
stop("To include interactions please specify 'arm' in variables.") |
346 |
} |
|
347 | ||
348 | 10x |
.stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics |
349 | 4x |
intersect(c("hr", "ci", "pval"), .stats) |
350 | 10x |
} else if (control$interaction) { |
351 | 4x |
intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats) |
352 |
} else { |
|
353 | 2x |
intersect(c("n", "hr", "ci", "pval"), .stats) |
354 |
} |
|
355 | 10x |
stat_labels <- c( |
356 | 10x |
n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"), |
357 | 10x |
pval = "p-value", pval_inter = "Interaction p-value" |
358 |
) |
|
359 | 10x |
stat_labels <- stat_labels[names(stat_labels) %in% .stats] |
360 | 10x |
.formats <- .formats[names(.formats) %in% .stats] |
361 | 10x |
env <- new.env() # create caching environment |
362 | ||
363 | 10x |
lyt <- lyt %>% |
364 | 10x |
split_cols_by_multivar( |
365 | 10x |
vars = rep(common_var, length(.stats)), |
366 | 10x |
varlabels = stat_labels, |
367 | 10x |
extra_args = list( |
368 | 10x |
.stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_str = rep(na_str, length(.stats)), |
369 | 10x |
cache_env = replicate(length(.stats), list(env)) |
370 |
) |
|
371 |
) |
|
372 | ||
373 | 10x |
if ("arm" %in% names(variables)) { # treatment effect |
374 | 8x |
lyt <- lyt %>% |
375 | 8x |
split_rows_by( |
376 | 8x |
common_var, |
377 | 8x |
split_label = "Treatment:", |
378 | 8x |
label_pos = "visible", |
379 | 8x |
child_labels = "hidden", |
380 | 8x |
section_div = head(.section_div, 1) |
381 |
) |
|
382 | 8x |
if (!multivar) { |
383 | 6x |
lyt <- lyt %>% |
384 | 6x |
analyze_colvars( |
385 | 6x |
afun = a_coxreg, |
386 | 6x |
na_str = na_str, |
387 | 6x |
extra_args = list( |
388 | 6x |
variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar, |
389 | 6x |
labelstr = "" |
390 |
) |
|
391 |
) |
|
392 |
} else { # treatment level effects |
|
393 | 2x |
lyt <- lyt %>% |
394 | 2x |
summarize_row_groups( |
395 | 2x |
cfun = a_coxreg, |
396 | 2x |
na_str = na_str, |
397 | 2x |
extra_args = list( |
398 | 2x |
variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar |
399 |
) |
|
400 |
) %>% |
|
401 | 2x |
analyze_colvars( |
402 | 2x |
afun = a_coxreg, |
403 | 2x |
na_str = na_str, |
404 | 2x |
extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "") |
405 |
) |
|
406 |
} |
|
407 |
} |
|
408 | ||
409 | 10x |
if ("covariates" %in% names(variables)) { # covariate main effects |
410 | 10x |
lyt <- lyt %>% |
411 | 10x |
split_rows_by_multivar( |
412 | 10x |
vars = variables$covariates, |
413 | 10x |
varlabels = varlabels, |
414 | 10x |
split_label = "Covariate:", |
415 | 10x |
nested = FALSE, |
416 | 10x |
child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden", |
417 | 10x |
section_div = tail(.section_div, 1) |
418 |
) |
|
419 | 10x |
if (multivar || control$interaction || !"arm" %in% names(variables)) { |
420 | 8x |
lyt <- lyt %>% |
421 | 8x |
summarize_row_groups( |
422 | 8x |
cfun = a_coxreg, |
423 | 8x |
na_str = na_str, |
424 | 8x |
extra_args = list( |
425 | 8x |
variables = variables, at = at, control = control, multivar = multivar, |
426 | 8x |
var_main = if (multivar) multivar else control$interaction |
427 |
) |
|
428 |
) |
|
429 |
} else { |
|
430 | ! |
if (!is.null(varlabels)) names(varlabels) <- variables$covariates |
431 | 2x |
lyt <- lyt %>% |
432 | 2x |
analyze_colvars( |
433 | 2x |
afun = a_coxreg, |
434 | 2x |
na_str = na_str, |
435 | 2x |
extra_args = list( |
436 | 2x |
variables = variables, at = at, control = control, multivar = multivar, |
437 | 2x |
var_main = if (multivar) multivar else control$interaction, |
438 | 2x |
labelstr = if (is.null(varlabels)) "" else varlabels |
439 |
) |
|
440 |
) |
|
441 |
} |
|
442 | ||
443 | 2x |
if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm |
444 | 10x |
if (multivar || control$interaction) { # covariate level effects |
445 | 8x |
lyt <- lyt %>% |
446 | 8x |
analyze_colvars( |
447 | 8x |
afun = a_coxreg, |
448 | 8x |
na_str = na_str, |
449 | 8x |
extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = ""), |
450 | 8x |
indent_mod = if (!"arm" %in% names(variables) || multivar) 0L else -1L |
451 |
) |
|
452 |
} |
|
453 |
} |
|
454 | ||
455 | 10x |
lyt |
456 |
} |
1 |
#' Odds ratio estimation |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Compares bivariate responses between two groups in terms of odds ratios |
|
6 |
#' along with a confidence interval. |
|
7 |
#' |
|
8 |
#' @inheritParams split_cols_by_groups |
|
9 |
#' @inheritParams argument_convention |
|
10 |
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_odds_ratio")` |
|
11 |
#' to see available statistics for this function. |
|
12 |
#' |
|
13 |
#' @details This function uses either logistic regression for unstratified |
|
14 |
#' analyses, or conditional logistic regression for stratified analyses. |
|
15 |
#' The Wald confidence interval with the specified confidence level is |
|
16 |
#' calculated. |
|
17 |
#' |
|
18 |
#' @note For stratified analyses, there is currently no implementation for conditional |
|
19 |
#' likelihood confidence intervals, therefore the likelihood confidence interval is not |
|
20 |
#' yet available as an option. Besides, when `rsp` contains only responders or non-responders, |
|
21 |
#' then the result values will be `NA`, because no odds ratio estimation is possible. |
|
22 |
#' |
|
23 |
#' @seealso Relevant helper function [h_odds_ratio()]. |
|
24 |
#' |
|
25 |
#' @name odds_ratio |
|
26 |
#' @order 1 |
|
27 |
NULL |
|
28 | ||
29 |
#' @describeIn odds_ratio Statistics function which estimates the odds ratio |
|
30 |
#' between a treatment and a control. A `variables` list with `arm` and `strata` |
|
31 |
#' variable names must be passed if a stratified analysis is required. |
|
32 |
#' |
|
33 |
#' @return |
|
34 |
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci` |
|
35 |
#' (containing `est`, `lcl`, and `ucl`) and `n_tot`. |
|
36 |
#' |
|
37 |
#' @examples |
|
38 |
#' # Unstratified analysis. |
|
39 |
#' s_odds_ratio( |
|
40 |
#' df = subset(dta, grp == "A"), |
|
41 |
#' .var = "rsp", |
|
42 |
#' .ref_group = subset(dta, grp == "B"), |
|
43 |
#' .in_ref_col = FALSE, |
|
44 |
#' .df_row = dta |
|
45 |
#' ) |
|
46 |
#' |
|
47 |
#' # Stratified analysis. |
|
48 |
#' s_odds_ratio( |
|
49 |
#' df = subset(dta, grp == "A"), |
|
50 |
#' .var = "rsp", |
|
51 |
#' .ref_group = subset(dta, grp == "B"), |
|
52 |
#' .in_ref_col = FALSE, |
|
53 |
#' .df_row = dta, |
|
54 |
#' variables = list(arm = "grp", strata = "strata") |
|
55 |
#' ) |
|
56 |
#' |
|
57 |
#' @export |
|
58 |
s_odds_ratio <- function(df, |
|
59 |
.var, |
|
60 |
.ref_group, |
|
61 |
.in_ref_col, |
|
62 |
.df_row, |
|
63 |
variables = list(arm = NULL, strata = NULL), |
|
64 |
conf_level = 0.95, |
|
65 |
groups_list = NULL) { |
|
66 | 70x |
y <- list(or_ci = "", n_tot = "") |
67 | ||
68 | 70x |
if (!.in_ref_col) { |
69 | 70x |
assert_proportion_value(conf_level) |
70 | 70x |
assert_df_with_variables(df, list(rsp = .var)) |
71 | 70x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
72 | ||
73 | 70x |
if (is.null(variables$strata)) { |
74 | 57x |
data <- data.frame( |
75 | 57x |
rsp = c(.ref_group[[.var]], df[[.var]]), |
76 | 57x |
grp = factor( |
77 | 57x |
rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), |
78 | 57x |
levels = c("ref", "Not-ref") |
79 |
) |
|
80 |
) |
|
81 | 57x |
y <- or_glm(data, conf_level = conf_level) |
82 |
} else { |
|
83 | 13x |
assert_df_with_variables(.df_row, c(list(rsp = .var), variables)) |
84 | ||
85 |
# The group variable prepared for clogit must be synchronised with combination groups definition. |
|
86 | 13x |
if (is.null(groups_list)) { |
87 | 12x |
ref_grp <- as.character(unique(.ref_group[[variables$arm]])) |
88 | 12x |
trt_grp <- as.character(unique(df[[variables$arm]])) |
89 | 12x |
grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp) |
90 |
} else { |
|
91 |
# If more than one level in reference col. |
|
92 | 1x |
reference <- as.character(unique(.ref_group[[variables$arm]])) |
93 | 1x |
grp_ref_flag <- vapply( |
94 | 1x |
X = groups_list, |
95 | 1x |
FUN.VALUE = TRUE, |
96 | 1x |
FUN = function(x) all(reference %in% x) |
97 |
) |
|
98 | 1x |
ref_grp <- names(groups_list)[grp_ref_flag] |
99 | ||
100 |
# If more than one level in treatment col. |
|
101 | 1x |
treatment <- as.character(unique(df[[variables$arm]])) |
102 | 1x |
grp_trt_flag <- vapply( |
103 | 1x |
X = groups_list, |
104 | 1x |
FUN.VALUE = TRUE, |
105 | 1x |
FUN = function(x) all(treatment %in% x) |
106 |
) |
|
107 | 1x |
trt_grp <- names(groups_list)[grp_trt_flag] |
108 | ||
109 | 1x |
grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp) |
110 | 1x |
grp <- combine_levels(grp, levels = treatment, new_level = trt_grp) |
111 |
} |
|
112 | ||
113 |
# The reference level in `grp` must be the same as in the `rtables` column split. |
|
114 | 13x |
data <- data.frame( |
115 | 13x |
rsp = .df_row[[.var]], |
116 | 13x |
grp = grp, |
117 | 13x |
strata = interaction(.df_row[variables$strata]) |
118 |
) |
|
119 | 13x |
y_all <- or_clogit(data, conf_level = conf_level) |
120 | 13x |
checkmate::assert_string(trt_grp) |
121 | 13x |
checkmate::assert_subset(trt_grp, names(y_all$or_ci)) |
122 | 12x |
y$or_ci <- y_all$or_ci[[trt_grp]] |
123 | 12x |
y$n_tot <- y_all$n_tot |
124 |
} |
|
125 |
} |
|
126 | ||
127 | 69x |
y$or_ci <- formatters::with_label( |
128 | 69x |
x = y$or_ci, |
129 | 69x |
label = paste0("Odds Ratio (", 100 * conf_level, "% CI)") |
130 |
) |
|
131 | ||
132 | 69x |
y$n_tot <- formatters::with_label( |
133 | 69x |
x = y$n_tot, |
134 | 69x |
label = "Total n" |
135 |
) |
|
136 | ||
137 | 69x |
y |
138 |
} |
|
139 | ||
140 |
#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`. |
|
141 |
#' |
|
142 |
#' @return |
|
143 |
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
144 |
#' |
|
145 |
#' @examples |
|
146 |
#' a_odds_ratio( |
|
147 |
#' df = subset(dta, grp == "A"), |
|
148 |
#' .var = "rsp", |
|
149 |
#' .ref_group = subset(dta, grp == "B"), |
|
150 |
#' .in_ref_col = FALSE, |
|
151 |
#' .df_row = dta |
|
152 |
#' ) |
|
153 |
#' |
|
154 |
#' @export |
|
155 |
a_odds_ratio <- make_afun( |
|
156 |
s_odds_ratio, |
|
157 |
.formats = c(or_ci = "xx.xx (xx.xx - xx.xx)"), |
|
158 |
.indent_mods = c(or_ci = 1L) |
|
159 |
) |
|
160 | ||
161 |
#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments |
|
162 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
163 |
#' |
|
164 |
#' @param ... arguments passed to `s_odds_ratio()`. |
|
165 |
#' |
|
166 |
#' @return |
|
167 |
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions, |
|
168 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
169 |
#' the statistics from `s_odds_ratio()` to the table layout. |
|
170 |
#' |
|
171 |
#' @examples |
|
172 |
#' set.seed(12) |
|
173 |
#' dta <- data.frame( |
|
174 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE), |
|
175 |
#' grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")), |
|
176 |
#' strata = factor(sample(c("C", "D"), 100, TRUE)) |
|
177 |
#' ) |
|
178 |
#' |
|
179 |
#' l <- basic_table() %>% |
|
180 |
#' split_cols_by(var = "grp", ref_group = "B") %>% |
|
181 |
#' estimate_odds_ratio(vars = "rsp") |
|
182 |
#' |
|
183 |
#' build_table(l, df = dta) |
|
184 |
#' |
|
185 |
#' @export |
|
186 |
#' @order 2 |
|
187 |
estimate_odds_ratio <- function(lyt, |
|
188 |
vars, |
|
189 |
variables = list(arm = NULL, strata = NULL), |
|
190 |
conf_level = 0.95, |
|
191 |
groups_list = NULL, |
|
192 |
na_str = default_na_str(), |
|
193 |
nested = TRUE, |
|
194 |
..., |
|
195 |
show_labels = "hidden", |
|
196 |
table_names = vars, |
|
197 |
.stats = "or_ci", |
|
198 |
.formats = NULL, |
|
199 |
.labels = NULL, |
|
200 |
.indent_mods = NULL) { |
|
201 | 4x |
extra_args <- list(variables = variables, conf_level = conf_level, groups_list = groups_list, ...) |
202 | ||
203 | 4x |
afun <- make_afun( |
204 | 4x |
a_odds_ratio, |
205 | 4x |
.stats = .stats, |
206 | 4x |
.formats = .formats, |
207 | 4x |
.labels = .labels, |
208 | 4x |
.indent_mods = .indent_mods |
209 |
) |
|
210 | ||
211 | 4x |
analyze( |
212 | 4x |
lyt, |
213 | 4x |
vars, |
214 | 4x |
afun = afun, |
215 | 4x |
na_str = na_str, |
216 | 4x |
nested = nested, |
217 | 4x |
extra_args = extra_args, |
218 | 4x |
show_labels = show_labels, |
219 | 4x |
table_names = table_names |
220 |
) |
|
221 |
} |
|
222 | ||
223 |
#' Helper functions for odds ratio estimation |
|
224 |
#' |
|
225 |
#' @description `r lifecycle::badge("stable")` |
|
226 |
#' |
|
227 |
#' Functions to calculate odds ratios in [estimate_odds_ratio()]. |
|
228 |
#' |
|
229 |
#' @inheritParams argument_convention |
|
230 |
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally |
|
231 |
#' `strata` for [or_clogit()]. |
|
232 |
#' |
|
233 |
#' @return A named `list` of elements `or_ci` and `n_tot`. |
|
234 |
#' |
|
235 |
#' @seealso [odds_ratio] |
|
236 |
#' |
|
237 |
#' @name h_odds_ratio |
|
238 |
NULL |
|
239 | ||
240 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be |
|
241 |
#' exactly 2 groups in `data` as specified by the `grp` variable. |
|
242 |
#' |
|
243 |
#' @examples |
|
244 |
#' # Data with 2 groups. |
|
245 |
#' data <- data.frame( |
|
246 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)), |
|
247 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)], |
|
248 |
#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)], |
|
249 |
#' stringsAsFactors = TRUE |
|
250 |
#' ) |
|
251 |
#' |
|
252 |
#' # Odds ratio based on glm. |
|
253 |
#' or_glm(data, conf_level = 0.95) |
|
254 |
#' |
|
255 |
#' @export |
|
256 |
or_glm <- function(data, conf_level) { |
|
257 | 62x |
checkmate::assert_logical(data$rsp) |
258 | 62x |
assert_proportion_value(conf_level) |
259 | 62x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp")) |
260 | 62x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
261 | ||
262 | 62x |
data$grp <- as_factor_keep_attributes(data$grp) |
263 | 62x |
assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2) |
264 | 62x |
formula <- stats::as.formula("rsp ~ grp") |
265 | 62x |
model_fit <- stats::glm( |
266 | 62x |
formula = formula, data = data, |
267 | 62x |
family = stats::binomial(link = "logit") |
268 |
) |
|
269 | ||
270 |
# Note that here we need to discard the intercept. |
|
271 | 62x |
or <- exp(stats::coef(model_fit)[-1]) |
272 | 62x |
or_ci <- exp( |
273 | 62x |
stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE] |
274 |
) |
|
275 | ||
276 | 62x |
values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl")) |
277 | 62x |
n_tot <- stats::setNames(nrow(model_fit$model), "n_tot") |
278 | ||
279 | 62x |
list(or_ci = values, n_tot = n_tot) |
280 |
} |
|
281 | ||
282 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for |
|
283 |
#' the whole data set including all groups, since the results are not the same as when doing |
|
284 |
#' pairwise comparisons between the groups. |
|
285 |
#' |
|
286 |
#' @examples |
|
287 |
#' # Data with 3 groups. |
|
288 |
#' data <- data.frame( |
|
289 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)), |
|
290 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)], |
|
291 |
#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)], |
|
292 |
#' stringsAsFactors = TRUE |
|
293 |
#' ) |
|
294 |
#' |
|
295 |
#' # Odds ratio based on stratified estimation by conditional logistic regression. |
|
296 |
#' or_clogit(data, conf_level = 0.95) |
|
297 |
#' |
|
298 |
#' @export |
|
299 |
or_clogit <- function(data, conf_level) { |
|
300 | 16x |
checkmate::assert_logical(data$rsp) |
301 | 16x |
assert_proportion_value(conf_level) |
302 | 16x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata")) |
303 | 16x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
304 | 16x |
checkmate::assert_multi_class(data$strata, classes = c("factor", "character")) |
305 | ||
306 | 16x |
data$grp <- as_factor_keep_attributes(data$grp) |
307 | 16x |
data$strata <- as_factor_keep_attributes(data$strata) |
308 | ||
309 |
# Deviation from convention: `survival::strata` must be simply `strata`. |
|
310 | 16x |
formula <- stats::as.formula("rsp ~ grp + strata(strata)") |
311 | 16x |
model_fit <- clogit_with_tryCatch(formula = formula, data = data) |
312 | ||
313 |
# Create a list with one set of OR estimates and CI per coefficient, i.e. |
|
314 |
# comparison of one group vs. the reference group. |
|
315 | 16x |
coef_est <- stats::coef(model_fit) |
316 | 16x |
ci_est <- stats::confint(model_fit, level = conf_level) |
317 | 16x |
or_ci <- list() |
318 | 16x |
for (coef_name in names(coef_est)) { |
319 | 18x |
grp_name <- gsub("^grp", "", x = coef_name) |
320 | 18x |
or_ci[[grp_name]] <- stats::setNames( |
321 | 18x |
object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), |
322 | 18x |
nm = c("est", "lcl", "ucl") |
323 |
) |
|
324 |
} |
|
325 | 16x |
list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n)) |
326 |
} |
1 |
#' Incidence rate |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Estimate the event rate adjusted for person-years at risk, otherwise known |
|
6 |
#' as incidence rate. Primary analysis variable is the person-years at risk. |
|
7 |
#' |
|
8 |
#' @inheritParams argument_convention |
|
9 |
#' @param control (`list`)\cr parameters for estimation details, specified by using |
|
10 |
#' the helper function [control_incidence_rate()]. Possible parameter options are: |
|
11 |
#' * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate. |
|
12 |
#' * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar` |
|
13 |
#' for confidence interval type. |
|
14 |
#' * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default) |
|
15 |
#' indicating time unit for data input. |
|
16 |
#' * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years). |
|
17 |
#' @param n_events (`integer(1)`)\cr number of events observed. |
|
18 |
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_incidence_rate")` |
|
19 |
#' to see available statistics for this function. |
|
20 |
#' |
|
21 |
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate]. |
|
22 |
#' |
|
23 |
#' @name incidence_rate |
|
24 |
#' @order 1 |
|
25 |
NULL |
|
26 | ||
27 |
#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the |
|
28 |
#' associated confidence interval. |
|
29 |
#' |
|
30 |
#' @return |
|
31 |
#' * `s_incidence_rate()` returns the following statistics: |
|
32 |
#' - `person_years`: Total person-years at risk. |
|
33 |
#' - `n_events`: Total number of events observed. |
|
34 |
#' - `rate`: Estimated incidence rate. |
|
35 |
#' - `rate_ci`: Confidence interval for the incidence rate. |
|
36 |
#' |
|
37 |
#' @keywords internal |
|
38 |
s_incidence_rate <- function(df, |
|
39 |
.var, |
|
40 |
n_events, |
|
41 |
is_event, |
|
42 |
control = control_incidence_rate()) { |
|
43 | 1x |
if (!missing(is_event)) { |
44 | ! |
warning("argument is_event will be deprecated. Please use n_events.") |
45 | ||
46 | ! |
if (missing(n_events)) { |
47 | ! |
assert_df_with_variables(df, list(tte = .var, is_event = is_event)) |
48 | ! |
checkmate::assert_string(.var) |
49 | ! |
checkmate::assert_logical(df[[is_event]], any.missing = FALSE) |
50 | ! |
checkmate::assert_numeric(df[[.var]], any.missing = FALSE) |
51 | ! |
n_events <- is_event |
52 |
} |
|
53 |
} else { |
|
54 | 1x |
assert_df_with_variables(df, list(tte = .var, n_events = n_events)) |
55 | 1x |
checkmate::assert_string(.var) |
56 | 1x |
checkmate::assert_numeric(df[[.var]], any.missing = FALSE) |
57 | 1x |
checkmate::assert_integer(df[[n_events]], any.missing = FALSE) |
58 |
} |
|
59 | ||
60 | 1x |
input_time_unit <- control$input_time_unit |
61 | 1x |
num_pt_year <- control$num_pt_year |
62 | 1x |
conf_level <- control$conf_level |
63 | 1x |
person_years <- sum(df[[.var]], na.rm = TRUE) * ( |
64 | 1x |
1 * (input_time_unit == "year") + |
65 | 1x |
1 / 12 * (input_time_unit == "month") + |
66 | 1x |
1 / 52.14 * (input_time_unit == "week") + |
67 | 1x |
1 / 365.24 * (input_time_unit == "day") |
68 |
) |
|
69 | 1x |
n_events <- sum(df[[n_events]], na.rm = TRUE) |
70 | ||
71 | 1x |
result <- h_incidence_rate( |
72 | 1x |
person_years, |
73 | 1x |
n_events, |
74 | 1x |
control |
75 |
) |
|
76 | 1x |
list( |
77 | 1x |
person_years = formatters::with_label(person_years, "Total patient-years at risk"), |
78 | 1x |
n_events = formatters::with_label(n_events, "Number of adverse events observed"), |
79 | 1x |
rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")), |
80 | 1x |
rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level)) |
81 |
) |
|
82 |
} |
|
83 | ||
84 |
#' @describeIn incidence_rate Formatted analysis function which is used as `afun` |
|
85 |
#' in `estimate_incidence_rate()`. |
|
86 |
#' |
|
87 |
#' @return |
|
88 |
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
89 |
#' |
|
90 |
#' @keywords internal |
|
91 |
a_incidence_rate <- make_afun( |
|
92 |
s_incidence_rate, |
|
93 |
.formats = c( |
|
94 |
"person_years" = "xx.x", |
|
95 |
"n_events" = "xx", |
|
96 |
"rate" = "xx.xx", |
|
97 |
"rate_ci" = "(xx.xx, xx.xx)" |
|
98 |
) |
|
99 |
) |
|
100 | ||
101 |
#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments |
|
102 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
103 |
#' |
|
104 |
#' @return |
|
105 |
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions, |
|
106 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
107 |
#' the statistics from `s_incidence_rate()` to the table layout. |
|
108 |
#' |
|
109 |
#' @examples |
|
110 |
#' library(dplyr) |
|
111 |
#' |
|
112 |
#' df <- data.frame( |
|
113 |
#' USUBJID = as.character(seq(6)), |
|
114 |
#' CNSR = c(0, 1, 1, 0, 0, 0), |
|
115 |
#' AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4), |
|
116 |
#' ARM = factor(c("A", "A", "A", "B", "B", "B")) |
|
117 |
#' ) %>% |
|
118 |
#' mutate(is_event = CNSR == 0) %>% |
|
119 |
#' mutate(n_events = as.integer(is_event)) |
|
120 |
#' |
|
121 |
#' basic_table() %>% |
|
122 |
#' split_cols_by("ARM") %>% |
|
123 |
#' add_colcounts() %>% |
|
124 |
#' estimate_incidence_rate( |
|
125 |
#' vars = "AVAL", |
|
126 |
#' n_events = "n_events", |
|
127 |
#' control = control_incidence_rate( |
|
128 |
#' input_time_unit = "month", |
|
129 |
#' num_pt_year = 100 |
|
130 |
#' ) |
|
131 |
#' ) %>% |
|
132 |
#' build_table(df) |
|
133 |
#' |
|
134 |
#' @export |
|
135 |
#' @order 2 |
|
136 |
estimate_incidence_rate <- function(lyt, |
|
137 |
vars, |
|
138 |
n_events, |
|
139 |
control = control_incidence_rate(), |
|
140 |
na_str = default_na_str(), |
|
141 |
nested = TRUE, |
|
142 |
..., |
|
143 |
show_labels = "hidden", |
|
144 |
table_names = vars, |
|
145 |
.stats = NULL, |
|
146 |
.formats = NULL, |
|
147 |
.labels = NULL, |
|
148 |
.indent_mods = NULL) { |
|
149 | 1x |
extra_args <- list(n_events = n_events, control = control, ...) |
150 | ||
151 | 1x |
afun <- make_afun( |
152 | 1x |
a_incidence_rate, |
153 | 1x |
.stats = .stats, |
154 | 1x |
.formats = .formats, |
155 | 1x |
.labels = .labels, |
156 | 1x |
.indent_mods = .indent_mods |
157 |
) |
|
158 | ||
159 | 1x |
analyze( |
160 | 1x |
lyt, |
161 | 1x |
vars, |
162 | 1x |
show_labels = show_labels, |
163 | 1x |
table_names = table_names, |
164 | 1x |
afun = afun, |
165 | 1x |
na_str = na_str, |
166 | 1x |
nested = nested, |
167 | 1x |
extra_args = extra_args |
168 |
) |
|
169 |
} |
|
170 | ||
171 |
#' Helper functions for incidence rate |
|
172 |
#' |
|
173 |
#' @description `r lifecycle::badge("stable")` |
|
174 |
#' |
|
175 |
#' @param control (`list`)\cr parameters for estimation details, specified by using |
|
176 |
#' the helper function [control_incidence_rate()]. Possible parameter options are: |
|
177 |
#' * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate. |
|
178 |
#' * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar` |
|
179 |
#' for confidence interval type. |
|
180 |
#' * `input_time_unit`: (`string`)\cr `day`, `week`, `month`, or `year` (default) |
|
181 |
#' indicating time unit for data input. |
|
182 |
#' * `num_pt_year`: (`numeric`)\cr time unit for desired output (in person-years). |
|
183 |
#' @param person_years (`numeric(1)`)\cr total person-years at risk. |
|
184 |
#' @param alpha (`numeric(1)`)\cr two-sided alpha-level for confidence interval. |
|
185 |
#' @param n_events (`integer(1)`)\cr number of events observed. |
|
186 |
#' |
|
187 |
#' @return Estimated incidence rate, `rate`, and associated confidence interval, `rate_ci`. |
|
188 |
#' |
|
189 |
#' @seealso [incidence_rate] |
|
190 |
#' |
|
191 |
#' @name h_incidence_rate |
|
192 |
NULL |
|
193 | ||
194 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and |
|
195 |
#' associated confidence interval based on the normal approximation for the |
|
196 |
#' incidence rate. Unit is one person-year. |
|
197 |
#' |
|
198 |
#' @examples |
|
199 |
#' h_incidence_rate_normal(200, 2) |
|
200 |
#' |
|
201 |
#' @export |
|
202 |
h_incidence_rate_normal <- function(person_years, |
|
203 |
n_events, |
|
204 |
alpha = 0.05) { |
|
205 | 1x |
checkmate::assert_number(person_years) |
206 | 1x |
checkmate::assert_number(n_events) |
207 | 1x |
assert_proportion_value(alpha) |
208 | ||
209 | 1x |
est <- n_events / person_years |
210 | 1x |
se <- sqrt(est / person_years) |
211 | 1x |
ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se |
212 | ||
213 | 1x |
list(rate = est, rate_ci = ci) |
214 |
} |
|
215 | ||
216 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and |
|
217 |
#' associated confidence interval based on the normal approximation for the |
|
218 |
#' logarithm of the incidence rate. Unit is one person-year. |
|
219 |
#' |
|
220 |
#' @examples |
|
221 |
#' h_incidence_rate_normal_log(200, 2) |
|
222 |
#' |
|
223 |
#' @export |
|
224 |
h_incidence_rate_normal_log <- function(person_years, |
|
225 |
n_events, |
|
226 |
alpha = 0.05) { |
|
227 | 5x |
checkmate::assert_number(person_years) |
228 | 5x |
checkmate::assert_number(n_events) |
229 | 5x |
assert_proportion_value(alpha) |
230 | ||
231 | 5x |
rate_est <- n_events / person_years |
232 | 5x |
rate_se <- sqrt(rate_est / person_years) |
233 | 5x |
lrate_est <- log(rate_est) |
234 | 5x |
lrate_se <- rate_se / rate_est |
235 | 5x |
ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se) |
236 | ||
237 | 5x |
list(rate = rate_est, rate_ci = ci) |
238 |
} |
|
239 | ||
240 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and |
|
241 |
#' associated exact confidence interval. Unit is one person-year. |
|
242 |
#' |
|
243 |
#' @examples |
|
244 |
#' h_incidence_rate_exact(200, 2) |
|
245 |
#' |
|
246 |
#' @export |
|
247 |
h_incidence_rate_exact <- function(person_years, |
|
248 |
n_events, |
|
249 |
alpha = 0.05) { |
|
250 | 1x |
checkmate::assert_number(person_years) |
251 | 1x |
checkmate::assert_number(n_events) |
252 | 1x |
assert_proportion_value(alpha) |
253 | ||
254 | 1x |
est <- n_events / person_years |
255 | 1x |
lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years) |
256 | 1x |
ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years) |
257 | ||
258 | 1x |
list(rate = est, rate_ci = c(lcl, ucl)) |
259 |
} |
|
260 | ||
261 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and |
|
262 |
#' associated Byar's confidence interval. Unit is one person-year. |
|
263 |
#' |
|
264 |
#' @examples |
|
265 |
#' h_incidence_rate_byar(200, 2) |
|
266 |
#' |
|
267 |
#' @export |
|
268 |
h_incidence_rate_byar <- function(person_years, |
|
269 |
n_events, |
|
270 |
alpha = 0.05) { |
|
271 | 1x |
checkmate::assert_number(person_years) |
272 | 1x |
checkmate::assert_number(n_events) |
273 | 1x |
assert_proportion_value(alpha) |
274 | ||
275 | 1x |
est <- n_events / person_years |
276 | 1x |
seg_1 <- n_events + 0.5 |
277 | 1x |
seg_2 <- 1 - 1 / (9 * (n_events + 0.5)) |
278 | 1x |
seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3 |
279 | 1x |
lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years |
280 | 1x |
ucl <- seg_1 * ((seg_2 + seg_3) ^ 3) / person_years # styler: off |
281 | ||
282 | 1x |
list(rate = est, rate_ci = c(lcl, ucl)) |
283 |
} |
|
284 | ||
285 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and |
|
286 |
#' associated confidence interval. |
|
287 |
#' |
|
288 |
#' @keywords internal |
|
289 |
h_incidence_rate <- function(person_years, |
|
290 |
n_events, |
|
291 |
control = control_incidence_rate()) { |
|
292 | 4x |
alpha <- 1 - control$conf_level |
293 | 4x |
est <- switch(control$conf_type, |
294 | 4x |
normal = h_incidence_rate_normal(person_years, n_events, alpha), |
295 | 4x |
normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha), |
296 | 4x |
exact = h_incidence_rate_exact(person_years, n_events, alpha), |
297 | 4x |
byar = h_incidence_rate_byar(person_years, n_events, alpha) |
298 |
) |
|
299 | ||
300 | 4x |
num_pt_year <- control$num_pt_year |
301 | 4x |
list( |
302 | 4x |
rate = est$rate * num_pt_year, |
303 | 4x |
rate_ci = est$rate_ci * num_pt_year |
304 |
) |
|
305 |
} |
1 |
#' Get default statistical methods and their associated formats, labels, and indent modifiers |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' Utility functions to get valid statistic methods for different method groups |
|
6 |
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers |
|
7 |
#' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be |
|
8 |
#' seen in [analyze_vars()]. See notes to understand why this is experimental. |
|
9 |
#' |
|
10 |
#' @param stats (`character`)\cr statistical methods to get defaults for. |
|
11 |
#' |
|
12 |
#' @details |
|
13 |
#' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`. |
|
14 |
#' |
|
15 |
#' @note |
|
16 |
#' These defaults are experimental because we use the names of functions to retrieve the default |
|
17 |
#' statistics. This should be generalized in groups of methods according to more reasonable groupings. |
|
18 |
#' |
|
19 |
#' @name default_stats_formats_labels |
|
20 |
NULL |
|
21 | ||
22 |
#' @describeIn default_stats_formats_labels Get statistics available for a given method |
|
23 |
#' group (analyze function). |
|
24 |
#' |
|
25 |
#' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function) |
|
26 |
#' to retrieve default statistics for. A character vector can be used to specify more than one statistical |
|
27 |
#' method group. |
|
28 |
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. |
|
29 |
#' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains |
|
30 |
#' `"analyze_vars_counts"`) be added to the statistical methods? |
|
31 |
#' |
|
32 |
#' @return |
|
33 |
#' * `get_stats()` returns a `character` vector of statistical methods. |
|
34 |
#' |
|
35 |
#' @examples |
|
36 |
#' # analyze_vars is numeric |
|
37 |
#' num_stats <- get_stats("analyze_vars_numeric") # also the default |
|
38 |
#' |
|
39 |
#' # Other type |
|
40 |
#' cnt_stats <- get_stats("analyze_vars_counts") |
|
41 |
#' |
|
42 |
#' # Weirdly taking the pval from count_occurrences |
|
43 |
#' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval") |
|
44 |
#' |
|
45 |
#' # All count_occurrences |
|
46 |
#' all_cnt_occ <- get_stats("count_occurrences") |
|
47 |
#' |
|
48 |
#' # Multiple |
|
49 |
#' get_stats(c("count_occurrences", "analyze_vars_counts")) |
|
50 |
#' |
|
51 |
#' @export |
|
52 |
get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, add_pval = FALSE) { |
|
53 | 405x |
checkmate::assert_character(method_groups) |
54 | 405x |
checkmate::assert_character(stats_in, null.ok = TRUE) |
55 | 405x |
checkmate::assert_flag(add_pval) |
56 | ||
57 |
# Default is still numeric |
|
58 | 405x |
if (any(method_groups == "analyze_vars")) { |
59 | 2x |
method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric" |
60 |
} |
|
61 | ||
62 | 405x |
type_tmp <- ifelse(any(grepl("counts", method_groups)), "counts", "numeric") # for pval checks |
63 | ||
64 |
# Defaults for loop |
|
65 | 405x |
out <- NULL |
66 | ||
67 |
# Loop for multiple method groups |
|
68 | 405x |
for (mgi in method_groups) { |
69 | 415x |
out_tmp <- if (mgi %in% names(tern_default_stats)) { |
70 | 415x |
tern_default_stats[[mgi]] |
71 |
} else { |
|
72 | ! |
stop("The selected method group (", mgi, ") has no default statistical method.") |
73 |
} |
|
74 | 415x |
out <- unique(c(out, out_tmp)) |
75 |
} |
|
76 | ||
77 |
# If you added pval to the stats_in you certainly want it |
|
78 | 405x |
if (!is.null(stats_in) && any(grepl("^pval", stats_in))) { |
79 | 22x |
stats_in_pval_value <- stats_in[grepl("^pval", stats_in)] |
80 | ||
81 |
# Must be only one value between choices |
|
82 | 22x |
checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts")) |
83 | ||
84 |
# Mismatch with counts and numeric |
|
85 | 21x |
if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" || |
86 | 21x |
any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint |
87 | 2x |
stop( |
88 | 2x |
"Inserted p-value (", stats_in_pval_value, ") is not valid for type ", |
89 | 2x |
type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")), |
90 | 2x |
" instead." |
91 |
) |
|
92 |
} |
|
93 | ||
94 |
# Lets add it even if present (thanks to unique) |
|
95 | 19x |
add_pval <- TRUE |
96 |
} |
|
97 | ||
98 |
# Mainly used in "analyze_vars" but it could be necessary elsewhere |
|
99 | 402x |
if (isTRUE(add_pval)) { |
100 | 23x |
if (any(grepl("counts", method_groups))) { |
101 | 10x |
out <- unique(c(out, "pval_counts")) |
102 |
} else { |
|
103 | 13x |
out <- unique(c(out, "pval")) |
104 |
} |
|
105 |
} |
|
106 | ||
107 |
# Filtering for stats_in (character vector) |
|
108 | 402x |
if (!is.null(stats_in)) { |
109 | 377x |
out <- intersect(stats_in, out) # It orders them too |
110 |
} |
|
111 | ||
112 |
# If intersect did not find matches (and no pval?) -> error |
|
113 | 402x |
if (length(out) == 0) { |
114 | 2x |
stop( |
115 | 2x |
"The selected method group(s) (", paste0(method_groups, collapse = ", "), ")", |
116 | 2x |
" do not have the required default statistical methods:\n", |
117 | 2x |
paste0(stats_in, collapse = " ") |
118 |
) |
|
119 |
} |
|
120 | ||
121 | 400x |
out |
122 |
} |
|
123 | ||
124 |
#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics. |
|
125 |
#' |
|
126 |
#' @param formats_in (named `vector`)\cr inserted formats to replace defaults. It can be a |
|
127 |
#' character vector from [formatters::list_valid_format_labels()] or a custom format function. |
|
128 |
#' |
|
129 |
#' @return |
|
130 |
#' * `get_formats_from_stats()` returns a named vector of formats (if present in either |
|
131 |
#' `tern_default_formats` or `formats_in`, otherwise `NULL`). Values can be taken from |
|
132 |
#' [formatters::list_valid_format_labels()] or a custom function (e.g. [formatting_functions]). |
|
133 |
#' |
|
134 |
#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and |
|
135 |
#' return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`. |
|
136 |
#' |
|
137 |
#' @examples |
|
138 |
#' # Defaults formats |
|
139 |
#' get_formats_from_stats(num_stats) |
|
140 |
#' get_formats_from_stats(cnt_stats) |
|
141 |
#' get_formats_from_stats(only_pval) |
|
142 |
#' get_formats_from_stats(all_cnt_occ) |
|
143 |
#' |
|
144 |
#' # Addition of customs |
|
145 |
#' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx"))) |
|
146 |
#' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx"))) |
|
147 |
#' |
|
148 |
#' @seealso [formatting_functions] |
|
149 |
#' |
|
150 |
#' @export |
|
151 |
get_formats_from_stats <- function(stats, formats_in = NULL) { |
|
152 | 403x |
checkmate::assert_character(stats, min.len = 1) |
153 |
# It may be a list if there is a function in the formats |
|
154 | 403x |
if (checkmate::test_list(formats_in, null.ok = TRUE)) { |
155 | 361x |
checkmate::assert_list(formats_in, null.ok = TRUE) |
156 |
# Or it may be a vector of characters |
|
157 |
} else { |
|
158 | 42x |
checkmate::assert_character(formats_in, null.ok = TRUE) |
159 |
} |
|
160 | ||
161 |
# Extract global defaults |
|
162 | 403x |
which_fmt <- match(stats, names(tern_default_formats)) |
163 | ||
164 |
# Select only needed formats from stats |
|
165 | 403x |
ret <- vector("list", length = length(stats)) # Returning a list is simpler |
166 | 403x |
ret[!is.na(which_fmt)] <- tern_default_formats[which_fmt[!is.na(which_fmt)]] |
167 | ||
168 | 403x |
out <- setNames(ret, stats) |
169 | ||
170 |
# Modify some with custom formats |
|
171 | 403x |
if (!is.null(formats_in)) { |
172 |
# Stats is the main |
|
173 | 44x |
common_names <- intersect(names(out), names(formats_in)) |
174 | 44x |
out[common_names] <- formats_in[common_names] |
175 |
} |
|
176 | ||
177 | 403x |
out |
178 |
} |
|
179 | ||
180 |
#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics. |
|
181 |
#' |
|
182 |
#' @param labels_in (named `character`)\cr inserted labels to replace defaults. |
|
183 |
#' @param row_nms (`character`)\cr row names. Levels of a `factor` or `character` variable, each |
|
184 |
#' of which the statistics in `.stats` will be calculated for. If this parameter is set, these |
|
185 |
#' variable levels will be used as the defaults, and the names of the given custom values should |
|
186 |
#' correspond to levels (or have format `statistic.level`) instead of statistics. Can also be |
|
187 |
#' variable names if rows correspond to different variables instead of levels. Defaults to `NULL`. |
|
188 |
#' |
|
189 |
#' @return |
|
190 |
#' * `get_labels_from_stats()` returns a named `character` vector of labels (if present in either |
|
191 |
#' `tern_default_labels` or `labels_in`, otherwise `NULL`). |
|
192 |
#' |
|
193 |
#' @examples |
|
194 |
#' # Defaults labels |
|
195 |
#' get_labels_from_stats(num_stats) |
|
196 |
#' get_labels_from_stats(cnt_stats) |
|
197 |
#' get_labels_from_stats(only_pval) |
|
198 |
#' get_labels_from_stats(all_cnt_occ) |
|
199 |
#' |
|
200 |
#' # Addition of customs |
|
201 |
#' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction")) |
|
202 |
#' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions"))) |
|
203 |
#' |
|
204 |
#' @export |
|
205 |
get_labels_from_stats <- function(stats, labels_in = NULL, row_nms = NULL) { |
|
206 | 387x |
checkmate::assert_character(stats, min.len = 1, null.ok = TRUE) |
207 | 387x |
checkmate::assert_character(row_nms, null.ok = TRUE) |
208 |
# It may be a list |
|
209 | 387x |
if (checkmate::test_list(labels_in, null.ok = TRUE)) { |
210 | 338x |
checkmate::assert_list(labels_in, null.ok = TRUE) |
211 |
# Or it may be a vector of characters |
|
212 |
} else { |
|
213 | 49x |
checkmate::assert_character(labels_in, null.ok = TRUE) |
214 |
} |
|
215 | ||
216 | 387x |
if (!is.null(row_nms)) { |
217 | 43x |
ret <- rep(row_nms, length(stats)) |
218 | 43x |
out <- setNames(ret, paste(rep(stats, each = length(row_nms)), ret, sep = ".")) |
219 | ||
220 | 43x |
if (!is.null(labels_in)) { |
221 | 1x |
lvl_lbls <- intersect(names(labels_in), row_nms) |
222 | 1x |
for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- labels_in[[i]] |
223 |
} |
|
224 |
} else { |
|
225 | 344x |
which_lbl <- match(stats, names(tern_default_labels)) |
226 | ||
227 | 344x |
ret <- vector("character", length = length(stats)) # it needs to be a character vector |
228 | 344x |
ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]] |
229 | ||
230 | 344x |
out <- setNames(ret, stats) |
231 |
} |
|
232 | ||
233 |
# Modify some with custom labels |
|
234 | 387x |
if (!is.null(labels_in)) { |
235 |
# Stats is the main |
|
236 | 49x |
common_names <- intersect(names(out), names(labels_in)) |
237 | 49x |
out[common_names] <- labels_in[common_names] |
238 |
} |
|
239 | ||
240 | 387x |
out |
241 |
} |
|
242 | ||
243 |
#' @describeIn default_stats_formats_labels Format indent modifiers for a given vector/list of statistics. |
|
244 |
#' |
|
245 |
#' @param indents_in (named `vector`)\cr inserted indent modifiers to replace defaults (default is `0L`). |
|
246 |
#' |
|
247 |
#' @return |
|
248 |
#' * `get_indents_from_stats()` returns a single indent modifier value to apply to all rows |
|
249 |
#' or a named numeric vector of indent modifiers (if present, otherwise `NULL`). |
|
250 |
#' |
|
251 |
#' @examples |
|
252 |
#' get_indents_from_stats(all_cnt_occ, indents_in = 3L) |
|
253 |
#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) |
|
254 |
#' get_indents_from_stats( |
|
255 |
#' all_cnt_occ, |
|
256 |
#' indents_in = list(a = 2L, count.a = 1L, count.b = 5L), row_nms = c("a", "b") |
|
257 |
#' ) |
|
258 |
#' |
|
259 |
#' @export |
|
260 |
get_indents_from_stats <- function(stats, indents_in = NULL, row_nms = NULL) { |
|
261 | 367x |
checkmate::assert_character(stats, min.len = 1) |
262 | 367x |
checkmate::assert_character(row_nms, null.ok = TRUE) |
263 |
# It may be a list |
|
264 | 367x |
if (checkmate::test_list(indents_in, null.ok = TRUE)) { |
265 | 336x |
checkmate::assert_list(indents_in, null.ok = TRUE) |
266 |
# Or it may be a vector of integers |
|
267 |
} else { |
|
268 | 31x |
checkmate::assert_integerish(indents_in, null.ok = TRUE) |
269 |
} |
|
270 | ||
271 | 367x |
if (is.null(names(indents_in)) && length(indents_in) == 1) { |
272 | 8x |
out <- rep(indents_in, length(stats) * if (!is.null(row_nms)) length(row_nms) else 1) |
273 | 8x |
return(out) |
274 |
} |
|
275 | ||
276 | 359x |
if (!is.null(row_nms)) { |
277 | 37x |
ret <- rep(0L, length(stats) * length(row_nms)) |
278 | 37x |
out <- setNames(ret, paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")) |
279 | ||
280 | 37x |
if (!is.null(indents_in)) { |
281 | 1x |
lvl_lbls <- intersect(names(indents_in), row_nms) |
282 | 1x |
for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- indents_in[[i]] |
283 |
} |
|
284 |
} else { |
|
285 | 322x |
ret <- rep(0L, length(stats)) |
286 | 322x |
out <- setNames(ret, stats) |
287 |
} |
|
288 | ||
289 |
# Modify some with custom labels |
|
290 | 359x |
if (!is.null(indents_in)) { |
291 |
# Stats is the main |
|
292 | 24x |
common_names <- intersect(names(out), names(indents_in)) |
293 | 24x |
out[common_names] <- indents_in[common_names] |
294 |
} |
|
295 | ||
296 | 359x |
out |
297 |
} |
|
298 | ||
299 |
#' Update labels according to control specifications |
|
300 |
#' |
|
301 |
#' @description `r lifecycle::badge("stable")` |
|
302 |
#' |
|
303 |
#' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant |
|
304 |
#' control specification. For example, if control has element `conf_level` set to `0.9`, the default |
|
305 |
#' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied |
|
306 |
#' via `labels_custom` will not be updated regardless of `control`. |
|
307 |
#' |
|
308 |
#' @param labels_default (named `character`)\cr a named vector of statistic labels to modify |
|
309 |
#' according to the control specifications. Labels that are explicitly defined in `labels_custom` will |
|
310 |
#' not be affected. |
|
311 |
#' @param labels_custom (named `character`)\cr named vector of labels that are customized by |
|
312 |
#' the user and should not be affected by `control`. |
|
313 |
#' @param control (named `list`)\cr list of control parameters to apply to adjust default labels. |
|
314 |
#' |
|
315 |
#' @return A named character vector of labels with control specifications applied to relevant labels. |
|
316 |
#' |
|
317 |
#' @examples |
|
318 |
#' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57) |
|
319 |
#' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>% |
|
320 |
#' labels_use_control(control = control) |
|
321 |
#' |
|
322 |
#' @export |
|
323 |
labels_use_control <- function(labels_default, control, labels_custom = NULL) { |
|
324 | 18x |
if ("conf_level" %in% names(control)) { |
325 | 18x |
labels_default <- sapply( |
326 | 18x |
names(labels_default), |
327 | 18x |
function(x) { |
328 | 74x |
if (!x %in% names(labels_custom)) { |
329 | 73x |
gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]])) |
330 |
} else { |
|
331 | 1x |
labels_default[[x]] |
332 |
} |
|
333 |
} |
|
334 |
) |
|
335 |
} |
|
336 | 18x |
if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) && |
337 | 18x |
!"quantiles" %in% names(labels_custom)) { # nolint |
338 | 14x |
labels_default["quantiles"] <- gsub( |
339 | 14x |
"[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""), |
340 | 14x |
labels_default["quantiles"] |
341 |
) |
|
342 |
} |
|
343 | 18x |
if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) && |
344 | 18x |
!"mean_pval" %in% names(labels_custom)) { # nolint |
345 | 1x |
labels_default["mean_pval"] <- gsub( |
346 | 1x |
"p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"] |
347 |
) |
|
348 |
} |
|
349 | ||
350 | 18x |
labels_default |
351 |
} |
|
352 | ||
353 |
#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`. |
|
354 |
#' |
|
355 |
#' @format |
|
356 |
#' * `tern_default_stats` is a named list of available statistics, with each element |
|
357 |
#' named for their corresponding statistical method group. |
|
358 |
#' |
|
359 |
#' @export |
|
360 |
tern_default_stats <- list( |
|
361 |
abnormal = c("fraction"), |
|
362 |
abnormal_by_baseline = c("fraction"), |
|
363 |
abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"), |
|
364 |
abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"), |
|
365 |
abnormal_by_worst_grade_worsen = c("fraction"), |
|
366 |
analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"), |
|
367 |
analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
368 |
analyze_vars_numeric = c( |
|
369 |
"n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval", |
|
370 |
"median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv", |
|
371 |
"geom_mean", "geom_mean_ci", "geom_cv" |
|
372 |
), |
|
373 |
count_cumulative = c("count_fraction", "count_fraction_fixed_dp"), |
|
374 |
count_missed_doses = c("n", "count_fraction", "count_fraction_fixed_dp"), |
|
375 |
count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"), |
|
376 |
count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"), |
|
377 |
count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
378 |
count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
379 |
count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
380 |
coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"), |
|
381 |
estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci"), |
|
382 |
estimate_multinomial_response = c("n_prop", "prop_ci"), |
|
383 |
estimate_odds_ratio = c("or_ci", "n_tot"), |
|
384 |
estimate_proportion = c("n_prop", "prop_ci"), |
|
385 |
estimate_proportion_diff = c("diff", "diff_ci"), |
|
386 |
summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"), |
|
387 |
summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"), |
|
388 |
summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), |
|
389 |
summarize_num_patients = c("unique", "nonunique", "unique_count"), |
|
390 |
summarize_patients_events_in_cols = c("unique", "all"), |
|
391 |
surv_time = c("median", "median_ci", "quantiles", "range_censor", "range_event", "range"), |
|
392 |
surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "rate_diff", "rate_diff_ci", "ztest_pval"), |
|
393 |
tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"), |
|
394 |
tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval"), |
|
395 |
tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), |
|
396 |
tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval"), |
|
397 |
test_proportion_diff = c("pval") |
|
398 |
) |
|
399 | ||
400 |
#' @describeIn default_stats_formats_labels Named vector of default formats for `tern`. |
|
401 |
#' |
|
402 |
#' @format |
|
403 |
#' * `tern_default_formats` is a named vector of available default formats, with each element |
|
404 |
#' named for their corresponding statistic. |
|
405 |
#' |
|
406 |
#' @export |
|
407 |
tern_default_formats <- c( |
|
408 |
fraction = format_fraction_fixed_dp, |
|
409 |
unique = format_count_fraction_fixed_dp, |
|
410 |
nonunique = "xx", |
|
411 |
unique_count = "xx", |
|
412 |
n = "xx.", |
|
413 |
count = "xx.", |
|
414 |
count_fraction = format_count_fraction, |
|
415 |
count_fraction_fixed_dp = format_count_fraction_fixed_dp, |
|
416 |
n_blq = "xx.", |
|
417 |
sum = "xx.x", |
|
418 |
mean = "xx.x", |
|
419 |
sd = "xx.x", |
|
420 |
se = "xx.x", |
|
421 |
mean_sd = "xx.x (xx.x)", |
|
422 |
mean_se = "xx.x (xx.x)", |
|
423 |
mean_ci = "(xx.xx, xx.xx)", |
|
424 |
mean_sei = "(xx.xx, xx.xx)", |
|
425 |
mean_sdi = "(xx.xx, xx.xx)", |
|
426 |
mean_pval = "x.xxxx | (<0.0001)", |
|
427 |
median = "xx.x", |
|
428 |
mad = "xx.x", |
|
429 |
median_ci = "(xx.xx, xx.xx)", |
|
430 |
quantiles = "xx.x - xx.x", |
|
431 |
iqr = "xx.x", |
|
432 |
range = "xx.x - xx.x", |
|
433 |
min = "xx.x", |
|
434 |
max = "xx.x", |
|
435 |
median_range = "xx.x (xx.x - xx.x)", |
|
436 |
cv = "xx.x", |
|
437 |
geom_mean = "xx.x", |
|
438 |
geom_mean_ci = "(xx.xx, xx.xx)", |
|
439 |
geom_cv = "xx.x", |
|
440 |
pval = "x.xxxx | (<0.0001)", |
|
441 |
pval_counts = "x.xxxx | (<0.0001)", |
|
442 |
range_censor = "xx.x to xx.x", |
|
443 |
range_event = "xx.x to xx.x" |
|
444 |
) |
|
445 | ||
446 |
#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`. |
|
447 |
#' |
|
448 |
#' @format |
|
449 |
#' * `tern_default_labels` is a named `character` vector of available default labels, with each element |
|
450 |
#' named for their corresponding statistic. |
|
451 |
#' |
|
452 |
#' @export |
|
453 |
tern_default_labels <- c( |
|
454 |
fraction = "fraction", |
|
455 |
unique = "Number of patients with at least one event", |
|
456 |
nonunique = "Number of events", |
|
457 |
n = "n", |
|
458 |
count = "count", |
|
459 |
count_fraction = "count_fraction", |
|
460 |
count_fraction_fixed_dp = "count_fraction", |
|
461 |
n_blq = "n_blq", |
|
462 |
sum = "Sum", |
|
463 |
mean = "Mean", |
|
464 |
sd = "SD", |
|
465 |
se = "SE", |
|
466 |
mean_sd = "Mean (SD)", |
|
467 |
mean_se = "Mean (SE)", |
|
468 |
mean_ci = "Mean 95% CI", |
|
469 |
mean_sei = "Mean -/+ 1xSE", |
|
470 |
mean_sdi = "Mean -/+ 1xSD", |
|
471 |
mean_pval = "Mean p-value (H0: mean = 0)", |
|
472 |
median = "Median", |
|
473 |
mad = "Median Absolute Deviation", |
|
474 |
median_ci = "Median 95% CI", |
|
475 |
quantiles = "25% and 75%-ile", |
|
476 |
iqr = "IQR", |
|
477 |
range = "Min - Max", |
|
478 |
min = "Minimum", |
|
479 |
max = "Maximum", |
|
480 |
median_range = "Median (Min - Max)", |
|
481 |
cv = "CV (%)", |
|
482 |
geom_mean = "Geometric Mean", |
|
483 |
geom_mean_ci = "Geometric Mean 95% CI", |
|
484 |
geom_cv = "CV % Geometric Mean", |
|
485 |
pval = "p-value (t-test)", # Default for numeric |
|
486 |
pval_counts = "p-value (chi-squared test)" # Default for counts |
|
487 |
) |
|
488 | ||
489 |
# To deprecate --------- |
|
490 | ||
491 |
#' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics: |
|
492 |
#' [analyze_vars()] and [analyze_vars_in_cols()] principally. |
|
493 |
#' |
|
494 |
#' @param type (`string`)\cr `"numeric"` or `"counts"`. |
|
495 |
#' |
|
496 |
#' @return |
|
497 |
#' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type. |
|
498 |
#' |
|
499 |
#' @examples |
|
500 |
#' summary_formats() |
|
501 |
#' summary_formats(type = "counts", include_pval = TRUE) |
|
502 |
#' |
|
503 |
#' @export |
|
504 |
summary_formats <- function(type = "numeric", include_pval = FALSE) { |
|
505 | 1x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_") |
506 | 1x |
get_formats_from_stats(get_stats(met_grp, add_pval = include_pval)) |
507 |
} |
|
508 | ||
509 |
#' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics. |
|
510 |
#' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`. |
|
511 |
#' |
|
512 |
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()]. |
|
513 |
#' |
|
514 |
#' @return |
|
515 |
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. |
|
516 |
#' |
|
517 |
#' @examples |
|
518 |
#' summary_labels() |
|
519 |
#' summary_labels(type = "counts", include_pval = TRUE) |
|
520 |
#' |
|
521 |
#' @export |
|
522 |
summary_labels <- function(type = "numeric", include_pval = FALSE) { |
|
523 | 1x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_") |
524 | 1x |
get_labels_from_stats(get_stats(met_grp, add_pval = include_pval)) |
525 |
} |
1 |
#' Confidence intervals for a difference of binomials |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' Several confidence intervals for the difference between proportions. |
|
6 |
#' |
|
7 |
#' @name desctools_binom |
|
8 |
NULL |
|
9 | ||
10 |
#' Recycle list of parameters |
|
11 |
#' |
|
12 |
#' This function recycles all supplied elements to the maximal dimension. |
|
13 |
#' |
|
14 |
#' @param ... (`any`)\cr elements to recycle. |
|
15 |
#' |
|
16 |
#' @return A `list`. |
|
17 |
#' |
|
18 |
#' @keywords internal |
|
19 |
#' @noRd |
|
20 |
h_recycle <- function(...) { |
|
21 | 64x |
lst <- list(...) |
22 | 64x |
maxdim <- max(lengths(lst)) |
23 | 64x |
res <- lapply(lst, rep, length.out = maxdim) |
24 | 64x |
attr(res, "maxdim") <- maxdim |
25 | 64x |
return(res) |
26 |
} |
|
27 | ||
28 |
#' @describeIn desctools_binom Several confidence intervals for the difference between proportions. |
|
29 |
#' |
|
30 |
#' @return A `matrix` of 3 values: |
|
31 |
#' * `est`: estimate of proportion difference. |
|
32 |
#' * `lwr.ci`: estimate of lower end of the confidence interval. |
|
33 |
#' * `upr.ci`: estimate of upper end of the confidence interval. |
|
34 |
#' |
|
35 |
#' @keywords internal |
|
36 |
desctools_binom <- function(x1, |
|
37 |
n1, |
|
38 |
x2, |
|
39 |
n2, |
|
40 |
conf.level = 0.95, # nolint |
|
41 |
sides = c("two.sided", "left", "right"), |
|
42 |
method = c( |
|
43 |
"ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp" |
|
44 |
)) { |
|
45 | 20x |
if (missing(sides)) { |
46 | 20x |
sides <- match.arg(sides) |
47 |
} |
|
48 | 20x |
if (missing(method)) { |
49 | 1x |
method <- match.arg(method) |
50 |
} |
|
51 | 20x |
iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint |
52 | 20x |
if (sides != "two.sided") { |
53 | ! |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
54 |
} |
|
55 | 20x |
alpha <- 1 - conf.level |
56 | 20x |
kappa <- stats::qnorm(1 - alpha / 2) |
57 | 20x |
p1_hat <- x1 / n1 |
58 | 20x |
p2_hat <- x2 / n2 |
59 | 20x |
est <- p1_hat - p2_hat |
60 | 20x |
switch(method, |
61 | 20x |
wald = { |
62 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
63 | 2x |
term2 <- kappa * sqrt(vd) |
64 | 2x |
ci_lwr <- max(-1, est - term2) |
65 | 2x |
ci_upr <- min(1, est + term2) |
66 |
}, |
|
67 | 20x |
waldcc = { |
68 | 4x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
69 | 4x |
term2 <- kappa * sqrt(vd) |
70 | 4x |
term2 <- term2 + 0.5 * (1 / n1 + 1 / n2) |
71 | 4x |
ci_lwr <- max(-1, est - term2) |
72 | 4x |
ci_upr <- min(1, est + term2) |
73 |
}, |
|
74 | 20x |
ac = { |
75 | 2x |
n1 <- n1 + 2 |
76 | 2x |
n2 <- n2 + 2 |
77 | 2x |
x1 <- x1 + 1 |
78 | 2x |
x2 <- x2 + 1 |
79 | 2x |
p1_hat <- x1 / n1 |
80 | 2x |
p2_hat <- x2 / n2 |
81 | 2x |
est1 <- p1_hat - p2_hat |
82 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
83 | 2x |
term2 <- kappa * sqrt(vd) |
84 | 2x |
ci_lwr <- max(-1, est1 - term2) |
85 | 2x |
ci_upr <- min(1, est1 + term2) |
86 |
}, |
|
87 | 20x |
exact = { |
88 | ! |
ci_lwr <- NA |
89 | ! |
ci_upr <- NA |
90 |
}, |
|
91 | 20x |
score = { |
92 | 2x |
w1 <- desctools_binomci( |
93 | 2x |
x = x1, n = n1, conf.level = conf.level, |
94 | 2x |
method = "wilson" |
95 |
) |
|
96 | 2x |
w2 <- desctools_binomci( |
97 | 2x |
x = x2, n = n2, conf.level = conf.level, |
98 | 2x |
method = "wilson" |
99 |
) |
|
100 | 2x |
l1 <- w1[2] |
101 | 2x |
u1 <- w1[3] |
102 | 2x |
l2 <- w2[2] |
103 | 2x |
u2 <- w2[3] |
104 | 2x |
ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2) |
105 | 2x |
ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2) |
106 |
}, |
|
107 | 20x |
scorecc = { |
108 | 1x |
w1 <- desctools_binomci( |
109 | 1x |
x = x1, n = n1, conf.level = conf.level, |
110 | 1x |
method = "wilsoncc" |
111 |
) |
|
112 | 1x |
w2 <- desctools_binomci( |
113 | 1x |
x = x2, n = n2, conf.level = conf.level, |
114 | 1x |
method = "wilsoncc" |
115 |
) |
|
116 | 1x |
l1 <- w1[2] |
117 | 1x |
u1 <- w1[3] |
118 | 1x |
l2 <- w2[2] |
119 | 1x |
u2 <- w2[3] |
120 | 1x |
ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2)) |
121 | 1x |
ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2)) |
122 |
}, |
|
123 | 20x |
mee = { |
124 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
125 | ! |
if (dif > 1) dif <- 1 |
126 | ! |
if (dif < -1) dif <- -1 |
127 | 24x |
diff <- p1 - p2 - dif |
128 | 24x |
if (abs(diff) == 0) { |
129 | ! |
res <- 0 |
130 |
} else { |
|
131 | 24x |
t <- n2 / n1 |
132 | 24x |
a <- 1 + t |
133 | 24x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
134 | 24x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
135 | 24x |
d <- -p1 * dif * (1 + dif) |
136 | 24x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
137 | 24x |
if (abs(v) < .Machine$double.eps) v <- 0 |
138 | 24x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
139 | 24x |
u <- ifelse(v > 0, 1, -1) * s |
140 | 24x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
141 | 24x |
p1d <- 2 * u * cos(w) - b / a / 3 |
142 | 24x |
p2d <- p1d - dif |
143 | 24x |
n <- n1 + n2 |
144 | 24x |
res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) |
145 |
} |
|
146 | 24x |
return(sqrt(res)) |
147 |
} |
|
148 | 1x |
pval <- function(delta) { |
149 | 24x |
z <- (est - delta) / .score(p1_hat, n1, p2_hat, n2, delta) |
150 | 24x |
2 * min(stats::pnorm(z), 1 - stats::pnorm(z)) |
151 |
} |
|
152 | 1x |
ci_lwr <- max(-1, stats::uniroot(function(delta) { |
153 | 12x |
pval(delta) - alpha |
154 | 1x |
}, interval = c(-1 + 1e-06, est - 1e-06))$root) |
155 | 1x |
ci_upr <- min(1, stats::uniroot(function(delta) { |
156 | 12x |
pval(delta) - alpha |
157 | 1x |
}, interval = c(est + 1e-06, 1 - 1e-06))$root) |
158 |
}, |
|
159 | 20x |
blj = { |
160 | 1x |
p1_dash <- (x1 + 0.5) / (n1 + 1) |
161 | 1x |
p2_dash <- (x2 + 0.5) / (n2 + 1) |
162 | 1x |
vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - p2_dash) / n2 |
163 | 1x |
term2 <- kappa * sqrt(vd) |
164 | 1x |
est_dash <- p1_dash - p2_dash |
165 | 1x |
ci_lwr <- max(-1, est_dash - term2) |
166 | 1x |
ci_upr <- min(1, est_dash + term2) |
167 |
}, |
|
168 | 20x |
ha = { |
169 | 4x |
term2 <- 1 / |
170 | 4x |
(2 * min(n1, n2)) + kappa * sqrt(p1_hat * (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - 1)) |
171 | 4x |
ci_lwr <- max(-1, est - term2) |
172 | 4x |
ci_upr <- min(1, est + term2) |
173 |
}, |
|
174 | 20x |
mn = { |
175 | 1x |
.conf <- function(x1, n1, x2, n2, z, lower = FALSE) { |
176 | 2x |
p1 <- x1 / n1 |
177 | 2x |
p2 <- x2 / n2 |
178 | 2x |
p_hat <- p1 - p2 |
179 | 2x |
dp <- 1 + ifelse(lower, 1, -1) * p_hat |
180 | 2x |
i <- 1 |
181 | 2x |
while (i <= 50) { |
182 | 46x |
dp <- 0.5 * dp |
183 | 46x |
y <- p_hat + ifelse(lower, -1, 1) * dp |
184 | 46x |
score <- .score(p1, n1, p2, n2, y) |
185 | 46x |
if (score < z) { |
186 | 20x |
p_hat <- y |
187 |
} |
|
188 | 46x |
if ((dp < 1e-07) || (abs(z - score) < 1e-06)) { |
189 | 2x |
(break)() |
190 |
} else { |
|
191 | 44x |
i <- i + 1 |
192 |
} |
|
193 |
} |
|
194 | 2x |
return(y) |
195 |
} |
|
196 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
197 | 46x |
diff <- p1 - p2 - dif |
198 | 46x |
if (abs(diff) == 0) { |
199 | ! |
res <- 0 |
200 |
} else { |
|
201 | 46x |
t <- n2 / n1 |
202 | 46x |
a <- 1 + t |
203 | 46x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
204 | 46x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
205 | 46x |
d <- -p1 * dif * (1 + dif) |
206 | 46x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
207 | 46x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
208 | 46x |
u <- ifelse(v > 0, 1, -1) * s |
209 | 46x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
210 | 46x |
p1d <- 2 * u * cos(w) - b / a / 3 |
211 | 46x |
p2d <- p1d - dif |
212 | 46x |
n <- n1 + n2 |
213 | 46x |
var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * n / (n - 1) |
214 | 46x |
res <- diff^2 / var |
215 |
} |
|
216 | 46x |
return(res) |
217 |
} |
|
218 | 1x |
z <- stats::qchisq(conf.level, 1) |
219 | 1x |
ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE)) |
220 | 1x |
ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE)) |
221 |
}, |
|
222 | 20x |
beal = { |
223 | ! |
a <- p1_hat + p2_hat |
224 | ! |
b <- p1_hat - p2_hat |
225 | ! |
u <- ((1 / n1) + (1 / n2)) / 4 |
226 | ! |
v <- ((1 / n1) - (1 / n2)) / 4 |
227 | ! |
V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint |
228 | ! |
z <- stats::qchisq(p = 1 - alpha / 2, df = 1) |
229 | ! |
A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint |
230 | ! |
B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint |
231 | ! |
ci_lwr <- max(-1, B - A / (1 + z * u)) |
232 | ! |
ci_upr <- min(1, B + A / (1 + z * u)) |
233 |
}, |
|
234 | 20x |
hal = { |
235 | 1x |
psi <- (p1_hat + p2_hat) / 2 |
236 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
237 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
238 | 1x |
z <- kappa |
239 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) |
240 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
241 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint |
242 | 1x |
c(theta + w, theta - w) |
243 | 1x |
ci_lwr <- max(-1, theta - w) |
244 | 1x |
ci_upr <- min(1, theta + w) |
245 |
}, |
|
246 | 20x |
jp = { |
247 | 1x |
psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + 1)) |
248 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
249 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
250 | 1x |
z <- kappa |
251 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) |
252 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
253 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint |
254 | 1x |
c(theta + w, theta - w) |
255 | 1x |
ci_lwr <- max(-1, theta - w) |
256 | 1x |
ci_upr <- min(1, theta + w) |
257 |
}, |
|
258 |
) |
|
259 | 20x |
ci <- c( |
260 | 20x |
est = est, lwr.ci = min(ci_lwr, ci_upr), |
261 | 20x |
upr.ci = max(ci_lwr, ci_upr) |
262 |
) |
|
263 | 20x |
if (sides == "left") { |
264 | ! |
ci[3] <- 1 |
265 | 20x |
} else if (sides == "right") { |
266 | ! |
ci[2] <- -1 |
267 |
} |
|
268 | 20x |
return(ci) |
269 |
} |
|
270 | 20x |
method <- match.arg(arg = method, several.ok = TRUE) |
271 | 20x |
sides <- match.arg(arg = sides, several.ok = TRUE) |
272 | 20x |
lst <- h_recycle( |
273 | 20x |
x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level, |
274 | 20x |
sides = sides, method = method |
275 |
) |
|
276 | 20x |
res <- t(sapply(1:attr(lst, "maxdim"), function(i) { |
277 | 20x |
iBinomDiffCI( |
278 | 20x |
x1 = lst$x1[i], |
279 | 20x |
n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i], |
280 | 20x |
sides = lst$sides[i], method = lst$method[i] |
281 |
) |
|
282 |
})) |
|
283 | 20x |
lgn <- h_recycle(x1 = if (is.null(names(x1))) { |
284 | 20x |
paste("x1", seq_along(x1), sep = ".") |
285 |
} else { |
|
286 | ! |
names(x1) |
287 | 20x |
}, n1 = if (is.null(names(n1))) { |
288 | 20x |
paste("n1", seq_along(n1), sep = ".") |
289 |
} else { |
|
290 | ! |
names(n1) |
291 | 20x |
}, x2 = if (is.null(names(x2))) { |
292 | 20x |
paste("x2", seq_along(x2), sep = ".") |
293 |
} else { |
|
294 | ! |
names(x2) |
295 | 20x |
}, n2 = if (is.null(names(n2))) { |
296 | 20x |
paste("n2", seq_along(n2), sep = ".") |
297 |
} else { |
|
298 | ! |
names(n2) |
299 | 20x |
}, conf.level = conf.level, sides = sides, method = method) |
300 | 20x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { |
301 | 140x |
length(unique(x)) != |
302 | 140x |
1 |
303 | 20x |
})]), 1, paste, collapse = ":") |
304 | 20x |
rownames(res) <- xn |
305 | 20x |
return(res) |
306 |
} |
|
307 | ||
308 |
#' @describeIn desctools_binom Compute confidence intervals for binomial proportions. |
|
309 |
#' |
|
310 |
#' @param x (`integer(1)`)\cr number of successes. |
|
311 |
#' @param n (`integer(1)`)\cr number of trials. |
|
312 |
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95. |
|
313 |
#' @param sides (`string`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default), |
|
314 |
#' `"left"`, or `"right"`. |
|
315 |
#' @param method (`string`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`, |
|
316 |
#' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`, |
|
317 |
#' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`. |
|
318 |
#' |
|
319 |
#' @return A `matrix` with 3 columns containing: |
|
320 |
#' * `est`: estimate of proportion difference. |
|
321 |
#' * `lwr.ci`: lower end of the confidence interval. |
|
322 |
#' * `upr.ci`: upper end of the confidence interval. |
|
323 |
#' |
|
324 |
#' @keywords internal |
|
325 |
desctools_binomci <- function(x, |
|
326 |
n, |
|
327 |
conf.level = 0.95, # nolint |
|
328 |
sides = c("two.sided", "left", "right"), |
|
329 |
method = c( |
|
330 |
"wilson", "wald", "waldcc", "agresti-coull", |
|
331 |
"jeffreys", "modified wilson", "wilsoncc", "modified jeffreys", |
|
332 |
"clopper-pearson", "arcsine", "logit", "witting", "pratt", |
|
333 |
"midp", "lik", "blaker" |
|
334 |
), |
|
335 |
rand = 123, |
|
336 |
tol = 1e-05) { |
|
337 | 24x |
if (missing(method)) { |
338 | 1x |
method <- "wilson" |
339 |
} |
|
340 | 24x |
if (missing(sides)) { |
341 | 23x |
sides <- "two.sided" |
342 |
} |
|
343 | 24x |
iBinomCI <- function(x, n, conf.level = 0.95, sides = c("two.sided", "left", "right"), # nolint |
344 | 24x |
method = c( |
345 | 24x |
"wilson", "wilsoncc", "wald", |
346 | 24x |
"waldcc", "agresti-coull", "jeffreys", "modified wilson", |
347 | 24x |
"modified jeffreys", "clopper-pearson", "arcsine", "logit", |
348 | 24x |
"witting", "pratt", "midp", "lik", "blaker" |
349 |
), |
|
350 | 24x |
rand = 123, |
351 | 24x |
tol = 1e-05) { |
352 | 24x |
if (length(x) != 1) { |
353 | ! |
stop("'x' has to be of length 1 (number of successes)") |
354 |
} |
|
355 | 24x |
if (length(n) != 1) { |
356 | ! |
stop("'n' has to be of length 1 (number of trials)") |
357 |
} |
|
358 | 24x |
if (length(conf.level) != 1) { |
359 | ! |
stop("'conf.level' has to be of length 1 (confidence level)") |
360 |
} |
|
361 | 24x |
if (conf.level < 0.5 || conf.level > 1) { |
362 | ! |
stop("'conf.level' has to be in [0.5, 1]") |
363 |
} |
|
364 | 24x |
sides <- match.arg(sides, choices = c( |
365 | 24x |
"two.sided", "left", |
366 | 24x |
"right" |
367 | 24x |
), several.ok = FALSE) |
368 | 24x |
if (sides != "two.sided") { |
369 | 1x |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
370 |
} |
|
371 | 24x |
alpha <- 1 - conf.level |
372 | 24x |
kappa <- stats::qnorm(1 - alpha / 2) |
373 | 24x |
p_hat <- x / n |
374 | 24x |
q_hat <- 1 - p_hat |
375 | 24x |
est <- p_hat |
376 | 24x |
switch(match.arg(arg = method, choices = c( |
377 | 24x |
"wilson", |
378 | 24x |
"wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys", |
379 | 24x |
"modified wilson", "modified jeffreys", "clopper-pearson", |
380 | 24x |
"arcsine", "logit", "witting", "pratt", "midp", "lik", |
381 | 24x |
"blaker" |
382 |
)), |
|
383 | 24x |
wald = { |
384 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
385 | 1x |
ci_lwr <- max(0, p_hat - term2) |
386 | 1x |
ci_upr <- min(1, p_hat + term2) |
387 |
}, |
|
388 | 24x |
waldcc = { |
389 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
390 | 1x |
term2 <- term2 + 1 / (2 * n) |
391 | 1x |
ci_lwr <- max(0, p_hat - term2) |
392 | 1x |
ci_upr <- min(1, p_hat + term2) |
393 |
}, |
|
394 | 24x |
wilson = { |
395 | 6x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
396 | 6x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) |
397 | 6x |
ci_lwr <- max(0, term1 - term2) |
398 | 6x |
ci_upr <- min(1, term1 + term2) |
399 |
}, |
|
400 | 24x |
wilsoncc = { |
401 | 3x |
lci <- ( |
402 | 3x |
2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - 2 - 1 / n + 4 * p_hat * (n * q_hat + 1)) |
403 | 3x |
) / (2 * (n + kappa^2)) |
404 | 3x |
uci <- ( |
405 | 3x |
2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + 2 - 1 / n + 4 * p_hat * (n * q_hat - 1)) |
406 | 3x |
) / (2 * (n + kappa^2)) |
407 | 3x |
ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci)) |
408 | 3x |
ci_upr <- min(1, ifelse(p_hat == 1, 1, uci)) |
409 |
}, |
|
410 | 24x |
`agresti-coull` = { |
411 | 1x |
x_tilde <- x + kappa^2 / 2 |
412 | 1x |
n_tilde <- n + kappa^2 |
413 | 1x |
p_tilde <- x_tilde / n_tilde |
414 | 1x |
q_tilde <- 1 - p_tilde |
415 | 1x |
est <- p_tilde |
416 | 1x |
term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
417 | 1x |
ci_lwr <- max(0, p_tilde - term2) |
418 | 1x |
ci_upr <- min(1, p_tilde + term2) |
419 |
}, |
|
420 | 24x |
jeffreys = { |
421 | 1x |
if (x == 0) { |
422 | ! |
ci_lwr <- 0 |
423 |
} else { |
|
424 | 1x |
ci_lwr <- stats::qbeta( |
425 | 1x |
alpha / 2, |
426 | 1x |
x + 0.5, n - x + 0.5 |
427 |
) |
|
428 |
} |
|
429 | 1x |
if (x == n) { |
430 | ! |
ci_upr <- 1 |
431 |
} else { |
|
432 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) |
433 |
} |
|
434 |
}, |
|
435 | 24x |
`modified wilson` = { |
436 | 1x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
437 | 1x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) |
438 | 1x |
if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% c(1:3))) { |
439 | ! |
ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * x) / n |
440 |
} else { |
|
441 | 1x |
ci_lwr <- max(0, term1 - term2) |
442 |
} |
|
443 | 1x |
if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & x %in% c(n - (1:3)))) { |
444 | ! |
ci_upr <- 1 - 0.5 * stats::qchisq( |
445 | ! |
alpha, |
446 | ! |
2 * (n - x) |
447 | ! |
) / n |
448 |
} else { |
|
449 | 1x |
ci_upr <- min(1, term1 + term2) |
450 |
} |
|
451 |
}, |
|
452 | 24x |
`modified jeffreys` = { |
453 | 1x |
if (x == n) { |
454 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
455 |
} else { |
|
456 | 1x |
if (x <= 1) { |
457 | ! |
ci_lwr <- 0 |
458 |
} else { |
|
459 | 1x |
ci_lwr <- stats::qbeta( |
460 | 1x |
alpha / 2, |
461 | 1x |
x + 0.5, n - x + 0.5 |
462 |
) |
|
463 |
} |
|
464 |
} |
|
465 | 1x |
if (x == 0) { |
466 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
467 |
} else { |
|
468 | 1x |
if (x >= n - 1) { |
469 | ! |
ci_upr <- 1 |
470 |
} else { |
|
471 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) |
472 |
} |
|
473 |
} |
|
474 |
}, |
|
475 | 24x |
`clopper-pearson` = { |
476 | 1x |
ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1) |
477 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x) |
478 |
}, |
|
479 | 24x |
arcsine = { |
480 | 1x |
p_tilde <- (x + 0.375) / (n + 0.75) |
481 | 1x |
est <- p_tilde |
482 | 1x |
ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2 |
483 | 1x |
ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2 |
484 |
}, |
|
485 | 24x |
logit = { |
486 | 1x |
lambda_hat <- log(x / (n - x)) |
487 | 1x |
V_hat <- n / (x * (n - x)) # nolint |
488 | 1x |
lambda_lower <- lambda_hat - kappa * sqrt(V_hat) |
489 | 1x |
lambda_upper <- lambda_hat + kappa * sqrt(V_hat) |
490 | 1x |
ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower)) |
491 | 1x |
ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper)) |
492 |
}, |
|
493 | 24x |
witting = { |
494 | 1x |
set.seed(rand) |
495 | 1x |
x_tilde <- x + stats::runif(1, min = 0, max = 1) |
496 | 1x |
pbinom_abscont <- function(q, size, prob) { |
497 | 22x |
v <- trunc(q) |
498 | 22x |
term1 <- stats::pbinom(v - 1, size = size, prob = prob) |
499 | 22x |
term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob) |
500 | 22x |
return(term1 + term2) |
501 |
} |
|
502 | 1x |
qbinom_abscont <- function(p, size, x) { |
503 | 2x |
fun <- function(prob, size, x, p) { |
504 | 22x |
pbinom_abscont(x, size, prob) - p |
505 |
} |
|
506 | 2x |
stats::uniroot(fun, |
507 | 2x |
interval = c(0, 1), size = size, |
508 | 2x |
x = x, p = p |
509 | 2x |
)$root |
510 |
} |
|
511 | 1x |
ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde) |
512 | 1x |
ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde) |
513 |
}, |
|
514 | 24x |
pratt = { |
515 | 1x |
if (x == 0) { |
516 | ! |
ci_lwr <- 0 |
517 | ! |
ci_upr <- 1 - alpha^(1 / n) |
518 | 1x |
} else if (x == 1) { |
519 | ! |
ci_lwr <- 1 - (1 - alpha / 2)^(1 / n) |
520 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
521 | 1x |
} else if (x == (n - 1)) { |
522 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
523 | ! |
ci_upr <- (1 - alpha / 2)^(1 / n) |
524 | 1x |
} else if (x == n) { |
525 | ! |
ci_lwr <- alpha^(1 / n) |
526 | ! |
ci_upr <- 1 |
527 |
} else { |
|
528 | 1x |
z <- stats::qnorm(1 - alpha / 2) |
529 | 1x |
A <- ((x + 1) / (n - x))^2 # nolint |
530 | 1x |
B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint |
531 | 1x |
C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint |
532 | 1x |
D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint |
533 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
534 | 1x |
ci_upr <- 1 / E |
535 | 1x |
A <- (x / (n - x - 1))^2 # nolint |
536 | 1x |
B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint |
537 | 1x |
C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint |
538 | 1x |
D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint |
539 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
540 | 1x |
ci_lwr <- 1 / E |
541 |
} |
|
542 |
}, |
|
543 | 24x |
midp = { |
544 | 1x |
f_low <- function(pi, x, n) { |
545 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x, |
546 | 12x |
size = n, prob = pi, lower.tail = FALSE |
547 |
) - |
|
548 | 12x |
(1 - conf.level) / 2 |
549 |
} |
|
550 | 1x |
f_up <- function(pi, x, n) { |
551 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - 1, size = n, prob = pi) - (1 - conf.level) / 2 |
552 |
} |
|
553 | 1x |
ci_lwr <- 0 |
554 | 1x |
ci_upr <- 1 |
555 | 1x |
if (x != 0) { |
556 | 1x |
ci_lwr <- stats::uniroot(f_low, |
557 | 1x |
interval = c(0, p_hat), |
558 | 1x |
x = x, n = n |
559 | 1x |
)$root |
560 |
} |
|
561 | 1x |
if (x != n) { |
562 | 1x |
ci_upr <- stats::uniroot(f_up, interval = c( |
563 | 1x |
p_hat, |
564 | 1x |
1 |
565 | 1x |
), x = x, n = n)$root |
566 |
} |
|
567 |
}, |
|
568 | 24x |
lik = { |
569 | 2x |
ci_lwr <- 0 |
570 | 2x |
ci_upr <- 1 |
571 | 2x |
z <- stats::qnorm(1 - alpha * 0.5) |
572 | 2x |
tol <- .Machine$double.eps^0.5 |
573 | 2x |
BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint |
574 |
...) { |
|
575 | 40x |
ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt, |
576 | 40x |
y, |
577 | 40x |
log = TRUE |
578 |
)) |
|
579 | 40x |
ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x, |
580 | 40x |
wt, mu, |
581 | 40x |
log = TRUE |
582 |
)) |
|
583 | 40x |
res <- ifelse(abs(y - mu) < tol, 0, sign(y - mu) * sqrt(-2 * (ll_y - ll_mu))) |
584 | 40x |
return(res - bound) |
585 |
} |
|
586 | 2x |
if (x != 0 && tol < p_hat) { |
587 | 2x |
ci_lwr <- if (BinDev( |
588 | 2x |
tol, x, p_hat, n, -z, |
589 | 2x |
tol |
590 | 2x |
) <= 0) { |
591 | 2x |
stats::uniroot( |
592 | 2x |
f = BinDev, interval = c(tol, if (p_hat < tol || p_hat == 1) { |
593 | ! |
1 - tol |
594 |
} else { |
|
595 | 2x |
p_hat |
596 | 2x |
}), bound = -z, |
597 | 2x |
x = x, mu = p_hat, wt = n |
598 | 2x |
)$root |
599 |
} |
|
600 |
} |
|
601 | 2x |
if (x != n && p_hat < (1 - tol)) { |
602 | 2x |
ci_upr <- if ( |
603 | 2x |
BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > 1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < 0) { # nolint |
604 | ! |
ci_lwr <- if (BinDev( |
605 | ! |
tol, x, if (p_hat < tol || p_hat == 1) { |
606 | ! |
1 - tol |
607 |
} else { |
|
608 | ! |
p_hat |
609 | ! |
}, n, |
610 | ! |
-z, tol |
611 | ! |
) <= 0) { |
612 | ! |
stats::uniroot( |
613 | ! |
f = BinDev, interval = c(tol, p_hat), |
614 | ! |
bound = -z, x = x, mu = p_hat, wt = n |
615 | ! |
)$root |
616 |
} |
|
617 |
} else { |
|
618 | 2x |
stats::uniroot( |
619 | 2x |
f = BinDev, interval = c(if (p_hat > 1 - tol) { |
620 | ! |
tol |
621 |
} else { |
|
622 | 2x |
p_hat |
623 | 2x |
}, 1 - tol), bound = z, |
624 | 2x |
x = x, mu = p_hat, wt = n |
625 | 2x |
)$root |
626 |
} |
|
627 |
} |
|
628 |
}, |
|
629 | 24x |
blaker = { |
630 | 1x |
acceptbin <- function(x, n, p) { |
631 | 3954x |
p1 <- 1 - stats::pbinom(x - 1, n, p) |
632 | 3954x |
p2 <- stats::pbinom(x, n, p) |
633 | 3954x |
a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p) |
634 | 3954x |
a2 <- p2 + 1 - stats::pbinom( |
635 | 3954x |
stats::qbinom(1 - p2, n, p), n, |
636 | 3954x |
p |
637 |
) |
|
638 | 3954x |
return(min(a1, a2)) |
639 |
} |
|
640 | 1x |
ci_lwr <- 0 |
641 | 1x |
ci_upr <- 1 |
642 | 1x |
if (x != 0) { |
643 | 1x |
ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - x + 1) |
644 | 1x |
while (acceptbin(x, n, ci_lwr + tol) < (1 - conf.level)) { |
645 | 1976x |
ci_lwr <- ci_lwr + tol |
646 |
} |
|
647 |
} |
|
648 | 1x |
if (x != n) { |
649 | 1x |
ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + 1, n - x) |
650 | 1x |
while (acceptbin(x, n, ci_upr - tol) < (1 - conf.level)) { |
651 | 1976x |
ci_upr <- ci_upr - tol |
652 |
} |
|
653 |
} |
|
654 |
} |
|
655 |
) |
|
656 | 24x |
ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min( |
657 | 24x |