1 |
#' Count patients with toxicity grades that have worsened from baseline by highest grade post-baseline |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The analyze function [count_abnormal_lab_worsen_by_baseline()] creates a layout element to count patients with |
|
6 |
#' analysis toxicity grades which have worsened from baseline, categorized by highest (worst) grade post-baseline. |
|
7 |
#' |
|
8 |
#' This function analyzes primary analysis variable `var` which indicates analysis toxicity grades. Additional |
|
9 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to `USUBJID`), |
|
10 |
#' a variable to indicate unique subject identifiers, `baseline_var` (defaults to `BTOXGR`), a variable to indicate |
|
11 |
#' baseline toxicity grades, and `direction_var` (defaults to `GRADDIR`), a variable to indicate toxicity grade |
|
12 |
#' directions of interest to include (e.g. `"H"` (high), `"L"` (low), or `"B"` (both)). |
|
13 |
#' |
|
14 |
#' For the direction(s) specified in `direction_var`, patient counts by worst grade for patients who have |
|
15 |
#' worsened from baseline are calculated as follows: |
|
16 |
#' * `1` to `4`: The number of patients who have worsened from their baseline grades with worst |
|
17 |
#' grades 1-4, respectively. |
|
18 |
#' * `Any`: The total number of patients who have worsened from their baseline grades. |
|
19 |
#' |
|
20 |
#' Fractions are calculated by dividing the above counts by the number of patients who's analysis toxicity grades |
|
21 |
#' have worsened from baseline toxicity grades during treatment. |
|
22 |
#' |
|
23 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create a row |
|
24 |
#' split on variable `direction_var`. |
|
25 |
#' |
|
26 |
#' @inheritParams argument_convention |
|
27 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including: |
|
28 |
#' * `id` (`string`)\cr subject variable name. |
|
29 |
#' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable. |
|
30 |
#' * `direction_var` (`string`)\cr see `direction_var` for more details. |
|
31 |
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_worst_grade_worsen")` |
|
32 |
#' to see all available statistics. |
|
33 |
#' |
|
34 |
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()] which are used within |
|
35 |
#' [s_count_abnormal_lab_worsen_by_baseline()] to process input data. |
|
36 |
#' |
|
37 |
#' @name abnormal_by_worst_grade_worsen |
|
38 |
#' @order 1 |
|
39 |
NULL |
|
40 | ||
41 |
#' Helper function to prepare ADLB with worst labs |
|
42 |
#' |
|
43 |
#' @description `r lifecycle::badge("stable")` |
|
44 |
#' |
|
45 |
#' Helper function to prepare a `df` for generate the patient count shift table. |
|
46 |
#' |
|
47 |
#' @param adlb (`data.frame`)\cr ADLB data frame. |
|
48 |
#' @param worst_flag_low (named `vector`)\cr worst low post-baseline lab grade flag variable. See how this is |
|
49 |
#' implemented in the following examples. |
|
50 |
#' @param worst_flag_high (named `vector`)\cr worst high post-baseline lab grade flag variable. See how this is |
|
51 |
#' implemented in the following examples. |
|
52 |
#' @param direction_var (`string`)\cr name of the direction variable specifying the direction of the shift table of |
|
53 |
#' interest. Only lab records flagged by `L`, `H` or `B` are included in the shift table. |
|
54 |
#' * `L`: low direction only |
|
55 |
#' * `H`: high direction only |
|
56 |
#' * `B`: both low and high directions |
|
57 |
#' |
|
58 |
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the |
|
59 |
#' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the |
|
60 |
#' direction specified according to `direction_var`. For instance, for a lab that is |
|
61 |
#' needed for the low direction only, only records flagged by `worst_flag_low` are |
|
62 |
#' selected. For a lab that is needed for both low and high directions, the worst |
|
63 |
#' low records are selected for the low direction, and the worst high record are selected |
|
64 |
#' for the high direction. |
|
65 |
#' |
|
66 |
#' @seealso [abnormal_by_worst_grade_worsen] |
|
67 |
#' |
|
68 |
#' @examples |
|
69 |
#' library(dplyr) |
|
70 |
#' |
|
71 |
#' # The direction variable, GRADDR, is based on metadata |
|
72 |
#' adlb <- tern_ex_adlb %>% |
|
73 |
#' mutate( |
|
74 |
#' GRADDR = case_when( |
|
75 |
#' PARAMCD == "ALT" ~ "B", |
|
76 |
#' PARAMCD == "CRP" ~ "L", |
|
77 |
#' PARAMCD == "IGA" ~ "H" |
|
78 |
#' ) |
|
79 |
#' ) %>% |
|
80 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") |
|
81 |
#' |
|
82 |
#' df <- h_adlb_worsen( |
|
83 |
#' adlb, |
|
84 |
#' worst_flag_low = c("WGRLOFL" = "Y"), |
|
85 |
#' worst_flag_high = c("WGRHIFL" = "Y"), |
|
86 |
#' direction_var = "GRADDR" |
|
87 |
#' ) |
|
88 |
#' |
|
89 |
#' @export |
|
90 |
h_adlb_worsen <- function(adlb, |
|
91 |
worst_flag_low = NULL, |
|
92 |
worst_flag_high = NULL, |
|
93 |
direction_var) { |
|
94 | 5x |
checkmate::assert_string(direction_var) |
95 | 5x |
checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H")) |
96 | 5x |
assert_df_with_variables(adlb, list("Col" = direction_var)) |
97 | ||
98 | 5x |
if (any(unique(adlb[[direction_var]]) == "H")) { |
99 | 4x |
assert_df_with_variables(adlb, list("High" = names(worst_flag_high))) |
100 |
} |
|
101 | ||
102 | 5x |
if (any(unique(adlb[[direction_var]]) == "L")) { |
103 | 4x |
assert_df_with_variables(adlb, list("Low" = names(worst_flag_low))) |
104 |
} |
|
105 | ||
106 | 5x |
if (any(unique(adlb[[direction_var]]) == "B")) { |
107 | 3x |
assert_df_with_variables( |
108 | 3x |
adlb, |
109 | 3x |
list( |
110 | 3x |
"Low" = names(worst_flag_low), |
111 | 3x |
"High" = names(worst_flag_high) |
112 |
) |
|
113 |
) |
|
114 |
} |
|
115 | ||
116 |
# extract patients with worst post-baseline lab, either low or high or both |
|
117 | 5x |
worst_flag <- c(worst_flag_low, worst_flag_high) |
118 | 5x |
col_names <- names(worst_flag) |
119 | 5x |
filter_values <- worst_flag |
120 | 5x |
temp <- Map( |
121 | 5x |
function(x, y) which(adlb[[x]] == y), |
122 | 5x |
col_names, |
123 | 5x |
filter_values |
124 |
) |
|
125 | 5x |
position_satisfy_filters <- Reduce(union, temp) |
126 | ||
127 |
# select variables of interest |
|
128 | 5x |
adlb_f <- adlb[position_satisfy_filters, ] |
129 | ||
130 |
# generate subsets for different directionality |
|
131 | 5x |
adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ] |
132 | 5x |
adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ] |
133 | 5x |
adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ] |
134 | ||
135 |
# for labs requiring both high and low, data is duplicated and will be stacked on top of each other |
|
136 | 5x |
adlb_f_b_h <- adlb_f_b |
137 | 5x |
adlb_f_b_l <- adlb_f_b |
138 | ||
139 |
# extract data with worst lab |
|
140 | 5x |
if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) { |
141 |
# change H to High, L to Low |
|
142 | 3x |
adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h)) |
143 | 3x |
adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l)) |
144 | ||
145 |
# change, B to High and Low |
|
146 | 3x |
adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h)) |
147 | 3x |
adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l)) |
148 | ||
149 | 3x |
adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] |
150 | 3x |
adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] |
151 | 3x |
adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] |
152 | 3x |
adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] |
153 | ||
154 | 3x |
out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l) |
155 | 2x |
} else if (!is.null(worst_flag_high)) { |
156 | 1x |
adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h)) |
157 | 1x |
adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h)) |
158 | ||
159 | 1x |
adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] |
160 | 1x |
adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] |
161 | ||
162 | 1x |
out <- rbind(adlb_out_h, adlb_out_b_h) |
163 | 1x |
} else if (!is.null(worst_flag_low)) { |
164 | 1x |
adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l)) |
165 | 1x |
adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l)) |
166 | ||
167 | 1x |
adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] |
168 | 1x |
adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] |
169 | ||
170 | 1x |
out <- rbind(adlb_out_l, adlb_out_b_l) |
171 |
} |
|
172 | ||
173 |
# label |
|
174 | 5x |
formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE) |
175 |
# NA |
|
176 | 5x |
out |
177 |
} |
|
178 | ||
179 |
#' Helper function to analyze patients for `s_count_abnormal_lab_worsen_by_baseline()` |
|
180 |
#' |
|
181 |
#' @description `r lifecycle::badge("stable")` |
|
182 |
#' |
|
183 |
#' Helper function to count the number of patients and the fraction of patients according to |
|
184 |
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`, |
|
185 |
#' and the direction of interest specified in `direction_var`. |
|
186 |
#' |
|
187 |
#' @inheritParams argument_convention |
|
188 |
#' @inheritParams h_adlb_worsen |
|
189 |
#' @param baseline_var (`string`)\cr name of the baseline lab grade variable. |
|
190 |
#' |
|
191 |
#' @return The counts and fraction of patients |
|
192 |
#' whose worst post-baseline lab grades are worse than their baseline grades, for |
|
193 |
#' post-baseline worst grades "1", "2", "3", "4" and "Any". |
|
194 |
#' |
|
195 |
#' @seealso [abnormal_by_worst_grade_worsen] |
|
196 |
#' |
|
197 |
#' @examples |
|
198 |
#' library(dplyr) |
|
199 |
#' |
|
200 |
#' # The direction variable, GRADDR, is based on metadata |
|
201 |
#' adlb <- tern_ex_adlb %>% |
|
202 |
#' mutate( |
|
203 |
#' GRADDR = case_when( |
|
204 |
#' PARAMCD == "ALT" ~ "B", |
|
205 |
#' PARAMCD == "CRP" ~ "L", |
|
206 |
#' PARAMCD == "IGA" ~ "H" |
|
207 |
#' ) |
|
208 |
#' ) %>% |
|
209 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") |
|
210 |
#' |
|
211 |
#' df <- h_adlb_worsen( |
|
212 |
#' adlb, |
|
213 |
#' worst_flag_low = c("WGRLOFL" = "Y"), |
|
214 |
#' worst_flag_high = c("WGRHIFL" = "Y"), |
|
215 |
#' direction_var = "GRADDR" |
|
216 |
#' ) |
|
217 |
#' |
|
218 |
#' # `h_worsen_counter` |
|
219 |
#' h_worsen_counter( |
|
220 |
#' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"), |
|
221 |
#' id = "USUBJID", |
|
222 |
#' .var = "ATOXGR", |
|
223 |
#' baseline_var = "BTOXGR", |
|
224 |
#' direction_var = "GRADDR" |
|
225 |
#' ) |
|
226 |
#' |
|
227 |
#' @export |
|
228 |
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) { |
|
229 | 17x |
checkmate::assert_string(id) |
230 | 17x |
checkmate::assert_string(.var) |
231 | 17x |
checkmate::assert_string(baseline_var) |
232 | 17x |
checkmate::assert_scalar(unique(df[[direction_var]])) |
233 | 17x |
checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low")) |
234 | 17x |
assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var))) |
235 | ||
236 |
# remove post-baseline missing |
|
237 | 17x |
df <- df[df[[.var]] != "<Missing>", ] |
238 | ||
239 |
# obtain directionality |
|
240 | 17x |
direction <- unique(df[[direction_var]]) |
241 | ||
242 | 17x |
if (direction == "Low") { |
243 | 10x |
grade <- -1:-4 |
244 | 10x |
worst_grade <- -4 |
245 | 7x |
} else if (direction == "High") { |
246 | 7x |
grade <- 1:4 |
247 | 7x |
worst_grade <- 4 |
248 |
} |
|
249 | ||
250 | 17x |
if (nrow(df) > 0) { |
251 | 17x |
by_grade <- lapply(grade, function(i) { |
252 |
# filter baseline values that is less than i or <Missing> |
|
253 | 68x |
df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ] |
254 |
# num: number of patients with post-baseline worst lab equal to i |
|
255 | 68x |
num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE])) |
256 |
# denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction |
|
257 | 68x |
denom <- length(unique(df_temp[[id]])) |
258 | 68x |
rm(df_temp) |
259 | 68x |
c(num = num, denom = denom) |
260 |
}) |
|
261 |
} else { |
|
262 | ! |
by_grade <- lapply(1, function(i) { |
263 | ! |
c(num = 0, denom = 0) |
264 |
}) |
|
265 |
} |
|
266 | ||
267 | 17x |
names(by_grade) <- as.character(seq_along(by_grade)) |
268 | ||
269 |
# baseline grade less 4 or missing |
|
270 | 17x |
df_temp <- df[!df[[baseline_var]] %in% worst_grade, ] |
271 | ||
272 |
# denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction |
|
273 | 17x |
denom <- length(unique(df_temp[, id, drop = TRUE])) |
274 | ||
275 |
# condition 1: missing baseline and in the direction of abnormality |
|
276 | 17x |
con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade) |
277 | 17x |
df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ] |
278 | ||
279 |
# condition 2: if post-baseline values are present then post-baseline values must be worse than baseline |
|
280 | 17x |
if (direction == "Low") { |
281 | 10x |
con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]]))) |
282 |
} else { |
|
283 | 7x |
con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]]))) |
284 |
} |
|
285 | ||
286 |
# number of patients satisfy either conditions 1 or 2 |
|
287 | 17x |
num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE])) |
288 | ||
289 | 17x |
list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom)))) |
290 |
} |
|
291 | ||
292 |
#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline |
|
293 |
#' lab grades are worse than their baseline grades. |
|
294 |
#' |
|
295 |
#' @return |
|
296 |
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst |
|
297 |
#' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades |
|
298 |
#' "1", "2", "3", "4" and "Any". |
|
299 |
#' |
|
300 |
#' @keywords internal |
|
301 |
s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint |
|
302 |
.var = "ATOXGR", |
|
303 |
variables = list( |
|
304 |
id = "USUBJID", |
|
305 |
baseline_var = "BTOXGR", |
|
306 |
direction_var = "GRADDR" |
|
307 |
)) { |
|
308 | 1x |
checkmate::assert_string(.var) |
309 | 1x |
checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var")) |
310 | 1x |
checkmate::assert_string(variables$id) |
311 | 1x |
checkmate::assert_string(variables$baseline_var) |
312 | 1x |
checkmate::assert_string(variables$direction_var) |
313 | 1x |
assert_df_with_variables(df, c(aval = .var, variables[1:3])) |
314 | 1x |
assert_list_of_variables(variables) |
315 | ||
316 | 1x |
h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var) |
317 |
} |
|
318 | ||
319 |
#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun` |
|
320 |
#' in `count_abnormal_lab_worsen_by_baseline()`. |
|
321 |
#' |
|
322 |
#' @return |
|
323 |
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with |
|
324 |
#' formatted [rtables::CellValue()]. |
|
325 |
#' |
|
326 |
#' @keywords internal |
|
327 |
a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint |
|
328 |
s_count_abnormal_lab_worsen_by_baseline, |
|
329 |
.formats = c(fraction = format_fraction), |
|
330 |
.ungroup_stats = "fraction" |
|
331 |
) |
|
332 | ||
333 |
#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function |
|
334 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
335 |
#' |
|
336 |
#' @return |
|
337 |
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting |
|
338 |
#' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted |
|
339 |
#' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout. |
|
340 |
#' |
|
341 |
#' @examples |
|
342 |
#' library(dplyr) |
|
343 |
#' |
|
344 |
#' # The direction variable, GRADDR, is based on metadata |
|
345 |
#' adlb <- tern_ex_adlb %>% |
|
346 |
#' mutate( |
|
347 |
#' GRADDR = case_when( |
|
348 |
#' PARAMCD == "ALT" ~ "B", |
|
349 |
#' PARAMCD == "CRP" ~ "L", |
|
350 |
#' PARAMCD == "IGA" ~ "H" |
|
351 |
#' ) |
|
352 |
#' ) %>% |
|
353 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "") |
|
354 |
#' |
|
355 |
#' df <- h_adlb_worsen( |
|
356 |
#' adlb, |
|
357 |
#' worst_flag_low = c("WGRLOFL" = "Y"), |
|
358 |
#' worst_flag_high = c("WGRHIFL" = "Y"), |
|
359 |
#' direction_var = "GRADDR" |
|
360 |
#' ) |
|
361 |
#' |
|
362 |
#' basic_table() %>% |
|
363 |
#' split_cols_by("ARMCD") %>% |
|
364 |
#' add_colcounts() %>% |
|
365 |
#' split_rows_by("PARAMCD") %>% |
|
366 |
#' split_rows_by("GRADDR") %>% |
|
367 |
#' count_abnormal_lab_worsen_by_baseline( |
|
368 |
#' var = "ATOXGR", |
|
369 |
#' variables = list( |
|
370 |
#' id = "USUBJID", |
|
371 |
#' baseline_var = "BTOXGR", |
|
372 |
#' direction_var = "GRADDR" |
|
373 |
#' ) |
|
374 |
#' ) %>% |
|
375 |
#' append_topleft("Direction of Abnormality") %>% |
|
376 |
#' build_table(df = df, alt_counts_df = tern_ex_adsl) |
|
377 |
#' |
|
378 |
#' @export |
|
379 |
#' @order 2 |
|
380 |
count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint |
|
381 |
var, |
|
382 |
variables = list( |
|
383 |
id = "USUBJID", |
|
384 |
baseline_var = "BTOXGR", |
|
385 |
direction_var = "GRADDR" |
|
386 |
), |
|
387 |
na_str = default_na_str(), |
|
388 |
nested = TRUE, |
|
389 |
..., |
|
390 |
table_names = NULL, |
|
391 |
.stats = NULL, |
|
392 |
.formats = NULL, |
|
393 |
.labels = NULL, |
|
394 |
.indent_mods = NULL) { |
|
395 | 1x |
checkmate::assert_string(var) |
396 | ||
397 | 1x |
extra_args <- list(variables = variables, ...) |
398 | ||
399 | 1x |
afun <- make_afun( |
400 | 1x |
a_count_abnormal_lab_worsen_by_baseline, |
401 | 1x |
.stats = .stats, |
402 | 1x |
.formats = .formats, |
403 | 1x |
.labels = .labels, |
404 | 1x |
.indent_mods = .indent_mods |
405 |
) |
|
406 | ||
407 | 1x |
lyt <- analyze( |
408 | 1x |
lyt = lyt, |
409 | 1x |
vars = var, |
410 | 1x |
afun = afun, |
411 | 1x |
na_str = na_str, |
412 | 1x |
nested = nested, |
413 | 1x |
extra_args = extra_args, |
414 | 1x |
show_labels = "hidden" |
415 |
) |
|
416 | ||
417 | 1x |
lyt |
418 |
} |
1 |
#' Helper function for deriving analysis datasets for select laboratory tables |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Helper function that merges ADSL and ADLB datasets so that missing lab test records are inserted in the |
|
6 |
#' output dataset. Remember that `na_level` must match the needed pre-processing |
|
7 |
#' done with [df_explicit_na()] to have the desired output. |
|
8 |
#' |
|
9 |
#' @param adsl (`data.frame`)\cr ADSL data frame. |
|
10 |
#' @param adlb (`data.frame`)\cr ADLB data frame. |
|
11 |
#' @param worst_flag (named `character`)\cr worst post-baseline lab flag variable. See how this is implemented in the |
|
12 |
#' following examples. |
|
13 |
#' @param by_visit (`flag`)\cr defaults to `FALSE` to generate worst grade per patient. |
|
14 |
#' If worst grade per patient per visit is specified for `worst_flag`, then |
|
15 |
#' `by_visit` should be `TRUE` to generate worst grade patient per visit. |
|
16 |
#' @param no_fillin_visits (named `character`)\cr visits that are not considered for post-baseline worst toxicity |
|
17 |
#' grade. Defaults to `c("SCREENING", "BASELINE")`. |
|
18 |
#' |
|
19 |
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`, |
|
20 |
#' `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when |
|
21 |
#' `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`. |
|
22 |
#' |
|
23 |
#' @details In the result data missing records will be created for the following situations: |
|
24 |
#' * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline). |
|
25 |
#' * Patients who do not have any post-baseline lab values. |
|
26 |
#' * Patients without any post-baseline values flagged as the worst. |
|
27 |
#' |
|
28 |
#' @examples |
|
29 |
#' # `h_adsl_adlb_merge_using_worst_flag` |
|
30 |
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag( |
|
31 |
#' tern_ex_adsl, |
|
32 |
#' tern_ex_adlb, |
|
33 |
#' worst_flag = c("WGRHIFL" = "Y") |
|
34 |
#' ) |
|
35 |
#' |
|
36 |
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example |
|
37 |
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag( |
|
38 |
#' tern_ex_adsl, |
|
39 |
#' tern_ex_adlb, |
|
40 |
#' worst_flag = c("WGRLOVFL" = "Y"), |
|
41 |
#' by_visit = TRUE |
|
42 |
#' ) |
|
43 |
#' |
|
44 |
#' @export |
|
45 |
h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint |
|
46 |
adlb, |
|
47 |
worst_flag = c("WGRHIFL" = "Y"), |
|
48 |
by_visit = FALSE, |
|
49 |
no_fillin_visits = c("SCREENING", "BASELINE")) { |
|
50 | 5x |
col_names <- names(worst_flag) |
51 | 5x |
filter_values <- worst_flag |
52 | ||
53 | 5x |
temp <- Map( |
54 | 5x |
function(x, y) which(adlb[[x]] == y), |
55 | 5x |
col_names, |
56 | 5x |
filter_values |
57 |
) |
|
58 | ||
59 | 5x |
position_satisfy_filters <- Reduce(intersect, temp) |
60 | ||
61 | 5x |
adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb)) |
62 | 5x |
columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR") |
63 | ||
64 | 5x |
adlb_f <- adlb[position_satisfy_filters, ] %>% |
65 | 5x |
dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) |
66 | 5x |
adlb_f <- adlb_f[, columns_from_adlb] |
67 | ||
68 | 5x |
avisits_grid <- adlb %>% |
69 | 5x |
dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>% |
70 | 5x |
dplyr::pull(.data[["AVISIT"]]) %>% |
71 | 5x |
unique() |
72 | ||
73 | 5x |
if (by_visit) { |
74 | 1x |
adsl_lb <- expand.grid( |
75 | 1x |
USUBJID = unique(adsl$USUBJID), |
76 | 1x |
AVISIT = avisits_grid, |
77 | 1x |
PARAMCD = unique(adlb$PARAMCD) |
78 |
) |
|
79 | ||
80 | 1x |
adsl_lb <- adsl_lb %>% |
81 | 1x |
dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>% |
82 | 1x |
dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD") |
83 | ||
84 | 1x |
adsl1 <- adsl[, adsl_adlb_common_columns] |
85 | 1x |
adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID") |
86 | ||
87 | 1x |
by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM") |
88 | ||
89 | 1x |
adlb_btoxgr <- adlb %>% |
90 | 1x |
dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>% |
91 | 1x |
unique() %>% |
92 | 1x |
dplyr::rename("BTOXGR_MAP" = "BTOXGR") |
93 | ||
94 | 1x |
adlb_out <- merge( |
95 | 1x |
adlb_f, |
96 | 1x |
adsl_lb, |
97 | 1x |
by = by_variables_from_adlb, |
98 | 1x |
all = TRUE, |
99 | 1x |
sort = FALSE |
100 |
) |
|
101 | 1x |
adlb_out <- adlb_out %>% |
102 | 1x |
dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>% |
103 | 1x |
dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>% |
104 | 1x |
dplyr::select(-"BTOXGR_MAP") |
105 | ||
106 | 1x |
adlb_var_labels <- c( |
107 | 1x |
formatters::var_labels(adlb[by_variables_from_adlb]), |
108 | 1x |
formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]), |
109 | 1x |
formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]]) |
110 |
) |
|
111 |
} else { |
|
112 | 4x |
adsl_lb <- expand.grid( |
113 | 4x |
USUBJID = unique(adsl$USUBJID), |
114 | 4x |
PARAMCD = unique(adlb$PARAMCD) |
115 |
) |
|
116 | ||
117 | 4x |
adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD") |
118 | ||
119 | 4x |
adsl1 <- adsl[, adsl_adlb_common_columns] |
120 | 4x |
adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID") |
121 | ||
122 | 4x |
by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM") |
123 | ||
124 | 4x |
adlb_out <- merge( |
125 | 4x |
adlb_f, |
126 | 4x |
adsl_lb, |
127 | 4x |
by = by_variables_from_adlb, |
128 | 4x |
all = TRUE, |
129 | 4x |
sort = FALSE |
130 |
) |
|
131 | ||
132 | 4x |
adlb_var_labels <- c( |
133 | 4x |
formatters::var_labels(adlb[by_variables_from_adlb]), |
134 | 4x |
formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]), |
135 | 4x |
formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]]) |
136 |
) |
|
137 |
} |
|
138 | ||
139 | 5x |
adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR) |
140 | 5x |
adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR) |
141 | ||
142 | 5x |
formatters::var_labels(adlb_out) <- adlb_var_labels |
143 | ||
144 | 5x |
adlb_out |
145 |
} |
1 |
#' Count patients with marked laboratory abnormalities |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The analyze function [count_abnormal_by_marked()] creates a layout element to count patients with marked laboratory |
|
6 |
#' abnormalities for each direction of abnormality, categorized by parameter value. |
|
7 |
#' |
|
8 |
#' This function analyzes primary analysis variable `var` which indicates whether a single, replicated, |
|
9 |
#' or last marked laboratory abnormality was observed. Levels of `var` to include for each marked lab |
|
10 |
#' abnormality (`single` and `last_replicated`) can be supplied via the `category` parameter. Additional |
|
11 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults |
|
12 |
#' to `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a |
|
13 |
#' variable to indicate parameter values, and `direction` (defaults to `abn_dir`), a variable to indicate |
|
14 |
#' abnormality directions. |
|
15 |
#' |
|
16 |
#' For each combination of `param` and `direction` levels, marked lab abnormality counts are calculated |
|
17 |
#' as follows: |
|
18 |
#' * `Single, not last` & `Last or replicated`: The number of patients with `Single, not last` |
|
19 |
#' and `Last or replicated` values, respectively. |
|
20 |
#' * `Any`: The number of patients with either single or replicated marked abnormalities. |
|
21 |
#' |
|
22 |
#' Fractions are calculated by dividing the above counts by the number of patients with at least one |
|
23 |
#' valid measurement recorded during the analysis. |
|
24 |
#' |
|
25 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two |
|
26 |
#' row splits, one on variable `param` and one on variable `direction`. |
|
27 |
#' |
|
28 |
#' @inheritParams argument_convention |
|
29 |
#' @param category (`list`)\cr a list with different marked category names for single |
|
30 |
#' and last or replicated. |
|
31 |
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_marked")` |
|
32 |
#' to see available statistics for this function. |
|
33 |
#' |
|
34 |
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has |
|
35 |
#' abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the |
|
36 |
#' patient will be counted only under the `Last or replicated` category. |
|
37 |
#' |
|
38 |
#' @name abnormal_by_marked |
|
39 |
#' @order 1 |
|
40 |
NULL |
|
41 | ||
42 |
#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities. |
|
43 |
#' |
|
44 |
#' @return |
|
45 |
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`, |
|
46 |
#' `Last or replicated`, and `Any` results. |
|
47 |
#' |
|
48 |
#' @keywords internal |
|
49 |
s_count_abnormal_by_marked <- function(df, |
|
50 |
.var = "AVALCAT1", |
|
51 |
.spl_context, |
|
52 |
category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")), |
|
53 |
variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) { |
|
54 | 3x |
checkmate::assert_string(.var) |
55 | 3x |
checkmate::assert_list(variables) |
56 | 3x |
checkmate::assert_list(category) |
57 | 3x |
checkmate::assert_subset(names(category), c("single", "last_replicated")) |
58 | 3x |
checkmate::assert_subset(names(variables), c("id", "param", "direction")) |
59 | 3x |
checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1) |
60 | ||
61 | 2x |
assert_df_with_variables(df, c(aval = .var, variables)) |
62 | 2x |
checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) |
63 | 2x |
checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) |
64 | ||
65 | ||
66 | 2x |
first_row <- .spl_context[.spl_context$split == variables[["param"]], ] |
67 |
# Patients in the denominator have at least one post-baseline visit. |
|
68 | 2x |
subj <- first_row$full_parent_df[[1]][[variables[["id"]]]] |
69 | 2x |
subj_cur_col <- subj[first_row$cur_col_subset[[1]]] |
70 |
# Some subjects may have a record for high and low directions but |
|
71 |
# should be counted only once. |
|
72 | 2x |
denom <- length(unique(subj_cur_col)) |
73 | ||
74 | 2x |
if (denom != 0) { |
75 | 2x |
subjects_last_replicated <- unique( |
76 | 2x |
df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE] |
77 |
) |
|
78 | 2x |
subjects_single <- unique( |
79 | 2x |
df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE] |
80 |
) |
|
81 |
# Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group. |
|
82 | 2x |
subjects_single <- setdiff(subjects_single, subjects_last_replicated) |
83 | 2x |
n_single <- length(subjects_single) |
84 | 2x |
n_last_replicated <- length(subjects_last_replicated) |
85 | 2x |
n_any <- n_single + n_last_replicated |
86 | 2x |
result <- list(count_fraction = list( |
87 | 2x |
"Single, not last" = c(n_single, n_single / denom), |
88 | 2x |
"Last or replicated" = c(n_last_replicated, n_last_replicated / denom), |
89 | 2x |
"Any Abnormality" = c(n_any, n_any / denom) |
90 |
)) |
|
91 |
} else { |
|
92 | ! |
result <- list(count_fraction = list( |
93 | ! |
"Single, not last" = c(0, 0), |
94 | ! |
"Last or replicated" = c(0, 0), |
95 | ! |
"Any Abnormality" = c(0, 0) |
96 |
)) |
|
97 |
} |
|
98 | ||
99 | 2x |
result |
100 |
} |
|
101 | ||
102 |
#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun` |
|
103 |
#' in `count_abnormal_by_marked()`. |
|
104 |
#' |
|
105 |
#' @return |
|
106 |
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
107 |
#' |
|
108 |
#' @keywords internal |
|
109 |
a_count_abnormal_by_marked <- make_afun( |
|
110 |
s_count_abnormal_by_marked, |
|
111 |
.formats = c(count_fraction = format_count_fraction) |
|
112 |
) |
|
113 | ||
114 |
#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments |
|
115 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
116 |
#' |
|
117 |
#' @return |
|
118 |
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions, |
|
119 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
120 |
#' the statistics from `s_count_abnormal_by_marked()` to the table layout. |
|
121 |
#' |
|
122 |
#' @examples |
|
123 |
#' library(dplyr) |
|
124 |
#' |
|
125 |
#' df <- data.frame( |
|
126 |
#' USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))), |
|
127 |
#' ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))), |
|
128 |
#' ANRIND = factor(c( |
|
129 |
#' "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH", |
|
130 |
#' "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW", |
|
131 |
#' "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW" |
|
132 |
#' )), |
|
133 |
#' ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2), |
|
134 |
#' PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))), |
|
135 |
#' AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)), |
|
136 |
#' stringsAsFactors = FALSE |
|
137 |
#' ) |
|
138 |
#' |
|
139 |
#' df <- df %>% |
|
140 |
#' mutate(abn_dir = factor( |
|
141 |
#' case_when( |
|
142 |
#' ANRIND == "LOW LOW" ~ "Low", |
|
143 |
#' ANRIND == "HIGH HIGH" ~ "High", |
|
144 |
#' TRUE ~ "" |
|
145 |
#' ), |
|
146 |
#' levels = c("Low", "High") |
|
147 |
#' )) |
|
148 |
#' |
|
149 |
#' # Select only post-baseline records. |
|
150 |
#' df <- df %>% filter(ONTRTFL == "Y") |
|
151 |
#' df_crp <- df %>% |
|
152 |
#' filter(PARAMCD == "CRP") %>% |
|
153 |
#' droplevels() |
|
154 |
#' full_parent_df <- list(df_crp, "not_needed") |
|
155 |
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed") |
|
156 |
#' spl_context <- data.frame( |
|
157 |
#' split = c("PARAMCD", "GRADE_DIR"), |
|
158 |
#' full_parent_df = I(full_parent_df), |
|
159 |
#' cur_col_subset = I(cur_col_subset) |
|
160 |
#' ) |
|
161 |
#' |
|
162 |
#' map <- unique( |
|
163 |
#' df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")] |
|
164 |
#' ) %>% |
|
165 |
#' lapply(as.character) %>% |
|
166 |
#' as.data.frame() %>% |
|
167 |
#' arrange(PARAMCD, abn_dir) |
|
168 |
#' |
|
169 |
#' basic_table() %>% |
|
170 |
#' split_cols_by("ARMCD") %>% |
|
171 |
#' split_rows_by("PARAMCD") %>% |
|
172 |
#' summarize_num_patients( |
|
173 |
#' var = "USUBJID", |
|
174 |
#' .stats = "unique_count" |
|
175 |
#' ) %>% |
|
176 |
#' split_rows_by( |
|
177 |
#' "abn_dir", |
|
178 |
#' split_fun = trim_levels_to_map(map) |
|
179 |
#' ) %>% |
|
180 |
#' count_abnormal_by_marked( |
|
181 |
#' var = "AVALCAT1", |
|
182 |
#' variables = list( |
|
183 |
#' id = "USUBJID", |
|
184 |
#' param = "PARAMCD", |
|
185 |
#' direction = "abn_dir" |
|
186 |
#' ) |
|
187 |
#' ) %>% |
|
188 |
#' build_table(df = df) |
|
189 |
#' |
|
190 |
#' basic_table() %>% |
|
191 |
#' split_cols_by("ARMCD") %>% |
|
192 |
#' split_rows_by("PARAMCD") %>% |
|
193 |
#' summarize_num_patients( |
|
194 |
#' var = "USUBJID", |
|
195 |
#' .stats = "unique_count" |
|
196 |
#' ) %>% |
|
197 |
#' split_rows_by( |
|
198 |
#' "abn_dir", |
|
199 |
#' split_fun = trim_levels_in_group("abn_dir") |
|
200 |
#' ) %>% |
|
201 |
#' count_abnormal_by_marked( |
|
202 |
#' var = "AVALCAT1", |
|
203 |
#' variables = list( |
|
204 |
#' id = "USUBJID", |
|
205 |
#' param = "PARAMCD", |
|
206 |
#' direction = "abn_dir" |
|
207 |
#' ) |
|
208 |
#' ) %>% |
|
209 |
#' build_table(df = df) |
|
210 |
#' |
|
211 |
#' @export |
|
212 |
#' @order 2 |
|
213 |
count_abnormal_by_marked <- function(lyt, |
|
214 |
var, |
|
215 |
category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")), |
|
216 |
variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"), |
|
217 |
na_str = default_na_str(), |
|
218 |
nested = TRUE, |
|
219 |
..., |
|
220 |
.stats = NULL, |
|
221 |
.formats = NULL, |
|
222 |
.labels = NULL, |
|
223 |
.indent_mods = NULL) { |
|
224 | 1x |
checkmate::assert_string(var) |
225 | ||
226 | 1x |
extra_args <- list(category = category, variables = variables, ...) |
227 | ||
228 | 1x |
afun <- make_afun( |
229 | 1x |
a_count_abnormal_by_marked, |
230 | 1x |
.stats = .stats, |
231 | 1x |
.formats = .formats, |
232 | 1x |
.labels = .labels, |
233 | 1x |
.indent_mods = .indent_mods, |
234 | 1x |
.ungroup_stats = "count_fraction" |
235 |
) |
|
236 | ||
237 | 1x |
lyt <- analyze( |
238 | 1x |
lyt = lyt, |
239 | 1x |
vars = var, |
240 | 1x |
afun = afun, |
241 | 1x |
na_str = na_str, |
242 | 1x |
nested = nested, |
243 | 1x |
show_labels = "hidden", |
244 | 1x |
extra_args = extra_args |
245 |
) |
|
246 | 1x |
lyt |
247 |
} |
1 |
#' Formatting functions |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' See below for the list of formatting functions created in `tern` to work with `rtables`. |
|
6 |
#' |
|
7 |
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional |
|
8 |
#' custom formats can be created via the [`formatters::sprintf_format()`] function. |
|
9 |
#' |
|
10 |
#' @family formatting functions |
|
11 |
#' @name formatting_functions |
|
12 |
NULL |
|
13 | ||
14 |
#' Format fraction and percentage |
|
15 |
#' |
|
16 |
#' @description `r lifecycle::badge("stable")` |
|
17 |
#' |
|
18 |
#' Formats a fraction together with ratio in percent. |
|
19 |
#' |
|
20 |
#' @param x (named `integer`)\cr vector with elements `num` and `denom`. |
|
21 |
#' @param ... not used. Required for `rtables` interface. |
|
22 |
#' |
|
23 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. |
|
24 |
#' |
|
25 |
#' @examples |
|
26 |
#' format_fraction(x = c(num = 2L, denom = 3L)) |
|
27 |
#' format_fraction(x = c(num = 0L, denom = 3L)) |
|
28 |
#' |
|
29 |
#' @family formatting functions |
|
30 |
#' @export |
|
31 |
format_fraction <- function(x, ...) { |
|
32 | 4x |
attr(x, "label") <- NULL |
33 | ||
34 | 4x |
checkmate::assert_vector(x) |
35 | 4x |
checkmate::assert_count(x["num"]) |
36 | 2x |
checkmate::assert_count(x["denom"]) |
37 | ||
38 | 2x |
result <- if (x["num"] == 0) { |
39 | 1x |
paste0(x["num"], "/", x["denom"]) |
40 |
} else { |
|
41 | 1x |
paste0( |
42 | 1x |
x["num"], "/", x["denom"], |
43 | 1x |
" (", round(x["num"] / x["denom"] * 100, 1), "%)" |
44 |
) |
|
45 |
} |
|
46 | ||
47 | 2x |
return(result) |
48 |
} |
|
49 | ||
50 |
#' Format fraction and percentage with fixed single decimal place |
|
51 |
#' |
|
52 |
#' @description `r lifecycle::badge("stable")` |
|
53 |
#' |
|
54 |
#' Formats a fraction together with ratio in percent with fixed single decimal place. |
|
55 |
#' Includes trailing zero in case of whole number percentages to always keep one decimal place. |
|
56 |
#' |
|
57 |
#' @inheritParams format_fraction |
|
58 |
#' |
|
59 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. |
|
60 |
#' |
|
61 |
#' @examples |
|
62 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L)) |
|
63 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L)) |
|
64 |
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L)) |
|
65 |
#' |
|
66 |
#' @family formatting functions |
|
67 |
#' @export |
|
68 |
format_fraction_fixed_dp <- function(x, ...) { |
|
69 | 3x |
attr(x, "label") <- NULL |
70 | 3x |
checkmate::assert_vector(x) |
71 | 3x |
checkmate::assert_count(x["num"]) |
72 | 3x |
checkmate::assert_count(x["denom"]) |
73 | ||
74 | 3x |
result <- if (x["num"] == 0) { |
75 | 1x |
paste0(x["num"], "/", x["denom"]) |
76 |
} else { |
|
77 | 2x |
paste0( |
78 | 2x |
x["num"], "/", x["denom"], |
79 | 2x |
" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)" |
80 |
) |
|
81 |
} |
|
82 | 3x |
return(result) |
83 |
} |
|
84 | ||
85 |
#' Format count and fraction |
|
86 |
#' |
|
87 |
#' @description `r lifecycle::badge("stable")` |
|
88 |
#' |
|
89 |
#' Formats a count together with fraction with special consideration when count is `0`. |
|
90 |
#' |
|
91 |
#' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively. |
|
92 |
#' @param ... not used. Required for `rtables` interface. |
|
93 |
#' |
|
94 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. |
|
95 |
#' |
|
96 |
#' @examples |
|
97 |
#' format_count_fraction(x = c(2, 0.6667)) |
|
98 |
#' format_count_fraction(x = c(0, 0)) |
|
99 |
#' |
|
100 |
#' @family formatting functions |
|
101 |
#' @export |
|
102 |
format_count_fraction <- function(x, ...) { |
|
103 | 3x |
attr(x, "label") <- NULL |
104 | ||
105 | 3x |
if (any(is.na(x))) { |
106 | 1x |
return("NA") |
107 |
} |
|
108 | ||
109 | 2x |
checkmate::assert_vector(x) |
110 | 2x |
checkmate::assert_integerish(x[1]) |
111 | 2x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
112 | ||
113 | 2x |
result <- if (x[1] == 0) { |
114 | 1x |
"0" |
115 |
} else { |
|
116 | 1x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)") |
117 |
} |
|
118 | ||
119 | 2x |
return(result) |
120 |
} |
|
121 | ||
122 |
#' Format count and percentage with fixed single decimal place |
|
123 |
#' |
|
124 |
#' @description `r lifecycle::badge("experimental")` |
|
125 |
#' |
|
126 |
#' Formats a count together with fraction with special consideration when count is `0`. |
|
127 |
#' |
|
128 |
#' @inheritParams format_count_fraction |
|
129 |
#' |
|
130 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. |
|
131 |
#' |
|
132 |
#' @examples |
|
133 |
#' format_count_fraction_fixed_dp(x = c(2, 0.6667)) |
|
134 |
#' format_count_fraction_fixed_dp(x = c(2, 0.5)) |
|
135 |
#' format_count_fraction_fixed_dp(x = c(0, 0)) |
|
136 |
#' |
|
137 |
#' @family formatting functions |
|
138 |
#' @export |
|
139 |
format_count_fraction_fixed_dp <- function(x, ...) { |
|
140 | 503x |
attr(x, "label") <- NULL |
141 | ||
142 | 503x |
if (any(is.na(x))) { |
143 | ! |
return("NA") |
144 |
} |
|
145 | ||
146 | 503x |
checkmate::assert_vector(x) |
147 | 503x |
checkmate::assert_integerish(x[1]) |
148 | 503x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
149 | ||
150 | 503x |
result <- if (x[1] == 0) { |
151 | 1x |
"0" |
152 | 503x |
} else if (.is_equal_float(x[2], 1)) { |
153 | 500x |
sprintf("%d (100%%)", x[1]) |
154 |
} else { |
|
155 | 2x |
sprintf("%d (%.1f%%)", x[1], x[2] * 100) |
156 |
} |
|
157 | ||
158 | 503x |
return(result) |
159 |
} |
|
160 | ||
161 |
#' Format count and fraction with special case for count < 10 |
|
162 |
#' |
|
163 |
#' @description `r lifecycle::badge("stable")` |
|
164 |
#' |
|
165 |
#' Formats a count together with fraction with special consideration when count is less than 10. |
|
166 |
#' |
|
167 |
#' @inheritParams format_count_fraction |
|
168 |
#' |
|
169 |
#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed. |
|
170 |
#' |
|
171 |
#' @examples |
|
172 |
#' format_count_fraction_lt10(x = c(275, 0.9673)) |
|
173 |
#' format_count_fraction_lt10(x = c(2, 0.6667)) |
|
174 |
#' format_count_fraction_lt10(x = c(9, 1)) |
|
175 |
#' |
|
176 |
#' @family formatting functions |
|
177 |
#' @export |
|
178 |
format_count_fraction_lt10 <- function(x, ...) { |
|
179 | 7x |
attr(x, "label") <- NULL |
180 | ||
181 | 7x |
if (any(is.na(x))) { |
182 | 1x |
return("NA") |
183 |
} |
|
184 | ||
185 | 6x |
checkmate::assert_vector(x) |
186 | 6x |
checkmate::assert_integerish(x[1]) |
187 | 6x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
188 | ||
189 | 6x |
result <- if (x[1] < 10) { |
190 | 3x |
paste0(x[1]) |
191 |
} else { |
|
192 | 3x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)") |
193 |
} |
|
194 | ||
195 | 6x |
return(result) |
196 |
} |
|
197 | ||
198 |
#' Format XX as a formatting function |
|
199 |
#' |
|
200 |
#' Translate a string where x and dots are interpreted as number place |
|
201 |
#' holders, and others as formatting elements. |
|
202 |
#' |
|
203 |
#' @param str (`string`)\cr template. |
|
204 |
#' |
|
205 |
#' @return An `rtables` formatting function. |
|
206 |
#' |
|
207 |
#' @examples |
|
208 |
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6)) |
|
209 |
#' |
|
210 |
#' z <- format_xx("xx (xx.x)") |
|
211 |
#' sapply(test, z) |
|
212 |
#' |
|
213 |
#' z <- format_xx("xx.x - xx.x") |
|
214 |
#' sapply(test, z) |
|
215 |
#' |
|
216 |
#' z <- format_xx("xx.x, incl. xx.x% NE") |
|
217 |
#' sapply(test, z) |
|
218 |
#' |
|
219 |
#' @family formatting functions |
|
220 |
#' @export |
|
221 |
format_xx <- function(str) { |
|
222 |
# Find position in the string. |
|
223 | 1x |
positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE) |
224 | 1x |
x_positions <- regmatches(x = str, m = positions)[[1]] |
225 | ||
226 |
# Roundings depends on the number of x behind [.]. |
|
227 | 1x |
roundings <- lapply( |
228 | 1x |
X = x_positions, |
229 | 1x |
function(x) { |
230 | 2x |
y <- strsplit(split = "\\.", x = x)[[1]] |
231 | 2x |
rounding <- function(x) { |
232 | 4x |
round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)) |
233 |
} |
|
234 | 2x |
return(rounding) |
235 |
} |
|
236 |
) |
|
237 | ||
238 | 1x |
rtable_format <- function(x, output) { |
239 | 2x |
values <- Map(y = x, fun = roundings, function(y, fun) fun(y)) |
240 | 2x |
regmatches(x = str, m = positions)[[1]] <- values |
241 | 2x |
return(str) |
242 |
} |
|
243 | ||
244 | 1x |
return(rtable_format) |
245 |
} |
|
246 | ||
247 |
#' Format numeric values by significant figures |
|
248 |
#' |
|
249 |
#' Format numeric values to print with a specified number of significant figures. |
|
250 |
#' |
|
251 |
#' @param sigfig (`integer(1)`)\cr number of significant figures to display. |
|
252 |
#' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal |
|
253 |
#' places in string are ignored in favor of formatting by significant figures. Formats options are: |
|
254 |
#' `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`. |
|
255 |
#' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for |
|
256 |
#' standard significant figures formatting - fixed (non-scientific notation) format (`"f"`) |
|
257 |
#' and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the |
|
258 |
#' [formatC()] `format` argument for more options. |
|
259 |
#' |
|
260 |
#' @return An `rtables` formatting function. |
|
261 |
#' |
|
262 |
#' @examples |
|
263 |
#' fmt_3sf <- format_sigfig(3) |
|
264 |
#' fmt_3sf(1.658) |
|
265 |
#' fmt_3sf(1e1) |
|
266 |
#' |
|
267 |
#' fmt_5sf <- format_sigfig(5) |
|
268 |
#' fmt_5sf(0.57) |
|
269 |
#' fmt_5sf(0.000025645) |
|
270 |
#' |
|
271 |
#' @family formatting functions |
|
272 |
#' @export |
|
273 |
format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") { |
|
274 | 3x |
checkmate::assert_integerish(sigfig) |
275 | 3x |
format <- gsub("xx\\.|xx\\.x+", "xx", format) |
276 | 3x |
checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)")) |
277 | 3x |
function(x, ...) { |
278 | ! |
if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.") |
279 | 12x |
num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#") |
280 | 12x |
num <- gsub("\\.$", "", num) # remove trailing "." |
281 | ||
282 | 12x |
format_value(num, format) |
283 |
} |
|
284 |
} |
|
285 | ||
286 |
#' Format fraction with lower threshold |
|
287 |
#' |
|
288 |
#' @description `r lifecycle::badge("stable")` |
|
289 |
#' |
|
290 |
#' Formats a fraction when the second element of the input `x` is the fraction. It applies |
|
291 |
#' a lower threshold, below which it is just stated that the fraction is smaller than that. |
|
292 |
#' |
|
293 |
#' @param threshold (`proportion`)\cr lower threshold. |
|
294 |
#' |
|
295 |
#' @return An `rtables` formatting function that takes numeric input `x` where the second |
|
296 |
#' element is the fraction that is formatted. If the fraction is above or equal to the threshold, |
|
297 |
#' then it is displayed in percentage. If it is positive but below the threshold, it returns, |
|
298 |
#' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned. |
|
299 |
#' |
|
300 |
#' @examples |
|
301 |
#' format_fun <- format_fraction_threshold(0.05) |
|
302 |
#' format_fun(x = c(20, 0.1)) |
|
303 |
#' format_fun(x = c(2, 0.01)) |
|
304 |
#' format_fun(x = c(0, 0)) |
|
305 |
#' |
|
306 |
#' @family formatting functions |
|
307 |
#' @export |
|
308 |
format_fraction_threshold <- function(threshold) { |
|
309 | 1x |
assert_proportion_value(threshold) |
310 | 1x |
string_below_threshold <- paste0("<", round(threshold * 100)) |
311 | 1x |
function(x, ...) { |
312 | 3x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
313 | 3x |
ifelse( |
314 | 3x |
x[2] > 0.01, |
315 | 3x |
round(x[2] * 100), |
316 | 3x |
ifelse( |
317 | 3x |
x[2] == 0, |
318 | 3x |
"0", |
319 | 3x |
string_below_threshold |
320 |
) |
|
321 |
) |
|
322 |
} |
|
323 |
} |
|
324 | ||
325 |
#' Format extreme values |
|
326 |
#' |
|
327 |
#' @description `r lifecycle::badge("stable")` |
|
328 |
#' |
|
329 |
#' `rtables` formatting functions that handle extreme values. |
|
330 |
#' |
|
331 |
#' @param digits (`integer(1)`)\cr number of decimal places to display. |
|
332 |
#' |
|
333 |
#' @details For each input, apply a format to the specified number of `digits`. If the value is |
|
334 |
#' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is |
|
335 |
#' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2. |
|
336 |
#' If it is zero, then returns "0.00". |
|
337 |
#' |
|
338 |
#' @family formatting functions |
|
339 |
#' @name extreme_format |
|
340 |
NULL |
|
341 | ||
342 |
#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings |
|
343 |
#' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`. |
|
344 |
#' |
|
345 |
#' @return |
|
346 |
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds, |
|
347 |
#' and `format_string`, with thresholds formatted as strings. |
|
348 |
#' |
|
349 |
#' @examples |
|
350 |
#' h_get_format_threshold(2L) |
|
351 |
#' |
|
352 |
#' @export |
|
353 |
h_get_format_threshold <- function(digits = 2L) { |
|
354 | 2113x |
checkmate::assert_integerish(digits) |
355 | ||
356 | 2113x |
low_threshold <- 1 / (10 ^ digits) # styler: off |
357 | 2113x |
high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off |
358 | ||
359 | 2113x |
string_below_threshold <- paste0("<", low_threshold) |
360 | 2113x |
string_above_threshold <- paste0(">", high_threshold) |
361 | ||
362 | 2113x |
list( |
363 | 2113x |
"threshold" = c(low = low_threshold, high = high_threshold), |
364 | 2113x |
"format_string" = c(low = string_below_threshold, high = string_above_threshold) |
365 |
) |
|
366 |
} |
|
367 | ||
368 |
#' @describeIn extreme_format Internal helper function to apply a threshold format to a value. |
|
369 |
#' Creates a formatted string to be used in Formatting Functions. |
|
370 |
#' |
|
371 |
#' @param x (`numeric(1)`)\cr value to format. |
|
372 |
#' |
|
373 |
#' @return |
|
374 |
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation |
|
375 |
#' of the given value to the digit threshold, as a formatted string. |
|
376 |
#' |
|
377 |
#' @examples |
|
378 |
#' h_format_threshold(0.001) |
|
379 |
#' h_format_threshold(1000) |
|
380 |
#' |
|
381 |
#' @export |
|
382 |
h_format_threshold <- function(x, digits = 2L) { |
|
383 | 2115x |
if (is.na(x)) { |
384 | 4x |
return(x) |
385 |
} |
|
386 | ||
387 | 2111x |
checkmate::assert_numeric(x, lower = 0) |
388 | ||
389 | 2111x |
l_fmt <- h_get_format_threshold(digits) |
390 | ||
391 | 2111x |
result <- if (x < l_fmt$threshold["low"] && 0 < x) { |
392 | 44x |
l_fmt$format_string["low"] |
393 | 2111x |
} else if (x > l_fmt$threshold["high"]) { |
394 | 99x |
l_fmt$format_string["high"] |
395 |
} else { |
|
396 | 1968x |
sprintf(fmt = paste0("%.", digits, "f"), x) |
397 |
} |
|
398 | ||
399 | 2111x |
unname(result) |
400 |
} |
|
401 | ||
402 |
#' Format a single extreme value |
|
403 |
#' |
|
404 |
#' @description `r lifecycle::badge("stable")` |
|
405 |
#' |
|
406 |
#' Create a formatting function for a single extreme value. |
|
407 |
#' |
|
408 |
#' @inheritParams extreme_format |
|
409 |
#' |
|
410 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value. |
|
411 |
#' |
|
412 |
#' @examples |
|
413 |
#' format_fun <- format_extreme_values(2L) |
|
414 |
#' format_fun(x = 0.127) |
|
415 |
#' format_fun(x = Inf) |
|
416 |
#' format_fun(x = 0) |
|
417 |
#' format_fun(x = 0.009) |
|
418 |
#' |
|
419 |
#' @family formatting functions |
|
420 |
#' @export |
|
421 |
format_extreme_values <- function(digits = 2L) { |
|
422 | 63x |
function(x, ...) { |
423 | 657x |
checkmate::assert_scalar(x, na.ok = TRUE) |
424 | ||
425 | 657x |
h_format_threshold(x = x, digits = digits) |
426 |
} |
|
427 |
} |
|
428 | ||
429 |
#' Format extreme values part of a confidence interval |
|
430 |
#' |
|
431 |
#' @description `r lifecycle::badge("stable")` |
|
432 |
#' |
|
433 |
#' Formatting Function for extreme values part of a confidence interval. Values |
|
434 |
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2. |
|
435 |
#' |
|
436 |
#' @inheritParams extreme_format |
|
437 |
#' |
|
438 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme |
|
439 |
#' values confidence interval. |
|
440 |
#' |
|
441 |
#' @examples |
|
442 |
#' format_fun <- format_extreme_values_ci(2L) |
|
443 |
#' format_fun(x = c(0.127, Inf)) |
|
444 |
#' format_fun(x = c(0, 0.009)) |
|
445 |
#' |
|
446 |
#' @family formatting functions |
|
447 |
#' @export |
|
448 |
format_extreme_values_ci <- function(digits = 2L) { |
|
449 | 71x |
function(x, ...) { |
450 | 726x |
checkmate::assert_vector(x, len = 2) |
451 | 726x |
l_result <- h_format_threshold(x = x[1], digits = digits) |
452 | 726x |
h_result <- h_format_threshold(x = x[2], digits = digits) |
453 | ||
454 | 726x |
paste0("(", l_result, ", ", h_result, ")") |
455 |
} |
|
456 |
} |
|
457 | ||
458 |
#' Format automatically using data significant digits |
|
459 |
#' |
|
460 |
#' @description `r lifecycle::badge("stable")` |
|
461 |
#' |
|
462 |
#' Formatting function for the majority of default methods used in [analyze_vars()]. |
|
463 |
#' For non-derived values, the significant digits of data is used (e.g. range), while derived |
|
464 |
#' values have one more digits (measure of location and dispersion like mean, standard deviation). |
|
465 |
#' This function can be called internally with "auto" like, for example, |
|
466 |
#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function. |
|
467 |
#' |
|
468 |
#' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to |
|
469 |
#' find significant digits. In [analyze_vars] this comes from `.df_row` (see |
|
470 |
#' [rtables::additional_fun_params]), and it is the row data after the above row splits. No |
|
471 |
#' column split is considered. |
|
472 |
#' @param x_stat (`string`)\cr string indicating the current statistical method used. |
|
473 |
#' |
|
474 |
#' @return A string that `rtables` prints in a table cell. |
|
475 |
#' |
|
476 |
#' @details |
|
477 |
#' The internal function is needed to work with `rtables` default structure for |
|
478 |
#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation. |
|
479 |
#' It can be more than one element (e.g. for `.stats = "mean_sd"`). |
|
480 |
#' |
|
481 |
#' @examples |
|
482 |
#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4) |
|
483 |
#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3])) |
|
484 |
#' |
|
485 |
#' # x is the result coming into the formatting function -> res!! |
|
486 |
#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res) |
|
487 |
#' format_auto(x_todo, "range")(x = range(x_todo)) |
|
488 |
#' no_sc_x <- c(0.0000001, 1) |
|
489 |
#' format_auto(no_sc_x, "range")(x = no_sc_x) |
|
490 |
#' |
|
491 |
#' @family formatting functions |
|
492 |
#' @export |
|
493 |
format_auto <- function(dt_var, x_stat) { |
|
494 | 10x |
function(x = "", ...) { |
495 | 18x |
checkmate::assert_numeric(x, min.len = 1) |
496 | 18x |
checkmate::assert_numeric(dt_var, min.len = 1) |
497 |
# Defaults - they may be a param in the future |
|
498 | 18x |
der_stats <- c( |
499 | 18x |
"mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr", |
500 | 18x |
"mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi", |
501 | 18x |
"median_ci" |
502 |
) |
|
503 | 18x |
nonder_stats <- c("n", "range", "min", "max") |
504 | ||
505 |
# Safenet for miss-modifications |
|
506 | 18x |
stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint |
507 | 18x |
checkmate::assert_choice(x_stat, c(der_stats, nonder_stats)) |
508 | ||
509 |
# Finds the max number of digits in data |
|
510 | 18x |
detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>% |
511 | 18x |
max() |
512 | ||
513 | 18x |
if (x_stat %in% der_stats) { |
514 | 8x |
detect_dig <- detect_dig + 1 |
515 |
} |
|
516 | ||
517 |
# Render input |
|
518 | 18x |
str_vals <- formatC(x, digits = detect_dig, format = "f") |
519 | 18x |
def_fmt <- get_formats_from_stats(x_stat)[[x_stat]] |
520 | 18x |
str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]] |
521 | 18x |
if (length(str_fmt) != length(str_vals)) { |
522 | 2x |
stop( |
523 | 2x |
"Number of inserted values as result (", length(str_vals), |
524 | 2x |
") is not the same as there should be in the default tern formats for ", |
525 | 2x |
x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ", |
526 | 2x |
"See tern_default_formats to check all of them." |
527 |
) |
|
528 |
} |
|
529 | ||
530 |
# Squashing them together |
|
531 | 16x |
inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]] |
532 | 16x |
stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint |
533 | ||
534 | 16x |
out <- vector("character", length = length(inv_str_fmt) + length(str_vals)) |
535 | 16x |
is_even <- seq_along(out) %% 2 == 0 |
536 | 16x |
out[is_even] <- str_vals |
537 | 16x |
out[!is_even] <- inv_str_fmt |
538 | ||
539 | 16x |
return(paste0(out, collapse = "")) |
540 |
} |
|
541 |
} |
|
542 | ||
543 |
# Utility function that could be useful in general |
|
544 |
str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) { |
|
545 | 34x |
regmatches(string, gregexpr(pattern, string), invert = invert) |
546 |
} |
|
547 | ||
548 |
# Helper function |
|
549 |
count_decimalplaces <- function(dec) { |
|
550 | 161x |
if (is.na(dec)) { |
551 | 6x |
return(0) |
552 | 155x |
} else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision |
553 | 122x |
nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]]) |
554 |
} else { |
|
555 | 33x |
return(0) |
556 |
} |
|
557 |
} |
|
558 | ||
559 |
#' Apply automatic formatting |
|
560 |
#' |
|
561 |
#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with |
|
562 |
#' the correct implementation of `format_auto` for the given statistics, data, and variable. |
|
563 |
#' |
|
564 |
#' @inheritParams argument_convention |
|
565 |
#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds |
|
566 |
#' to an element in `.formats`, with matching names. |
|
567 |
#' |
|
568 |
#' @keywords internal |
|
569 |
apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) { |
|
570 | 420x |
is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) |
571 | 420x |
if (any(is_auto_fmt)) { |
572 | 3x |
auto_stats <- x_stats[is_auto_fmt] |
573 | 3x |
var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets |
574 | 3x |
.formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df) |
575 |
} |
|
576 | 420x |
.formats |
577 |
} |
1 |
#' Univariate formula special term |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The special term `univariate` indicate that the model should be fitted individually for |
|
6 |
#' every variable included in univariate. |
|
7 |
#' |
|
8 |
#' @param x (`character`)\cr a vector of variable names separated by commas. |
|
9 |
#' |
|
10 |
#' @return When used within a model formula, produces univariate models for each variable provided. |
|
11 |
#' |
|
12 |
#' @details |
|
13 |
#' If provided alongside with pairwise specification, the model |
|
14 |
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models |
|
15 |
#' + `y ~ ARM` |
|
16 |
#' + `y ~ ARM + SEX` |
|
17 |
#' + `y ~ ARM + AGE` |
|
18 |
#' + `y ~ ARM + RACE` |
|
19 |
#' |
|
20 |
#' @export |
|
21 |
univariate <- function(x) { |
|
22 | 2x |
structure(x, varname = deparse(substitute(x))) |
23 |
} |
|
24 | ||
25 |
# Get the right-hand-term of a formula |
|
26 |
rht <- function(x) { |
|
27 | 4x |
checkmate::assert_formula(x) |
28 | 4x |
y <- as.character(rev(x)[[1]]) |
29 | 4x |
return(y) |
30 |
} |
|
31 | ||
32 |
#' Hazard ratio estimation in interactions |
|
33 |
#' |
|
34 |
#' This function estimates the hazard ratios between arms when an interaction variable is given with |
|
35 |
#' specific values. |
|
36 |
#' |
|
37 |
#' @param variable,given (`character(2)`)\cr names of the two variables in the interaction. We seek the estimation of |
|
38 |
#' the levels of `variable` given the levels of `given`. |
|
39 |
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels given by [levels()]. |
|
40 |
#' @param mmat (named `numeric`) a vector filled with `0`s used as a template to obtain the design matrix. |
|
41 |
#' @param coef (`numeric`)\cr vector of estimated coefficients. |
|
42 |
#' @param vcov (`matrix`)\cr variance-covariance matrix of underlying model. |
|
43 |
#' @param conf_level (`proportion`)\cr confidence level of estimate intervals. |
|
44 |
#' |
|
45 |
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A) |
|
46 |
#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex. |
|
47 |
#' The cox regression estimates the coefficients along with a variance-covariance matrix for: |
|
48 |
#' |
|
49 |
#' - b1 (arm b), b2 (arm c) |
|
50 |
#' - b3 (sex m) |
|
51 |
#' - b4 (arm b: sex m), b5 (arm c: sex m) |
|
52 |
#' |
|
53 |
#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation |
|
54 |
#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5), |
|
55 |
#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained |
|
56 |
#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95. |
|
57 |
#' |
|
58 |
#' @return A list of matrices (one per level of variable) with rows corresponding to the combinations of |
|
59 |
#' `variable` and `given`, with columns: |
|
60 |
#' * `coef_hat`: Estimation of the coefficient. |
|
61 |
#' * `coef_se`: Standard error of the estimation. |
|
62 |
#' * `hr`: Hazard ratio. |
|
63 |
#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio. |
|
64 |
#' |
|
65 |
#' @seealso [s_cox_multivariate()]. |
|
66 |
#' |
|
67 |
#' @examples |
|
68 |
#' library(dplyr) |
|
69 |
#' library(survival) |
|
70 |
#' |
|
71 |
#' ADSL <- tern_ex_adsl %>% |
|
72 |
#' filter(SEX %in% c("F", "M")) |
|
73 |
#' |
|
74 |
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS") |
|
75 |
#' adtte$ARMCD <- droplevels(adtte$ARMCD) |
|
76 |
#' adtte$SEX <- droplevels(adtte$SEX) |
|
77 |
#' |
|
78 |
#' mod <- coxph( |
|
79 |
#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2, |
|
80 |
#' data = adtte |
|
81 |
#' ) |
|
82 |
#' |
|
83 |
#' mmat <- stats::model.matrix(mod)[1, ] |
|
84 |
#' mmat[!mmat == 0] <- 0 |
|
85 |
#' |
|
86 |
#' @keywords internal |
|
87 |
estimate_coef <- function(variable, given, |
|
88 |
lvl_var, lvl_given, |
|
89 |
coef, |
|
90 |
mmat, |
|
91 |
vcov, |
|
92 |
conf_level = 0.95) { |
|
93 | 8x |
var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level |
94 | 8x |
giv_lvl <- paste0(given, lvl_given) |
95 | ||
96 | 8x |
design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) |
97 | 8x |
design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] |
98 | 8x |
design_mat <- within( |
99 | 8x |
data = design_mat, |
100 | 8x |
expr = { |
101 | 8x |
inter <- paste0(variable, ":", given) |
102 | 8x |
rev_inter <- paste0(given, ":", variable) |
103 |
} |
|
104 |
) |
|
105 | ||
106 | 8x |
split_by_variable <- design_mat$variable |
107 | 8x |
interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") |
108 | ||
109 | 8x |
design_mat <- apply( |
110 | 8x |
X = design_mat, MARGIN = 1, FUN = function(x) { |
111 | 27x |
mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 |
112 | 27x |
return(mmat) |
113 |
} |
|
114 |
) |
|
115 | 8x |
colnames(design_mat) <- interaction_names |
116 | ||
117 | 8x |
betas <- as.matrix(coef) |
118 | ||
119 | 8x |
coef_hat <- t(design_mat) %*% betas |
120 | 8x |
dimnames(coef_hat)[2] <- "coef" |
121 | ||
122 | 8x |
coef_se <- apply(design_mat, 2, function(x) { |
123 | 27x |
vcov_el <- as.logical(x) |
124 | 27x |
y <- vcov[vcov_el, vcov_el] |
125 | 27x |
y <- sum(y) |
126 | 27x |
y <- sqrt(y) |
127 | 27x |
return(y) |
128 |
}) |
|
129 | ||
130 | 8x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
131 | 8x |
y <- cbind(coef_hat, `se(coef)` = coef_se) |
132 | ||
133 | 8x |
y <- apply(y, 1, function(x) { |
134 | 27x |
x["hr"] <- exp(x["coef"]) |
135 | 27x |
x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) |
136 | 27x |
x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) |
137 | ||
138 | 27x |
return(x) |
139 |
}) |
|
140 | ||
141 | 8x |
y <- t(y) |
142 | 8x |
y <- by(y, split_by_variable, identity) |
143 | 8x |
y <- lapply(y, as.matrix) |
144 | ||
145 | 8x |
attr(y, "details") <- paste0( |
146 | 8x |
"Estimations of ", variable, |
147 | 8x |
" hazard ratio given the level of ", given, " compared to ", |
148 | 8x |
variable, " level ", lvl_var[1], "." |
149 |
) |
|
150 | 8x |
return(y) |
151 |
} |
|
152 | ||
153 |
#' `tryCatch` around `car::Anova` |
|
154 |
#' |
|
155 |
#' Captures warnings when executing [car::Anova]. |
|
156 |
#' |
|
157 |
#' @inheritParams car::Anova |
|
158 |
#' |
|
159 |
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings. |
|
160 |
#' |
|
161 |
#' @examples |
|
162 |
#' # `car::Anova` on cox regression model including strata and expected |
|
163 |
#' # a likelihood ratio test triggers a warning as only Wald method is |
|
164 |
#' # accepted. |
|
165 |
#' |
|
166 |
#' library(survival) |
|
167 |
#' |
|
168 |
#' mod <- coxph( |
|
169 |
#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps), |
|
170 |
#' data = ovarian |
|
171 |
#' ) |
|
172 |
#' |
|
173 |
#' @keywords internal |
|
174 |
try_car_anova <- function(mod, |
|
175 |
test.statistic) { # nolint |
|
176 | 2x |
y <- tryCatch( |
177 | 2x |
withCallingHandlers( |
178 | 2x |
expr = { |
179 | 2x |
warn_text <- c() |
180 | 2x |
list( |
181 | 2x |
aov = car::Anova( |
182 | 2x |
mod, |
183 | 2x |
test.statistic = test.statistic, |
184 | 2x |
type = "III" |
185 |
), |
|
186 | 2x |
warn_text = warn_text |
187 |
) |
|
188 |
}, |
|
189 | 2x |
warning = function(w) { |
190 |
# If a warning is detected it is handled as "w". |
|
191 | ! |
warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w)) |
192 | ||
193 |
# A warning is sometimes expected, then, we want to restart |
|
194 |
# the execution while ignoring the warning. |
|
195 | ! |
invokeRestart("muffleWarning") |
196 |
} |
|
197 |
), |
|
198 | 2x |
finally = { |
199 |
} |
|
200 |
) |
|
201 | ||
202 | 2x |
return(y) |
203 |
} |
|
204 | ||
205 |
#' Fit a Cox regression model and ANOVA |
|
206 |
#' |
|
207 |
#' The functions derives the effect p-values using [car::Anova()] from [survival::coxph()] results. |
|
208 |
#' |
|
209 |
#' @inheritParams t_coxreg |
|
210 |
#' |
|
211 |
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and |
|
212 |
#' `aov` (result of [car::Anova()]). |
|
213 |
#' |
|
214 |
#' @noRd |
|
215 |
fit_n_aov <- function(formula, |
|
216 |
data = data, |
|
217 |
conf_level = conf_level, |
|
218 |
pval_method = c("wald", "likelihood"), |
|
219 |
...) { |
|
220 | 1x |
pval_method <- match.arg(pval_method) |
221 | ||
222 | 1x |
environment(formula) <- environment() |
223 | 1x |
suppressWarnings({ |
224 |
# We expect some warnings due to coxph which fails strict programming. |
|
225 | 1x |
mod <- survival::coxph(formula, data = data, ...) |
226 | 1x |
msum <- summary(mod, conf.int = conf_level) |
227 |
}) |
|
228 | ||
229 | 1x |
aov <- try_car_anova( |
230 | 1x |
mod, |
231 | 1x |
test.statistic = switch(pval_method, |
232 | 1x |
"wald" = "Wald", |
233 | 1x |
"likelihood" = "LR" |
234 |
) |
|
235 |
) |
|
236 | ||
237 | 1x |
warn_attr <- aov$warn_text |
238 | ! |
if (!is.null(aov$warn_text)) message(warn_attr) |
239 | ||
240 | 1x |
aov <- aov$aov |
241 | 1x |
y <- list(mod = mod, msum = msum, aov = aov) |
242 | 1x |
attr(y, "message") <- warn_attr |
243 | ||
244 | 1x |
return(y) |
245 |
} |
|
246 | ||
247 |
# argument_checks |
|
248 |
check_formula <- function(formula) { |
|
249 | 1x |
if (!(inherits(formula, "formula"))) { |
250 | 1x |
stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.") |
251 |
} |
|
252 | ||
253 | ! |
invisible() |
254 |
} |
|
255 | ||
256 |
check_covariate_formulas <- function(covariates) { |
|
257 | 1x |
if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) { |
258 | 1x |
stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).") |
259 |
} |
|
260 | ||
261 | ! |
invisible() |
262 |
} |
|
263 | ||
264 |
name_covariate_names <- function(covariates) { |
|
265 | 1x |
miss_names <- names(covariates) == "" |
266 | 1x |
no_names <- is.null(names(covariates)) |
267 | ! |
if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name") |
268 | ! |
if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name") |
269 | 1x |
return(covariates) |
270 |
} |
|
271 | ||
272 |
check_increments <- function(increments, covariates) { |
|
273 | 1x |
if (!is.null(increments)) { |
274 | 1x |
covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name") |
275 | 1x |
lapply( |
276 | 1x |
X = names(increments), FUN = function(x) { |
277 | 3x |
if (!x %in% covariates) { |
278 | 1x |
warning( |
279 | 1x |
paste( |
280 | 1x |
"Check `increments`, the `increment` for ", x, |
281 | 1x |
"doesn't match any names in investigated covariate(s)." |
282 |
) |
|
283 |
) |
|
284 |
} |
|
285 |
} |
|
286 |
) |
|
287 |
} |
|
288 | ||
289 | 1x |
invisible() |
290 |
} |
|
291 | ||
292 |
#' Multivariate Cox model - summarized results |
|
293 |
#' |
|
294 |
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or |
|
295 |
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually |
|
296 |
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the |
|
297 |
#' covariates included in the model. |
|
298 |
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the |
|
299 |
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis, |
|
300 |
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**, |
|
301 |
#' `NEST's bookdown`) |
|
302 |
#' |
|
303 |
#' @param formula (`formula`)\cr a formula corresponding to the investigated [survival::Surv()] survival model |
|
304 |
#' including covariates. |
|
305 |
#' @param data (`data.frame`)\cr a data frame which includes the variable in formula and covariates. |
|
306 |
#' @param conf_level (`proportion`)\cr the confidence level for the hazard ratio interval estimations. Default is 0.95. |
|
307 |
#' @param pval_method (`string`)\cr the method used for the estimation of p-values, should be one of |
|
308 |
#' `"wald"` (default) or `"likelihood"`. |
|
309 |
#' @param ... optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the |
|
310 |
#' method for tie handling, one of `exact` (default), `efron`, `breslow`. |
|
311 |
#' |
|
312 |
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`. |
|
313 |
#' |
|
314 |
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms |
|
315 |
#' but is out of scope as defined by the Global Data Standards Repository |
|
316 |
#' (**`GDS_Standard_TLG_Specs_Tables_2.doc`**). |
|
317 |
#' |
|
318 |
#' @seealso [estimate_coef()]. |
|
319 |
#' |
|
320 |
#' @examples |
|
321 |
#' library(dplyr) |
|
322 |
#' |
|
323 |
#' adtte <- tern_ex_adtte |
|
324 |
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered |
|
325 |
#' adtte_f <- filter( |
|
326 |
#' adtte_f, |
|
327 |
#' PARAMCD == "OS" & |
|
328 |
#' SEX %in% c("F", "M") & |
|
329 |
#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE") |
|
330 |
#' ) |
|
331 |
#' adtte_f$SEX <- droplevels(adtte_f$SEX) |
|
332 |
#' adtte_f$RACE <- droplevels(adtte_f$RACE) |
|
333 |
#' |
|
334 |
#' @keywords internal |
|
335 |
s_cox_multivariate <- function(formula, data, |
|
336 |
conf_level = 0.95, |
|
337 |
pval_method = c("wald", "likelihood"), |
|
338 |
...) { |
|
339 | 1x |
tf <- stats::terms(formula, specials = c("strata")) |
340 | 1x |
covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))] |
341 | 1x |
lapply( |
342 | 1x |
X = covariates, |
343 | 1x |
FUN = function(x) { |
344 | 3x |
if (is.character(data[[x]])) { |
345 | 1x |
data[[x]] <<- as.factor(data[[x]]) |
346 |
} |
|
347 | 3x |
invisible() |
348 |
} |
|
349 |
) |
|
350 | 1x |
pval_method <- match.arg(pval_method) |
351 | ||
352 |
# Results directly exported from environment(fit_n_aov) to environment(s_function_draft) |
|
353 | 1x |
y <- fit_n_aov( |
354 | 1x |
formula = formula, |
355 | 1x |
data = data, |
356 | 1x |
conf_level = conf_level, |
357 | 1x |
pval_method = pval_method, |
358 |
... |
|
359 |
) |
|
360 | 1x |
mod <- y$mod |
361 | 1x |
aov <- y$aov |
362 | 1x |
msum <- y$msum |
363 | 1x |
list2env(as.list(y), environment()) |
364 | ||
365 | 1x |
all_term_labs <- attr(mod$terms, "term.labels") |
366 | 1x |
term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)] |
367 | 1x |
names(term_labs) <- term_labs |
368 | ||
369 | 1x |
coef_inter <- NULL |
370 | 1x |
if (any(attr(mod$terms, "order") > 1)) { |
371 | 1x |
for_inter <- all_term_labs[attr(mod$terms, "order") > 1] |
372 | 1x |
names(for_inter) <- for_inter |
373 | 1x |
mmat <- stats::model.matrix(mod)[1, ] |
374 | 1x |
mmat[!mmat == 0] <- 0 |
375 | 1x |
mcoef <- stats::coef(mod) |
376 | 1x |
mvcov <- stats::vcov(mod) |
377 | ||
378 | 1x |
estimate_coef_local <- function(variable, given) { |
379 | 6x |
estimate_coef( |
380 | 6x |
variable, given, |
381 | 6x |
coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level, |
382 | 6x |
lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]]) |
383 |
) |
|
384 |
} |
|
385 | ||
386 | 1x |
coef_inter <- lapply( |
387 | 1x |
for_inter, function(x) { |
388 | 3x |
y <- attr(mod$terms, "factors")[, x] |
389 | 3x |
y <- names(y[y > 0]) |
390 | 3x |
Map(estimate_coef_local, variable = y, given = rev(y)) |
391 |
} |
|
392 |
) |
|
393 |
} |
|
394 | ||
395 | 1x |
list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter) |
396 |
} |
1 |
#' Proportion estimation |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The analyze function [estimate_proportion()] creates a layout element to estimate the proportion of responders |
|
6 |
#' within a studied population. The primary analysis variable, `vars`, indicates whether a response has occurred for |
|
7 |
#' each record. See the `method` parameter for options of methods to use when constructing the confidence interval of |
|
8 |
#' the proportion. Additionally, a stratification variable can be supplied via the `strata` element of the `variables` |
|
9 |
#' argument. |
|
10 |
#' |
|
11 |
#' @inheritParams prop_strat_wilson |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_proportion")` |
|
14 |
#' to see available statistics for this function. |
|
15 |
#' @param method (`string`)\cr the method used to construct the confidence interval |
|
16 |
#' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`, |
|
17 |
#' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`. |
|
18 |
#' @param long (`flag`)\cr whether a long description is required. |
|
19 |
#' |
|
20 |
#' @seealso [h_proportions] |
|
21 |
#' |
|
22 |
#' @name estimate_proportion |
|
23 |
#' @order 1 |
|
24 |
NULL |
|
25 | ||
26 |
#' @describeIn estimate_proportion Statistics function estimating a |
|
27 |
#' proportion along with its confidence interval. |
|
28 |
#' |
|
29 |
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used, |
|
30 |
#' it indicates whether each subject is a responder or not. `TRUE` represents |
|
31 |
#' a successful outcome. If a `data.frame` is provided, also the `strata` variable |
|
32 |
#' names must be provided in `variables` as a list element with the strata strings. |
|
33 |
#' In the case of `data.frame`, the logical vector of responses must be indicated as a |
|
34 |
#' variable name in `.var`. |
|
35 |
#' |
|
36 |
#' @return |
|
37 |
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a |
|
38 |
#' given variable. |
|
39 |
#' |
|
40 |
#' @examples |
|
41 |
#' # Case with only logical vector. |
|
42 |
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0) |
|
43 |
#' s_proportion(rsp_v) |
|
44 |
#' |
|
45 |
#' # Example for Stratified Wilson CI |
|
46 |
#' nex <- 100 # Number of example rows |
|
47 |
#' dta <- data.frame( |
|
48 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
49 |
#' "grp" = sample(c("A", "B"), nex, TRUE), |
|
50 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE), |
|
51 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE), |
|
52 |
#' stringsAsFactors = TRUE |
|
53 |
#' ) |
|
54 |
#' |
|
55 |
#' s_proportion( |
|
56 |
#' df = dta, |
|
57 |
#' .var = "rsp", |
|
58 |
#' variables = list(strata = c("f1", "f2")), |
|
59 |
#' conf_level = 0.90, |
|
60 |
#' method = "strat_wilson" |
|
61 |
#' ) |
|
62 |
#' |
|
63 |
#' @export |
|
64 |
s_proportion <- function(df, |
|
65 |
.var, |
|
66 |
conf_level = 0.95, |
|
67 |
method = c( |
|
68 |
"waldcc", "wald", "clopper-pearson", |
|
69 |
"wilson", "wilsonc", "strat_wilson", "strat_wilsonc", |
|
70 |
"agresti-coull", "jeffreys" |
|
71 |
), |
|
72 |
weights = NULL, |
|
73 |
max_iterations = 50, |
|
74 |
variables = list(strata = NULL), |
|
75 |
long = FALSE) { |
|
76 | 167x |
method <- match.arg(method) |
77 | 167x |
checkmate::assert_flag(long) |
78 | 167x |
assert_proportion_value(conf_level) |
79 | ||
80 | 167x |
if (!is.null(variables$strata)) { |
81 |
# Checks for strata |
|
82 | ! |
if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.") |
83 | ! |
strata_colnames <- variables$strata |
84 | ! |
checkmate::assert_character(strata_colnames, null.ok = FALSE) |
85 | ! |
strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) |
86 | ! |
assert_df_with_variables(df, strata_vars) |
87 | ||
88 | ! |
strata <- interaction(df[strata_colnames]) |
89 | ! |
strata <- as.factor(strata) |
90 | ||
91 |
# Pushing down checks to prop_strat_wilson |
|
92 | 167x |
} else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) { |
93 | ! |
stop("To use stratified methods you need to specify the strata variables.") |
94 |
} |
|
95 | 167x |
if (checkmate::test_atomic_vector(df)) { |
96 | 167x |
rsp <- as.logical(df) |
97 |
} else { |
|
98 | ! |
rsp <- as.logical(df[[.var]]) |
99 |
} |
|
100 | 167x |
n <- sum(rsp) |
101 | 167x |
p_hat <- mean(rsp) |
102 | ||
103 | 167x |
prop_ci <- switch(method, |
104 | 167x |
"clopper-pearson" = prop_clopper_pearson(rsp, conf_level), |
105 | 167x |
"wilson" = prop_wilson(rsp, conf_level), |
106 | 167x |
"wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE), |
107 | 167x |
"strat_wilson" = prop_strat_wilson(rsp, |
108 | 167x |
strata, |
109 | 167x |
weights, |
110 | 167x |
conf_level, |
111 | 167x |
max_iterations, |
112 | 167x |
correct = FALSE |
113 | 167x |
)$conf_int, |
114 | 167x |
"strat_wilsonc" = prop_strat_wilson(rsp, |
115 | 167x |
strata, |
116 | 167x |
weights, |
117 | 167x |
conf_level, |
118 | 167x |
max_iterations, |
119 | 167x |
correct = TRUE |
120 | 167x |
)$conf_int, |
121 | 167x |
"wald" = prop_wald(rsp, conf_level), |
122 | 167x |
"waldcc" = prop_wald(rsp, conf_level, correct = TRUE), |
123 | 167x |
"agresti-coull" = prop_agresti_coull(rsp, conf_level), |
124 | 167x |
"jeffreys" = prop_jeffreys(rsp, conf_level) |
125 |
) |
|
126 | ||
127 | 167x |
list( |
128 | 167x |
"n_prop" = formatters::with_label(c(n, p_hat), "Responders"), |
129 | 167x |
"prop_ci" = formatters::with_label( |
130 | 167x |
x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long) |
131 |
) |
|
132 |
) |
|
133 |
} |
|
134 | ||
135 |
#' @describeIn estimate_proportion Formatted analysis function which is used as `afun` |
|
136 |
#' in `estimate_proportion()`. |
|
137 |
#' |
|
138 |
#' @return |
|
139 |
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
140 |
#' |
|
141 |
#' @export |
|
142 |
a_proportion <- make_afun( |
|
143 |
s_proportion, |
|
144 |
.formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)") |
|
145 |
) |
|
146 | ||
147 |
#' @describeIn estimate_proportion Layout-creating function which can take statistics function arguments |
|
148 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
149 |
#' |
|
150 |
#' @return |
|
151 |
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions, |
|
152 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
153 |
#' the statistics from `s_proportion()` to the table layout. |
|
154 |
#' |
|
155 |
#' @examples |
|
156 |
#' dta_test <- data.frame( |
|
157 |
#' USUBJID = paste0("S", 1:12), |
|
158 |
#' ARM = rep(LETTERS[1:3], each = 4), |
|
159 |
#' AVAL = rep(LETTERS[1:3], each = 4) |
|
160 |
#' ) |
|
161 |
#' |
|
162 |
#' basic_table() %>% |
|
163 |
#' split_cols_by("ARM") %>% |
|
164 |
#' estimate_proportion(vars = "AVAL") %>% |
|
165 |
#' build_table(df = dta_test) |
|
166 |
#' |
|
167 |
#' @export |
|
168 |
#' @order 2 |
|
169 |
estimate_proportion <- function(lyt, |
|
170 |
vars, |
|
171 |
conf_level = 0.95, |
|
172 |
method = c( |
|
173 |
"waldcc", "wald", "clopper-pearson", |
|
174 |
"wilson", "wilsonc", "strat_wilson", "strat_wilsonc", |
|
175 |
"agresti-coull", "jeffreys" |
|
176 |
), |
|
177 |
weights = NULL, |
|
178 |
max_iterations = 50, |
|
179 |
variables = list(strata = NULL), |
|
180 |
long = FALSE, |
|
181 |
na_str = default_na_str(), |
|
182 |
nested = TRUE, |
|
183 |
..., |
|
184 |
show_labels = "hidden", |
|
185 |
table_names = vars, |
|
186 |
.stats = NULL, |
|
187 |
.formats = NULL, |
|
188 |
.labels = NULL, |
|
189 |
.indent_mods = NULL) { |
|
190 | 3x |
extra_args <- list( |
191 | 3x |
conf_level = conf_level, method = method, weights = weights, max_iterations = max_iterations, |
192 | 3x |
variables = variables, long = long, ... |
193 |
) |
|
194 | ||
195 | 3x |
afun <- make_afun( |
196 | 3x |
a_proportion, |
197 | 3x |
.stats = .stats, |
198 | 3x |
.formats = .formats, |
199 | 3x |
.labels = .labels, |
200 | 3x |
.indent_mods = .indent_mods |
201 |
) |
|
202 | 3x |
analyze( |
203 | 3x |
lyt, |
204 | 3x |
vars, |
205 | 3x |
afun = afun, |
206 | 3x |
na_str = na_str, |
207 | 3x |
nested = nested, |
208 | 3x |
extra_args = extra_args, |
209 | 3x |
show_labels = show_labels, |
210 | 3x |
table_names = table_names |
211 |
) |
|
212 |
} |
|
213 | ||
214 |
#' Helper functions for calculating proportion confidence intervals |
|
215 |
#' |
|
216 |
#' @description `r lifecycle::badge("stable")` |
|
217 |
#' |
|
218 |
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()]. |
|
219 |
#' |
|
220 |
#' @inheritParams argument_convention |
|
221 |
#' @inheritParams estimate_proportion |
|
222 |
#' |
|
223 |
#' @return Confidence interval of a proportion. |
|
224 |
#' |
|
225 |
#' @seealso [estimate_proportion], descriptive function [d_proportion()], |
|
226 |
#' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()]. |
|
227 |
#' |
|
228 |
#' @name h_proportions |
|
229 |
NULL |
|
230 | ||
231 |
#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()]. |
|
232 |
#' Also referred to as Wilson score interval. |
|
233 |
#' |
|
234 |
#' @examples |
|
235 |
#' rsp <- c( |
|
236 |
#' TRUE, TRUE, TRUE, TRUE, TRUE, |
|
237 |
#' FALSE, FALSE, FALSE, FALSE, FALSE |
|
238 |
#' ) |
|
239 |
#' prop_wilson(rsp, conf_level = 0.9) |
|
240 |
#' |
|
241 |
#' @export |
|
242 |
prop_wilson <- function(rsp, conf_level, correct = FALSE) { |
|
243 | 5x |
y <- stats::prop.test( |
244 | 5x |
sum(rsp), |
245 | 5x |
length(rsp), |
246 | 5x |
correct = correct, |
247 | 5x |
conf.level = conf_level |
248 |
) |
|
249 | ||
250 | 5x |
as.numeric(y$conf.int) |
251 |
} |
|
252 | ||
253 |
#' @describeIn h_proportions Calculates the stratified Wilson confidence |
|
254 |
#' interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern} |
|
255 |
#' |
|
256 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. |
|
257 |
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are |
|
258 |
#' estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that |
|
259 |
#' minimizes the weighted squared length of the confidence interval. |
|
260 |
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used |
|
261 |
#' to find estimates of optimal weights. |
|
262 |
#' @param correct (`flag`)\cr whether to include the continuity correction. For further information, see for example |
|
263 |
#' for [stats::prop.test()]. |
|
264 |
#' |
|
265 |
#' @references |
|
266 |
#' \insertRef{Yan2010-jt}{tern} |
|
267 |
#' |
|
268 |
#' @examples |
|
269 |
#' # Stratified Wilson confidence interval with unequal probabilities |
|
270 |
#' |
|
271 |
#' set.seed(1) |
|
272 |
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE) |
|
273 |
#' strata_data <- data.frame( |
|
274 |
#' "f1" = sample(c("a", "b"), 100, TRUE), |
|
275 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE), |
|
276 |
#' stringsAsFactors = TRUE |
|
277 |
#' ) |
|
278 |
#' strata <- interaction(strata_data) |
|
279 |
#' n_strata <- ncol(table(rsp, strata)) # Number of strata |
|
280 |
#' |
|
281 |
#' prop_strat_wilson( |
|
282 |
#' rsp = rsp, strata = strata, |
|
283 |
#' conf_level = 0.90 |
|
284 |
#' ) |
|
285 |
#' |
|
286 |
#' # Not automatic setting of weights |
|
287 |
#' prop_strat_wilson( |
|
288 |
#' rsp = rsp, strata = strata, |
|
289 |
#' weights = rep(1 / n_strata, n_strata), |
|
290 |
#' conf_level = 0.90 |
|
291 |
#' ) |
|
292 |
#' |
|
293 |
#' @export |
|
294 |
prop_strat_wilson <- function(rsp, |
|
295 |
strata, |
|
296 |
weights = NULL, |
|
297 |
conf_level = 0.95, |
|
298 |
max_iterations = NULL, |
|
299 |
correct = FALSE) { |
|
300 | 20x |
checkmate::assert_logical(rsp, any.missing = FALSE) |
301 | 20x |
checkmate::assert_factor(strata, len = length(rsp)) |
302 | 20x |
assert_proportion_value(conf_level) |
303 | ||
304 | 20x |
tbl <- table(rsp, strata) |
305 | 20x |
n_strata <- length(unique(strata)) |
306 | ||
307 |
# Checking the weights and maximum number of iterations. |
|
308 | 20x |
do_iter <- FALSE |
309 | 20x |
if (is.null(weights)) { |
310 | 6x |
weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure |
311 | 6x |
do_iter <- TRUE |
312 | ||
313 |
# Iteration parameters |
|
314 | 2x |
if (is.null(max_iterations)) max_iterations <- 10 |
315 | 6x |
checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1) |
316 |
} |
|
317 | 20x |
checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata) |
318 | 20x |
sum_weights <- checkmate::assert_int(sum(weights)) |
319 | ! |
if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.") |
320 | ||
321 | 20x |
xs <- tbl["TRUE", ] |
322 | 20x |
ns <- colSums(tbl) |
323 | 20x |
use_stratum <- (ns > 0) |
324 | 20x |
ns <- ns[use_stratum] |
325 | 20x |
xs <- xs[use_stratum] |
326 | 20x |
ests <- xs / ns |
327 | 20x |
vars <- ests * (1 - ests) / ns |
328 | ||
329 | 20x |
strata_qnorm <- strata_normal_quantile(vars, weights, conf_level) |
330 | ||
331 |
# Iterative setting of weights if they were not set externally |
|
332 | 20x |
weights_new <- if (do_iter) { |
333 | 6x |
update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights |
334 |
} else { |
|
335 | 14x |
weights |
336 |
} |
|
337 | ||
338 | 20x |
strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1 |
339 | ||
340 | 20x |
ci_by_strata <- Map( |
341 | 20x |
function(x, n) { |
342 |
# Classic Wilson's confidence interval |
|
343 | 139x |
suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int) |
344 |
}, |
|
345 | 20x |
x = xs, |
346 | 20x |
n = ns |
347 |
) |
|
348 | 20x |
lower_by_strata <- sapply(ci_by_strata, "[", 1L) |
349 | 20x |
upper_by_strata <- sapply(ci_by_strata, "[", 2L) |
350 | ||
351 | 20x |
lower <- sum(weights_new * lower_by_strata) |
352 | 20x |
upper <- sum(weights_new * upper_by_strata) |
353 | ||
354 |
# Return values |
|
355 | 20x |
if (do_iter) { |
356 | 6x |
list( |
357 | 6x |
conf_int = c( |
358 | 6x |
lower = lower, |
359 | 6x |
upper = upper |
360 |
), |
|
361 | 6x |
weights = weights_new |
362 |
) |
|
363 |
} else { |
|
364 | 14x |
list( |
365 | 14x |
conf_int = c( |
366 | 14x |
lower = lower, |
367 | 14x |
upper = upper |
368 |
) |
|
369 |
) |
|
370 |
} |
|
371 |
} |
|
372 | ||
373 |
#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()]. |
|
374 |
#' Also referred to as the `exact` method. |
|
375 |
#' |
|
376 |
#' @examples |
|
377 |
#' prop_clopper_pearson(rsp, conf_level = .95) |
|
378 |
#' |
|
379 |
#' @export |
|
380 |
prop_clopper_pearson <- function(rsp, |
|
381 |
conf_level) { |
|
382 | 1x |
y <- stats::binom.test( |
383 | 1x |
x = sum(rsp), |
384 | 1x |
n = length(rsp), |
385 | 1x |
conf.level = conf_level |
386 |
) |
|
387 | 1x |
as.numeric(y$conf.int) |
388 |
} |
|
389 | ||
390 |
#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition |
|
391 |
#' for a single proportion confidence interval using the normal approximation. |
|
392 |
#' |
|
393 |
#' @param correct (`flag`)\cr whether to apply continuity correction. |
|
394 |
#' |
|
395 |
#' @examples |
|
396 |
#' prop_wald(rsp, conf_level = 0.95) |
|
397 |
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE) |
|
398 |
#' |
|
399 |
#' @export |
|
400 |
prop_wald <- function(rsp, conf_level, correct = FALSE) { |
|
401 | 163x |
n <- length(rsp) |
402 | 163x |
p_hat <- mean(rsp) |
403 | 163x |
z <- stats::qnorm((1 + conf_level) / 2) |
404 | 163x |
q_hat <- 1 - p_hat |
405 | 163x |
correct <- if (correct) 1 / (2 * n) else 0 |
406 | ||
407 | 163x |
err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct |
408 | 163x |
l_ci <- max(0, p_hat - err) |
409 | 163x |
u_ci <- min(1, p_hat + err) |
410 | ||
411 | 163x |
c(l_ci, u_ci) |
412 |
} |
|
413 | ||
414 |
#' @describeIn h_proportions Calculates the Agresti-Coull interval. Constructed (for 95% CI) by adding two successes |
|
415 |
#' and two failures to the data and then using the Wald formula to construct a CI. |
|
416 |
#' |
|
417 |
#' @examples |
|
418 |
#' prop_agresti_coull(rsp, conf_level = 0.95) |
|
419 |
#' |
|
420 |
#' @export |
|
421 |
prop_agresti_coull <- function(rsp, conf_level) { |
|
422 | 3x |
n <- length(rsp) |
423 | 3x |
x_sum <- sum(rsp) |
424 | 3x |
z <- stats::qnorm((1 + conf_level) / 2) |
425 | ||
426 |
# Add here both z^2 / 2 successes and failures. |
|
427 | 3x |
x_sum_tilde <- x_sum + z^2 / 2 |
428 | 3x |
n_tilde <- n + z^2 |
429 | ||
430 |
# Then proceed as with the Wald interval. |
|
431 | 3x |
p_tilde <- x_sum_tilde / n_tilde |
432 | 3x |
q_tilde <- 1 - p_tilde |
433 | 3x |
err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
434 | 3x |
l_ci <- max(0, p_tilde - err) |
435 | 3x |
u_ci <- min(1, p_tilde + err) |
436 | ||
437 | 3x |
c(l_ci, u_ci) |
438 |
} |
|
439 | ||
440 |
#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the |
|
441 |
#' non-informative Jeffreys prior for a binomial proportion. |
|
442 |
#' |
|
443 |
#' @examples |
|
444 |
#' prop_jeffreys(rsp, conf_level = 0.95) |
|
445 |
#' |
|
446 |
#' @export |
|
447 |
prop_jeffreys <- function(rsp, |
|
448 |
conf_level) { |
|
449 | 5x |
n <- length(rsp) |
450 | 5x |
x_sum <- sum(rsp) |
451 | ||
452 | 5x |
alpha <- 1 - conf_level |
453 | 5x |
l_ci <- ifelse( |
454 | 5x |
x_sum == 0, |
455 | 5x |
0, |
456 | 5x |
stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5) |
457 |
) |
|
458 | ||
459 | 5x |
u_ci <- ifelse( |
460 | 5x |
x_sum == n, |
461 | 5x |
1, |
462 | 5x |
stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5) |
463 |
) |
|
464 | ||
465 | 5x |
c(l_ci, u_ci) |
466 |
} |
|
467 | ||
468 |
#' Description of the proportion summary |
|
469 |
#' |
|
470 |
#' @description `r lifecycle::badge("stable")` |
|
471 |
#' |
|
472 |
#' This is a helper function that describes the analysis in [s_proportion()]. |
|
473 |
#' |
|
474 |
#' @inheritParams s_proportion |
|
475 |
#' @param long (`flag`)\cr whether a long or a short (default) description is required. |
|
476 |
#' |
|
477 |
#' @return String describing the analysis. |
|
478 |
#' |
|
479 |
#' @export |
|
480 |
d_proportion <- function(conf_level, |
|
481 |
method, |
|
482 |
long = FALSE) { |
|
483 | 179x |
label <- paste0(conf_level * 100, "% CI") |
484 | ||
485 | ! |
if (long) label <- paste(label, "for Response Rates") |
486 | ||
487 | 179x |
method_part <- switch(method, |
488 | 179x |
"clopper-pearson" = "Clopper-Pearson", |
489 | 179x |
"waldcc" = "Wald, with correction", |
490 | 179x |
"wald" = "Wald, without correction", |
491 | 179x |
"wilson" = "Wilson, without correction", |
492 | 179x |
"strat_wilson" = "Stratified Wilson, without correction", |
493 | 179x |
"wilsonc" = "Wilson, with correction", |
494 | 179x |
"strat_wilsonc" = "Stratified Wilson, with correction", |
495 | 179x |
"agresti-coull" = "Agresti-Coull", |
496 | 179x |
"jeffreys" = "Jeffreys", |
497 | 179x |
stop(paste(method, "does not have a description")) |
498 |
) |
|
499 | ||
500 | 179x |
paste0(label, " (", method_part, ")") |
501 |
} |
|
502 | ||
503 |
#' Helper function for the estimation of stratified quantiles |
|
504 |
#' |
|
505 |
#' @description `r lifecycle::badge("stable")` |
|
506 |
#' |
|
507 |
#' This function wraps the estimation of stratified percentiles when we assume |
|
508 |
#' the approximation for large numbers. This is necessary only in the case |
|
509 |
#' proportions for each strata are unequal. |
|
510 |
#' |
|
511 |
#' @inheritParams argument_convention |
|
512 |
#' @inheritParams prop_strat_wilson |
|
513 |
#' |
|
514 |
#' @return Stratified quantile. |
|
515 |
#' |
|
516 |
#' @seealso [prop_strat_wilson()] |
|
517 |
#' |
|
518 |
#' @examples |
|
519 |
#' strata_data <- table(data.frame( |
|
520 |
#' "f1" = sample(c(TRUE, FALSE), 100, TRUE), |
|
521 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE), |
|
522 |
#' stringsAsFactors = TRUE |
|
523 |
#' )) |
|
524 |
#' ns <- colSums(strata_data) |
|
525 |
#' ests <- strata_data["TRUE", ] / ns |
|
526 |
#' vars <- ests * (1 - ests) / ns |
|
527 |
#' weights <- rep(1 / length(ns), length(ns)) |
|
528 |
#' |
|
529 |
#' strata_normal_quantile(vars, weights, 0.95) |
|
530 |
#' |
|
531 |
#' @export |
|
532 |
strata_normal_quantile <- function(vars, weights, conf_level) { |
|
533 | 43x |
summands <- weights^2 * vars |
534 |
# Stratified quantile |
|
535 | 43x |
sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2) |
536 |
} |
|
537 | ||
538 |
#' Helper function for the estimation of weights for `prop_strat_wilson()` |
|
539 |
#' |
|
540 |
#' @description `r lifecycle::badge("stable")` |
|
541 |
#' |
|
542 |
#' This function wraps the iteration procedure that allows you to estimate |
|
543 |
#' the weights for each proportional strata. This assumes to minimize the |
|
544 |
#' weighted squared length of the confidence interval. |
|
545 |
#' |
|
546 |
#' @inheritParams prop_strat_wilson |
|
547 |
#' @param vars (`numeric`)\cr normalized proportions for each strata. |
|
548 |
#' @param strata_qnorm (`numeric(1)`)\cr initial estimation with identical weights of the quantiles. |
|
549 |
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can |
|
550 |
#' be optimized in the future if we need to estimate better initial weights. |
|
551 |
#' @param n_per_strata (`numeric`)\cr number of elements in each strata. |
|
552 |
#' @param max_iterations (`integer(1)`)\cr maximum number of iterations to be tried. Convergence is always checked. |
|
553 |
#' @param tol (`numeric(1)`)\cr tolerance threshold for convergence. |
|
554 |
#' |
|
555 |
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`. |
|
556 |
#' |
|
557 |
#' @seealso For references and details see [prop_strat_wilson()]. |
|
558 |
#' |
|
559 |
#' @examples |
|
560 |
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018) |
|
561 |
#' sq <- 0.674 |
|
562 |
#' ws <- rep(1 / length(vs), length(vs)) |
|
563 |
#' ns <- c(22, 18, 17, 17, 14, 12) |
|
564 |
#' |
|
565 |
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001) |
|
566 |
#' |
|
567 |
#' @export |
|
568 |
update_weights_strat_wilson <- function(vars, |
|
569 |
strata_qnorm, |
|
570 |
initial_weights, |
|
571 |
n_per_strata, |
|
572 |
max_iterations = 50, |
|
573 |
conf_level = 0.95, |
|
574 |
tol = 0.001) { |
|
575 | 9x |
it <- 0 |
576 | 9x |
diff_v <- NULL |
577 | ||
578 | 9x |
while (it < max_iterations) { |
579 | 21x |
it <- it + 1 |
580 | 21x |
weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2 |
581 | 21x |
weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2)) |
582 | 21x |
weights_new <- weights_new_t / weights_new_b |
583 | 21x |
weights_new <- weights_new / sum(weights_new) |
584 | 21x |
strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level) |
585 | 21x |
diff_v <- c(diff_v, sum(abs(weights_new - initial_weights))) |
586 | 8x |
if (diff_v[length(diff_v)] < tol) break |
587 | 13x |
initial_weights <- weights_new |
588 |
} |
|
589 | ||
590 | 9x |
if (it == max_iterations) { |
591 | 1x |
warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations) |
592 |
} |
|
593 | ||
594 | 9x |
list( |
595 | 9x |
"n_it" = it, |
596 | 9x |
"weights" = weights_new, |
597 | 9x |
"diff_v" = diff_v |
598 |
) |
|
599 |
} |
1 |
# Utility functions to cooperate with {rtables} package |
|
2 | ||
3 |
#' Convert table into matrix of strings |
|
4 |
#' |
|
5 |
#' @description `r lifecycle::badge("stable")` |
|
6 |
#' |
|
7 |
#' Helper function to use mostly within tests. `with_spaces`parameter allows |
|
8 |
#' to test not only for content but also indentation and table structure. |
|
9 |
#' `print_txt_to_copy` instead facilitate the testing development by returning a well |
|
10 |
#' formatted text that needs only to be copied and pasted in the expected output. |
|
11 |
#' |
|
12 |
#' @inheritParams formatters::toString |
|
13 |
#' @param x (`VTableTree`)\cr `rtables` table object. |
|
14 |
#' @param with_spaces (`flag`)\cr whether the tested table should keep the indentation and other relevant spaces. |
|
15 |
#' @param print_txt_to_copy (`flag`)\cr utility to have a way to copy the input table directly |
|
16 |
#' into the expected variable instead of copying it too manually. |
|
17 |
#' |
|
18 |
#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the |
|
19 |
#' table will be printed to console, ready to be copied as a expected value. |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' tbl <- basic_table() %>% |
|
23 |
#' split_rows_by("SEX") %>% |
|
24 |
#' split_cols_by("ARM") %>% |
|
25 |
#' analyze("AGE") %>% |
|
26 |
#' build_table(tern_ex_adsl) |
|
27 |
#' |
|
28 |
#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2)) |
|
29 |
#' |
|
30 |
#' @export |
|
31 |
to_string_matrix <- function(x, widths = NULL, max_width = NULL, |
|
32 |
hsep = formatters::default_hsep(), |
|
33 |
with_spaces = TRUE, print_txt_to_copy = FALSE) { |
|
34 | 11x |
checkmate::assert_flag(with_spaces) |
35 | 11x |
checkmate::assert_flag(print_txt_to_copy) |
36 | 11x |
checkmate::assert_int(max_width, null.ok = TRUE) |
37 | ||
38 | 11x |
if (inherits(x, "MatrixPrintForm")) { |
39 | ! |
tx <- x |
40 |
} else { |
|
41 | 11x |
tx <- matrix_form(x, TRUE) |
42 |
} |
|
43 | ||
44 | 11x |
tf_wrap <- FALSE |
45 | 11x |
if (!is.null(max_width)) { |
46 | ! |
tf_wrap <- TRUE |
47 |
} |
|
48 | ||
49 |
# Producing the matrix to test |
|
50 | 11x |
if (with_spaces) { |
51 | 2x |
out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\n")[[1]] |
52 |
} else { |
|
53 | 9x |
out <- tx$strings |
54 |
} |
|
55 | ||
56 |
# Printing to console formatted output that needs to be copied in "expected" |
|
57 | 11x |
if (print_txt_to_copy) { |
58 | 2x |
out_tmp <- out |
59 | 2x |
if (!with_spaces) { |
60 | 1x |
out_tmp <- apply(out, 1, paste0, collapse = '", "') |
61 |
} |
|
62 | 2x |
cat(paste0('c(\n "', paste0(out_tmp, collapse = '",\n "'), '"\n)')) |
63 |
} |
|
64 | ||
65 |
# Return values |
|
66 | 11x |
return(out) |
67 |
} |
|
68 | ||
69 |
#' Blank for missing input |
|
70 |
#' |
|
71 |
#' Helper function to use in tabulating model results. |
|
72 |
#' |
|
73 |
#' @param x (`vector`)\cr input for a cell. |
|
74 |
#' |
|
75 |
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise |
|
76 |
#' the unlisted version of `x`. |
|
77 |
#' |
|
78 |
#' @keywords internal |
|
79 |
unlist_and_blank_na <- function(x) { |
|
80 | 267x |
unl <- unlist(x) |
81 | 267x |
if (all(is.na(unl))) { |
82 | 161x |
character() |
83 |
} else { |
|
84 | 106x |
unl |
85 |
} |
|
86 |
} |
|
87 | ||
88 |
#' Constructor for content functions given a data frame with flag input |
|
89 |
#' |
|
90 |
#' This can be useful for tabulating model results. |
|
91 |
#' |
|
92 |
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the |
|
93 |
#' content function. |
|
94 |
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned. |
|
95 |
#' @param format (`string`)\cr `rtables` format to use. |
|
96 |
#' |
|
97 |
#' @return A content function which gives `df$analysis_var` at the row identified by |
|
98 |
#' `.df_row$flag` in the given format. |
|
99 |
#' |
|
100 |
#' @keywords internal |
|
101 |
cfun_by_flag <- function(analysis_var, |
|
102 |
flag_var, |
|
103 |
format = "xx", |
|
104 |
.indent_mods = NULL) { |
|
105 | 61x |
checkmate::assert_string(analysis_var) |
106 | 61x |
checkmate::assert_string(flag_var) |
107 | 61x |
function(df, labelstr) { |
108 | 265x |
row_index <- which(df[[flag_var]]) |
109 | 265x |
x <- unlist_and_blank_na(df[[analysis_var]][row_index]) |
110 | 265x |
formatters::with_label( |
111 | 265x |
rcell(x, format = format, indent_mod = .indent_mods), |
112 | 265x |
labelstr |
113 |
) |
|
114 |
} |
|
115 |
} |
|
116 | ||
117 |
#' Content row function to add row total to labels |
|
118 |
#' |
|
119 |
#' This takes the label of the latest row split level and adds the row total from `df` in parentheses. |
|
120 |
#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than |
|
121 |
#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`. |
|
122 |
#' |
|
123 |
#' @inheritParams argument_convention |
|
124 |
#' |
|
125 |
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label. |
|
126 |
#' |
|
127 |
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because |
|
128 |
#' the former is already split by columns and will refer to the first column of the data only. |
|
129 |
#' |
|
130 |
#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from |
|
131 |
#' `alt_counts_df` instead of `df`. |
|
132 |
#' |
|
133 |
#' @keywords internal |
|
134 |
c_label_n <- function(df, |
|
135 |
labelstr, |
|
136 |
.N_row) { # nolint |
|
137 | 273x |
label <- paste0(labelstr, " (N=", .N_row, ")") |
138 | 273x |
in_rows( |
139 | 273x |
.list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)), |
140 | 273x |
.formats = c(row_count = function(x, ...) "") |
141 |
) |
|
142 |
} |
|
143 | ||
144 |
#' Content row function to add `alt_counts_df` row total to labels |
|
145 |
#' |
|
146 |
#' This takes the label of the latest row split level and adds the row total from `alt_counts_df` |
|
147 |
#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df` |
|
148 |
#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`. |
|
149 |
#' |
|
150 |
#' @inheritParams argument_convention |
|
151 |
#' |
|
152 |
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label. |
|
153 |
#' |
|
154 |
#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead |
|
155 |
#' of `alt_counts_df`. |
|
156 |
#' |
|
157 |
#' @keywords internal |
|
158 |
c_label_n_alt <- function(df, |
|
159 |
labelstr, |
|
160 |
.alt_df_row) { |
|
161 | 7x |
N_row_alt <- nrow(.alt_df_row) # nolint |
162 | 7x |
label <- paste0(labelstr, " (N=", N_row_alt, ")") |
163 | 7x |
in_rows( |
164 | 7x |
.list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)), |
165 | 7x |
.formats = c(row_count = function(x, ...) "") |
166 |
) |
|
167 |
} |
|
168 | ||
169 |
#' Layout-creating function to add row total counts |
|
170 |
#' |
|
171 |
#' @description `r lifecycle::badge("stable")` |
|
172 |
#' |
|
173 |
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function |
|
174 |
#' is a wrapper for [rtables::summarize_row_groups()]. |
|
175 |
#' |
|
176 |
#' @inheritParams argument_convention |
|
177 |
#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`) |
|
178 |
#' or from `df` (`FALSE`). Defaults to `FALSE`. |
|
179 |
#' |
|
180 |
#' @return A modified layout where the latest row split labels now have the row-wise |
|
181 |
#' total counts (i.e. without column-based subsetting) attached in parentheses. |
|
182 |
#' |
|
183 |
#' @note Row count values are contained in these row count rows but are not displayed |
|
184 |
#' so that they are not considered zero rows by default when pruning. |
|
185 |
#' |
|
186 |
#' @examples |
|
187 |
#' basic_table() %>% |
|
188 |
#' split_cols_by("ARM") %>% |
|
189 |
#' add_colcounts() %>% |
|
190 |
#' split_rows_by("RACE", split_fun = drop_split_levels) %>% |
|
191 |
#' add_rowcounts() %>% |
|
192 |
#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>% |
|
193 |
#' build_table(DM) |
|
194 |
#' |
|
195 |
#' @export |
|
196 |
add_rowcounts <- function(lyt, alt_counts = FALSE) { |
|
197 | 7x |
summarize_row_groups( |
198 | 7x |
lyt, |
199 | 7x |
cfun = if (alt_counts) c_label_n_alt else c_label_n |
200 |
) |
|
201 |
} |
|
202 | ||
203 |
#' Obtain column indices |
|
204 |
#' |
|
205 |
#' @description `r lifecycle::badge("stable")` |
|
206 |
#' |
|
207 |
#' Helper function to extract column indices from a `VTableTree` for a given |
|
208 |
#' vector of column names. |
|
209 |
#' |
|
210 |
#' @param table_tree (`VTableTree`)\cr `rtables` table object to extract the indices from. |
|
211 |
#' @param col_names (`character`)\cr vector of column names. |
|
212 |
#' |
|
213 |
#' @return A vector of column indices. |
|
214 |
#' |
|
215 |
#' @export |
|
216 |
h_col_indices <- function(table_tree, col_names) { |
|
217 | 1256x |
checkmate::assert_class(table_tree, "VTableNodeInfo") |
218 | 1256x |
checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE) |
219 | 1256x |
match(col_names, names(attr(col_info(table_tree), "cextra_args"))) |
220 |
} |
|
221 | ||
222 |
#' Labels or names of list elements |
|
223 |
#' |
|
224 |
#' Internal helper function for working with nested statistic function results which typically |
|
225 |
#' don't have labels but names that we can use. |
|
226 |
#' |
|
227 |
#' @param x (`list`)\cr a list. |
|
228 |
#' |
|
229 |
#' @return A `character` vector with the labels or names for the list elements. |
|
230 |
#' |
|
231 |
#' @keywords internal |
|
232 |
labels_or_names <- function(x) { |
|
233 | 190x |
checkmate::assert_multi_class(x, c("data.frame", "list")) |
234 | 190x |
labs <- sapply(x, obj_label) |
235 | 190x |
nams <- rlang::names2(x) |
236 | 190x |
label_is_null <- sapply(labs, is.null) |
237 | 190x |
result <- unlist(ifelse(label_is_null, nams, labs)) |
238 | 190x |
return(result) |
239 |
} |
|
240 | ||
241 |
#' Convert to `rtable` |
|
242 |
#' |
|
243 |
#' @description `r lifecycle::badge("stable")` |
|
244 |
#' |
|
245 |
#' This is a new generic function to convert objects to `rtable` tables. |
|
246 |
#' |
|
247 |
#' @param x (`data.frame`)\cr the object which should be converted to an `rtable`. |
|
248 |
#' @param ... additional arguments for methods. |
|
249 |
#' |
|
250 |
#' @return An `rtables` table object. Note that the concrete class will depend on the method used. |
|
251 |
#' |
|
252 |
#' @export |
|
253 |
as.rtable <- function(x, ...) { # nolint |
|
254 | 3x |
UseMethod("as.rtable", x) |
255 |
} |
|
256 | ||
257 |
#' @describeIn as.rtable Method for converting a `data.frame` that contains numeric columns to `rtable`. |
|
258 |
#' |
|
259 |
#' @param format (`string` or `function`)\cr the format which should be used for the columns. |
|
260 |
#' |
|
261 |
#' @method as.rtable data.frame |
|
262 |
#' |
|
263 |
#' @examples |
|
264 |
#' x <- data.frame( |
|
265 |
#' a = 1:10, |
|
266 |
#' b = rnorm(10) |
|
267 |
#' ) |
|
268 |
#' as.rtable(x) |
|
269 |
#' |
|
270 |
#' @export |
|
271 |
as.rtable.data.frame <- function(x, format = "xx.xx", ...) { |
|
272 | 3x |
checkmate::assert_numeric(unlist(x)) |
273 | 2x |
do.call( |
274 | 2x |
rtable, |
275 | 2x |
c( |
276 | 2x |
list( |
277 | 2x |
header = labels_or_names(x), |
278 | 2x |
format = format |
279 |
), |
|
280 | 2x |
Map( |
281 | 2x |
function(row, row_name) { |
282 | 20x |
do.call( |
283 | 20x |
rrow, |
284 | 20x |
c(as.list(unname(row)), |
285 | 20x |
row.name = row_name |
286 |
) |
|
287 |
) |
|
288 |
}, |
|
289 | 2x |
row = as.data.frame(t(x)), |
290 | 2x |
row_name = rownames(x) |
291 |
) |
|
292 |
) |
|
293 |
) |
|
294 |
} |
|
295 | ||
296 |
#' Split parameters |
|
297 |
#' |
|
298 |
#' @description `r lifecycle::badge("stable")` |
|
299 |
#' |
|
300 |
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant |
|
301 |
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to |
|
302 |
#' specific analysis function. |
|
303 |
#' |
|
304 |
#' @param param (`vector`)\cr the parameter to be split. |
|
305 |
#' @param value (`vector`)\cr the value used to split. |
|
306 |
#' @param f (`list`)\cr the reference to make the split. |
|
307 |
#' |
|
308 |
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`. |
|
309 |
#' |
|
310 |
#' @examples |
|
311 |
#' f <- list( |
|
312 |
#' surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"), |
|
313 |
#' surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval") |
|
314 |
#' ) |
|
315 |
#' |
|
316 |
#' .stats <- c("pt_at_risk", "rate_diff") |
|
317 |
#' h_split_param(.stats, .stats, f = f) |
|
318 |
#' |
|
319 |
#' # $surv |
|
320 |
#' # [1] "pt_at_risk" |
|
321 |
#' # |
|
322 |
#' # $surv_diff |
|
323 |
#' # [1] "rate_diff" |
|
324 |
#' |
|
325 |
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx") |
|
326 |
#' h_split_param(.formats, names(.formats), f = f) |
|
327 |
#' |
|
328 |
#' # $surv |
|
329 |
#' # pt_at_risk event_free_rate |
|
330 |
#' # "xx" "xxx" |
|
331 |
#' # |
|
332 |
#' # $surv_diff |
|
333 |
#' # NULL |
|
334 |
#' |
|
335 |
#' @export |
|
336 |
h_split_param <- function(param, |
|
337 |
value, |
|
338 |
f) { |
|
339 | 26x |
y <- lapply(f, function(x) param[value %in% x]) |
340 | 26x |
lapply(y, function(x) if (length(x) == 0) NULL else x) |
341 |
} |
|
342 | ||
343 |
#' Get selected statistics names |
|
344 |
#' |
|
345 |
#' Helper function to be used for creating `afun`. |
|
346 |
#' |
|
347 |
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means |
|
348 |
#' in this context that all default statistics should be used. |
|
349 |
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially. |
|
350 |
#' |
|
351 |
#' @return A `character` vector with the selected statistics. |
|
352 |
#' |
|
353 |
#' @keywords internal |
|
354 |
afun_selected_stats <- function(.stats, all_stats) { |
|
355 | 2x |
checkmate::assert_character(.stats, null.ok = TRUE) |
356 | 2x |
checkmate::assert_character(all_stats) |
357 | 2x |
if (is.null(.stats)) { |
358 | 1x |
all_stats |
359 |
} else { |
|
360 | 1x |
intersect(.stats, all_stats) |
361 |
} |
|
362 |
} |
|
363 | ||
364 |
#' Add variable labels to top left corner in table |
|
365 |
#' |
|
366 |
#' @description `r lifecycle::badge("stable")` |
|
367 |
#' |
|
368 |
#' Helper layout-creating function to append the variable labels of a given variables vector |
|
369 |
#' from a given dataset in the top left corner. If a variable label is not found then the |
|
370 |
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes. |
|
371 |
#' |
|
372 |
#' @inheritParams argument_convention |
|
373 |
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`. |
|
374 |
#' @param indent (`integer(1)`)\cr non-negative number of nested indent space, default to 0L which means no indent. |
|
375 |
#' 1L means two spaces indent, 2L means four spaces indent and so on. |
|
376 |
#' |
|
377 |
#' @return A modified layout with the new variable label(s) added to the top-left material. |
|
378 |
#' |
|
379 |
#' @note This is not an optimal implementation of course, since we are using here the data set |
|
380 |
#' itself during the layout creation. When we have a more mature `rtables` implementation then |
|
381 |
#' this will also be improved or not necessary anymore. |
|
382 |
#' |
|
383 |
#' @examples |
|
384 |
#' lyt <- basic_table() %>% |
|
385 |
#' split_cols_by("ARM") %>% |
|
386 |
#' add_colcounts() %>% |
|
387 |
#' split_rows_by("SEX") %>% |
|
388 |
#' append_varlabels(DM, "SEX") %>% |
|
389 |
#' analyze("AGE", afun = mean) %>% |
|
390 |
#' append_varlabels(DM, "AGE", indent = 1) |
|
391 |
#' build_table(lyt, DM) |
|
392 |
#' |
|
393 |
#' lyt <- basic_table() %>% |
|
394 |
#' split_cols_by("ARM") %>% |
|
395 |
#' split_rows_by("SEX") %>% |
|
396 |
#' analyze("AGE", afun = mean) %>% |
|
397 |
#' append_varlabels(DM, c("SEX", "AGE")) |
|
398 |
#' build_table(lyt, DM) |
|
399 |
#' |
|
400 |
#' @export |
|
401 |
append_varlabels <- function(lyt, df, vars, indent = 0L) { |
|
402 | 3x |
if (checkmate::test_flag(indent)) { |
403 | ! |
warning("indent argument is now accepting integers. Boolean indent will be converted to integers.") |
404 | ! |
indent <- as.integer(indent) |
405 |
} |
|
406 | ||
407 | 3x |
checkmate::assert_data_frame(df) |
408 | 3x |
checkmate::assert_character(vars) |
409 | 3x |
checkmate::assert_count(indent) |
410 | ||
411 | 3x |
lab <- formatters::var_labels(df[vars], fill = TRUE) |
412 | 3x |
lab <- paste(lab, collapse = " / ") |
413 | 3x |
space <- paste(rep(" ", indent * 2), collapse = "") |
414 | 3x |
lab <- paste0(space, lab) |
415 | ||
416 | 3x |
append_topleft(lyt, lab) |
417 |
} |
|
418 | ||
419 |
#' Default string replacement for `NA` values |
|
420 |
#' |
|
421 |
#' @description `r lifecycle::badge("stable")` |
|
422 |
#' |
|
423 |
#' The default string used to represent `NA` values. This value is used as the default |
|
424 |
#' value for the `na_str` argument throughout the `tern` package, and printed in place |
|
425 |
#' of `NA` values in output tables. If not specified for each `tern` function by the user |
|
426 |
#' via the `na_str` argument, or in the R environment options via [set_default_na_str()], |
|
427 |
#' then `NA` is used. |
|
428 |
#' |
|
429 |
#' @param na_str (`string`)\cr single string value to set in the R environment options as |
|
430 |
#' the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the |
|
431 |
#' current value set in the R environment (defaults to `NULL` if not set). |
|
432 |
#' |
|
433 |
#' @name default_na_str |
|
434 |
NULL |
|
435 | ||
436 |
#' @describeIn default_na_str Accessor for default `NA` value replacement string. |
|
437 |
#' |
|
438 |
#' @return |
|
439 |
#' * `default_na_str` returns the current value if an R environment option has been set |
|
440 |
#' for `"tern_default_na_str"`, or `NA_character_` otherwise. |
|
441 |
#' |
|
442 |
#' @examples |
|
443 |
#' # Default settings |
|
444 |
#' default_na_str() |
|
445 |
#' getOption("tern_default_na_str") |
|
446 |
#' |
|
447 |
#' # Set custom value |
|
448 |
#' set_default_na_str("<Missing>") |
|
449 |
#' |
|
450 |
#' # Settings after value has been set |
|
451 |
#' default_na_str() |
|
452 |
#' getOption("tern_default_na_str") |
|
453 |
#' |
|
454 |
#' @export |
|
455 |
default_na_str <- function() { |
|
456 | 330x |
getOption("tern_default_na_str", default = NA_character_) |
457 |
} |
|
458 | ||
459 |
#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the |
|
460 |
#' option `"tern_default_na_str"` within the R environment. |
|
461 |
#' |
|
462 |
#' @return |
|
463 |
#' * `set_default_na_str` has no return value. |
|
464 |
#' |
|
465 |
#' @export |
|
466 |
set_default_na_str <- function(na_str) { |
|
467 | 3x |
checkmate::assert_character(na_str, len = 1, null.ok = TRUE) |
468 | 3x |
options("tern_default_na_str" = na_str) |
469 |
} |
1 |
#' Helper functions for subgroup treatment effect pattern (STEP) calculations |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Helper functions that are used internally for the STEP calculations. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' |
|
9 |
#' @name h_step |
|
10 |
#' @include control_step.R |
|
11 |
NULL |
|
12 | ||
13 |
#' @describeIn h_step Creates the windows for STEP, based on the control settings |
|
14 |
#' provided. |
|
15 |
#' |
|
16 |
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`). |
|
17 |
#' @param control (named `list`)\cr output from `control_step()`. |
|
18 |
#' |
|
19 |
#' @return |
|
20 |
#' * `h_step_window()` returns a list containing the window-selection matrix `sel` |
|
21 |
#' and the interval information matrix `interval`. |
|
22 |
#' |
|
23 |
#' @export |
|
24 |
h_step_window <- function(x, |
|
25 |
control = control_step()) { |
|
26 | 12x |
checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) |
27 | 12x |
checkmate::assert_list(control, names = "named") |
28 | ||
29 | 12x |
sel <- matrix(FALSE, length(x), control$num_points) |
30 | 12x |
out <- matrix(0, control$num_points, 3) |
31 | 12x |
colnames(out) <- paste("Interval", c("Center", "Lower", "Upper")) |
32 | 12x |
if (control$use_percentile) { |
33 |
# Create windows according to percentile cutoffs. |
|
34 | 9x |
out <- cbind(out, out) |
35 | 9x |
colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper")) |
36 | 9x |
xs <- seq(0, 1, length.out = control$num_points + 2)[-1] |
37 | 9x |
for (i in seq_len(control$num_points)) { |
38 | 185x |
out[i, 2:3] <- c( |
39 | 185x |
max(xs[i] - control$bandwidth, 0), |
40 | 185x |
min(xs[i] + control$bandwidth, 1) |
41 |
) |
|
42 | 185x |
out[i, 5:6] <- stats::quantile(x, out[i, 2:3]) |
43 | 185x |
sel[, i] <- x >= out[i, 5] & x <= out[i, 6] |
44 |
} |
|
45 |
# Center is the middle point of the percentile window. |
|
46 | 9x |
out[, 1] <- xs[-control$num_points - 1] |
47 | 9x |
out[, 4] <- stats::quantile(x, out[, 1]) |
48 |
} else { |
|
49 |
# Create windows according to cutoffs. |
|
50 | 3x |
m <- c(min(x), max(x)) |
51 | 3x |
xs <- seq(m[1], m[2], length.out = control$num_points + 2)[-1] |
52 | 3x |
for (i in seq_len(control$num_points)) { |
53 | 11x |
out[i, 2:3] <- c( |
54 | 11x |
max(xs[i] - control$bandwidth, m[1]), |
55 | 11x |
min(xs[i] + control$bandwidth, m[2]) |
56 |
) |
|
57 | 11x |
sel[, i] <- x >= out[i, 2] & x <= out[i, 3] |
58 |
} |
|
59 |
# Center is the same as the point for predicting. |
|
60 | 3x |
out[, 1] <- xs[-control$num_points - 1] |
61 |
} |
|
62 | 12x |
list(sel = sel, interval = out) |
63 |
} |
|
64 | ||
65 |
#' @describeIn h_step Calculates the estimated treatment effect estimate |
|
66 |
#' on the linear predictor scale and corresponding standard error from a STEP `model` fitted |
|
67 |
#' on `data` given `variables` specification, for a single biomarker value `x`. |
|
68 |
#' This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds |
|
69 |
#' ratio estimates. |
|
70 |
#' |
|
71 |
#' @param model (`coxph` or `glm`)\cr the regression model object. |
|
72 |
#' |
|
73 |
#' @return |
|
74 |
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`. |
|
75 |
#' |
|
76 |
#' @export |
|
77 |
h_step_trt_effect <- function(data, |
|
78 |
model, |
|
79 |
variables, |
|
80 |
x) { |
|
81 | 208x |
checkmate::assert_multi_class(model, c("coxph", "glm")) |
82 | 208x |
checkmate::assert_number(x) |
83 | 208x |
assert_df_with_variables(data, variables) |
84 | 208x |
checkmate::assert_factor(data[[variables$arm]], n.levels = 2) |
85 | ||
86 | 208x |
newdata <- data[c(1, 1), ] |
87 | 208x |
newdata[, variables$biomarker] <- x |
88 | 208x |
newdata[, variables$arm] <- levels(data[[variables$arm]]) |
89 | 208x |
model_terms <- stats::delete.response(stats::terms(model)) |
90 | 208x |
model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels) |
91 | 208x |
mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts) |
92 | 208x |
coefs <- stats::coef(model) |
93 |
# Note: It is important to use the coef subset from matrix, otherwise intercept and |
|
94 |
# strata are included for coxph() models. |
|
95 | 208x |
mat <- mat[, names(coefs)] |
96 | 208x |
mat_diff <- diff(mat) |
97 | 208x |
est <- mat_diff %*% coefs |
98 | 208x |
var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff) |
99 | 208x |
se <- sqrt(var) |
100 | 208x |
c( |
101 | 208x |
est = est, |
102 | 208x |
se = se |
103 |
) |
|
104 |
} |
|
105 | ||
106 |
#' @describeIn h_step Builds the model formula used in survival STEP calculations. |
|
107 |
#' |
|
108 |
#' @return |
|
109 |
#' * `h_step_survival_formula()` returns a model formula. |
|
110 |
#' |
|
111 |
#' @export |
|
112 |
h_step_survival_formula <- function(variables, |
|
113 |
control = control_step()) { |
|
114 | 10x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
115 | ||
116 | 10x |
assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")]) |
117 | 10x |
form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm) |
118 | 10x |
if (control$degree > 0) { |
119 | 5x |
form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)") |
120 |
} |
|
121 | 10x |
if (!is.null(variables$covariates)) { |
122 | 6x |
form <- paste(form, "+", paste(variables$covariates, collapse = "+")) |
123 |
} |
|
124 | 10x |
if (!is.null(variables$strata)) { |
125 | 2x |
form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")") |
126 |
} |
|
127 | 10x |
stats::as.formula(form) |
128 |
} |
|
129 | ||
130 |
#' @describeIn h_step Estimates the model with `formula` built based on |
|
131 |
#' `variables` in `data` for a given `subset` and `control` parameters for the |
|
132 |
#' Cox regression. |
|
133 |
#' |
|
134 |
#' @param formula (`formula`)\cr the regression model formula. |
|
135 |
#' @param subset (`logical`)\cr subset vector. |
|
136 |
#' |
|
137 |
#' @return |
|
138 |
#' * `h_step_survival_est()` returns a matrix of number of observations `n`, |
|
139 |
#' `events`, log hazard ratio estimates `loghr`, standard error `se`, |
|
140 |
#' and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is |
|
141 |
#' included for each biomarker value in `x`. |
|
142 |
#' |
|
143 |
#' @export |
|
144 |
h_step_survival_est <- function(formula, |
|
145 |
data, |
|
146 |
variables, |
|
147 |
x, |
|
148 |
subset = rep(TRUE, nrow(data)), |
|
149 |
control = control_coxph()) { |
|
150 | 55x |
checkmate::assert_formula(formula) |
151 | 55x |
assert_df_with_variables(data, variables) |
152 | 55x |
checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE) |
153 | 55x |
checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) |
154 | 55x |
checkmate::assert_list(control, names = "named") |
155 | ||
156 |
# Note: `subset` in `coxph` needs to be an expression referring to `data` variables. |
|
157 | 55x |
data$.subset <- subset |
158 | 55x |
coxph_warnings <- NULL |
159 | 55x |
tryCatch( |
160 | 55x |
withCallingHandlers( |
161 | 55x |
expr = { |
162 | 55x |
fit <- survival::coxph( |
163 | 55x |
formula = formula, |
164 | 55x |
data = data, |
165 | 55x |
subset = .subset, |
166 | 55x |
ties = control$ties |
167 |
) |
|
168 |
}, |
|
169 | 55x |
warning = function(w) { |
170 | 1x |
coxph_warnings <<- c(coxph_warnings, w) |
171 | 1x |
invokeRestart("muffleWarning") |
172 |
} |
|
173 |
), |
|
174 | 55x |
finally = { |
175 |
} |
|
176 |
) |
|
177 | 55x |
if (!is.null(coxph_warnings)) { |
178 | 1x |
warning(paste( |
179 | 1x |
"Fit warnings occurred, please consider using a simpler model, or", |
180 | 1x |
"larger `bandwidth`, less `num_points` in `control_step()` settings" |
181 |
)) |
|
182 |
} |
|
183 |
# Produce a matrix with one row per `x` and columns `est` and `se`. |
|
184 | 55x |
estimates <- t(vapply( |
185 | 55x |
X = x, |
186 | 55x |
FUN = h_step_trt_effect, |
187 | 55x |
FUN.VALUE = c(1, 2), |
188 | 55x |
data = data, |
189 | 55x |
model = fit, |
190 | 55x |
variables = variables |
191 |
)) |
|
192 | 55x |
q_norm <- stats::qnorm((1 + control$conf_level) / 2) |
193 | 55x |
cbind( |
194 | 55x |
n = fit$n, |
195 | 55x |
events = fit$nevent, |
196 | 55x |
loghr = estimates[, "est"], |
197 | 55x |
se = estimates[, "se"], |
198 | 55x |
ci_lower = estimates[, "est"] - q_norm * estimates[, "se"], |
199 | 55x |
ci_upper = estimates[, "est"] + q_norm * estimates[, "se"] |
200 |
) |
|
201 |
} |
|
202 | ||
203 |
#' @describeIn h_step Builds the model formula used in response STEP calculations. |
|
204 |
#' |
|
205 |
#' @return |
|
206 |
#' * `h_step_rsp_formula()` returns a model formula. |
|
207 |
#' |
|
208 |
#' @export |
|
209 |
h_step_rsp_formula <- function(variables, |
|
210 |
control = c(control_step(), control_logistic())) { |
|
211 | 14x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
212 | 14x |
assert_list_of_variables(variables[c("arm", "biomarker", "response")]) |
213 | 14x |
response_definition <- sub( |
214 | 14x |
pattern = "response", |
215 | 14x |
replacement = variables$response, |
216 | 14x |
x = control$response_definition, |
217 | 14x |
fixed = TRUE |
218 |
) |
|
219 | 14x |
form <- paste0(response_definition, " ~ ", variables$arm) |
220 | 14x |
if (control$degree > 0) { |
221 | 8x |
form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)") |
222 |
} |
|
223 | 14x |
if (!is.null(variables$covariates)) { |
224 | 8x |
form <- paste(form, "+", paste(variables$covariates, collapse = "+")) |
225 |
} |
|
226 | 14x |
if (!is.null(variables$strata)) { |
227 | 5x |
strata_arg <- if (length(variables$strata) > 1) { |
228 | 2x |
paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))") |
229 |
} else { |
|
230 | 3x |
variables$strata |
231 |
} |
|
232 | 5x |
form <- paste0(form, "+ strata(", strata_arg, ")") |
233 |
} |
|
234 | 14x |
stats::as.formula(form) |
235 |
} |
|
236 | ||
237 |
#' @describeIn h_step Estimates the model with `formula` built based on |
|
238 |
#' `variables` in `data` for a given `subset` and `control` parameters for the |
|
239 |
#' logistic regression. |
|
240 |
#' |
|
241 |
#' @param formula (`formula`)\cr the regression model formula. |
|
242 |
#' @param subset (`logical`)\cr subset vector. |
|
243 |
#' |
|
244 |
#' @return |
|
245 |
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds |
|
246 |
#' ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds |
|
247 |
#' `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`. |
|
248 |
#' |
|
249 |
#' @export |
|
250 |
h_step_rsp_est <- function(formula, |
|
251 |
data, |
|
252 |
variables, |
|
253 |
x, |
|
254 |
subset = rep(TRUE, nrow(data)), |
|
255 |
control = control_logistic()) { |
|
256 | 58x |
checkmate::assert_formula(formula) |
257 | 58x |
assert_df_with_variables(data, variables) |
258 | 58x |
checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE) |
259 | 58x |
checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) |
260 | 58x |
checkmate::assert_list(control, names = "named") |
261 |
# Note: `subset` in `glm` needs to be an expression referring to `data` variables. |
|
262 | 58x |
data$.subset <- subset |
263 | 58x |
fit_warnings <- NULL |
264 | 58x |
tryCatch( |
265 | 58x |
withCallingHandlers( |
266 | 58x |
expr = { |
267 | 58x |
fit <- if (is.null(variables$strata)) { |
268 | 54x |
stats::glm( |
269 | 54x |
formula = formula, |
270 | 54x |
data = data, |
271 | 54x |
subset = .subset, |
272 | 54x |
family = stats::binomial("logit") |
273 |
) |
|
274 |
} else { |
|
275 |
# clogit needs coxph and strata imported |
|
276 | 4x |
survival::clogit( |
277 | 4x |
formula = formula, |
278 | 4x |
data = data, |
279 | 4x |
subset = .subset |
280 |
) |
|
281 |
} |
|
282 |
}, |
|
283 | 58x |
warning = function(w) { |
284 | 19x |
fit_warnings <<- c(fit_warnings, w) |
285 | 19x |
invokeRestart("muffleWarning") |
286 |
} |
|
287 |
), |
|
288 | 58x |
finally = { |
289 |
} |
|
290 |
) |
|
291 | 58x |
if (!is.null(fit_warnings)) { |
292 | 13x |
warning(paste( |
293 | 13x |
"Fit warnings occurred, please consider using a simpler model, or", |
294 | 13x |
"larger `bandwidth`, less `num_points` in `control_step()` settings" |
295 |
)) |
|
296 |
} |
|
297 |
# Produce a matrix with one row per `x` and columns `est` and `se`. |
|
298 | 58x |
estimates <- t(vapply( |
299 | 58x |
X = x, |
300 | 58x |
FUN = h_step_trt_effect, |
301 | 58x |
FUN.VALUE = c(1, 2), |
302 | 58x |
data = data, |
303 | 58x |
model = fit, |
304 | 58x |
variables = variables |
305 |
)) |
|
306 | 58x |
q_norm <- stats::qnorm((1 + control$conf_level) / 2) |
307 | 58x |
cbind( |
308 | 58x |
n = length(fit$y), |
309 | 58x |
logor = estimates[, "est"], |
310 | 58x |
se = estimates[, "se"], |
311 | 58x |
ci_lower = estimates[, "est"] - q_norm * estimates[, "se"], |
312 | 58x |
ci_upper = estimates[, "est"] + q_norm * estimates[, "se"] |
313 |
) |
|
314 |
} |
1 |
#' Confidence intervals for a difference of binomials |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' Several confidence intervals for the difference between proportions. |
|
6 |
#' |
|
7 |
#' @name desctools_binom |
|
8 |
NULL |
|
9 | ||
10 |
#' Recycle list of parameters |
|
11 |
#' |
|
12 |
#' This function recycles all supplied elements to the maximal dimension. |
|
13 |
#' |
|
14 |
#' @param ... (`any`)\cr elements to recycle. |
|
15 |
#' |
|
16 |
#' @return A `list`. |
|
17 |
#' |
|
18 |
#' @keywords internal |
|
19 |
#' @noRd |
|
20 |
h_recycle <- function(...) { |
|
21 | 78x |
lst <- list(...) |
22 | 78x |
maxdim <- max(lengths(lst)) |
23 | 78x |
res <- lapply(lst, rep, length.out = maxdim) |
24 | 78x |
attr(res, "maxdim") <- maxdim |
25 | 78x |
return(res) |
26 |
} |
|
27 | ||
28 |
#' @describeIn desctools_binom Several confidence intervals for the difference between proportions. |
|
29 |
#' |
|
30 |
#' @return A `matrix` of 3 values: |
|
31 |
#' * `est`: estimate of proportion difference. |
|
32 |
#' * `lwr.ci`: estimate of lower end of the confidence interval. |
|
33 |
#' * `upr.ci`: estimate of upper end of the confidence interval. |
|
34 |
#' |
|
35 |
#' @keywords internal |
|
36 |
desctools_binom <- function(x1, |
|
37 |
n1, |
|
38 |
x2, |
|
39 |
n2, |
|
40 |
conf.level = 0.95, # nolint |
|
41 |
sides = c("two.sided", "left", "right"), |
|
42 |
method = c( |
|
43 |
"ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp" |
|
44 |
)) { |
|
45 | 26x |
if (missing(sides)) { |
46 | 26x |
sides <- match.arg(sides) |
47 |
} |
|
48 | 26x |
if (missing(method)) { |
49 | 1x |
method <- match.arg(method) |
50 |
} |
|
51 | 26x |
iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint |
52 | 26x |
if (sides != "two.sided") { |
53 | ! |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
54 |
} |
|
55 | 26x |
alpha <- 1 - conf.level |
56 | 26x |
kappa <- stats::qnorm(1 - alpha / 2) |
57 | 26x |
p1_hat <- x1 / n1 |
58 | 26x |
p2_hat <- x2 / n2 |
59 | 26x |
est <- p1_hat - p2_hat |
60 | 26x |
switch(method, |
61 | 26x |
wald = { |
62 | 4x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
63 | 4x |
term2 <- kappa * sqrt(vd) |
64 | 4x |
ci_lwr <- max(-1, est - term2) |
65 | 4x |
ci_upr <- min(1, est + term2) |
66 |
}, |
|
67 | 26x |
waldcc = { |
68 | 6x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
69 | 6x |
term2 <- kappa * sqrt(vd) |
70 | 6x |
term2 <- term2 + 0.5 * (1 / n1 + 1 / n2) |
71 | 6x |
ci_lwr <- max(-1, est - term2) |
72 | 6x |
ci_upr <- min(1, est + term2) |
73 |
}, |
|
74 | 26x |
ac = { |
75 | 2x |
n1 <- n1 + 2 |
76 | 2x |
n2 <- n2 + 2 |
77 | 2x |
x1 <- x1 + 1 |
78 | 2x |
x2 <- x2 + 1 |
79 | 2x |
p1_hat <- x1 / n1 |
80 | 2x |
p2_hat <- x2 / n2 |
81 | 2x |
est1 <- p1_hat - p2_hat |
82 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
83 | 2x |
term2 <- kappa * sqrt(vd) |
84 | 2x |
ci_lwr <- max(-1, est1 - term2) |
85 | 2x |
ci_upr <- min(1, est1 + term2) |
86 |
}, |
|
87 | 26x |
exact = { |
88 | ! |
ci_lwr <- NA |
89 | ! |
ci_upr <- NA |
90 |
}, |
|
91 | 26x |
score = { |
92 | 3x |
w1 <- desctools_binomci( |
93 | 3x |
x = x1, n = n1, conf.level = conf.level, |
94 | 3x |
method = "wilson" |
95 |
) |
|
96 | 3x |
w2 <- desctools_binomci( |
97 | 3x |
x = x2, n = n2, conf.level = conf.level, |
98 | 3x |
method = "wilson" |
99 |
) |
|
100 | 3x |
l1 <- w1[2] |
101 | 3x |
u1 <- w1[3] |
102 | 3x |
l2 <- w2[2] |
103 | 3x |
u2 <- w2[3] |
104 | 3x |
ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2) |
105 | 3x |
ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2) |
106 |
}, |
|
107 | 26x |
scorecc = { |
108 | 1x |
w1 <- desctools_binomci( |
109 | 1x |
x = x1, n = n1, conf.level = conf.level, |
110 | 1x |
method = "wilsoncc" |
111 |
) |
|
112 | 1x |
w2 <- desctools_binomci( |
113 | 1x |
x = x2, n = n2, conf.level = conf.level, |
114 | 1x |
method = "wilsoncc" |
115 |
) |
|
116 | 1x |
l1 <- w1[2] |
117 | 1x |
u1 <- w1[3] |
118 | 1x |
l2 <- w2[2] |
119 | 1x |
u2 <- w2[3] |
120 | 1x |
ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2)) |
121 | 1x |
ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2)) |
122 |
}, |
|
123 | 26x |
mee = { |
124 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
125 | ! |
if (dif > 1) dif <- 1 |
126 | ! |
if (dif < -1) dif <- -1 |
127 | 24x |
diff <- p1 - p2 - dif |
128 | 24x |
if (abs(diff) == 0) { |
129 | ! |
res <- 0 |
130 |
} else { |
|
131 | 24x |
t <- n2 / n1 |
132 | 24x |
a <- 1 + t |
133 | 24x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
134 | 24x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
135 | 24x |
d <- -p1 * dif * (1 + dif) |
136 | 24x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
137 | 24x |
if (abs(v) < .Machine$double.eps) v <- 0 |
138 | 24x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
139 | 24x |
u <- ifelse(v > 0, 1, -1) * s |
140 | 24x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
141 | 24x |
p1d <- 2 * u * cos(w) - b / a / 3 |