1 |
#' Tabulate survival duration by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The [tabulate_survival_subgroups()] function creates a layout element to tabulate survival duration by subgroup,
|
|
6 |
#' returning statistics including median survival time and hazard ratio for each population subgroup. The table is
|
|
7 |
#' created from `df`, a list of data frames returned by [extract_survival_subgroups()], with the statistics to include
|
|
8 |
#' specified via the `vars` parameter.
|
|
9 |
#'
|
|
10 |
#' A forest plot can be created from the resulting table using the [g_forest()] function.
|
|
11 |
#'
|
|
12 |
#' @inheritParams argument_convention
|
|
13 |
#' @inheritParams survival_coxph_pairwise
|
|
14 |
#' @param df (`list`)\cr list of data frames containing all analysis variables. List should be
|
|
15 |
#' created using [extract_survival_subgroups()].
|
|
16 |
#' @param vars (`character`)\cr the names of statistics to be reported among:
|
|
17 |
#' * `n_tot_events`: Total number of events per group.
|
|
18 |
#' * `n_events`: Number of events per group.
|
|
19 |
#' * `n_tot`: Total number of observations per group.
|
|
20 |
#' * `n`: Number of observations per group.
|
|
21 |
#' * `median`: Median survival time.
|
|
22 |
#' * `hr`: Hazard ratio.
|
|
23 |
#' * `ci`: Confidence interval of hazard ratio.
|
|
24 |
#' * `pval`: p-value of the effect.
|
|
25 |
#' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
|
|
26 |
#' are required.
|
|
27 |
#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
|
|
28 |
#'
|
|
29 |
#' @details These functions create a layout starting from a data frame which contains
|
|
30 |
#' the required statistics. Tables typically used as part of forest plot.
|
|
31 |
#'
|
|
32 |
#' @seealso [extract_survival_subgroups()]
|
|
33 |
#'
|
|
34 |
#' @examples
|
|
35 |
#' library(dplyr)
|
|
36 |
#'
|
|
37 |
#' adtte <- tern_ex_adtte
|
|
38 |
#'
|
|
39 |
#' # Save variable labels before data processing steps.
|
|
40 |
#' adtte_labels <- formatters::var_labels(adtte)
|
|
41 |
#'
|
|
42 |
#' adtte_f <- adtte %>%
|
|
43 |
#' filter(
|
|
44 |
#' PARAMCD == "OS",
|
|
45 |
#' ARM %in% c("B: Placebo", "A: Drug X"),
|
|
46 |
#' SEX %in% c("M", "F")
|
|
47 |
#' ) %>%
|
|
48 |
#' mutate(
|
|
49 |
#' # Reorder levels of ARM to display reference arm before treatment arm.
|
|
50 |
#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
|
|
51 |
#' SEX = droplevels(SEX),
|
|
52 |
#' AVALU = as.character(AVALU),
|
|
53 |
#' is_event = CNSR == 0
|
|
54 |
#' )
|
|
55 |
#' labels <- c(
|
|
56 |
#' "ARM" = adtte_labels[["ARM"]],
|
|
57 |
#' "SEX" = adtte_labels[["SEX"]],
|
|
58 |
#' "AVALU" = adtte_labels[["AVALU"]],
|
|
59 |
#' "is_event" = "Event Flag"
|
|
60 |
#' )
|
|
61 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
|
|
62 |
#'
|
|
63 |
#' df <- extract_survival_subgroups(
|
|
64 |
#' variables = list(
|
|
65 |
#' tte = "AVAL",
|
|
66 |
#' is_event = "is_event",
|
|
67 |
#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
|
|
68 |
#' ),
|
|
69 |
#' label_all = "Total Patients",
|
|
70 |
#' data = adtte_f
|
|
71 |
#' )
|
|
72 |
#' df
|
|
73 |
#'
|
|
74 |
#' df_grouped <- extract_survival_subgroups(
|
|
75 |
#' variables = list(
|
|
76 |
#' tte = "AVAL",
|
|
77 |
#' is_event = "is_event",
|
|
78 |
#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
|
|
79 |
#' ),
|
|
80 |
#' data = adtte_f,
|
|
81 |
#' groups_lists = list(
|
|
82 |
#' BMRKR2 = list(
|
|
83 |
#' "low" = "LOW",
|
|
84 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
85 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
86 |
#' )
|
|
87 |
#' )
|
|
88 |
#' )
|
|
89 |
#' df_grouped
|
|
90 |
#'
|
|
91 |
#' @name survival_duration_subgroups
|
|
92 |
#' @order 1
|
|
93 |
NULL
|
|
94 | ||
95 |
#' Prepare survival data for population subgroups in data frames
|
|
96 |
#'
|
|
97 |
#' @description `r lifecycle::badge("stable")`
|
|
98 |
#'
|
|
99 |
#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
|
|
100 |
#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
|
|
101 |
#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
|
|
102 |
#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strata`.
|
|
103 |
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
|
|
104 |
#'
|
|
105 |
#' @inheritParams argument_convention
|
|
106 |
#' @inheritParams survival_duration_subgroups
|
|
107 |
#' @inheritParams survival_coxph_pairwise
|
|
108 |
#'
|
|
109 |
#' @return A named `list` of two elements:
|
|
110 |
#' * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
|
|
111 |
#' `var_label`, and `row_type`.
|
|
112 |
#' * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
|
|
113 |
#' `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
|
|
114 |
#'
|
|
115 |
#' @seealso [survival_duration_subgroups]
|
|
116 |
#'
|
|
117 |
#' @export
|
|
118 |
extract_survival_subgroups <- function(variables, |
|
119 |
data,
|
|
120 |
groups_lists = list(), |
|
121 |
control = control_coxph(), |
|
122 |
label_all = "All Patients") { |
|
123 | 12x |
if ("strat" %in% names(variables)) { |
124 | ! |
warning( |
125 | ! |
"Warning: the `strat` element name of the `variables` list argument to `extract_survival_subgroups() ",
|
126 | ! |
"was deprecated in tern 0.9.4.\n ",
|
127 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
128 |
)
|
|
129 | ! |
variables[["strata"]] <- variables[["strat"]] |
130 |
}
|
|
131 | ||
132 | 12x |
df_survtime <- h_survtime_subgroups_df( |
133 | 12x |
variables,
|
134 | 12x |
data,
|
135 | 12x |
groups_lists = groups_lists, |
136 | 12x |
label_all = label_all |
137 |
)
|
|
138 | 12x |
df_hr <- h_coxph_subgroups_df( |
139 | 12x |
variables,
|
140 | 12x |
data,
|
141 | 12x |
groups_lists = groups_lists, |
142 | 12x |
control = control, |
143 | 12x |
label_all = label_all |
144 |
)
|
|
145 | ||
146 | 12x |
list(survtime = df_survtime, hr = df_hr) |
147 |
}
|
|
148 | ||
149 |
#' @describeIn survival_duration_subgroups Formatted analysis function which is used as
|
|
150 |
#' `afun` in `tabulate_survival_subgroups()`.
|
|
151 |
#'
|
|
152 |
#' @return
|
|
153 |
#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
154 |
#'
|
|
155 |
#' @keywords internal
|
|
156 |
a_survival_subgroups <- function(df, |
|
157 |
labelstr = "", |
|
158 |
...,
|
|
159 |
.stats = NULL, |
|
160 |
.stat_names = NULL, |
|
161 |
.formats = NULL, |
|
162 |
.labels = NULL, |
|
163 |
.indent_mods = NULL) { |
|
164 |
# Check for additional parameters to the statistics function
|
|
165 | 335x |
dots_extra_args <- list(...) |
166 | 335x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
167 | 335x |
dots_extra_args$.additional_fun_parameters <- NULL |
168 | 335x |
cur_col_stat <- extra_afun_params$.var %||% .stats |
169 | ||
170 |
# Uniquely name & label rows
|
|
171 | 335x |
var_lvls <- if ("biomarker" %in% names(dots_extra_args) && "biomarker" %in% names(df)) { |
172 | 126x |
if ("overall" %in% names(dots_extra_args)) { # label rows for (nested) biomarker tables - e.g. "AGE", "BMRKR1" |
173 | 54x |
as.character(df$biomarker) |
174 | 335x |
} else { # data rows for (nested) biomarker tables - e.g. "AGE.LOW", "BMRKR1.Total Patients" |
175 | 72x |
paste(as.character(df$biomarker), as.character(df$subgroup), sep = ".") |
176 |
}
|
|
177 | 335x |
} else { # data rows for non-biomarker tables - e.g. "Total Patients", "F", "M" |
178 | 209x |
make.unique(as.character(df$subgroup)) |
179 |
}
|
|
180 | ||
181 |
# if empty, return NA
|
|
182 | 335x |
if (nrow(df) == 0) { |
183 | 1x |
return(in_rows(.list = list(NA) %>% stats::setNames(cur_col_stat))) |
184 |
}
|
|
185 | ||
186 |
# Main statistics taken from df
|
|
187 | 334x |
x_stats <- as.list(df) |
188 | ||
189 |
# Fill in formatting defaults
|
|
190 | 334x |
.stats <- get_stats("tabulate_survival_subgroups", stats_in = cur_col_stat) |
191 | 334x |
levels_per_stats <- rep(list(var_lvls), length(.stats)) %>% setNames(.stats) |
192 | 334x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
193 | 334x |
.labels <- get_labels_from_stats( |
194 | 334x |
.stats, .labels, levels_per_stats, |
195 |
# default labels are pre-determined in extract_*() function
|
|
196 | 334x |
tern_defaults = as.list(as.character(df$subgroup)) %>% setNames(var_lvls) |
197 |
)
|
|
198 | 334x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
199 | ||
200 | 334x |
x_stats <- lapply( |
201 | 334x |
.stats,
|
202 | 334x |
function(x) x_stats[[x]] %>% stats::setNames(var_lvls) |
203 |
) %>% |
|
204 | 334x |
stats::setNames(.stats) %>% |
205 | 334x |
.unlist_keep_nulls() |
206 | ||
207 |
# Auto format handling
|
|
208 | 334x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
209 | ||
210 |
# Get and check statistical names
|
|
211 | 334x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
212 | ||
213 | 334x |
in_rows( |
214 | 334x |
.list = x_stats, |
215 | 334x |
.formats = .formats, |
216 | 334x |
.names = names(.labels), |
217 | 334x |
.stat_names = .stat_names, |
218 | 334x |
.labels = .labels %>% .unlist_keep_nulls(), |
219 | 334x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
220 |
)
|
|
221 |
}
|
|
222 | ||
223 |
#' @describeIn survival_duration_subgroups Table-creating function which creates a table
|
|
224 |
#' summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
|
|
225 |
#' and [rtables::summarize_row_groups()].
|
|
226 |
#'
|
|
227 |
#' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the
|
|
228 |
#' [extract_survival_subgroups()] function when creating `df`.
|
|
229 |
#' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply
|
|
230 |
#' within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If
|
|
231 |
#' `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$survtime$arm` will be used as `arm_x`
|
|
232 |
#' and the second level as `arm_y`.
|
|
233 |
#'
|
|
234 |
#' @return An `rtables` table summarizing survival by subgroup.
|
|
235 |
#'
|
|
236 |
#' @examples
|
|
237 |
#' ## Table with default columns.
|
|
238 |
#' basic_table() %>%
|
|
239 |
#' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
|
|
240 |
#'
|
|
241 |
#' ## Table with a manually chosen set of columns: adding "pval".
|
|
242 |
#' basic_table() %>%
|
|
243 |
#' tabulate_survival_subgroups(
|
|
244 |
#' df = df,
|
|
245 |
#' vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
|
|
246 |
#' time_unit = adtte_f$AVALU[1]
|
|
247 |
#' )
|
|
248 |
#'
|
|
249 |
#' @export
|
|
250 |
#' @order 2
|
|
251 |
tabulate_survival_subgroups <- function(lyt, |
|
252 |
df,
|
|
253 |
vars = c("n_tot_events", "n_events", "median", "hr", "ci"), |
|
254 |
groups_lists = list(), |
|
255 |
label_all = lifecycle::deprecated(), |
|
256 |
time_unit = NULL, |
|
257 |
riskdiff = NULL, |
|
258 |
na_str = default_na_str(), |
|
259 |
...,
|
|
260 |
.stat_names = NULL, |
|
261 |
.formats = NULL, |
|
262 |
.labels = NULL, |
|
263 |
.indent_mods = NULL) { |
|
264 | 11x |
checkmate::assert_list(riskdiff, null.ok = TRUE) |
265 | 11x |
checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars)) |
266 | 11x |
checkmate::assert_true(all(c("hr", "ci") %in% vars)) |
267 | 11x |
if ("pval" %in% vars && !"pval" %in% names(df$hr)) { |
268 | ! |
warning( |
269 | ! |
'The "pval" statistic has been selected but is not present in "df" so it will not be included in the output ',
|
270 | ! |
'table. To include the "pval" statistic, please specify a p-value test when generating "df" via ',
|
271 | ! |
'the "method" argument to `extract_survival_subgroups()`. If method = "cmh", strata must also be specified via ',
|
272 | ! |
'the "variables" argument to `extract_survival_subgroups()`.'
|
273 |
)
|
|
274 |
}
|
|
275 | ||
276 | 11x |
if (lifecycle::is_present(label_all)) { |
277 | 1x |
lifecycle::deprecate_warn( |
278 | 1x |
"0.9.5", "tabulate_survival_subgroups(label_all)", |
279 | 1x |
details = |
280 | 1x |
"Please assign the `label_all` parameter within the `extract_survival_subgroups()` function when creating `df`."
|
281 |
)
|
|
282 |
}
|
|
283 | ||
284 |
# Process standard extra arguments
|
|
285 | 11x |
extra_args <- list(".stats" = vars) |
286 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
287 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
288 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
289 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
290 | ||
291 |
# Create "ci" column from "lcl" and "ucl"
|
|
292 | 11x |
df$hr$ci <- combine_vectors(df$hr$lcl, df$hr$ucl) |
293 | ||
294 |
# Extract additional parameters from df
|
|
295 | 11x |
conf_level <- df$hr$conf_level[1] |
296 | 11x |
method <- if ("pval_label" %in% names(df$hr)) df$hr$pval_label[1] else NULL |
297 | 11x |
colvars <- d_survival_subgroups_colvars(vars, conf_level = conf_level, method = method, time_unit = time_unit) |
298 | 11x |
survtime_vars <- intersect(colvars$vars, c("n", "n_events", "median")) |
299 | 11x |
hr_vars <- intersect(names(colvars$labels), c("n_tot", "n_tot_events", "hr", "ci", "pval")) |
300 | 11x |
colvars_survtime <- list(vars = survtime_vars, labels = colvars$labels[survtime_vars]) |
301 | 11x |
colvars_hr <- list(vars = hr_vars, labels = colvars$labels[hr_vars]) |
302 | ||
303 |
# Process additional arguments to the statistic function
|
|
304 | 11x |
extra_args <- c( |
305 | 11x |
extra_args,
|
306 | 11x |
groups_lists = list(groups_lists), conf_level = conf_level, method = method, |
307 |
...
|
|
308 |
)
|
|
309 | ||
310 |
# Adding additional info from layout to analysis function
|
|
311 | 11x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
312 | 11x |
formals(a_survival_subgroups) <- c(formals(a_survival_subgroups), extra_args[[".additional_fun_parameters"]]) |
313 | ||
314 |
# Add risk difference column
|
|
315 | 11x |
if (!is.null(riskdiff)) { |
316 | 2x |
if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$survtime$arm)[1] |
317 | 2x |
if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$survtime$arm)[2] |
318 | 2x |
colvars_hr$vars <- c(colvars_hr$vars, "riskdiff") |
319 | 2x |
colvars_hr$labels <- c(colvars_hr$labels, riskdiff = riskdiff$col_label) |
320 | 2x |
arm_cols <- paste(rep(c("n_events", "n_events", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_") |
321 | ||
322 | 2x |
df_prop_diff <- df$survtime %>% |
323 | 2x |
dplyr::select(-"median") %>% |
324 | 2x |
tidyr::pivot_wider( |
325 | 2x |
id_cols = c("subgroup", "var", "var_label", "row_type"), |
326 | 2x |
names_from = "arm", |
327 | 2x |
values_from = c("n", "n_events") |
328 |
) %>% |
|
329 | 2x |
dplyr::rowwise() %>% |
330 | 2x |
dplyr::mutate( |
331 | 2x |
riskdiff = stat_propdiff_ci( |
332 | 2x |
x = as.list(.data[[arm_cols[1]]]), |
333 | 2x |
y = as.list(.data[[arm_cols[2]]]), |
334 | 2x |
N_x = .data[[arm_cols[3]]], |
335 | 2x |
N_y = .data[[arm_cols[4]]], |
336 | 2x |
pct = riskdiff$pct |
337 |
)
|
|
338 |
) %>% |
|
339 | 2x |
dplyr::select(-dplyr::all_of(arm_cols)) |
340 | ||
341 | 2x |
df$hr <- df$hr %>% |
342 | 2x |
dplyr::left_join( |
343 | 2x |
df_prop_diff,
|
344 | 2x |
by = c("subgroup", "var", "var_label", "row_type") |
345 |
)
|
|
346 |
}
|
|
347 | ||
348 |
# Add columns from table_survtime (optional)
|
|
349 | 11x |
if (length(colvars_survtime$vars) > 0) { |
350 | 10x |
lyt_survtime <- split_cols_by(lyt = lyt, var = "arm") |
351 | 10x |
lyt_survtime <- split_cols_by_multivar( |
352 | 10x |
lyt = lyt_survtime, |
353 | 10x |
vars = colvars_survtime$vars, |
354 | 10x |
varlabels = colvars_survtime$labels |
355 |
)
|
|
356 | ||
357 |
# Add "All Patients" row
|
|
358 | 10x |
lyt_survtime <- split_rows_by( |
359 | 10x |
lyt = lyt_survtime, |
360 | 10x |
var = "row_type", |
361 | 10x |
split_fun = keep_split_levels("content"), |
362 | 10x |
nested = FALSE, |
363 | 10x |
child_labels = "hidden" |
364 |
)
|
|
365 | 10x |
lyt_survtime <- analyze_colvars( |
366 | 10x |
lyt = lyt_survtime, |
367 | 10x |
afun = a_survival_subgroups, |
368 | 10x |
na_str = na_str, |
369 | 10x |
extra_args = extra_args |
370 |
)
|
|
371 | ||
372 |
# Add analysis rows
|
|
373 | 10x |
if ("analysis" %in% df$survtime$row_type) { |
374 | 9x |
lyt_survtime <- split_rows_by( |
375 | 9x |
lyt = lyt_survtime, |
376 | 9x |
var = "row_type", |
377 | 9x |
split_fun = keep_split_levels("analysis"), |
378 | 9x |
nested = FALSE, |
379 | 9x |
child_labels = "hidden" |
380 |
)
|
|
381 | 9x |
lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE) |
382 | 9x |
lyt_survtime <- analyze_colvars( |
383 | 9x |
lyt = lyt_survtime, |
384 | 9x |
afun = a_survival_subgroups, |
385 | 9x |
na_str = na_str, |
386 | 9x |
inclNAs = TRUE, |
387 | 9x |
extra_args = extra_args |
388 |
)
|
|
389 |
}
|
|
390 | ||
391 | 10x |
table_survtime <- build_table(lyt_survtime, df = df$survtime) |
392 |
} else { |
|
393 | 1x |
table_survtime <- NULL |
394 |
}
|
|
395 | ||
396 |
# Add columns from table_hr ("n_tot_events" or "n_tot", "hr" and "ci" required)
|
|
397 | 11x |
lyt_hr <- split_cols_by(lyt = lyt, var = "arm") |
398 | 11x |
lyt_hr <- split_cols_by_multivar( |
399 | 11x |
lyt = lyt_hr, |
400 | 11x |
vars = colvars_hr$vars, |
401 | 11x |
varlabels = colvars_hr$labels |
402 |
)
|
|
403 | ||
404 |
# Add "All Patients" row
|
|
405 | 11x |
lyt_hr <- split_rows_by( |
406 | 11x |
lyt = lyt_hr, |
407 | 11x |
var = "row_type", |
408 | 11x |
split_fun = keep_split_levels("content"), |
409 | 11x |
nested = FALSE, |
410 | 11x |
child_labels = "hidden" |
411 |
)
|
|
412 | 11x |
lyt_hr <- analyze_colvars( |
413 | 11x |
lyt = lyt_hr, |
414 | 11x |
afun = a_survival_subgroups, |
415 | 11x |
na_str = na_str, |
416 | 11x |
extra_args = extra_args |
417 |
) %>% |
|
418 | 11x |
append_topleft("Baseline Risk Factors") |
419 | ||
420 |
# Add analysis rows
|
|
421 | 11x |
if ("analysis" %in% df$survtime$row_type) { |
422 | 10x |
lyt_hr <- split_rows_by( |
423 | 10x |
lyt = lyt_hr, |
424 | 10x |
var = "row_type", |
425 | 10x |
split_fun = keep_split_levels("analysis"), |
426 | 10x |
nested = FALSE, |
427 | 10x |
child_labels = "hidden" |
428 |
)
|
|
429 | 10x |
lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE) |
430 | 10x |
lyt_hr <- analyze_colvars( |
431 | 10x |
lyt = lyt_hr, |
432 | 10x |
afun = a_survival_subgroups, |
433 | 10x |
na_str = na_str, |
434 | 10x |
inclNAs = TRUE, |
435 | 10x |
extra_args = extra_args |
436 |
)
|
|
437 |
}
|
|
438 | ||
439 | 11x |
table_hr <- build_table(lyt_hr, df = df$hr) |
440 | ||
441 |
# Join tables, add forest plot attributes
|
|
442 | 11x |
n_tot_ids <- grep("^n_tot", colvars_hr$vars) |
443 | 11x |
if (is.null(table_survtime)) { |
444 | 1x |
result <- table_hr |
445 | 1x |
hr_id <- match("hr", colvars_hr$vars) |
446 | 1x |
ci_id <- match("ci", colvars_hr$vars) |
447 |
} else { |
|
448 | 10x |
result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids]) |
449 | 10x |
hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids]) |
450 | 10x |
ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("ci", colvars_hr$vars[-n_tot_ids]) |
451 | 10x |
n_tot_ids <- seq_along(n_tot_ids) |
452 |
}
|
|
453 | 11x |
structure( |
454 | 11x |
result,
|
455 | 11x |
forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"), |
456 | 11x |
col_x = hr_id, |
457 | 11x |
col_ci = ci_id, |
458 | 11x |
col_symbol_size = n_tot_ids[1] # for scaling the symbol sizes in forest plots |
459 |
)
|
|
460 |
}
|
|
461 | ||
462 |
#' Labels for column variables in survival duration by subgroup table
|
|
463 |
#'
|
|
464 |
#' @description `r lifecycle::badge("stable")`
|
|
465 |
#'
|
|
466 |
#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
|
|
467 |
#'
|
|
468 |
#' @inheritParams tabulate_survival_subgroups
|
|
469 |
#' @inheritParams argument_convention
|
|
470 |
#' @param method (`string`)\cr p-value method for testing hazard ratio = 1.
|
|
471 |
#'
|
|
472 |
#' @return A `list` of variables and their labels to tabulate.
|
|
473 |
#'
|
|
474 |
#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
|
|
475 |
#'
|
|
476 |
#' @export
|
|
477 |
d_survival_subgroups_colvars <- function(vars, |
|
478 |
conf_level,
|
|
479 |
method,
|
|
480 |
time_unit = NULL) { |
|
481 | 18x |
checkmate::assert_character(vars) |
482 | 18x |
checkmate::assert_string(time_unit, null.ok = TRUE) |
483 | 18x |
checkmate::assert_subset(c("hr", "ci"), vars) |
484 | 18x |
checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars)) |
485 | 18x |
checkmate::assert_subset( |
486 | 18x |
vars,
|
487 | 18x |
c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval") |
488 |
)
|
|
489 | ||
490 | 18x |
propcase_time_label <- if (!is.null(time_unit)) { |
491 | 17x |
paste0("Median (", time_unit, ")") |
492 |
} else { |
|
493 | 1x |
"Median"
|
494 |
}
|
|
495 | ||
496 | 18x |
varlabels <- c( |
497 | 18x |
n = "n", |
498 | 18x |
n_events = "Events", |
499 | 18x |
median = propcase_time_label, |
500 | 18x |
n_tot = "Total n", |
501 | 18x |
n_tot_events = "Total Events", |
502 | 18x |
hr = "Hazard Ratio", |
503 | 18x |
ci = paste0(100 * conf_level, "% Wald CI"), |
504 | 18x |
pval = method |
505 |
)
|
|
506 | ||
507 | 18x |
colvars <- vars |
508 | ||
509 | 18x |
list( |
510 | 18x |
vars = colvars, |
511 | 18x |
labels = varlabels[vars] |
512 |
)
|
|
513 |
}
|
1 |
#' Cumulative counts of numeric variable by thresholds
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_cumulative()] creates a layout element to calculate cumulative counts of values in a
|
|
6 |
#' numeric variable that are less than, less or equal to, greater than, or greater or equal to user-specified
|
|
7 |
#' threshold values.
|
|
8 |
#'
|
|
9 |
#' This function analyzes numeric variable `vars` against the threshold values supplied to the `thresholds`
|
|
10 |
#' argument as a numeric vector. Whether counts should include the threshold values, and whether to count
|
|
11 |
#' values lower or higher than the threshold values can be set via the `include_eq` and `lower_tail`
|
|
12 |
#' parameters, respectively.
|
|
13 |
#'
|
|
14 |
#' @inheritParams h_count_cumulative
|
|
15 |
#' @inheritParams argument_convention
|
|
16 |
#' @param thresholds (`numeric`)\cr vector of cutoff values for the counts.
|
|
17 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
18 |
#'
|
|
19 |
#' Options are: ``r shQuote(get_stats("count_cumulative"), type = "sh")``
|
|
20 |
#'
|
|
21 |
#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
|
|
22 |
#'
|
|
23 |
#' @name count_cumulative
|
|
24 |
#' @order 1
|
|
25 |
NULL
|
|
26 | ||
27 |
#' Helper function for `s_count_cumulative()`
|
|
28 |
#'
|
|
29 |
#' @description `r lifecycle::badge("stable")`
|
|
30 |
#'
|
|
31 |
#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
|
|
32 |
#'
|
|
33 |
#' @inheritParams argument_convention
|
|
34 |
#' @param threshold (`numeric(1)`)\cr a cutoff value as threshold to count values of `x`.
|
|
35 |
#' @param lower_tail (`flag`)\cr whether to count lower tail, default is `TRUE`.
|
|
36 |
#' @param include_eq (`flag`)\cr whether to include value equal to the `threshold` in
|
|
37 |
#' count, default is `TRUE`.
|
|
38 |
#'
|
|
39 |
#' @return A named vector with items:
|
|
40 |
#' * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
|
|
41 |
#' of user specification.
|
|
42 |
#' * `fraction`: the fraction of the count.
|
|
43 |
#'
|
|
44 |
#' @seealso [count_cumulative]
|
|
45 |
#'
|
|
46 |
#' @examples
|
|
47 |
#' set.seed(1, kind = "Mersenne-Twister")
|
|
48 |
#' x <- c(sample(1:10, 10), NA)
|
|
49 |
#' .N_col <- length(x)
|
|
50 |
#'
|
|
51 |
#' h_count_cumulative(x, 5, denom = .N_col)
|
|
52 |
#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na_rm = FALSE, denom = .N_col)
|
|
53 |
#' h_count_cumulative(x, 0, lower_tail = FALSE, denom = .N_col)
|
|
54 |
#' h_count_cumulative(x, 100, lower_tail = FALSE, denom = .N_col)
|
|
55 |
#'
|
|
56 |
#' @export
|
|
57 |
h_count_cumulative <- function(x, |
|
58 |
threshold,
|
|
59 |
lower_tail = TRUE, |
|
60 |
include_eq = TRUE, |
|
61 |
na_rm = TRUE, |
|
62 |
denom) { |
|
63 | 48x |
checkmate::assert_numeric(x) |
64 | 48x |
checkmate::assert_numeric(threshold) |
65 | 48x |
checkmate::assert_numeric(denom) |
66 | 48x |
checkmate::assert_flag(lower_tail) |
67 | 48x |
checkmate::assert_flag(include_eq) |
68 | 48x |
checkmate::assert_flag(na_rm) |
69 | ||
70 | 48x |
is_keep <- if (na_rm) !is.na(x) else rep(TRUE, length(x)) |
71 | 48x |
count <- if (lower_tail && include_eq) { |
72 | 19x |
length(x[is_keep & x <= threshold]) |
73 | 48x |
} else if (lower_tail && !include_eq) { |
74 | ! |
length(x[is_keep & x < threshold]) |
75 | 48x |
} else if (!lower_tail && include_eq) { |
76 | 14x |
length(x[is_keep & x >= threshold]) |
77 | 48x |
} else if (!lower_tail && !include_eq) { |
78 | 15x |
length(x[is_keep & x > threshold]) |
79 |
}
|
|
80 | ||
81 | 48x |
result <- c( |
82 | 48x |
count = count, |
83 | 48x |
fraction = if (count == 0 && denom == 0) 0 else count / denom |
84 |
)
|
|
85 | 48x |
result
|
86 |
}
|
|
87 | ||
88 |
#' Description of cumulative count
|
|
89 |
#'
|
|
90 |
#' @description `r lifecycle::badge("stable")`
|
|
91 |
#'
|
|
92 |
#' This is a helper function that describes the analysis in [s_count_cumulative()].
|
|
93 |
#'
|
|
94 |
#' @inheritParams h_count_cumulative
|
|
95 |
#'
|
|
96 |
#' @return Labels for [s_count_cumulative()].
|
|
97 |
#'
|
|
98 |
#' @export
|
|
99 |
d_count_cumulative <- function(threshold, lower_tail = TRUE, include_eq = TRUE) { |
|
100 | 46x |
checkmate::assert_numeric(threshold) |
101 | 46x |
lg <- if (lower_tail) "<" else ">" |
102 | 46x |
eq <- if (include_eq) "=" else "" |
103 | 46x |
paste0(lg, eq, " ", threshold) |
104 |
}
|
|
105 | ||
106 |
#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
|
|
107 |
#'
|
|
108 |
#' @return
|
|
109 |
#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
|
|
110 |
#' component, each component containing a vector for the count and fraction.
|
|
111 |
#'
|
|
112 |
#' @keywords internal
|
|
113 |
s_count_cumulative <- function(x, |
|
114 |
thresholds,
|
|
115 |
lower_tail = TRUE, |
|
116 |
include_eq = TRUE, |
|
117 |
denom = c("N_col", "n", "N_row"), |
|
118 |
.N_col, # nolint |
|
119 |
.N_row, # nolint |
|
120 |
na_rm = TRUE, |
|
121 |
...) { |
|
122 | 23x |
checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE) |
123 | ||
124 | 23x |
denom <- match.arg(denom) %>% |
125 | 23x |
switch( |
126 | 23x |
n = length(x), |
127 | 23x |
N_row = .N_row, |
128 | 23x |
N_col = .N_col |
129 |
)
|
|
130 | ||
131 | 23x |
count_fraction_list <- Map(function(thres) { |
132 | 46x |
result <- h_count_cumulative(x, thres, lower_tail, include_eq, na_rm = na_rm, denom = denom) |
133 | 46x |
label <- d_count_cumulative(thres, lower_tail, include_eq) |
134 | 46x |
formatters::with_label(result, label) |
135 | 23x |
}, thresholds) |
136 | ||
137 | 23x |
names(count_fraction_list) <- thresholds |
138 | 23x |
list(count_fraction = count_fraction_list) |
139 |
}
|
|
140 | ||
141 |
#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
|
|
142 |
#' in `count_cumulative()`.
|
|
143 |
#'
|
|
144 |
#' @return
|
|
145 |
#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
146 |
#'
|
|
147 |
#' @keywords internal
|
|
148 |
a_count_cumulative <- function(x, |
|
149 |
...,
|
|
150 |
.stats = NULL, |
|
151 |
.stat_names = NULL, |
|
152 |
.formats = NULL, |
|
153 |
.labels = NULL, |
|
154 |
.indent_mods = NULL) { |
|
155 | 14x |
dots_extra_args <- list(...) |
156 | ||
157 |
# Check if there are user-defined functions
|
|
158 | 14x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
159 | 14x |
.stats <- default_and_custom_stats_list$all_stats |
160 | 14x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
161 | ||
162 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
|
|
163 | 14x |
extra_afun_params <- retrieve_extra_afun_params( |
164 | 14x |
names(dots_extra_args$.additional_fun_parameters) |
165 |
)
|
|
166 | 14x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
167 | ||
168 |
# Main statistical functions application
|
|
169 | 14x |
x_stats <- .apply_stat_functions( |
170 | 14x |
default_stat_fnc = s_count_cumulative, |
171 | 14x |
custom_stat_fnc_list = custom_stat_functions, |
172 | 14x |
args_list = c( |
173 | 14x |
x = list(x), |
174 | 14x |
extra_afun_params,
|
175 | 14x |
dots_extra_args
|
176 |
)
|
|
177 |
)
|
|
178 | ||
179 |
# Fill in with stats defaults if needed
|
|
180 | 14x |
.stats <- get_stats("count_cumulative", |
181 | 14x |
stats_in = .stats, |
182 | 14x |
custom_stats_in = names(custom_stat_functions) |
183 |
)
|
|
184 | ||
185 | 14x |
x_stats <- x_stats[.stats] |
186 | 14x |
levels_per_stats <- lapply(x_stats, names) |
187 | ||
188 |
# Fill in formats/indents/labels with custom input and defaults
|
|
189 | 14x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
190 | 14x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
191 | 14x |
.labels <- get_labels_from_stats( |
192 | 14x |
.stats, .labels, levels_per_stats, |
193 | 14x |
label_attr_from_stats = sapply(.unlist_keep_nulls(x_stats), attr, "label") |
194 |
)
|
|
195 | ||
196 |
# Unlist stats
|
|
197 | 14x |
x_stats <- x_stats %>% |
198 | 14x |
.unlist_keep_nulls() %>% |
199 | 14x |
setNames(names(.formats)) |
200 | ||
201 |
# Auto format handling
|
|
202 | 14x |
.formats <- apply_auto_formatting( |
203 | 14x |
.formats,
|
204 | 14x |
x_stats,
|
205 | 14x |
extra_afun_params$.df_row, |
206 | 14x |
extra_afun_params$.var |
207 |
)
|
|
208 | ||
209 |
# Get and check statistical names from defaults
|
|
210 | 14x |
.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats |
211 | ||
212 | 14x |
in_rows( |
213 | 14x |
.list = x_stats, |
214 | 14x |
.formats = .formats, |
215 | 14x |
.names = names(.labels), |
216 | 14x |
.stat_names = .stat_names, |
217 | 14x |
.labels = .labels %>% .unlist_keep_nulls(), |
218 | 14x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
219 |
)
|
|
220 |
}
|
|
221 | ||
222 |
#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
|
|
223 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
224 |
#'
|
|
225 |
#' @return
|
|
226 |
#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
|
|
227 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
228 |
#' the statistics from `s_count_cumulative()` to the table layout.
|
|
229 |
#'
|
|
230 |
#' @examples
|
|
231 |
#' basic_table() %>%
|
|
232 |
#' split_cols_by("ARM") %>%
|
|
233 |
#' add_colcounts() %>%
|
|
234 |
#' count_cumulative(
|
|
235 |
#' vars = "AGE",
|
|
236 |
#' thresholds = c(40, 60)
|
|
237 |
#' ) %>%
|
|
238 |
#' build_table(tern_ex_adsl)
|
|
239 |
#'
|
|
240 |
#' @export
|
|
241 |
#' @order 2
|
|
242 |
count_cumulative <- function(lyt, |
|
243 |
vars,
|
|
244 |
thresholds,
|
|
245 |
lower_tail = TRUE, |
|
246 |
include_eq = TRUE, |
|
247 |
var_labels = vars, |
|
248 |
show_labels = "visible", |
|
249 |
na_str = default_na_str(), |
|
250 |
nested = TRUE, |
|
251 |
table_names = vars, |
|
252 |
...,
|
|
253 |
na_rm = TRUE, |
|
254 |
.stats = c("count_fraction"), |
|
255 |
.stat_names = NULL, |
|
256 |
.formats = NULL, |
|
257 |
.labels = NULL, |
|
258 |
.indent_mods = NULL) { |
|
259 |
# Depending on main functions
|
|
260 | 6x |
extra_args <- list( |
261 | 6x |
"na_rm" = na_rm, |
262 | 6x |
"thresholds" = thresholds, |
263 | 6x |
"lower_tail" = lower_tail, |
264 | 6x |
"include_eq" = include_eq, |
265 |
...
|
|
266 |
)
|
|
267 | ||
268 |
# Needed defaults
|
|
269 | 6x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
270 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
271 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
272 | 1x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
273 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
274 | ||
275 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
|
|
276 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
277 | 6x |
formals(a_count_cumulative) <- c( |
278 | 6x |
formals(a_count_cumulative), |
279 | 6x |
extra_args[[".additional_fun_parameters"]] |
280 |
)
|
|
281 | ||
282 |
# Main {rtables} structural call
|
|
283 | 6x |
analyze( |
284 | 6x |
lyt,
|
285 | 6x |
vars,
|
286 | 6x |
afun = a_count_cumulative, |
287 | 6x |
na_str = na_str, |
288 | 6x |
inclNAs = !na_rm, |
289 | 6x |
table_names = table_names, |
290 | 6x |
var_labels = var_labels, |
291 | 6x |
show_labels = show_labels, |
292 | 6x |
nested = nested, |
293 | 6x |
extra_args = extra_args |
294 |
)
|
|
295 |
}
|
1 |
#' Univariate formula special term
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The special term `univariate` indicate that the model should be fitted individually for
|
|
6 |
#' every variable included in univariate.
|
|
7 |
#'
|
|
8 |
#' @param x (`character`)\cr a vector of variable names separated by commas.
|
|
9 |
#'
|
|
10 |
#' @return When used within a model formula, produces univariate models for each variable provided.
|
|
11 |
#'
|
|
12 |
#' @details
|
|
13 |
#' If provided alongside with pairwise specification, the model
|
|
14 |
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
|
|
15 |
#' + `y ~ ARM`
|
|
16 |
#' + `y ~ ARM + SEX`
|
|
17 |
#' + `y ~ ARM + AGE`
|
|
18 |
#' + `y ~ ARM + RACE`
|
|
19 |
#'
|
|
20 |
#' @export
|
|
21 |
univariate <- function(x) { |
|
22 | 2x |
structure(x, varname = deparse(substitute(x))) |
23 |
}
|
|
24 | ||
25 |
# Get the right-hand-term of a formula
|
|
26 |
rht <- function(x) { |
|
27 | 4x |
checkmate::assert_formula(x) |
28 | 4x |
y <- as.character(rev(x)[[1]]) |
29 | 4x |
return(y) |
30 |
}
|
|
31 | ||
32 |
#' Hazard ratio estimation in interactions
|
|
33 |
#'
|
|
34 |
#' This function estimates the hazard ratios between arms when an interaction variable is given with
|
|
35 |
#' specific values.
|
|
36 |
#'
|
|
37 |
#' @param variable,given (`character(2)`)\cr names of the two variables in the interaction. We seek the estimation of
|
|
38 |
#' the levels of `variable` given the levels of `given`.
|
|
39 |
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels given by [levels()].
|
|
40 |
#' @param mmat (named `numeric`) a vector filled with `0`s used as a template to obtain the design matrix.
|
|
41 |
#' @param coef (`numeric`)\cr vector of estimated coefficients.
|
|
42 |
#' @param vcov (`matrix`)\cr variance-covariance matrix of underlying model.
|
|
43 |
#' @param conf_level (`proportion`)\cr confidence level of estimate intervals.
|
|
44 |
#'
|
|
45 |
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
|
|
46 |
#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
|
|
47 |
#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
|
|
48 |
#'
|
|
49 |
#' - b1 (arm b), b2 (arm c)
|
|
50 |
#' - b3 (sex m)
|
|
51 |
#' - b4 (arm b: sex m), b5 (arm c: sex m)
|
|
52 |
#'
|
|
53 |
#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
|
|
54 |
#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
|
|
55 |
#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
|
|
56 |
#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
|
|
57 |
#'
|
|
58 |
#' @return A list of matrices (one per level of variable) with rows corresponding to the combinations of
|
|
59 |
#' `variable` and `given`, with columns:
|
|
60 |
#' * `coef_hat`: Estimation of the coefficient.
|
|
61 |
#' * `coef_se`: Standard error of the estimation.
|
|
62 |
#' * `hr`: Hazard ratio.
|
|
63 |
#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
|
|
64 |
#'
|
|
65 |
#' @seealso [s_cox_multivariate()].
|
|
66 |
#'
|
|
67 |
#' @examples
|
|
68 |
#' library(dplyr)
|
|
69 |
#' library(survival)
|
|
70 |
#'
|
|
71 |
#' ADSL <- tern_ex_adsl %>%
|
|
72 |
#' filter(SEX %in% c("F", "M"))
|
|
73 |
#'
|
|
74 |
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
|
|
75 |
#' adtte$ARMCD <- droplevels(adtte$ARMCD)
|
|
76 |
#' adtte$SEX <- droplevels(adtte$SEX)
|
|
77 |
#'
|
|
78 |
#' mod <- coxph(
|
|
79 |
#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
|
|
80 |
#' data = adtte
|
|
81 |
#' )
|
|
82 |
#'
|
|
83 |
#' mmat <- stats::model.matrix(mod)[1, ]
|
|
84 |
#' mmat[!mmat == 0] <- 0
|
|
85 |
#'
|
|
86 |
#' @keywords internal
|
|
87 |
estimate_coef <- function(variable, given, |
|
88 |
lvl_var, lvl_given, |
|
89 |
coef,
|
|
90 |
mmat,
|
|
91 |
vcov,
|
|
92 |
conf_level = 0.95) { |
|
93 | 8x |
var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level |
94 | 8x |
giv_lvl <- paste0(given, lvl_given) |
95 | ||
96 | 8x |
design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) |
97 | 8x |
design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] |
98 | 8x |
design_mat <- within( |
99 | 8x |
data = design_mat, |
100 | 8x |
expr = { |
101 | 8x |
inter <- paste0(variable, ":", given) |
102 | 8x |
rev_inter <- paste0(given, ":", variable) |
103 |
}
|
|
104 |
)
|
|
105 | ||
106 | 8x |
split_by_variable <- design_mat$variable |
107 | 8x |
interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") |
108 | ||
109 | 8x |
design_mat <- apply( |
110 | 8x |
X = design_mat, MARGIN = 1, FUN = function(x) { |
111 | 27x |
mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 |
112 | 27x |
return(mmat) |
113 |
}
|
|
114 |
)
|
|
115 | 8x |
colnames(design_mat) <- interaction_names |
116 | ||
117 | 8x |
betas <- as.matrix(coef) |
118 | ||
119 | 8x |
coef_hat <- t(design_mat) %*% betas |
120 | 8x |
dimnames(coef_hat)[2] <- "coef" |
121 | ||
122 | 8x |
coef_se <- apply(design_mat, 2, function(x) { |
123 | 27x |
vcov_el <- as.logical(x) |
124 | 27x |
y <- vcov[vcov_el, vcov_el] |
125 | 27x |
y <- sum(y) |
126 | 27x |
y <- sqrt(y) |
127 | 27x |
return(y) |
128 |
}) |
|
129 | ||
130 | 8x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
131 | 8x |
y <- cbind(coef_hat, `se(coef)` = coef_se) |
132 | ||
133 | 8x |
y <- apply(y, 1, function(x) { |
134 | 27x |
x["hr"] <- exp(x["coef"]) |
135 | 27x |
x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) |
136 | 27x |
x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) |
137 | ||
138 | 27x |
return(x) |
139 |
}) |
|
140 | ||
141 | 8x |
y <- t(y) |
142 | 8x |
y <- by(y, split_by_variable, identity) |
143 | 8x |
y <- lapply(y, as.matrix) |
144 | ||
145 | 8x |
attr(y, "details") <- paste0( |
146 | 8x |
"Estimations of ", variable, |
147 | 8x |
" hazard ratio given the level of ", given, " compared to ", |
148 | 8x |
variable, " level ", lvl_var[1], "." |
149 |
)
|
|
150 | 8x |
return(y) |
151 |
}
|
|
152 | ||
153 |
#' `tryCatch` around `car::Anova`
|
|
154 |
#'
|
|
155 |
#' Captures warnings when executing [car::Anova].
|
|
156 |
#'
|
|
157 |
#' @inheritParams car::Anova
|
|
158 |
#'
|
|
159 |
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
|
|
160 |
#'
|
|
161 |
#' @examples
|
|
162 |
#' # `car::Anova` on cox regression model including strata and expected
|
|
163 |
#' # a likelihood ratio test triggers a warning as only Wald method is
|
|
164 |
#' # accepted.
|
|
165 |
#'
|
|
166 |
#' library(survival)
|
|
167 |
#'
|
|
168 |
#' mod <- coxph(
|
|
169 |
#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
|
|
170 |
#' data = ovarian
|
|
171 |
#' )
|
|
172 |
#'
|
|
173 |
#' @keywords internal
|
|
174 |
try_car_anova <- function(mod, |
|
175 |
test.statistic) { # nolint |
|
176 | 2x |
y <- tryCatch( |
177 | 2x |
withCallingHandlers( |
178 | 2x |
expr = { |
179 | 2x |
warn_text <- c() |
180 | 2x |
list( |
181 | 2x |
aov = car::Anova( |
182 | 2x |
mod,
|
183 | 2x |
test.statistic = test.statistic, |
184 | 2x |
type = "III" |
185 |
),
|
|
186 | 2x |
warn_text = warn_text |
187 |
)
|
|
188 |
},
|
|
189 | 2x |
warning = function(w) { |
190 |
# If a warning is detected it is handled as "w".
|
|
191 | ! |
warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w)) |
192 | ||
193 |
# A warning is sometimes expected, then, we want to restart
|
|
194 |
# the execution while ignoring the warning.
|
|
195 | ! |
invokeRestart("muffleWarning") |
196 |
}
|
|
197 |
),
|
|
198 | 2x |
finally = { |
199 |
}
|
|
200 |
)
|
|
201 | ||
202 | 2x |
return(y) |
203 |
}
|
|
204 | ||
205 |
#' Fit a Cox regression model and ANOVA
|
|
206 |
#'
|
|
207 |
#' The functions derives the effect p-values using [car::Anova()] from [survival::coxph()] results.
|
|
208 |
#'
|
|
209 |
#' @inheritParams t_coxreg
|
|
210 |
#'
|
|
211 |
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
|
|
212 |
#' `aov` (result of [car::Anova()]).
|
|
213 |
#'
|
|
214 |
#' @noRd
|
|
215 |
fit_n_aov <- function(formula, |
|
216 |
data = data, |
|
217 |
conf_level = conf_level, |
|
218 |
pval_method = c("wald", "likelihood"), |
|
219 |
...) { |
|
220 | 1x |
pval_method <- match.arg(pval_method) |
221 | ||
222 | 1x |
environment(formula) <- environment() |
223 | 1x |
suppressWarnings({ |
224 |
# We expect some warnings due to coxph which fails strict programming.
|
|
225 | 1x |
mod <- survival::coxph(formula, data = data, ...) |
226 | 1x |
msum <- summary(mod, conf.int = conf_level) |
227 |
}) |
|
228 | ||
229 | 1x |
aov <- try_car_anova( |
230 | 1x |
mod,
|
231 | 1x |
test.statistic = switch(pval_method, |
232 | 1x |
"wald" = "Wald", |
233 | 1x |
"likelihood" = "LR" |
234 |
)
|
|
235 |
)
|
|
236 | ||
237 | 1x |
warn_attr <- aov$warn_text |
238 | ! |
if (!is.null(aov$warn_text)) message(warn_attr) |
239 | ||
240 | 1x |
aov <- aov$aov |
241 | 1x |
y <- list(mod = mod, msum = msum, aov = aov) |
242 | 1x |
attr(y, "message") <- warn_attr |
243 | ||
244 | 1x |
return(y) |
245 |
}
|
|
246 | ||
247 |
# argument_checks
|
|
248 |
check_formula <- function(formula) { |
|
249 | 1x |
if (!(inherits(formula, "formula"))) { |
250 | 1x |
stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.") |
251 |
}
|
|
252 | ||
253 | ! |
invisible() |
254 |
}
|
|
255 | ||
256 |
check_covariate_formulas <- function(covariates) { |
|
257 | 1x |
if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) { |
258 | 1x |
stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).") |
259 |
}
|
|
260 | ||
261 | ! |
invisible() |
262 |
}
|
|
263 | ||
264 |
name_covariate_names <- function(covariates) { |
|
265 | 1x |
miss_names <- names(covariates) == "" |
266 | 1x |
no_names <- is.null(names(covariates)) |
267 | ! |
if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name") |
268 | ! |
if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name") |
269 | 1x |
return(covariates) |
270 |
}
|
|
271 | ||
272 |
check_increments <- function(increments, covariates) { |
|
273 | 1x |
if (!is.null(increments)) { |
274 | 1x |
covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name") |
275 | 1x |
lapply( |
276 | 1x |
X = names(increments), FUN = function(x) { |
277 | 3x |
if (!x %in% covariates) { |
278 | 1x |
warning( |
279 | 1x |
paste( |
280 | 1x |
"Check `increments`, the `increment` for ", x, |
281 | 1x |
"doesn't match any names in investigated covariate(s)."
|
282 |
)
|
|
283 |
)
|
|
284 |
}
|
|
285 |
}
|
|
286 |
)
|
|
287 |
}
|
|
288 | ||
289 | 1x |
invisible() |
290 |
}
|
|
291 | ||
292 |
#' Multivariate Cox model - summarized results
|
|
293 |
#'
|
|
294 |
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
|
|
295 |
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
|
|
296 |
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
|
|
297 |
#' covariates included in the model.
|
|
298 |
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
|
|
299 |
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
|
|
300 |
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**,
|
|
301 |
#' `NEST's bookdown`)
|
|
302 |
#'
|
|
303 |
#' @param formula (`formula`)\cr a formula corresponding to the investigated [survival::Surv()] survival model
|
|
304 |
#' including covariates.
|
|
305 |
#' @param data (`data.frame`)\cr a data frame which includes the variable in formula and covariates.
|
|
306 |
#' @param conf_level (`proportion`)\cr the confidence level for the hazard ratio interval estimations. Default is 0.95.
|
|
307 |
#' @param pval_method (`string`)\cr the method used for the estimation of p-values, should be one of
|
|
308 |
#' `"wald"` (default) or `"likelihood"`.
|
|
309 |
#' @param ... optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
|
|
310 |
#' method for tie handling, one of `exact` (default), `efron`, `breslow`.
|
|
311 |
#'
|
|
312 |
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
|
|
313 |
#'
|
|
314 |
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
|
|
315 |
#' but is out of scope as defined by the Global Data Standards Repository
|
|
316 |
#' (**`GDS_Standard_TLG_Specs_Tables_2.doc`**).
|
|
317 |
#'
|
|
318 |
#' @seealso [estimate_coef()].
|
|
319 |
#'
|
|
320 |
#' @examples
|
|
321 |
#' library(dplyr)
|
|
322 |
#'
|
|
323 |
#' adtte <- tern_ex_adtte
|
|
324 |
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
|
|
325 |
#' adtte_f <- filter(
|
|
326 |
#' adtte_f,
|
|
327 |
#' PARAMCD == "OS" &
|
|
328 |
#' SEX %in% c("F", "M") &
|
|
329 |
#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
|
|
330 |
#' )
|
|
331 |
#' adtte_f$SEX <- droplevels(adtte_f$SEX)
|
|
332 |
#' adtte_f$RACE <- droplevels(adtte_f$RACE)
|
|
333 |
#'
|
|
334 |
#' @keywords internal
|
|
335 |
s_cox_multivariate <- function(formula, data, |
|
336 |
conf_level = 0.95, |
|
337 |
pval_method = c("wald", "likelihood"), |
|
338 |
...) { |
|
339 | 1x |
tf <- stats::terms(formula, specials = c("strata")) |
340 | 1x |
covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))] |
341 | 1x |
lapply( |
342 | 1x |
X = covariates, |
343 | 1x |
FUN = function(x) { |
344 | 3x |
if (is.character(data[[x]])) { |
345 | 1x |
data[[x]] <<- as.factor(data[[x]]) |
346 |
}
|
|
347 | 3x |
invisible() |
348 |
}
|
|
349 |
)
|
|
350 | 1x |
pval_method <- match.arg(pval_method) |
351 | ||
352 |
# Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
|
|
353 | 1x |
y <- fit_n_aov( |
354 | 1x |
formula = formula, |
355 | 1x |
data = data, |
356 | 1x |
conf_level = conf_level, |
357 | 1x |
pval_method = pval_method, |
358 |
...
|
|
359 |
)
|
|
360 | 1x |
mod <- y$mod |
361 | 1x |
aov <- y$aov |
362 | 1x |
msum <- y$msum |
363 | 1x |
list2env(as.list(y), environment()) |
364 | ||
365 | 1x |
all_term_labs <- attr(mod$terms, "term.labels") |
366 | 1x |
term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)] |
367 | 1x |
names(term_labs) <- term_labs |
368 | ||
369 | 1x |
coef_inter <- NULL |
370 | 1x |
if (any(attr(mod$terms, "order") > 1)) { |
371 | 1x |
for_inter <- all_term_labs[attr(mod$terms, "order") > 1] |
372 | 1x |
names(for_inter) <- for_inter |
373 | 1x |
mmat <- stats::model.matrix(mod)[1, ] |
374 | 1x |
mmat[!mmat == 0] <- 0 |
375 | 1x |
mcoef <- stats::coef(mod) |
376 | 1x |
mvcov <- stats::vcov(mod) |
377 | ||
378 | 1x |
estimate_coef_local <- function(variable, given) { |
379 | 6x |
estimate_coef( |
380 | 6x |
variable, given, |
381 | 6x |
coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level, |
382 | 6x |
lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]]) |
383 |
)
|
|
384 |
}
|
|
385 | ||
386 | 1x |
coef_inter <- lapply( |
387 | 1x |
for_inter, function(x) { |
388 | 3x |
y <- attr(mod$terms, "factors")[, x] |
389 | 3x |
y <- names(y[y > 0]) |
390 | 3x |
Map(estimate_coef_local, variable = y, given = rev(y)) |
391 |
}
|
|
392 |
)
|
|
393 |
}
|
|
394 | ||
395 | 1x |
list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter) |
396 |
}
|
1 |
#' Split function to configure risk difference column
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Wrapper function for [rtables::add_combo_levels()] which configures settings for the risk difference
|
|
6 |
#' column to be added to an `rtables` object. To add a risk difference column to a table, this function
|
|
7 |
#' should be used as `split_fun` in calls to [rtables::split_cols_by()], followed by setting argument
|
|
8 |
#' `riskdiff` to `TRUE` in all following analyze function calls.
|
|
9 |
#'
|
|
10 |
#' @param arm_x (`string`)\cr name of reference arm to use in risk difference calculations.
|
|
11 |
#' @param arm_y (`character`)\cr names of one or more arms to compare to reference arm in risk difference
|
|
12 |
#' calculations. A new column will be added for each value of `arm_y`.
|
|
13 |
#' @param col_label (`character`)\cr labels to use when rendering the risk difference column within the table.
|
|
14 |
#' If more than one comparison arm is specified in `arm_y`, default labels will specify which two arms are
|
|
15 |
#' being compared (reference arm vs. comparison arm).
|
|
16 |
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
|
|
17 |
#'
|
|
18 |
#' @return A closure suitable for use as a split function (`split_fun`) within [rtables::split_cols_by()]
|
|
19 |
#' when creating a table layout.
|
|
20 |
#'
|
|
21 |
#' @seealso [stat_propdiff_ci()] for details on risk difference calculation.
|
|
22 |
#'
|
|
23 |
#' @examples
|
|
24 |
#' adae <- tern_ex_adae
|
|
25 |
#' adae$AESEV <- factor(adae$AESEV)
|
|
26 |
#'
|
|
27 |
#' lyt <- basic_table() %>%
|
|
28 |
#' split_cols_by("ARMCD", split_fun = add_riskdiff(arm_x = "ARM A", arm_y = c("ARM B", "ARM C"))) %>%
|
|
29 |
#' count_occurrences_by_grade(
|
|
30 |
#' var = "AESEV",
|
|
31 |
#' riskdiff = TRUE
|
|
32 |
#' )
|
|
33 |
#'
|
|
34 |
#' tbl <- build_table(lyt, df = adae)
|
|
35 |
#' tbl
|
|
36 |
#'
|
|
37 |
#' @export
|
|
38 |
add_riskdiff <- function(arm_x, |
|
39 |
arm_y,
|
|
40 |
col_label = paste0( |
|
41 |
"Risk Difference (%) (95% CI)", if (length(arm_y) > 1) paste0("\n", arm_x, " vs. ", arm_y) |
|
42 |
),
|
|
43 |
pct = TRUE) { |
|
44 | 19x |
checkmate::assert_character(arm_x, len = 1) |
45 | 19x |
checkmate::assert_character(arm_y, min.len = 1) |
46 | 19x |
checkmate::assert_character(col_label, len = length(arm_y)) |
47 | ||
48 | 19x |
combodf <- tibble::tribble(~valname, ~label, ~levelcombo, ~exargs) |
49 | 19x |
for (i in seq_len(length(arm_y))) { |
50 | 20x |
combodf <- rbind( |
51 | 20x |
combodf,
|
52 | 20x |
tibble::tribble( |
53 | 20x |
~valname, ~label, ~levelcombo, ~exargs, |
54 | 20x |
paste("riskdiff", arm_x, arm_y[i], sep = "_"), col_label[i], c(arm_x, arm_y[i]), list() |
55 |
)
|
|
56 |
)
|
|
57 |
}
|
|
58 | 19x |
if (pct) combodf$valname <- paste0(combodf$valname, "_pct") |
59 | 19x |
add_combo_levels(combodf) |
60 |
}
|
|
61 | ||
62 |
#' Analysis function to calculate risk difference column values
|
|
63 |
#'
|
|
64 |
#' In the risk difference column, this function uses the statistics function associated with `afun` to
|
|
65 |
#' calculates risk difference values from arm X (reference group) and arm Y. These arms are specified
|
|
66 |
#' when configuring the risk difference column which is done using the [add_riskdiff()] split function in
|
|
67 |
#' the previous call to [rtables::split_cols_by()]. For all other columns, applies `afun` as usual. This
|
|
68 |
#' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations.
|
|
69 |
#'
|
|
70 |
#' @inheritParams argument_convention
|
|
71 |
#' @param afun (named `list`)\cr a named list containing one name-value pair where the name corresponds to
|
|
72 |
#' the name of the statistics function that should be used in calculations and the value is the corresponding
|
|
73 |
#' analysis function.
|
|
74 |
#'
|
|
75 |
#' @return A list of formatted [rtables::CellValue()].
|
|
76 |
#'
|
|
77 |
#' @seealso
|
|
78 |
#' * [stat_propdiff_ci()] for details on risk difference calculation.
|
|
79 |
#' * Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] with
|
|
80 |
#' `riskdiff` argument set to `TRUE` in subsequent analyze functions calls, adds a risk difference column
|
|
81 |
#' to a table layout.
|
|
82 |
#'
|
|
83 |
#' @keywords internal
|
|
84 |
afun_riskdiff <- function(df, |
|
85 |
labelstr = "", |
|
86 |
afun,
|
|
87 |
...,
|
|
88 |
.stats = NULL, |
|
89 |
.stat_names = NULL, |
|
90 |
.formats = NULL, |
|
91 |
.labels = NULL, |
|
92 |
.indent_mods = NULL) { |
|
93 | 146x |
if (!any(grepl("riskdiff", names(.spl_context)))) { |
94 | ! |
stop( |
95 | ! |
"Please set up levels to use in risk difference calculations using the `add_riskdiff` ",
|
96 | ! |
"split function within `split_cols_by`. See ?add_riskdiff for details."
|
97 |
)
|
|
98 |
}
|
|
99 | 146x |
checkmate::assert_list(afun, len = 1, types = "function") |
100 | 146x |
checkmate::assert_named(afun) |
101 | ||
102 | 146x |
sfun <- names(afun) |
103 | 146x |
dots_extra_args <- list(...)[intersect(names(list(...)), names(formals(sfun)))] |
104 | 146x |
extra_args <- list( |
105 | 146x |
.var = .var, .df_row = .df_row, .N_col = .N_col, .N_row = .N_row, .stats = .stats, .formats = .formats, |
106 | 146x |
.labels = .labels, .indent_mods = .indent_mods |
107 |
)
|
|
108 | 146x |
cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1) |
109 | ||
110 | 146x |
if (!grepl("^riskdiff", cur_split)) { |
111 |
# Apply basic afun (no risk difference) in all other columns
|
|
112 | 108x |
do.call(afun[[1]], args = c(list(df = df, labelstr = labelstr), extra_args, dots_extra_args)) |
113 |
} else { |
|
114 | 38x |
arm_x <- strsplit(cur_split, "_")[[1]][2] |
115 | 38x |
arm_y <- strsplit(cur_split, "_")[[1]][3] |
116 | 38x |
if (length(.spl_context$cur_col_split[[1]]) > 1) { # Different split name for nested column splits |
117 | 8x |
arm_spl_x <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 2)], collapse = "")) |
118 | 8x |
arm_spl_y <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 3)], collapse = "")) |
119 |
} else { |
|
120 | 30x |
arm_spl_x <- arm_x |
121 | 30x |
arm_spl_y <- arm_y |
122 |
}
|
|
123 | 38x |
N_col_x <- .all_col_counts[[arm_spl_x]] # nolint |
124 | 38x |
N_col_y <- .all_col_counts[[arm_spl_y]] # nolint |
125 | 38x |
cur_var <- tail(.spl_context$cur_col_split[[1]], 1) |
126 | ||
127 |
# Apply statistics function to arm X and arm Y data
|
|
128 | 38x |
s_args <- c(dots_extra_args, extra_args[intersect(setdiff(names(extra_args), ".N_col"), names(formals(sfun)))]) |
129 | 38x |
s_x <- do.call(sfun, args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args)) |
130 | 38x |
s_y <- do.call(sfun, args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args)) |
131 | ||
132 |
# Get statistic name and row names
|
|
133 | 38x |
stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique") |
134 | 38x |
if ("flag_variables" %in% names(s_args)) { |
135 | 2x |
var_nms <- s_args$flag_variables |
136 | 36x |
} else if (is.list(s_x[[stat]]) && !is.null(names(s_x[[stat]]))) { |
137 | 24x |
var_nms <- names(s_x[[stat]]) |
138 |
} else { |
|
139 | 12x |
var_nms <- "" |
140 | 12x |
s_x[[stat]] <- list(s_x[[stat]]) |
141 | 12x |
s_y[[stat]] <- list(s_y[[stat]]) |
142 |
}
|
|
143 | ||
144 |
# Calculate risk difference for each row, repeated if multiple statistics in table
|
|
145 | 38x |
pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct" |
146 | 38x |
rd_ci <- rep(stat_propdiff_ci( |
147 | 38x |
lapply(s_x[[stat]], `[`, 1), lapply(s_y[[stat]], `[`, 1), |
148 | 38x |
N_col_x, N_col_y, |
149 | 38x |
list_names = var_nms, |
150 | 38x |
pct = pct |
151 | 38x |
), max(1, length(.stats))) |
152 | ||
153 | 38x |
in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods) |
154 |
}
|
|
155 |
}
|
|
156 | ||
157 |
#' Control function for risk difference column
|
|
158 |
#'
|
|
159 |
#' @description `r lifecycle::badge("stable")`
|
|
160 |
#'
|
|
161 |
#' Sets a list of parameters to use when generating a risk (proportion) difference column. Used as input to the
|
|
162 |
#' `riskdiff` parameter of [tabulate_rsp_subgroups()] and [tabulate_survival_subgroups()].
|
|
163 |
#'
|
|
164 |
#' @inheritParams add_riskdiff
|
|
165 |
#' @param format (`string` or `function`)\cr the format label (string) or formatting function to apply to the risk
|
|
166 |
#' difference statistic. See the `3d` string options in [formatters::list_valid_format_labels()] for possible format
|
|
167 |
#' strings. Defaults to `"xx.x (xx.x - xx.x)"`.
|
|
168 |
#'
|
|
169 |
#' @return A `list` of items with names corresponding to the arguments.
|
|
170 |
#'
|
|
171 |
#' @seealso [add_riskdiff()], [tabulate_rsp_subgroups()], and [tabulate_survival_subgroups()].
|
|
172 |
#'
|
|
173 |
#' @examples
|
|
174 |
#' control_riskdiff()
|
|
175 |
#' control_riskdiff(arm_x = "ARM A", arm_y = "ARM B")
|
|
176 |
#'
|
|
177 |
#' @export
|
|
178 |
control_riskdiff <- function(arm_x = NULL, |
|
179 |
arm_y = NULL, |
|
180 |
format = "xx.x (xx.x - xx.x)", |
|
181 |
col_label = "Risk Difference (%) (95% CI)", |
|
182 |
pct = TRUE) { |
|
183 | 4x |
checkmate::assert_character(arm_x, len = 1, null.ok = TRUE) |
184 | 4x |
checkmate::assert_character(arm_y, min.len = 1, null.ok = TRUE) |
185 | 4x |
checkmate::assert_character(format, len = 1) |
186 | 4x |
checkmate::assert_character(col_label) |
187 | 4x |
checkmate::assert_flag(pct) |
188 | ||
189 | 4x |
list(arm_x = arm_x, arm_y = arm_y, format = format, col_label = col_label, pct = pct) |
190 |
}
|
1 |
#' Helper functions for multivariate logistic regression
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions used in calculations for logistic regression.
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
|
|
9 |
#' Limited functionality is also available for conditional logistic regression models fitted by
|
|
10 |
#' [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()].
|
|
11 |
#' @param x (`character`)\cr a variable or interaction term in `fit_glm` (depending on the helper function used).
|
|
12 |
#'
|
|
13 |
#' @examples
|
|
14 |
#' library(dplyr)
|
|
15 |
#' library(broom)
|
|
16 |
#'
|
|
17 |
#' adrs_f <- tern_ex_adrs %>%
|
|
18 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
19 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
20 |
#' mutate(
|
|
21 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
22 |
#' RACE = factor(RACE),
|
|
23 |
#' SEX = factor(SEX)
|
|
24 |
#' )
|
|
25 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
|
|
26 |
#' mod1 <- fit_logistic(
|
|
27 |
#' data = adrs_f,
|
|
28 |
#' variables = list(
|
|
29 |
#' response = "Response",
|
|
30 |
#' arm = "ARMCD",
|
|
31 |
#' covariates = c("AGE", "RACE")
|
|
32 |
#' )
|
|
33 |
#' )
|
|
34 |
#' mod2 <- fit_logistic(
|
|
35 |
#' data = adrs_f,
|
|
36 |
#' variables = list(
|
|
37 |
#' response = "Response",
|
|
38 |
#' arm = "ARMCD",
|
|
39 |
#' covariates = c("AGE", "RACE"),
|
|
40 |
#' interaction = "AGE"
|
|
41 |
#' )
|
|
42 |
#' )
|
|
43 |
#'
|
|
44 |
#' @name h_logistic_regression
|
|
45 |
NULL
|
|
46 | ||
47 |
#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted
|
|
48 |
#' model assuming only one interaction term.
|
|
49 |
#'
|
|
50 |
#' @return Vector of names of interaction variables.
|
|
51 |
#'
|
|
52 |
#' @export
|
|
53 |
h_get_interaction_vars <- function(fit_glm) { |
|
54 | 34x |
checkmate::assert_class(fit_glm, "glm") |
55 | 34x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
56 | 34x |
terms_order <- attr(stats::terms(fit_glm), "order") |
57 | 34x |
interaction_term <- terms_name[terms_order == 2] |
58 | 34x |
checkmate::assert_string(interaction_term) |
59 | 34x |
strsplit(interaction_term, split = ":")[[1]] |
60 |
}
|
|
61 | ||
62 |
#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the
|
|
63 |
#' interaction variable names and the given levels. The main value here is that the order
|
|
64 |
#' of first and second variable is checked in the `interaction_vars` input.
|
|
65 |
#'
|
|
66 |
#' @param interaction_vars (`character(2)`)\cr interaction variable names.
|
|
67 |
#' @param first_var_with_level (`character(2)`)\cr the first variable name with the interaction level.
|
|
68 |
#' @param second_var_with_level (`character(2)`)\cr the second variable name with the interaction level.
|
|
69 |
#'
|
|
70 |
#' @return Name of coefficient.
|
|
71 |
#'
|
|
72 |
#' @export
|
|
73 |
h_interaction_coef_name <- function(interaction_vars, |
|
74 |
first_var_with_level,
|
|
75 |
second_var_with_level) { |
|
76 | 55x |
checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE) |
77 | 55x |
checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE) |
78 | 55x |
checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE) |
79 | 55x |
checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars) |
80 | ||
81 | 55x |
first_name <- paste(first_var_with_level, collapse = "") |
82 | 55x |
second_name <- paste(second_var_with_level, collapse = "") |
83 | 55x |
if (first_var_with_level[1] == interaction_vars[1]) { |
84 | 36x |
paste(first_name, second_name, sep = ":") |
85 | 19x |
} else if (second_var_with_level[1] == interaction_vars[1]) { |
86 | 19x |
paste(second_name, first_name, sep = ":") |
87 |
}
|
|
88 |
}
|
|
89 | ||
90 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
|
|
91 |
#' for the case when both the odds ratio and the interaction variable are categorical.
|
|
92 |
#'
|
|
93 |
#' @param odds_ratio_var (`string`)\cr the odds ratio variable.
|
|
94 |
#' @param interaction_var (`string`)\cr the interaction variable.
|
|
95 |
#'
|
|
96 |
#' @return Odds ratio.
|
|
97 |
#'
|
|
98 |
#' @export
|
|
99 |
h_or_cat_interaction <- function(odds_ratio_var, |
|
100 |
interaction_var,
|
|
101 |
fit_glm,
|
|
102 |
conf_level = 0.95) { |
|
103 | 8x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
104 | 8x |
checkmate::assert_string(odds_ratio_var) |
105 | 8x |
checkmate::assert_string(interaction_var) |
106 | 8x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
107 | 8x |
checkmate::assert_vector(interaction_vars, len = 2) |
108 | ||
109 | 8x |
xs_level <- fit_glm$xlevels |
110 | 8x |
xs_coef <- stats::coef(fit_glm) |
111 | 8x |
xs_vcov <- stats::vcov(fit_glm) |
112 | 8x |
y <- list() |
113 | 8x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
114 | 14x |
x <- list() |
115 | 14x |
for (ref_level in xs_level[[interaction_var]]) { |
116 | 38x |
coef_names <- paste0(odds_ratio_var, var_level) |
117 | 38x |
if (ref_level != xs_level[[interaction_var]][1]) { |
118 | 24x |
interaction_coef_name <- h_interaction_coef_name( |
119 | 24x |
interaction_vars,
|
120 | 24x |
c(odds_ratio_var, var_level), |
121 | 24x |
c(interaction_var, ref_level) |
122 |
)
|
|
123 | 24x |
coef_names <- c( |
124 | 24x |
coef_names,
|
125 | 24x |
interaction_coef_name
|
126 |
)
|
|
127 |
}
|
|
128 | 38x |
if (length(coef_names) > 1) { |
129 | 24x |
ones <- t(c(1, 1)) |
130 | 24x |
est <- as.numeric(ones %*% xs_coef[coef_names]) |
131 | 24x |
se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones))) |
132 |
} else { |
|
133 | 14x |
est <- xs_coef[coef_names] |
134 | 14x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
135 |
}
|
|
136 | 38x |
or <- exp(est) |
137 | 38x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
138 | 38x |
x[[ref_level]] <- list(or = or, ci = ci) |
139 |
}
|
|
140 | 14x |
y[[var_level]] <- x |
141 |
}
|
|
142 | 8x |
y
|
143 |
}
|
|
144 | ||
145 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
|
|
146 |
#' for the case when either the odds ratio or the interaction variable is continuous.
|
|
147 |
#'
|
|
148 |
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise
|
|
149 |
#' the median is used.
|
|
150 |
#'
|
|
151 |
#' @return Odds ratio.
|
|
152 |
#'
|
|
153 |
#' @note We don't provide a function for the case when both variables are continuous because
|
|
154 |
#' this does not arise in this table, as the treatment arm variable will always be involved
|
|
155 |
#' and categorical.
|
|
156 |
#'
|
|
157 |
#' @export
|
|
158 |
h_or_cont_interaction <- function(odds_ratio_var, |
|
159 |
interaction_var,
|
|
160 |
fit_glm,
|
|
161 |
at = NULL, |
|
162 |
conf_level = 0.95) { |
|
163 | 13x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
164 | 13x |
checkmate::assert_string(odds_ratio_var) |
165 | 13x |
checkmate::assert_string(interaction_var) |
166 | 13x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
167 | 13x |
checkmate::assert_vector(interaction_vars, len = 2) |
168 | 13x |
checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE) |
169 | 13x |
xs_level <- fit_glm$xlevels |
170 | 13x |
xs_coef <- stats::coef(fit_glm) |
171 | 13x |
xs_vcov <- stats::vcov(fit_glm) |
172 | 13x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
173 | 13x |
model_data <- fit_glm$model |
174 | 13x |
if (!is.null(at)) { |
175 | 3x |
checkmate::assert_set_equal(xs_class[interaction_var], "numeric") |
176 |
}
|
|
177 | 12x |
y <- list() |
178 | 12x |
if (xs_class[interaction_var] == "numeric") { |
179 | 7x |
if (is.null(at)) { |
180 | 5x |
at <- ceiling(stats::median(model_data[[interaction_var]])) |
181 |
}
|
|
182 | ||
183 | 7x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
184 | 14x |
x <- list() |
185 | 14x |
for (increment in at) { |
186 | 20x |
coef_names <- paste0(odds_ratio_var, var_level) |
187 | 20x |
if (increment != 0) { |
188 | 20x |
interaction_coef_name <- h_interaction_coef_name( |
189 | 20x |
interaction_vars,
|
190 | 20x |
c(odds_ratio_var, var_level), |
191 | 20x |
c(interaction_var, "") |
192 |
)
|
|
193 | 20x |
coef_names <- c( |
194 | 20x |
coef_names,
|
195 | 20x |
interaction_coef_name
|
196 |
)
|
|
197 |
}
|
|
198 | 20x |
if (length(coef_names) > 1) { |
199 | 20x |
xvec <- t(c(1, increment)) |
200 | 20x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
201 | 20x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
202 |
} else { |
|
203 | ! |
est <- xs_coef[coef_names] |
204 | ! |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
205 |
}
|
|
206 | 20x |
or <- exp(est) |
207 | 20x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
208 | 20x |
x[[as.character(increment)]] <- list(or = or, ci = ci) |
209 |
}
|
|
210 | 14x |
y[[var_level]] <- x |
211 |
}
|
|
212 |
} else { |
|
213 | 5x |
checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric") |
214 | 5x |
checkmate::assert_set_equal(xs_class[interaction_var], "factor") |
215 | 5x |
for (var_level in xs_level[[interaction_var]]) { |
216 | 15x |
coef_names <- odds_ratio_var |
217 | 15x |
if (var_level != xs_level[[interaction_var]][1]) { |
218 | 10x |
interaction_coef_name <- h_interaction_coef_name( |
219 | 10x |
interaction_vars,
|
220 | 10x |
c(odds_ratio_var, ""), |
221 | 10x |
c(interaction_var, var_level) |
222 |
)
|
|
223 | 10x |
coef_names <- c( |
224 | 10x |
coef_names,
|
225 | 10x |
interaction_coef_name
|
226 |
)
|
|
227 |
}
|
|
228 | 15x |
if (length(coef_names) > 1) { |
229 | 10x |
xvec <- t(c(1, 1)) |
230 | 10x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
231 | 10x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
232 |
} else { |
|
233 | 5x |
est <- xs_coef[coef_names] |
234 | 5x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
235 |
}
|
|
236 | 15x |
or <- exp(est) |
237 | 15x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
238 | 15x |
y[[var_level]] <- list(or = or, ci = ci) |
239 |
}
|
|
240 |
}
|
|
241 | 12x |
y
|
242 |
}
|
|
243 | ||
244 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
|
|
245 |
#' in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and
|
|
246 |
#' [h_or_cat_interaction()].
|
|
247 |
#'
|
|
248 |
#' @return Odds ratio.
|
|
249 |
#'
|
|
250 |
#' @export
|
|
251 |
h_or_interaction <- function(odds_ratio_var, |
|
252 |
interaction_var,
|
|
253 |
fit_glm,
|
|
254 |
at = NULL, |
|
255 |
conf_level = 0.95) { |
|
256 | 15x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
257 | 15x |
if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) { |
258 | 9x |
h_or_cont_interaction( |
259 | 9x |
odds_ratio_var,
|
260 | 9x |
interaction_var,
|
261 | 9x |
fit_glm,
|
262 | 9x |
at = at, |
263 | 9x |
conf_level = conf_level |
264 |
)
|
|
265 | 6x |
} else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) { |
266 | 6x |
h_or_cat_interaction( |
267 | 6x |
odds_ratio_var,
|
268 | 6x |
interaction_var,
|
269 | 6x |
fit_glm,
|
270 | 6x |
conf_level = conf_level |
271 |
)
|
|
272 |
} else { |
|
273 | ! |
stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor") |
274 |
}
|
|
275 |
}
|
|
276 | ||
277 |
#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table
|
|
278 |
#' of numbers of patients.
|
|
279 |
#'
|
|
280 |
#' @param terms (`character`)\cr simple terms.
|
|
281 |
#' @param table (`table`)\cr table containing numbers for terms.
|
|
282 |
#'
|
|
283 |
#' @return Term labels containing numbers of patients.
|
|
284 |
#'
|
|
285 |
#' @export
|
|
286 |
h_simple_term_labels <- function(terms, |
|
287 |
table) { |
|
288 | 54x |
checkmate::assert_true(is.table(table)) |
289 | 54x |
checkmate::assert_multi_class(terms, classes = c("factor", "character")) |
290 | 54x |
terms <- as.character(terms) |
291 | 54x |
term_n <- table[terms] |
292 | 54x |
paste0(terms, ", n = ", term_n) |
293 |
}
|
|
294 | ||
295 |
#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table
|
|
296 |
#' of numbers of patients.
|
|
297 |
#'
|
|
298 |
#' @param terms1 (`character`)\cr terms for first dimension (rows).
|
|
299 |
#' @param terms2 (`character`)\cr terms for second dimension (rows).
|
|
300 |
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the
|
|
301 |
#' number of patients. In that case they can only be scalar (strings).
|
|
302 |
#'
|
|
303 |
#' @return Term labels containing numbers of patients.
|
|
304 |
#'
|
|
305 |
#' @export
|
|
306 |
h_interaction_term_labels <- function(terms1, |
|
307 |
terms2,
|
|
308 |
table,
|
|
309 |
any = FALSE) { |
|
310 | 8x |
checkmate::assert_true(is.table(table)) |
311 | 8x |
checkmate::assert_flag(any) |
312 | 8x |
checkmate::assert_multi_class(terms1, classes = c("factor", "character")) |
313 | 8x |
checkmate::assert_multi_class(terms2, classes = c("factor", "character")) |
314 | 8x |
terms1 <- as.character(terms1) |
315 | 8x |
terms2 <- as.character(terms2) |
316 | 8x |
if (any) { |
317 | 4x |
checkmate::assert_scalar(terms1) |
318 | 4x |
checkmate::assert_scalar(terms2) |
319 | 4x |
paste0( |
320 | 4x |
terms1, " or ", terms2, ", n = ", |
321 |
# Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract.
|
|
322 | 4x |
sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2] |
323 |
)
|
|
324 |
} else { |
|
325 | 4x |
term_n <- table[cbind(terms1, terms2)] |
326 | 4x |
paste0(terms1, " * ", terms2, ", n = ", term_n) |
327 |
}
|
|
328 |
}
|
|
329 | ||
330 |
#' @describeIn h_logistic_regression Helper function to tabulate the main effect
|
|
331 |
#' results of a (conditional) logistic regression model.
|
|
332 |
#'
|
|
333 |
#' @return Tabulated main effect results from a logistic regression model.
|
|
334 |
#'
|
|
335 |
#' @examples
|
|
336 |
#' h_glm_simple_term_extract("AGE", mod1)
|
|
337 |
#' h_glm_simple_term_extract("ARMCD", mod1)
|
|
338 |
#'
|
|
339 |
#' @export
|
|
340 |
h_glm_simple_term_extract <- function(x, fit_glm) { |
|
341 | 78x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
342 | 78x |
checkmate::assert_string(x) |
343 | ||
344 | 78x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
345 | 78x |
xs_level <- fit_glm$xlevels |
346 | 78x |
xs_coef <- summary(fit_glm)$coefficients |
347 | 78x |
stats <- if (inherits(fit_glm, "glm")) { |
348 | 66x |
c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
349 |
} else { |
|
350 | 12x |
c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)") |
351 |
}
|
|
352 |
# Make sure x is not an interaction term.
|
|
353 | 78x |
checkmate::assert_subset(x, names(xs_class)) |
354 | 78x |
x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1]) |
355 | 78x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
356 | 78x |
colnames(x_stats) <- names(stats) |
357 | 78x |
x_stats$estimate <- as.list(x_stats$estimate) |
358 | 78x |
x_stats$std_error <- as.list(x_stats$std_error) |
359 | 78x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
360 | 78x |
x_stats$df <- as.list(1) |
361 | 78x |
if (xs_class[x] == "numeric") { |
362 | 60x |
x_stats$term <- x |
363 | 60x |
x_stats$term_label <- if (inherits(fit_glm, "glm")) { |
364 | 48x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
365 |
} else { |
|
366 |
# We just fill in here with the `term` itself as we don't have the data available.
|
|
367 | 12x |
x
|
368 |
}
|
|
369 | 60x |
x_stats$is_variable_summary <- FALSE |
370 | 60x |
x_stats$is_term_summary <- TRUE |
371 |
} else { |
|
372 | 18x |
checkmate::assert_class(fit_glm, "glm") |
373 |
# The reason is that we don't have the original data set in the `clogit` object
|
|
374 |
# and therefore cannot determine the `x_numbers` here.
|
|
375 | 18x |
x_numbers <- table(fit_glm$data[[x]]) |
376 | 18x |
x_stats$term <- xs_level[[x]][-1] |
377 | 18x |
x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers) |
378 | 18x |
x_stats$is_variable_summary <- FALSE |
379 | 18x |
x_stats$is_term_summary <- TRUE |
380 | 18x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
381 | 18x |
x_main <- data.frame( |
382 | 18x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
383 | 18x |
term = xs_level[[x]][1], |
384 | 18x |
term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)), |
385 | 18x |
df = main_effects[x, "Df", drop = TRUE], |
386 | 18x |
stringsAsFactors = FALSE |
387 |
)
|
|
388 | 18x |
x_main$pvalue <- as.list(x_main$pvalue) |
389 | 18x |
x_main$df <- as.list(x_main$df) |
390 | 18x |
x_main$estimate <- list(numeric(0)) |
391 | 18x |
x_main$std_error <- list(numeric(0)) |
392 | 18x |
if (length(xs_level[[x]][-1]) == 1) { |
393 | 8x |
x_main$pvalue <- list(numeric(0)) |
394 | 8x |
x_main$df <- list(numeric(0)) |
395 |
}
|
|
396 | 18x |
x_main$is_variable_summary <- TRUE |
397 | 18x |
x_main$is_term_summary <- FALSE |
398 | 18x |
x_stats <- rbind(x_main, x_stats) |
399 |
}
|
|
400 | 78x |
x_stats$variable <- x |
401 | 78x |
x_stats$variable_label <- if (inherits(fit_glm, "glm")) { |
402 | 66x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
403 |
} else { |
|
404 | 12x |
x
|
405 |
}
|
|
406 | 78x |
x_stats$interaction <- "" |
407 | 78x |
x_stats$interaction_label <- "" |
408 | 78x |
x_stats$reference <- "" |
409 | 78x |
x_stats$reference_label <- "" |
410 | 78x |
rownames(x_stats) <- NULL |
411 | 78x |
x_stats[c( |
412 | 78x |
"variable",
|
413 | 78x |
"variable_label",
|
414 | 78x |
"term",
|
415 | 78x |
"term_label",
|
416 | 78x |
"interaction",
|
417 | 78x |
"interaction_label",
|
418 | 78x |
"reference",
|
419 | 78x |
"reference_label",
|
420 | 78x |
"estimate",
|
421 | 78x |
"std_error",
|
422 | 78x |
"df",
|
423 | 78x |
"pvalue",
|
424 | 78x |
"is_variable_summary",
|
425 | 78x |
"is_term_summary"
|
426 |
)] |
|
427 |
}
|
|
428 | ||
429 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction term
|
|
430 |
#' results of a logistic regression model.
|
|
431 |
#'
|
|
432 |
#' @return Tabulated interaction term results from a logistic regression model.
|
|
433 |
#'
|
|
434 |
#' @examples
|
|
435 |
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
|
|
436 |
#'
|
|
437 |
#' @export
|
|
438 |
h_glm_interaction_extract <- function(x, fit_glm) { |
|
439 | 7x |
vars <- h_get_interaction_vars(fit_glm) |
440 | 7x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
441 | ||
442 | 7x |
checkmate::assert_string(x) |
443 | ||
444 |
# Only take two-way interaction
|
|
445 | 7x |
checkmate::assert_vector(vars, len = 2) |
446 | ||
447 |
# Only consider simple case: first variable in interaction is arm, a categorical variable
|
|
448 | 7x |
checkmate::assert_disjunct(xs_class[vars[1]], "numeric") |
449 | ||
450 | 7x |
xs_level <- fit_glm$xlevels |
451 | 7x |
xs_coef <- summary(fit_glm)$coefficients |
452 | 7x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
453 | 7x |
stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
454 | 7x |
v1_comp <- xs_level[[vars[1]]][-1] |
455 | 7x |
if (xs_class[vars[2]] == "numeric") { |
456 | 4x |
x_stats <- as.data.frame( |
457 | 4x |
xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE], |
458 | 4x |
stringsAsFactors = FALSE |
459 |
)
|
|
460 | 4x |
colnames(x_stats) <- names(stats) |
461 | 4x |
x_stats$term <- v1_comp |
462 | 4x |
x_numbers <- table(fit_glm$data[[vars[1]]]) |
463 | 4x |
x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers) |
464 | 4x |
v1_ref <- xs_level[[vars[1]]][1] |
465 | 4x |
term_main <- v1_ref |
466 | 4x |
ref_label <- h_simple_term_labels(v1_ref, x_numbers) |
467 | 3x |
} else if (xs_class[vars[2]] != "numeric") { |
468 | 3x |
v2_comp <- xs_level[[vars[2]]][-1] |
469 | 3x |
v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp) |
470 | 3x |
x_sel <- paste( |
471 | 3x |
paste0(vars[1], v1_v2_grid$v1), |
472 | 3x |
paste0(vars[2], v1_v2_grid$v2), |
473 | 3x |
sep = ":" |
474 |
)
|
|
475 | 3x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
476 | 3x |
colnames(x_stats) <- names(stats) |
477 | 3x |
x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2) |
478 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]]) |
479 | 3x |
x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers) |
480 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
481 | 3x |
v2_ref <- xs_level[[vars[2]]][1] |
482 | 3x |
term_main <- paste(vars[1], vars[2], sep = " * ") |
483 | 3x |
ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE) |
484 |
}
|
|
485 | 7x |
x_stats$df <- as.list(1) |
486 | 7x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
487 | 7x |
x_stats$is_variable_summary <- FALSE |
488 | 7x |
x_stats$is_term_summary <- TRUE |
489 | 7x |
x_main <- data.frame( |
490 | 7x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
491 | 7x |
term = term_main, |
492 | 7x |
term_label = paste("Reference", ref_label), |
493 | 7x |
df = main_effects[x, "Df", drop = TRUE], |
494 | 7x |
stringsAsFactors = FALSE |
495 |
)
|
|
496 | 7x |
x_main$pvalue <- as.list(x_main$pvalue) |
497 | 7x |
x_main$df <- as.list(x_main$df) |
498 | 7x |
x_main$estimate <- list(numeric(0)) |
499 | 7x |
x_main$std_error <- list(numeric(0)) |
500 | 7x |
x_main$is_variable_summary <- TRUE |
501 | 7x |
x_main$is_term_summary <- FALSE |
502 | ||
503 | 7x |
x_stats <- rbind(x_main, x_stats) |
504 | 7x |
x_stats$variable <- x |
505 | 7x |
x_stats$variable_label <- paste( |
506 | 7x |
"Interaction of",
|
507 | 7x |
formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE), |
508 |
"*",
|
|
509 | 7x |
formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE) |
510 |
)
|
|
511 | 7x |
x_stats$interaction <- "" |
512 | 7x |
x_stats$interaction_label <- "" |
513 | 7x |
x_stats$reference <- "" |
514 | 7x |
x_stats$reference_label <- "" |
515 | 7x |
rownames(x_stats) <- NULL |
516 | 7x |
x_stats[c( |
517 | 7x |
"variable",
|
518 | 7x |
"variable_label",
|
519 | 7x |
"term",
|
520 | 7x |
"term_label",
|
521 | 7x |
"interaction",
|
522 | 7x |
"interaction_label",
|
523 | 7x |
"reference",
|
524 | 7x |
"reference_label",
|
525 | 7x |
"estimate",
|
526 | 7x |
"std_error",
|
527 | 7x |
"df",
|
528 | 7x |
"pvalue",
|
529 | 7x |
"is_variable_summary",
|
530 | 7x |
"is_term_summary"
|
531 |
)] |
|
532 |
}
|
|
533 | ||
534 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction
|
|
535 |
#' results of a logistic regression model. This basically is a wrapper for
|
|
536 |
#' [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results
|
|
537 |
#' in the right data frame format.
|
|
538 |
#'
|
|
539 |
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model.
|
|
540 |
#'
|
|
541 |
#' @examples
|
|
542 |
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
|
|
543 |
#'
|
|
544 |
#' @export
|
|
545 |
h_glm_inter_term_extract <- function(odds_ratio_var, |
|
546 |
interaction_var,
|
|
547 |
fit_glm,
|
|
548 |
...) { |
|
549 |
# First obtain the main effects.
|
|
550 | 13x |
main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm) |
551 | 13x |
main_stats$is_reference_summary <- FALSE |
552 | 13x |
main_stats$odds_ratio <- NA |
553 | 13x |
main_stats$lcl <- NA |
554 | 13x |
main_stats$ucl <- NA |
555 | ||
556 |
# Then we get the odds ratio estimates and put into df form.
|
|
557 | 13x |
or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...) |
558 | 13x |
is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric" |
559 | ||
560 | 13x |
if (is_num_or_var) { |
561 |
# Numeric OR variable case.
|
|
562 | 4x |
references <- names(or_numbers) |
563 | 4x |
n_ref <- length(references) |
564 | ||
565 | 4x |
extract_from_list <- function(l, name, pos = 1) { |
566 | 12x |
unname(unlist( |
567 | 12x |
lapply(or_numbers, function(x) { |
568 | 36x |
x[[name]][pos] |
569 |
}) |
|
570 |
)) |
|
571 |
}
|
|
572 | 4x |
or_stats <- data.frame( |
573 | 4x |
variable = odds_ratio_var, |
574 | 4x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
575 | 4x |
term = odds_ratio_var, |
576 | 4x |
term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
577 | 4x |
interaction = interaction_var, |
578 | 4x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
579 | 4x |
reference = references, |
580 | 4x |
reference_label = references, |
581 | 4x |
estimate = NA, |
582 | 4x |
std_error = NA, |
583 | 4x |
odds_ratio = extract_from_list(or_numbers, "or"), |
584 | 4x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
585 | 4x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
586 | 4x |
df = NA, |
587 | 4x |
pvalue = NA, |
588 | 4x |
is_variable_summary = FALSE, |
589 | 4x |
is_term_summary = FALSE, |
590 | 4x |
is_reference_summary = TRUE |
591 |
)
|
|
592 |
} else { |
|
593 |
# Categorical OR variable case.
|
|
594 | 9x |
references <- names(or_numbers[[1]]) |
595 | 9x |
n_ref <- length(references) |
596 | ||
597 | 9x |
extract_from_list <- function(l, name, pos = 1) { |
598 | 27x |
unname(unlist( |
599 | 27x |
lapply(or_numbers, function(x) { |
600 | 48x |
lapply(x, function(y) y[[name]][pos]) |
601 |
}) |
|
602 |
)) |
|
603 |
}
|
|
604 | 9x |
or_stats <- data.frame( |
605 | 9x |
variable = odds_ratio_var, |
606 | 9x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
607 | 9x |
term = rep(names(or_numbers), each = n_ref), |
608 | 9x |
term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])), |
609 | 9x |
interaction = interaction_var, |
610 | 9x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
611 | 9x |
reference = unlist(lapply(or_numbers, names)), |
612 | 9x |
reference_label = unlist(lapply(or_numbers, names)), |
613 | 9x |
estimate = NA, |
614 | 9x |
std_error = NA, |
615 | 9x |
odds_ratio = extract_from_list(or_numbers, "or"), |
616 | 9x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
617 | 9x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
618 | 9x |
df = NA, |
619 | 9x |
pvalue = NA, |
620 | 9x |
is_variable_summary = FALSE, |
621 | 9x |
is_term_summary = FALSE, |
622 | 9x |
is_reference_summary = TRUE |
623 |
)
|
|
624 |
}
|
|
625 | ||
626 | 13x |
df <- rbind( |
627 | 13x |
main_stats[, names(or_stats)], |
628 | 13x |
or_stats
|
629 |
)
|
|
630 | 13x |
df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ] |
631 |
}
|
|
632 | ||
633 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including
|
|
634 |
#' odds ratios and confidence intervals of simple terms.
|
|
635 |
#'
|
|
636 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
|
|
637 |
#'
|
|
638 |
#' @examples
|
|
639 |
#' h_logistic_simple_terms("AGE", mod1)
|
|
640 |
#'
|
|
641 |
#' @export
|
|
642 |
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) { |
|
643 | 53x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
644 | 53x |
if (inherits(fit_glm, "glm")) { |
645 | 42x |
checkmate::assert_set_equal(fit_glm$family$family, "binomial") |
646 |
}
|
|
647 | 53x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
648 | 53x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
649 | 53x |
interaction <- terms_name[which(!terms_name %in% names(xs_class))] |
650 | 53x |
checkmate::assert_subset(x, terms_name) |
651 | 53x |
if (length(interaction) != 0) { |
652 |
# Make sure any item in x is not part of interaction term
|
|
653 | 2x |
checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":"))) |
654 |
}
|
|
655 | 53x |
x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm) |
656 | 53x |
x_stats <- do.call(rbind, x_stats) |
657 | 53x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
658 | 53x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
659 | 53x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
660 | 53x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
661 | 53x |
x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl) |
662 | 53x |
x_stats
|
663 |
}
|
|
664 | ||
665 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including
|
|
666 |
#' odds ratios and confidence intervals of interaction terms.
|
|
667 |
#'
|
|
668 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
|
|
669 |
#'
|
|
670 |
#' @examples
|
|
671 |
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
|
|
672 |
#'
|
|
673 |
#' @export
|
|
674 |
h_logistic_inter_terms <- function(x, |
|
675 |
fit_glm,
|
|
676 |
conf_level = 0.95, |
|
677 |
at = NULL) { |
|
678 |
# Find out the interaction variables and interaction term.
|
|
679 | 5x |
inter_vars <- h_get_interaction_vars(fit_glm) |
680 | 5x |
checkmate::assert_vector(inter_vars, len = 2) |
681 | ||
682 | ||
683 | 5x |
inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x)) |
684 | 5x |
inter_term <- x[inter_term_index] |
685 | ||
686 |
# For the non-interaction vars we need the standard stuff.
|
|
687 | 5x |
normal_terms <- setdiff(x, union(inter_vars, inter_term)) |
688 | ||
689 | 5x |
x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm) |
690 | 5x |
x_stats <- do.call(rbind, x_stats) |
691 | 5x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
692 | 5x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
693 | 5x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
694 | 5x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
695 | 5x |
normal_stats <- x_stats |
696 | 5x |
normal_stats$is_reference_summary <- FALSE |
697 | ||
698 |
# Now the interaction term itself.
|
|
699 | 5x |
inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm) |
700 | 5x |
inter_term_stats$odds_ratio <- NA |
701 | 5x |
inter_term_stats$lcl <- NA |
702 | 5x |
inter_term_stats$ucl <- NA |
703 | 5x |
inter_term_stats$is_reference_summary <- FALSE |
704 | ||
705 | 5x |
is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric" |
706 | ||
707 |
# Interaction stuff.
|
|
708 | 5x |
inter_stats_one <- h_glm_inter_term_extract( |
709 | 5x |
inter_vars[1], |
710 | 5x |
inter_vars[2], |
711 | 5x |
fit_glm,
|
712 | 5x |
conf_level = conf_level, |
713 | 5x |
at = `if`(is_intervar1_numeric, NULL, at) |
714 |
)
|
|
715 | 5x |
inter_stats_two <- h_glm_inter_term_extract( |
716 | 5x |
inter_vars[2], |
717 | 5x |
inter_vars[1], |
718 | 5x |
fit_glm,
|
719 | 5x |
conf_level = conf_level, |
720 | 5x |
at = `if`(is_intervar1_numeric, at, NULL) |
721 |
)
|
|
722 | ||
723 |
# Now just combine everything in one data frame.
|
|
724 | 5x |
col_names <- c( |
725 | 5x |
"variable",
|
726 | 5x |
"variable_label",
|
727 | 5x |
"term",
|
728 | 5x |
"term_label",
|
729 | 5x |
"interaction",
|
730 | 5x |
"interaction_label",
|
731 | 5x |
"reference",
|
732 | 5x |
"reference_label",
|
733 | 5x |
"estimate",
|
734 | 5x |
"std_error",
|
735 | 5x |
"df",
|
736 | 5x |
"pvalue",
|
737 | 5x |
"odds_ratio",
|
738 | 5x |
"lcl",
|
739 | 5x |
"ucl",
|
740 | 5x |
"is_variable_summary",
|
741 | 5x |
"is_term_summary",
|
742 | 5x |
"is_reference_summary"
|
743 |
)
|
|
744 | 5x |
df <- rbind( |
745 | 5x |
inter_stats_one[, col_names], |
746 | 5x |
inter_stats_two[, col_names], |
747 | 5x |
inter_term_stats[, col_names] |
748 |
)
|
|
749 | 5x |
if (length(normal_terms) > 0) { |
750 | 5x |
df <- rbind( |
751 | 5x |
normal_stats[, col_names], |
752 | 5x |
df
|
753 |
)
|
|
754 |
}
|
|
755 | 5x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
756 | 5x |
df
|
757 |
}
|
1 |
#' Cox regression helper function for interactions
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Test and estimate the effect of a treatment in interaction with a covariate.
|
|
6 |
#' The effect is estimated as the HR of the tested treatment for a given level
|
|
7 |
#' of the covariate, in comparison to the treatment control.
|
|
8 |
#'
|
|
9 |
#' @inheritParams argument_convention
|
|
10 |
#' @param x (`numeric` or `factor`)\cr the values of the covariate to be tested.
|
|
11 |
#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
|
|
12 |
#' @param covar (`string`)\cr the name of the covariate in the model.
|
|
13 |
#' @param mod (`coxph`)\cr the Cox regression model.
|
|
14 |
#' @param label (`string`)\cr the label to be returned as `term_label`.
|
|
15 |
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
|
|
16 |
#' @param ... see methods.
|
|
17 |
#'
|
|
18 |
#' @examples
|
|
19 |
#' library(survival)
|
|
20 |
#'
|
|
21 |
#' set.seed(1, kind = "Mersenne-Twister")
|
|
22 |
#'
|
|
23 |
#' # Testing dataset [survival::bladder].
|
|
24 |
#' dta_bladder <- with(
|
|
25 |
#' data = bladder[bladder$enum < 5, ],
|
|
26 |
#' data.frame(
|
|
27 |
#' time = stop,
|
|
28 |
#' status = event,
|
|
29 |
#' armcd = as.factor(rx),
|
|
30 |
#' covar1 = as.factor(enum),
|
|
31 |
#' covar2 = factor(
|
|
32 |
#' sample(as.factor(enum)),
|
|
33 |
#' levels = 1:4,
|
|
34 |
#' labels = c("F", "F", "M", "M")
|
|
35 |
#' )
|
|
36 |
#' )
|
|
37 |
#' )
|
|
38 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
|
|
39 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
|
|
40 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
|
|
41 |
#'
|
|
42 |
#' plot(
|
|
43 |
#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
|
|
44 |
#' lty = 2:4,
|
|
45 |
#' xlab = "Months",
|
|
46 |
#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
|
|
47 |
#' )
|
|
48 |
#'
|
|
49 |
#' @name cox_regression_inter
|
|
50 |
NULL
|
|
51 | ||
52 |
#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
|
|
53 |
#'
|
|
54 |
#' @return
|
|
55 |
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
|
|
56 |
#' variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
|
|
57 |
#'
|
|
58 |
#' @export
|
|
59 |
h_coxreg_inter_effect <- function(x, |
|
60 |
effect,
|
|
61 |
covar,
|
|
62 |
mod,
|
|
63 |
label,
|
|
64 |
control,
|
|
65 |
...) { |
|
66 | 29x |
UseMethod("h_coxreg_inter_effect", x) |
67 |
}
|
|
68 | ||
69 |
#' @describeIn cox_regression_inter Method for `numeric` class. Estimates the interaction with a `numeric` covariate.
|
|
70 |
#'
|
|
71 |
#' @method h_coxreg_inter_effect numeric
|
|
72 |
#'
|
|
73 |
#' @param at (`list`)\cr a list with items named after the covariate, every
|
|
74 |
#' item is a vector of levels at which the interaction should be estimated.
|
|
75 |
#'
|
|
76 |
#' @export
|
|
77 |
h_coxreg_inter_effect.numeric <- function(x, |
|
78 |
effect,
|
|
79 |
covar,
|
|
80 |
mod,
|
|
81 |
label,
|
|
82 |
control,
|
|
83 |
at,
|
|
84 |
...) { |
|
85 | 7x |
betas <- stats::coef(mod) |
86 | 7x |
attrs <- attr(stats::terms(mod), "term.labels") |
87 | 7x |
term_indices <- grep( |
88 | 7x |
pattern = effect, |
89 | 7x |
x = attrs[!grepl("strata\\(", attrs)] |
90 |
)
|
|
91 | 7x |
checkmate::assert_vector(term_indices, len = 2) |
92 | 7x |
betas <- betas[term_indices] |
93 | 7x |
betas_var <- diag(stats::vcov(mod))[term_indices] |
94 | 7x |
betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]] |
95 | 7x |
xval <- if (is.null(at[[covar]])) { |
96 | 6x |
stats::median(x) |
97 |
} else { |
|
98 | 1x |
at[[covar]] |
99 |
}
|
|
100 | 7x |
effect_index <- !grepl(covar, names(betas)) |
101 | 7x |
coef_hat <- betas[effect_index] + xval * betas[!effect_index] |
102 | 7x |
coef_se <- sqrt( |
103 | 7x |
betas_var[effect_index] + |
104 | 7x |
xval ^ 2 * betas_var[!effect_index] + # styler: off |
105 | 7x |
2 * xval * betas_cov |
106 |
)
|
|
107 | 7x |
q_norm <- stats::qnorm((1 + control$conf_level) / 2) |
108 | 7x |
data.frame( |
109 | 7x |
effect = "Covariate:", |
110 | 7x |
term = rep(covar, length(xval)), |
111 | 7x |
term_label = paste0(" ", xval), |
112 | 7x |
level = as.character(xval), |
113 | 7x |
n = NA, |
114 | 7x |
hr = exp(coef_hat), |
115 | 7x |
lcl = exp(coef_hat - q_norm * coef_se), |
116 | 7x |
ucl = exp(coef_hat + q_norm * coef_se), |
117 | 7x |
pval = NA, |
118 | 7x |
pval_inter = NA, |
119 | 7x |
stringsAsFactors = FALSE |
120 |
)
|
|
121 |
}
|
|
122 | ||
123 |
#' @describeIn cox_regression_inter Method for `factor` class. Estimate the interaction with a `factor` covariate.
|
|
124 |
#'
|
|
125 |
#' @method h_coxreg_inter_effect factor
|
|
126 |
#'
|
|
127 |
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
|
|
128 |
#'
|
|
129 |
#' @export
|
|
130 |
h_coxreg_inter_effect.factor <- function(x, |
|
131 |
effect,
|
|
132 |
covar,
|
|
133 |
mod,
|
|
134 |
label,
|
|
135 |
control,
|
|
136 |
data,
|
|
137 |
...) { |
|
138 | 17x |
lvl_given <- levels(x) |
139 | 17x |
y <- h_coxreg_inter_estimations( |
140 | 17x |
variable = effect, given = covar, |
141 | 17x |
lvl_var = levels(data[[effect]]), |
142 | 17x |
lvl_given = lvl_given, |
143 | 17x |
mod = mod, |
144 | 17x |
conf_level = 0.95 |
145 | 17x |
)[[1]] |
146 | ||
147 | 17x |
data.frame( |
148 | 17x |
effect = "Covariate:", |
149 | 17x |
term = rep(covar, nrow(y)), |
150 | 17x |
term_label = paste0(" ", lvl_given), |
151 | 17x |
level = lvl_given, |
152 | 17x |
n = NA, |
153 | 17x |
hr = y[, "hr"], |
154 | 17x |
lcl = y[, "lcl"], |
155 | 17x |
ucl = y[, "ucl"], |
156 | 17x |
pval = NA, |
157 | 17x |
pval_inter = NA, |
158 | 17x |
stringsAsFactors = FALSE |
159 |
)
|
|
160 |
}
|
|
161 | ||
162 |
#' @describeIn cox_regression_inter Method for `character` class. Estimate the interaction with a `character` covariate.
|
|
163 |
#' This makes an automatic conversion to `factor` and then forwards to the method for factors.
|
|
164 |
#'
|
|
165 |
#' @method h_coxreg_inter_effect character
|
|
166 |
#'
|
|
167 |
#' @note
|
|
168 |
#' * Automatic conversion of character to factor does not guarantee results can be generated correctly. It is
|
|
169 |
#' therefore better to always pre-process the dataset such that factors are manually created from character
|
|
170 |
#' variables before passing the dataset to [rtables::build_table()].
|
|
171 |
#'
|
|
172 |
#' @export
|
|
173 |
h_coxreg_inter_effect.character <- function(x, |
|
174 |
effect,
|
|
175 |
covar,
|
|
176 |
mod,
|
|
177 |
label,
|
|
178 |
control,
|
|
179 |
data,
|
|
180 |
...) { |
|
181 | 5x |
y <- as.factor(x) |
182 | ||
183 | 5x |
h_coxreg_inter_effect( |
184 | 5x |
x = y, |
185 | 5x |
effect = effect, |
186 | 5x |
covar = covar, |
187 | 5x |
mod = mod, |
188 | 5x |
label = label, |
189 | 5x |
control = control, |
190 | 5x |
data = data, |
191 |
...
|
|
192 |
)
|
|
193 |
}
|
|
194 | ||
195 |
#' @describeIn cox_regression_inter A higher level function to get
|
|
196 |
#' the results of the interaction test and the estimated values.
|
|
197 |
#'
|
|
198 |
#' @return
|
|
199 |
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
|
|
200 |
#' no interaction, [h_coxreg_univar_extract()] is applied instead.
|
|
201 |
#'
|
|
202 |
#' @examples
|
|
203 |
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
|
|
204 |
#' h_coxreg_extract_interaction(
|
|
205 |
#' mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
|
|
206 |
#' control = control_coxreg()
|
|
207 |
#' )
|
|
208 |
#'
|
|
209 |
#' @export
|
|
210 |
h_coxreg_extract_interaction <- function(effect, |
|
211 |
covar,
|
|
212 |
mod,
|
|
213 |
data,
|
|
214 |
at,
|
|
215 |
control) { |
|
216 | 31x |
if (!any(attr(stats::terms(mod), "order") == 2)) { |
217 | 12x |
y <- h_coxreg_univar_extract( |
218 | 12x |
effect = effect, covar = covar, mod = mod, data = data, control = control |
219 |
)
|
|
220 | 12x |
y$pval_inter <- NA |
221 | 12x |
y
|
222 |
} else { |
|
223 | 19x |
test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] |
224 | ||
225 |
# Test the main treatment effect.
|
|
226 | 19x |
mod_aov <- muffled_car_anova(mod, test_statistic) |
227 | 19x |
sum_anova <- broom::tidy(mod_aov) |
228 | 19x |
pval <- sum_anova[sum_anova$term == effect, ][["p.value"]] |
229 | ||
230 |
# Test the interaction effect.
|
|
231 | 19x |
pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]] |
232 | 19x |
covar_test <- data.frame( |
233 | 19x |
effect = "Covariate:", |
234 | 19x |
term = covar, |
235 | 19x |
term_label = unname(labels_or_names(data[covar])), |
236 | 19x |
level = "", |
237 | 19x |
n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval, |
238 | 19x |
pval_inter = pval_inter, |
239 | 19x |
stringsAsFactors = FALSE |
240 |
)
|
|
241 |
# Estimate the interaction.
|
|
242 | 19x |
y <- h_coxreg_inter_effect( |
243 | 19x |
data[[covar]], |
244 | 19x |
covar = covar, |
245 | 19x |
effect = effect, |
246 | 19x |
mod = mod, |
247 | 19x |
label = unname(labels_or_names(data[covar])), |
248 | 19x |
at = at, |
249 | 19x |
control = control, |
250 | 19x |
data = data |
251 |
)
|
|
252 | 19x |
rbind(covar_test, y) |
253 |
}
|
|
254 |
}
|
|
255 | ||
256 |
#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
|
|
257 |
#'
|
|
258 |
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
|
|
259 |
#' of the levels of `variable` given the levels of `given`.
|
|
260 |
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels as given by [levels()].
|
|
261 |
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
|
|
262 |
#'
|
|
263 |
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
|
|
264 |
#' and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
|
|
265 |
#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
|
|
266 |
#'
|
|
267 |
#' - b1 (arm b), b2 (arm c)
|
|
268 |
#' - b3 (sex m)
|
|
269 |
#' - b4 (arm b: sex m), b5 (arm c: sex m)
|
|
270 |
#'
|
|
271 |
#' The estimation of the Hazard Ratio for arm C/sex M is given in reference
|
|
272 |
#' to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
|
|
273 |
#' The interaction coefficient is deduced by b2 + b5 while the standard error
|
|
274 |
#' is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
|
|
275 |
#'
|
|
276 |
#' @return
|
|
277 |
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
|
|
278 |
#' to the combinations of `variable` and `given`, with columns:
|
|
279 |
#' * `coef_hat`: Estimation of the coefficient.
|
|
280 |
#' * `coef_se`: Standard error of the estimation.
|
|
281 |
#' * `hr`: Hazard ratio.
|
|
282 |
#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
|
|
283 |
#'
|
|
284 |
#' @examples
|
|
285 |
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
|
|
286 |
#' result <- h_coxreg_inter_estimations(
|
|
287 |
#' variable = "armcd", given = "covar1",
|
|
288 |
#' lvl_var = levels(dta_bladder$armcd),
|
|
289 |
#' lvl_given = levels(dta_bladder$covar1),
|
|
290 |
#' mod = mod, conf_level = .95
|
|
291 |
#' )
|
|
292 |
#' result
|
|
293 |
#'
|
|
294 |
#' @export
|
|
295 |
h_coxreg_inter_estimations <- function(variable, |
|
296 |
given,
|
|
297 |
lvl_var,
|
|
298 |
lvl_given,
|
|
299 |
mod,
|
|
300 |
conf_level = 0.95) { |
|
301 | 18x |
var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level |
302 | 18x |
giv_lvl <- paste0(given, lvl_given) |
303 | 18x |
design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) |
304 | 18x |
design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] |
305 | 18x |
design_mat <- within( |
306 | 18x |
data = design_mat, |
307 | 18x |
expr = { |
308 | 18x |
inter <- paste0(variable, ":", given) |
309 | 18x |
rev_inter <- paste0(given, ":", variable) |
310 |
}
|
|
311 |
)
|
|
312 | 18x |
split_by_variable <- design_mat$variable |
313 | 18x |
interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") |
314 | ||
315 | 18x |
mmat <- stats::model.matrix(mod)[1, ] |
316 | 18x |
mmat[!mmat == 0] <- 0 |
317 | ||
318 | 18x |
design_mat <- apply( |
319 | 18x |
X = design_mat, MARGIN = 1, FUN = function(x) { |
320 | 52x |
mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 |
321 | 52x |
mmat
|
322 |
}
|
|
323 |
)
|
|
324 | 18x |
colnames(design_mat) <- interaction_names |
325 | ||
326 | 18x |
coef <- stats::coef(mod) |
327 | 18x |
vcov <- stats::vcov(mod) |
328 | 18x |
betas <- as.matrix(coef) |
329 | 18x |
coef_hat <- t(design_mat) %*% betas |
330 | 18x |
dimnames(coef_hat)[2] <- "coef" |
331 | 18x |
coef_se <- apply( |
332 | 18x |
design_mat, 2, |
333 | 18x |
function(x) { |
334 | 52x |
vcov_el <- as.logical(x) |
335 | 52x |
y <- vcov[vcov_el, vcov_el] |
336 | 52x |
y <- sum(y) |
337 | 52x |
y <- sqrt(y) |
338 | 52x |
return(y) |
339 |
}
|
|
340 |
)
|
|
341 | 18x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
342 | 18x |
y <- cbind(coef_hat, `se(coef)` = coef_se) |
343 | 18x |
y <- apply(y, 1, function(x) { |
344 | 52x |
x["hr"] <- exp(x["coef"]) |
345 | 52x |
x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) |
346 | 52x |
x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) |
347 | 52x |
x
|
348 |
}) |
|
349 | 18x |
y <- t(y) |
350 | 18x |
y <- by(y, split_by_variable, identity) |
351 | 18x |
y <- lapply(y, as.matrix) |
352 | 18x |
attr(y, "details") <- paste0( |
353 | 18x |
"Estimations of ", variable, |
354 | 18x |
" hazard ratio given the level of ", given, " compared to ", |
355 | 18x |
variable, " level ", lvl_var[1], "." |
356 |
)
|
|
357 | 18x |
y
|
358 |
}
|
1 |
#' Count patients by most extreme post-baseline toxicity grade per direction of abnormality
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_abnormal_by_worst_grade()] creates a layout element to count patients by highest (worst)
|
|
6 |
#' analysis toxicity grade post-baseline for each direction, categorized by parameter value.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `var` which indicates toxicity grades. Additional
|
|
9 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to
|
|
10 |
#' `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a variable
|
|
11 |
#' to indicate parameter values, and `grade_dir` (defaults to `GRADE_DIR`), a variable to indicate directions
|
|
12 |
#' (e.g. High or Low) for each toxicity grade supplied in `var`.
|
|
13 |
#'
|
|
14 |
#' For each combination of `param` and `grade_dir` levels, patient counts by worst
|
|
15 |
#' grade are calculated as follows:
|
|
16 |
#' * `1` to `4`: The number of patients with worst grades 1-4, respectively.
|
|
17 |
#' * `Any`: The number of patients with at least one abnormality (i.e. grade is not 0).
|
|
18 |
#'
|
|
19 |
#' Fractions are calculated by dividing the above counts by the number of patients with at least one
|
|
20 |
#' valid measurement recorded during treatment.
|
|
21 |
#'
|
|
22 |
#' Pre-processing is crucial when using this function and can be done automatically using the
|
|
23 |
#' [h_adlb_abnormal_by_worst_grade()] helper function. See the description of this function for details on the
|
|
24 |
#' necessary pre-processing steps.
|
|
25 |
#'
|
|
26 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two row
|
|
27 |
#' splits, one on variable `param` and one on variable `grade_dir`.
|
|
28 |
#'
|
|
29 |
#' @inheritParams argument_convention
|
|
30 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
31 |
#'
|
|
32 |
#' Options are: ``r shQuote(get_stats("abnormal_by_worst_grade"), type = "sh")``
|
|
33 |
#'
|
|
34 |
#' @seealso [h_adlb_abnormal_by_worst_grade()] which pre-processes ADLB data frames to be used in
|
|
35 |
#' [count_abnormal_by_worst_grade()].
|
|
36 |
#'
|
|
37 |
#' @name abnormal_by_worst_grade
|
|
38 |
#' @order 1
|
|
39 |
NULL
|
|
40 | ||
41 |
#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
|
|
42 |
#'
|
|
43 |
#' @return
|
|
44 |
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
|
|
45 |
#' "Any" results.
|
|
46 |
#'
|
|
47 |
#' @keywords internal
|
|
48 |
s_count_abnormal_by_worst_grade <- function(df, |
|
49 |
.var = "GRADE_ANL", |
|
50 |
.spl_context,
|
|
51 |
variables = list( |
|
52 |
id = "USUBJID", |
|
53 |
param = "PARAM", |
|
54 |
grade_dir = "GRADE_DIR" |
|
55 |
),
|
|
56 |
...) { |
|
57 | 5x |
checkmate::assert_string(.var) |
58 | 5x |
assert_valid_factor(df[[.var]]) |
59 | 5x |
assert_valid_factor(df[[variables$param]]) |
60 | 4x |
assert_valid_factor(df[[variables$grade_dir]]) |
61 | 4x |
assert_df_with_variables(df, c(a = .var, variables)) |
62 | 4x |
checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) |
63 | ||
64 |
# To verify that the `split_rows_by` are performed with correct variables.
|
|
65 | 4x |
checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split) |
66 | 4x |
first_row <- .spl_context[.spl_context$split == variables[["param"]], ] |
67 | 4x |
x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any") |
68 | 4x |
result <- split(numeric(0), factor(x_lvls)) |
69 | ||
70 | 4x |
subj <- first_row$full_parent_df[[1]][[variables[["id"]]]] |
71 | 4x |
subj_cur_col <- subj[first_row$cur_col_subset[[1]]] |
72 |
# Some subjects may have a record for high and low directions but
|
|
73 |
# should be counted only once.
|
|
74 | 4x |
denom <- length(unique(subj_cur_col)) |
75 | ||
76 | 4x |
for (lvl in x_lvls) { |
77 | 20x |
if (lvl != "Any") { |
78 | 16x |
df_lvl <- df[df[[.var]] == lvl, ] |
79 |
} else { |
|
80 | 4x |
df_lvl <- df[df[[.var]] != 0, ] |
81 |
}
|
|
82 | 20x |
num <- length(unique(df_lvl[[variables[["id"]]]])) |
83 | 20x |
fraction <- ifelse(denom == 0, 0, num / denom) |
84 | 20x |
result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl) |
85 |
}
|
|
86 | ||
87 | 4x |
result <- list(count_fraction = result) |
88 | 4x |
result
|
89 |
}
|
|
90 | ||
91 |
#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
|
|
92 |
#' in `count_abnormal_by_worst_grade()`.
|
|
93 |
#'
|
|
94 |
#' @return
|
|
95 |
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
96 |
#'
|
|
97 |
#' @keywords internal
|
|
98 |
a_count_abnormal_by_worst_grade <- function(df, |
|
99 |
...,
|
|
100 |
.stats = NULL, |
|
101 |
.stat_names = NULL, |
|
102 |
.formats = NULL, |
|
103 |
.labels = NULL, |
|
104 |
.indent_mods = NULL) { |
|
105 |
# Check for additional parameters to the statistics function
|
|
106 | 4x |
dots_extra_args <- list(...) |
107 | 4x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
108 | 4x |
dots_extra_args$.additional_fun_parameters <- NULL |
109 | ||
110 |
# Check for user-defined functions
|
|
111 | 4x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
112 | 4x |
.stats <- default_and_custom_stats_list$all_stats |
113 | 4x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
114 | ||
115 |
# Apply statistics function
|
|
116 | 4x |
x_stats <- .apply_stat_functions( |
117 | 4x |
default_stat_fnc = s_count_abnormal_by_worst_grade, |
118 | 4x |
custom_stat_fnc_list = custom_stat_functions, |
119 | 4x |
args_list = c( |
120 | 4x |
df = list(df), |
121 | 4x |
extra_afun_params,
|
122 | 4x |
dots_extra_args
|
123 |
)
|
|
124 |
)
|
|
125 | ||
126 |
# Fill in formatting defaults
|
|
127 | 3x |
.stats <- get_stats("abnormal_by_worst_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
128 | 3x |
levels_per_stats <- lapply(x_stats, names) |
129 | 3x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
130 | 3x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
131 | 3x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
132 | ||
133 | 3x |
x_stats <- x_stats[.stats] %>% |
134 | 3x |
.unlist_keep_nulls() %>% |
135 | 3x |
setNames(names(.formats)) |
136 | ||
137 |
# Auto format handling
|
|
138 | 3x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
139 | ||
140 |
# Get and check statistical names
|
|
141 | 3x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
142 | ||
143 | 3x |
in_rows( |
144 | 3x |
.list = x_stats, |
145 | 3x |
.formats = .formats, |
146 | 3x |
.names = .labels %>% .unlist_keep_nulls(), |
147 | 3x |
.stat_names = .stat_names, |
148 | 3x |
.labels = .labels %>% .unlist_keep_nulls(), |
149 | 3x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
150 |
)
|
|
151 |
}
|
|
152 | ||
153 |
#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
|
|
154 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
155 |
#'
|
|
156 |
#' @return
|
|
157 |
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
|
|
158 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
159 |
#' the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
|
|
160 |
#'
|
|
161 |
#' @examples
|
|
162 |
#' library(dplyr)
|
|
163 |
#' library(forcats)
|
|
164 |
#' adlb <- tern_ex_adlb
|
|
165 |
#'
|
|
166 |
#' # Data is modified in order to have some parameters with grades only in one direction
|
|
167 |
#' # and simulate the real data.
|
|
168 |
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
|
|
169 |
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
|
|
170 |
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
|
|
171 |
#'
|
|
172 |
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
|
|
173 |
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
|
|
174 |
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
|
|
175 |
#'
|
|
176 |
#' # Pre-processing
|
|
177 |
#' adlb_f <- adlb %>% h_adlb_abnormal_by_worst_grade()
|
|
178 |
#'
|
|
179 |
#' # Map excludes records without abnormal grade since they should not be displayed
|
|
180 |
#' # in the table.
|
|
181 |
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
|
|
182 |
#' lapply(as.character) %>%
|
|
183 |
#' as.data.frame() %>%
|
|
184 |
#' arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
|
|
185 |
#'
|
|
186 |
#' basic_table() %>%
|
|
187 |
#' split_cols_by("ARMCD") %>%
|
|
188 |
#' split_rows_by("PARAM") %>%
|
|
189 |
#' split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
|
|
190 |
#' count_abnormal_by_worst_grade(
|
|
191 |
#' var = "GRADE_ANL",
|
|
192 |
#' variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
|
|
193 |
#' ) %>%
|
|
194 |
#' build_table(df = adlb_f)
|
|
195 |
#'
|
|
196 |
#' @export
|
|
197 |
#' @order 2
|
|
198 |
count_abnormal_by_worst_grade <- function(lyt, |
|
199 |
var,
|
|
200 |
variables = list( |
|
201 |
id = "USUBJID", |
|
202 |
param = "PARAM", |
|
203 |
grade_dir = "GRADE_DIR" |
|
204 |
),
|
|
205 |
na_str = default_na_str(), |
|
206 |
nested = TRUE, |
|
207 |
...,
|
|
208 |
.stats = "count_fraction", |
|
209 |
.stat_names = NULL, |
|
210 |
.formats = list(count_fraction = format_count_fraction), |
|
211 |
.labels = NULL, |
|
212 |
.indent_mods = NULL) { |
|
213 |
# Process standard extra arguments
|
|
214 | 2x |
extra_args <- list(".stats" = .stats) |
215 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
216 | 2x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
217 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
218 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
219 | ||
220 |
# Process additional arguments to the statistic function
|
|
221 | 2x |
extra_args <- c(extra_args, "variables" = list(variables), ...) |
222 | ||
223 |
# Append additional info from layout to the analysis function
|
|
224 | 2x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
225 | 2x |
formals(a_count_abnormal_by_worst_grade) <- c( |
226 | 2x |
formals(a_count_abnormal_by_worst_grade), extra_args[[".additional_fun_parameters"]] |
227 |
)
|
|
228 | ||
229 | 2x |
analyze( |
230 | 2x |
lyt = lyt, |
231 | 2x |
vars = var, |
232 | 2x |
afun = a_count_abnormal_by_worst_grade, |
233 | 2x |
na_str = na_str, |
234 | 2x |
nested = nested, |
235 | 2x |
extra_args = extra_args, |
236 | 2x |
show_labels = "hidden" |
237 |
)
|
|
238 |
}
|
|
239 | ||
240 |
#' Helper function to prepare ADLB for `count_abnormal_by_worst_grade()`
|
|
241 |
#'
|
|
242 |
#' @description `r lifecycle::badge("stable")`
|
|
243 |
#'
|
|
244 |
#' Helper function to prepare an ADLB data frame to be used as input in
|
|
245 |
#' [count_abnormal_by_worst_grade()]. The following pre-processing steps are applied:
|
|
246 |
#'
|
|
247 |
#' 1. `adlb` is filtered on variable `avisit` to only include post-baseline visits.
|
|
248 |
#' 2. `adlb` is filtered on variables `worst_flag_low` and `worst_flag_high` so that only
|
|
249 |
#' worst grades (in either direction) are included.
|
|
250 |
#' 3. From the standard lab grade variable `atoxgr`, the following two variables are derived
|
|
251 |
#' and added to `adlb`:
|
|
252 |
#' * A grade direction variable (e.g. `GRADE_DIR`). The variable takes value `"HIGH"` when
|
|
253 |
#' `atoxgr > 0`, `"LOW"` when `atoxgr < 0`, and `"ZERO"` otherwise.
|
|
254 |
#' * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from `atoxgr` are
|
|
255 |
#' replaced by their absolute values.
|
|
256 |
#' 4. Unused factor levels are dropped from `adlb` via [droplevels()].
|
|
257 |
#'
|
|
258 |
#' @param adlb (`data.frame`)\cr ADLB data frame.
|
|
259 |
#' @param atoxgr (`string`)\cr name of the analysis toxicity grade variable. This must be a `factor`
|
|
260 |
#' variable.
|
|
261 |
#' @param avisit (`string`)\cr name of the analysis visit variable.
|
|
262 |
#' @param worst_flag_low (`string`)\cr name of the worst low lab grade flag variable. This variable is
|
|
263 |
#' set to `"Y"` when indicating records of worst low lab grades.
|
|
264 |
#' @param worst_flag_high (`string`)\cr name of the worst high lab grade flag variable. This variable is
|
|
265 |
#' set to `"Y"` when indicating records of worst high lab grades.
|
|
266 |
#'
|
|
267 |
#' @return `h_adlb_abnormal_by_worst_grade()` returns the `adlb` data frame with two new
|
|
268 |
#' variables: `GRADE_DIR` and `GRADE_ANL`.
|
|
269 |
#'
|
|
270 |
#' @seealso [abnormal_by_worst_grade]
|
|
271 |
#'
|
|
272 |
#' @examples
|
|
273 |
#' h_adlb_abnormal_by_worst_grade(tern_ex_adlb) %>%
|
|
274 |
#' dplyr::select(ATOXGR, GRADE_DIR, GRADE_ANL) %>%
|
|
275 |
#' head(10)
|
|
276 |
#'
|
|
277 |
#' @export
|
|
278 |
h_adlb_abnormal_by_worst_grade <- function(adlb, |
|
279 |
atoxgr = "ATOXGR", |
|
280 |
avisit = "AVISIT", |
|
281 |
worst_flag_low = "WGRLOFL", |
|
282 |
worst_flag_high = "WGRHIFL") { |
|
283 | 1x |
adlb %>% |
284 | 1x |
dplyr::filter( |
285 | 1x |
!.data[[avisit]] %in% c("SCREENING", "BASELINE"), |
286 | 1x |
.data[[worst_flag_low]] == "Y" | .data[[worst_flag_high]] == "Y" |
287 |
) %>% |
|
288 | 1x |
dplyr::mutate( |
289 | 1x |
GRADE_DIR = factor( |
290 | 1x |
dplyr::case_when( |
291 | 1x |
.data[[atoxgr]] %in% c("-1", "-2", "-3", "-4") ~ "LOW", |
292 | 1x |
.data[[atoxgr]] == "0" ~ "ZERO", |
293 | 1x |
.data[[atoxgr]] %in% c("1", "2", "3", "4") ~ "HIGH" |
294 |
),
|
|
295 | 1x |
levels = c("LOW", "ZERO", "HIGH") |
296 |
),
|
|
297 | 1x |
GRADE_ANL = forcats::fct_relevel( |
298 | 1x |
forcats::fct_recode(.data[[atoxgr]], `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"), |
299 | 1x |
c("0", "1", "2", "3", "4") |
300 |
)
|
|
301 |
) %>% |
|
302 | 1x |
droplevels() |
303 |
}
|
1 |
#' Tabulate binary response by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The [tabulate_rsp_subgroups()] function creates a layout element to tabulate binary response by subgroup, returning
|
|
6 |
#' statistics including response rate and odds ratio for each population subgroup. The table is created from `df`, a
|
|
7 |
#' list of data frames returned by [extract_rsp_subgroups()], with the statistics to include specified via the `vars`
|
|
8 |
#' parameter.
|
|
9 |
#'
|
|
10 |
#' A forest plot can be created from the resulting table using the [g_forest()] function.
|
|
11 |
#'
|
|
12 |
#' @inheritParams extract_rsp_subgroups
|
|
13 |
#' @inheritParams argument_convention
|
|
14 |
#'
|
|
15 |
#' @details These functions create a layout starting from a data frame which contains
|
|
16 |
#' the required statistics. Tables typically used as part of forest plot.
|
|
17 |
#'
|
|
18 |
#' @seealso [extract_rsp_subgroups()]
|
|
19 |
#'
|
|
20 |
#' @examples
|
|
21 |
#' library(dplyr)
|
|
22 |
#' library(forcats)
|
|
23 |
#'
|
|
24 |
#' adrs <- tern_ex_adrs
|
|
25 |
#' adrs_labels <- formatters::var_labels(adrs)
|
|
26 |
#'
|
|
27 |
#' adrs_f <- adrs %>%
|
|
28 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
29 |
#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
|
|
30 |
#' droplevels() %>%
|
|
31 |
#' mutate(
|
|
32 |
#' # Reorder levels of factor to make the placebo group the reference arm.
|
|
33 |
#' ARM = fct_relevel(ARM, "B: Placebo"),
|
|
34 |
#' rsp = AVALC == "CR"
|
|
35 |
#' )
|
|
36 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
|
|
37 |
#'
|
|
38 |
#' # Unstratified analysis.
|
|
39 |
#' df <- extract_rsp_subgroups(
|
|
40 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
41 |
#' data = adrs_f
|
|
42 |
#' )
|
|
43 |
#' df
|
|
44 |
#'
|
|
45 |
#' # Stratified analysis.
|
|
46 |
#' df_strat <- extract_rsp_subgroups(
|
|
47 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strata = "STRATA1"),
|
|
48 |
#' data = adrs_f
|
|
49 |
#' )
|
|
50 |
#' df_strat
|
|
51 |
#'
|
|
52 |
#' # Grouping of the BMRKR2 levels.
|
|
53 |
#' df_grouped <- extract_rsp_subgroups(
|
|
54 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
55 |
#' data = adrs_f,
|
|
56 |
#' groups_lists = list(
|
|
57 |
#' BMRKR2 = list(
|
|
58 |
#' "low" = "LOW",
|
|
59 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
60 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
61 |
#' )
|
|
62 |
#' )
|
|
63 |
#' )
|
|
64 |
#' df_grouped
|
|
65 |
#'
|
|
66 |
#' @name response_subgroups
|
|
67 |
#' @order 1
|
|
68 |
NULL
|
|
69 | ||
70 |
#' Prepare response data for population subgroups in data frames
|
|
71 |
#'
|
|
72 |
#' @description `r lifecycle::badge("stable")`
|
|
73 |
#'
|
|
74 |
#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
|
|
75 |
#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
|
|
76 |
#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
|
|
77 |
#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strata`.
|
|
78 |
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
|
|
79 |
#'
|
|
80 |
#' @inheritParams argument_convention
|
|
81 |
#' @inheritParams response_subgroups
|
|
82 |
#' @param label_all (`string`)\cr label for the total population analysis.
|
|
83 |
#'
|
|
84 |
#' @return A named list of two elements:
|
|
85 |
#' * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
|
|
86 |
#' `var_label`, and `row_type`.
|
|
87 |
#' * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
|
|
88 |
#' `subgroup`, `var`, `var_label`, and `row_type`.
|
|
89 |
#'
|
|
90 |
#' @seealso [response_subgroups]
|
|
91 |
#'
|
|
92 |
#' @export
|
|
93 |
extract_rsp_subgroups <- function(variables, |
|
94 |
data,
|
|
95 |
groups_lists = list(), |
|
96 |
conf_level = 0.95, |
|
97 |
method = NULL, |
|
98 |
label_all = "All Patients") { |
|
99 | 14x |
if ("strat" %in% names(variables)) { |
100 | ! |
warning( |
101 | ! |
"Warning: the `strat` element name of the `variables` list argument to `extract_rsp_subgroups() ",
|
102 | ! |
"was deprecated in tern 0.9.4.\n ",
|
103 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
104 |
)
|
|
105 | ! |
variables[["strata"]] <- variables[["strat"]] |
106 |
}
|
|
107 | ||
108 | 14x |
df_prop <- h_proportion_subgroups_df( |
109 | 14x |
variables,
|
110 | 14x |
data,
|
111 | 14x |
groups_lists = groups_lists, |
112 | 14x |
label_all = label_all |
113 |
)
|
|
114 | 14x |
df_or <- h_odds_ratio_subgroups_df( |
115 | 14x |
variables,
|
116 | 14x |
data,
|
117 | 14x |
groups_lists = groups_lists, |
118 | 14x |
conf_level = conf_level, |
119 | 14x |
method = method, |
120 | 14x |
label_all = label_all |
121 |
)
|
|
122 | ||
123 | 14x |
list(prop = df_prop, or = df_or) |
124 |
}
|
|
125 | ||
126 |
#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
|
|
127 |
#'
|
|
128 |
#' @return
|
|
129 |
#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
130 |
#'
|
|
131 |
#' @keywords internal
|
|
132 |
a_response_subgroups <- function(df, |
|
133 |
labelstr = "", |
|
134 |
...,
|
|
135 |
.stats = NULL, |
|
136 |
.stat_names = NULL, |
|
137 |
.formats = NULL, |
|
138 |
.labels = NULL, |
|
139 |
.indent_mods = NULL) { |
|
140 |
# Check for additional parameters to the statistics function
|
|
141 | 375x |
dots_extra_args <- list(...) |
142 | 375x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
143 | 375x |
dots_extra_args$.additional_fun_parameters <- NULL |
144 | 375x |
cur_col_stat <- extra_afun_params$.var %||% .stats |
145 | ||
146 |
# Uniquely name & label rows
|
|
147 | 375x |
var_lvls <- if ("biomarker" %in% names(dots_extra_args) && "biomarker" %in% names(df)) { |
148 | 90x |
if ("overall" %in% names(dots_extra_args)) { # label rows for (nested) biomarker tables - e.g. "AGE", "BMRKR1" |
149 | 42x |
as.character(df$biomarker) |
150 | 375x |
} else { # data rows for (nested) biomarker tables - e.g. "AGE.LOW", "BMRKR1.Total Patients" |
151 | 48x |
paste(as.character(df$biomarker), as.character(df$subgroup), sep = ".") |
152 |
}
|
|
153 | 375x |
} else { # data rows for non-biomarker tables - e.g. "Total Patients", "F", "M" |
154 | 285x |
make.unique(as.character(df$subgroup)) |
155 |
}
|
|
156 | ||
157 |
# if empty, return NA
|
|
158 | 375x |
if (nrow(df) == 0) { |
159 | 1x |
return(in_rows(.list = list(NA) %>% stats::setNames(cur_col_stat))) |
160 |
}
|
|
161 | ||
162 |
# Main statistics taken from df
|
|
163 | 374x |
x_stats <- as.list(df) |
164 | ||
165 |
# Fill in formatting defaults
|
|
166 | 374x |
.stats <- get_stats("tabulate_rsp_subgroups", stats_in = cur_col_stat) |
167 | 374x |
levels_per_stats <- rep(list(var_lvls), length(.stats)) %>% setNames(.stats) |
168 | 374x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
169 | 374x |
.labels <- get_labels_from_stats( |
170 | 374x |
.stats, .labels, levels_per_stats, |
171 |
# default labels are pre-determined in extract_*() function
|
|
172 | 374x |
tern_defaults = as.list(as.character(df$subgroup)) %>% setNames(var_lvls) |
173 |
)
|
|
174 | 374x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
175 | ||
176 | 374x |
x_stats <- lapply( |
177 | 374x |
.stats,
|
178 | 374x |
function(x) x_stats[[x]] %>% stats::setNames(var_lvls) |
179 |
) %>% |
|
180 | 374x |
stats::setNames(.stats) %>% |
181 | 374x |
.unlist_keep_nulls() |
182 | ||
183 | 374x |
.nms <- if ("biomarker" %in% names(dots_extra_args)) var_lvls else names(.labels) |
184 | ||
185 |
# Auto format handling
|
|
186 | 374x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
187 | ||
188 |
# Get and check statistical names
|
|
189 | 374x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
190 | ||
191 | 374x |
in_rows( |
192 | 374x |
.list = x_stats, |
193 | 374x |
.formats = .formats, |
194 | 374x |
.names = .nms, |
195 | 374x |
.stat_names = .stat_names, |
196 | 374x |
.labels = .labels %>% .unlist_keep_nulls(), |
197 | 374x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
198 |
)
|
|
199 |
}
|
|
200 | ||
201 |
#' @describeIn response_subgroups Table-creating function which creates a table
|
|
202 |
#' summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
|
|
203 |
#' and [rtables::summarize_row_groups()].
|
|
204 |
#'
|
|
205 |
#' @param df (`list`)\cr a list of data frames containing all analysis variables. List should be
|
|
206 |
#' created using [extract_rsp_subgroups()].
|
|
207 |
#' @param vars (`character`)\cr the names of statistics to be reported among:
|
|
208 |
#' * `n`: Total number of observations per group.
|
|
209 |
#' * `n_rsp`: Number of responders per group.
|
|
210 |
#' * `prop`: Proportion of responders.
|
|
211 |
#' * `n_tot`: Total number of observations.
|
|
212 |
#' * `or`: Odds ratio.
|
|
213 |
#' * `ci` : Confidence interval of odds ratio.
|
|
214 |
#' * `pval`: p-value of the effect.
|
|
215 |
#' Note, the statistics `n_tot`, `or`, and `ci` are required.
|
|
216 |
#' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply
|
|
217 |
#' within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If
|
|
218 |
#' `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$prop$arm` will be used as `arm_x` and
|
|
219 |
#' the second level as `arm_y`.
|
|
220 |
#'
|
|
221 |
#' @return An `rtables` table summarizing binary response by subgroup.
|
|
222 |
#'
|
|
223 |
#' @examples
|
|
224 |
#' # Table with default columns
|
|
225 |
#' basic_table() %>%
|
|
226 |
#' tabulate_rsp_subgroups(df)
|
|
227 |
#'
|
|
228 |
#' # Table with selected columns
|
|
229 |
#' basic_table() %>%
|
|
230 |
#' tabulate_rsp_subgroups(
|
|
231 |
#' df = df,
|
|
232 |
#' vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
|
|
233 |
#' )
|
|
234 |
#'
|
|
235 |
#' # Table with risk difference column added
|
|
236 |
#' basic_table() %>%
|
|
237 |
#' tabulate_rsp_subgroups(
|
|
238 |
#' df,
|
|
239 |
#' riskdiff = control_riskdiff(
|
|
240 |
#' arm_x = levels(df$prop$arm)[1],
|
|
241 |
#' arm_y = levels(df$prop$arm)[2]
|
|
242 |
#' )
|
|
243 |
#' )
|
|
244 |
#'
|
|
245 |
#' @export
|
|
246 |
#' @order 2
|
|
247 |
tabulate_rsp_subgroups <- function(lyt, |
|
248 |
df,
|
|
249 |
vars = c("n_tot", "n", "prop", "or", "ci"), |
|
250 |
groups_lists = list(), |
|
251 |
label_all = lifecycle::deprecated(), |
|
252 |
riskdiff = NULL, |
|
253 |
na_str = default_na_str(), |
|
254 |
...,
|
|
255 |
.stat_names = NULL, |
|
256 |
.formats = NULL, |
|
257 |
.labels = NULL, |
|
258 |
.indent_mods = NULL) { |
|
259 | 14x |
checkmate::assert_list(riskdiff, null.ok = TRUE) |
260 | 14x |
checkmate::assert_true(all(c("n_tot", "or", "ci") %in% vars)) |
261 | 14x |
if ("pval" %in% vars && !"pval" %in% names(df$or)) { |
262 | 1x |
warning( |
263 | 1x |
'The "pval" statistic has been selected but is not present in "df" so it will not be included in the output ',
|
264 | 1x |
'table. To include the "pval" statistic, please specify a p-value test when generating "df" via ',
|
265 | 1x |
'the "method" argument to `extract_rsp_subgroups()`. If method = "cmh", strata must also be specified via the ',
|
266 | 1x |
'"variables" argument to `extract_rsp_subgroups()`.'
|
267 |
)
|
|
268 |
}
|
|
269 | ||
270 | 14x |
if (lifecycle::is_present(label_all)) { |
271 | ! |
lifecycle::deprecate_warn( |
272 | ! |
"0.9.8", "tabulate_rsp_subgroups(label_all)", |
273 | ! |
details = |
274 | ! |
"Please assign the `label_all` parameter within the `extract_rsp_subgroups()` function when creating `df`."
|
275 |
)
|
|
276 |
}
|
|
277 | ||
278 |
# Process standard extra arguments
|
|
279 | 14x |
extra_args <- list(".stats" = vars) |
280 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
281 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
282 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
283 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
284 | ||
285 |
# Create "ci" column from "lcl" and "ucl"
|
|
286 | 14x |
df$or$ci <- combine_vectors(df$or$lcl, df$or$ucl) |
287 | ||
288 |
# Extract additional parameters from df
|
|
289 | 14x |
conf_level <- df$or$conf_level[1] |
290 | 14x |
method <- if ("pval_label" %in% names(df$or)) df$or$pval_label[1] else NULL |
291 | 14x |
colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method) |
292 | 14x |
prop_vars <- intersect(colvars$vars, c("n", "prop", "n_rsp")) |
293 | 14x |
or_vars <- intersect(names(colvars$labels), c("n_tot", "or", "ci", "pval")) |
294 | 14x |
colvars_prop <- list(vars = prop_vars, labels = colvars$labels[prop_vars]) |
295 | 14x |
colvars_or <- list(vars = or_vars, labels = colvars$labels[or_vars]) |
296 | ||
297 |
# Process additional arguments to the statistic function
|
|
298 | 14x |
extra_args <- c( |
299 | 14x |
extra_args,
|
300 | 14x |
groups_lists = list(groups_lists), conf_level = conf_level, method = method, |
301 |
...
|
|
302 |
)
|
|
303 | ||
304 |
# Adding additional info from layout to analysis function
|
|
305 | 14x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
306 | 14x |
formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]]) |
307 | ||
308 |
# Add risk difference column
|
|
309 | 14x |
if (!is.null(riskdiff)) { |
310 | ! |
if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$prop$arm)[1] |
311 | ! |
if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$prop$arm)[2] |
312 | 2x |
colvars_or$vars <- c(colvars_or$vars, "riskdiff") |
313 | 2x |
colvars_or$labels <- c(colvars_or$labels, riskdiff = riskdiff$col_label) |
314 | 2x |
arm_cols <- paste(rep(c("n_rsp", "n_rsp", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_") |
315 | ||
316 | 2x |
df_prop_diff <- df$prop %>% |
317 | 2x |
dplyr::select(-"prop") %>% |
318 | 2x |
tidyr::pivot_wider( |
319 | 2x |
id_cols = c("subgroup", "var", "var_label", "row_type"), |
320 | 2x |
names_from = "arm", |
321 | 2x |
values_from = c("n", "n_rsp") |
322 |
) %>% |
|
323 | 2x |
dplyr::rowwise() %>% |
324 | 2x |
dplyr::mutate( |
325 | 2x |
riskdiff = stat_propdiff_ci( |
326 | 2x |
x = as.list(.data[[arm_cols[1]]]), |
327 | 2x |
y = as.list(.data[[arm_cols[2]]]), |
328 | 2x |
N_x = .data[[arm_cols[3]]], |
329 | 2x |
N_y = .data[[arm_cols[4]]], |
330 | 2x |
pct = riskdiff$pct |
331 |
)
|
|
332 |
) %>% |
|
333 | 2x |
dplyr::select(-dplyr::all_of(arm_cols)) |
334 | ||
335 | 2x |
df$or <- df$or %>% |
336 | 2x |
dplyr::left_join( |
337 | 2x |
df_prop_diff,
|
338 | 2x |
by = c("subgroup", "var", "var_label", "row_type") |
339 |
)
|
|
340 |
}
|
|
341 | ||
342 |
# Add columns from table_prop (optional)
|
|
343 | 14x |
if (length(colvars_prop$vars) > 0) { |
344 | 13x |
lyt_prop <- split_cols_by(lyt = lyt, var = "arm") |
345 | 13x |
lyt_prop <- split_cols_by_multivar( |
346 | 13x |
lyt = lyt_prop, |
347 | 13x |
vars = colvars_prop$vars, |
348 | 13x |
varlabels = colvars_prop$labels |
349 |
)
|
|
350 | ||
351 |
# Add "All Patients" row
|
|
352 | 13x |
lyt_prop <- split_rows_by( |
353 | 13x |
lyt = lyt_prop, |
354 | 13x |
var = "row_type", |
355 | 13x |
split_fun = keep_split_levels("content"), |
356 | 13x |
nested = FALSE, |
357 | 13x |
child_labels = "hidden" |
358 |
)
|
|
359 | 13x |
lyt_prop <- analyze_colvars( |
360 | 13x |
lyt = lyt_prop, |
361 | 13x |
afun = a_response_subgroups, |
362 | 13x |
na_str = na_str, |
363 | 13x |
extra_args = extra_args |
364 |
)
|
|
365 | ||
366 |
# Add analysis rows
|
|
367 | 13x |
if ("analysis" %in% df$prop$row_type) { |
368 | 12x |
lyt_prop <- split_rows_by( |
369 | 12x |
lyt = lyt_prop, |
370 | 12x |
var = "row_type", |
371 | 12x |
split_fun = keep_split_levels("analysis"), |
372 | 12x |
nested = FALSE, |
373 | 12x |
child_labels = "hidden" |
374 |
)
|
|
375 | 12x |
lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE) |
376 | 12x |
lyt_prop <- analyze_colvars( |
377 | 12x |
lyt = lyt_prop, |
378 | 12x |
afun = a_response_subgroups, |
379 | 12x |
na_str = na_str, |
380 | 12x |
inclNAs = TRUE, |
381 | 12x |
extra_args = extra_args |
382 |
)
|
|
383 |
}
|
|
384 | ||
385 | 13x |
table_prop <- build_table(lyt_prop, df = df$prop) |
386 |
} else { |
|
387 | 1x |
table_prop <- NULL |
388 |
}
|
|
389 | ||
390 |
# Add columns from table_or ("n_tot", "or", and "ci" required)
|
|
391 | 14x |
lyt_or <- split_cols_by(lyt = lyt, var = "arm") |
392 | 14x |
lyt_or <- split_cols_by_multivar( |
393 | 14x |
lyt = lyt_or, |
394 | 14x |
vars = colvars_or$vars, |
395 | 14x |
varlabels = colvars_or$labels |
396 |
)
|
|
397 | ||
398 |
# Add "All Patients" row
|
|
399 | 14x |
lyt_or <- split_rows_by( |
400 | 14x |
lyt = lyt_or, |
401 | 14x |
var = "row_type", |
402 | 14x |
split_fun = keep_split_levels("content"), |
403 | 14x |
nested = FALSE, |
404 | 14x |
child_labels = "hidden" |
405 |
)
|
|
406 | 14x |
lyt_or <- analyze_colvars( |
407 | 14x |
lyt = lyt_or, |
408 | 14x |
afun = a_response_subgroups, |
409 | 14x |
na_str = na_str, |
410 | 14x |
extra_args = extra_args |
411 |
) %>% |
|
412 | 14x |
append_topleft("Baseline Risk Factors") |
413 | ||
414 |
# Add analysis rows
|
|
415 | 14x |
if ("analysis" %in% df$or$row_type) { |
416 | 13x |
lyt_or <- split_rows_by( |
417 | 13x |
lyt = lyt_or, |
418 | 13x |
var = "row_type", |
419 | 13x |
split_fun = keep_split_levels("analysis"), |
420 | 13x |
nested = FALSE, |
421 | 13x |
child_labels = "hidden" |
422 |
)
|
|
423 | 13x |
lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE) |
424 | 13x |
lyt_or <- analyze_colvars( |
425 | 13x |
lyt = lyt_or, |
426 | 13x |
afun = a_response_subgroups, |
427 | 13x |
na_str = na_str, |
428 | 13x |
inclNAs = TRUE, |
429 | 13x |
extra_args = extra_args |
430 |
)
|
|
431 |
}
|
|
432 | ||
433 | 14x |
table_or <- build_table(lyt_or, df = df$or) |
434 | ||
435 |
# Join tables, add forest plot attributes
|
|
436 | 14x |
n_tot_id <- match("n_tot", colvars_or$vars) |
437 | 14x |
if (is.null(table_prop)) { |
438 | 1x |
result <- table_or |
439 | 1x |
or_id <- match("or", colvars_or$vars) |
440 | 1x |
ci_id <- match("ci", colvars_or$vars) |
441 |
} else { |
|
442 | 13x |
result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id]) |
443 | 13x |
or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id]) |
444 | 13x |
ci_id <- 1L + ncol(table_prop) + match("ci", colvars_or$vars[-n_tot_id]) |
445 | 13x |
n_tot_id <- 1L |
446 |
}
|
|
447 | 14x |
structure( |
448 | 14x |
result,
|
449 | 14x |
forest_header = paste0(levels(df$prop$arm), "\nBetter"), |
450 | 14x |
col_x = or_id, |
451 | 14x |
col_ci = ci_id, |
452 | 14x |
col_symbol_size = n_tot_id |
453 |
)
|
|
454 |
}
|
|
455 | ||
456 |
#' Labels for column variables in binary response by subgroup table
|
|
457 |
#'
|
|
458 |
#' @description `r lifecycle::badge("stable")`
|
|
459 |
#'
|
|
460 |
#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
|
|
461 |
#'
|
|
462 |
#' @inheritParams argument_convention
|
|
463 |
#' @inheritParams tabulate_rsp_subgroups
|
|
464 |
#'
|
|
465 |
#' @return A `list` of variables to tabulate and their labels.
|
|
466 |
#'
|
|
467 |
#' @export
|
|
468 |
d_rsp_subgroups_colvars <- function(vars, |
|
469 |
conf_level = NULL, |
|
470 |
method = NULL) { |
|
471 | 20x |
checkmate::assert_character(vars) |
472 | 20x |
checkmate::assert_subset(c("n_tot", "or", "ci"), vars) |
473 | 20x |
checkmate::assert_subset( |
474 | 20x |
vars,
|
475 | 20x |
c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval") |
476 |
)
|
|
477 | ||
478 | 20x |
varlabels <- c( |
479 | 20x |
n = "n", |
480 | 20x |
n_rsp = "Responders", |
481 | 20x |
prop = "Response (%)", |
482 | 20x |
n_tot = "Total n", |
483 | 20x |
or = "Odds Ratio" |
484 |
)
|
|
485 | 20x |
colvars <- vars |
486 | ||
487 | 20x |
if ("ci" %in% colvars) { |
488 | 20x |
checkmate::assert_false(is.null(conf_level)) |
489 | ||
490 | 20x |
varlabels <- c( |
491 | 20x |
varlabels,
|
492 | 20x |
ci = paste0(100 * conf_level, "% CI") |
493 |
)
|
|
494 |
}
|
|
495 | ||
496 | 20x |
if ("pval" %in% colvars) { |
497 | 14x |
varlabels <- c( |
498 | 14x |
varlabels,
|
499 | 14x |
pval = method |
500 |
)
|
|
501 |
}
|
|
502 | ||
503 | 20x |
list( |
504 | 20x |
vars = colvars, |
505 | 20x |
labels = varlabels[vars] |
506 |
)
|
|
507 |
}
|
1 |
#' Helper functions for accessing information from `rtables`
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' These are a couple of functions that help with accessing the data in `rtables` objects.
|
|
6 |
#' Currently these work for occurrence tables, which are defined as having a count as the first
|
|
7 |
#' element and a fraction as the second element in each cell.
|
|
8 |
#'
|
|
9 |
#' @seealso [prune_occurrences] for usage of these functions.
|
|
10 |
#'
|
|
11 |
#' @name rtables_access
|
|
12 |
NULL
|
|
13 | ||
14 |
#' @describeIn rtables_access Helper function to extract the first values from each content
|
|
15 |
#' cell and from specified columns in a `TableRow`. Defaults to all columns.
|
|
16 |
#'
|
|
17 |
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
|
|
18 |
#' @param col_names (`character`)\cr the names of the columns to extract from.
|
|
19 |
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
|
|
20 |
#' then these are inferred from the names of `table_row`. Note that this currently only works well with a single
|
|
21 |
#' column split.
|
|
22 |
#'
|
|
23 |
#' @return
|
|
24 |
#' * `h_row_first_values()` returns a `vector` of numeric values.
|
|
25 |
#'
|
|
26 |
#' @examples
|
|
27 |
#' tbl <- basic_table() %>%
|
|
28 |
#' split_cols_by("ARM") %>%
|
|
29 |
#' split_rows_by("RACE") %>%
|
|
30 |
#' analyze("AGE", function(x) {
|
|
31 |
#' list(
|
|
32 |
#' "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
|
|
33 |
#' "n" = length(x),
|
|
34 |
#' "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
|
|
35 |
#' )
|
|
36 |
#' }) %>%
|
|
37 |
#' build_table(tern_ex_adsl) %>%
|
|
38 |
#' prune_table()
|
|
39 |
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
|
|
40 |
#' result <- max(h_row_first_values(tree_row_elem))
|
|
41 |
#' result
|
|
42 |
#'
|
|
43 |
#' @export
|
|
44 |
h_row_first_values <- function(table_row, |
|
45 |
col_names = NULL, |
|
46 |
col_indices = NULL) { |
|
47 | 745x |
col_indices <- check_names_indices(table_row, col_names, col_indices) |
48 | 744x |
checkmate::assert_integerish(col_indices) |
49 | 744x |
checkmate::assert_subset(col_indices, seq_len(ncol(table_row))) |
50 | ||
51 |
# Main values are extracted
|
|
52 | 744x |
row_vals <- row_values(table_row)[col_indices] |
53 | ||
54 |
# Main return
|
|
55 | 744x |
vapply(row_vals, function(rv) { |
56 | 2096x |
if (is.null(rv)) { |
57 | 744x |
NA_real_
|
58 |
} else { |
|
59 | 2093x |
rv[1L] |
60 |
}
|
|
61 | 744x |
}, FUN.VALUE = numeric(1)) |
62 |
}
|
|
63 | ||
64 |
#' @describeIn rtables_access Helper function that extracts row values and checks if they are
|
|
65 |
#' convertible to integers (`integerish` values).
|
|
66 |
#'
|
|
67 |
#' @return
|
|
68 |
#' * `h_row_counts()` returns a `vector` of numeric values.
|
|
69 |
#'
|
|
70 |
#' @examples
|
|
71 |
#' # Row counts (integer values)
|
|
72 |
#' # h_row_counts(tree_row_elem) # Fails because there are no integers
|
|
73 |
#' # Using values with integers
|
|
74 |
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
|
|
75 |
#' result <- h_row_counts(tree_row_elem)
|
|
76 |
#' # result
|
|
77 |
#'
|
|
78 |
#' @export
|
|
79 |
h_row_counts <- function(table_row, |
|
80 |
col_names = NULL, |
|
81 |
col_indices = NULL) { |
|
82 | 741x |
counts <- h_row_first_values(table_row, col_names, col_indices) |
83 | 741x |
checkmate::assert_integerish(counts) |
84 | 741x |
counts
|
85 |
}
|
|
86 | ||
87 |
#' @describeIn rtables_access Helper function to extract fractions from specified columns in a `TableRow`.
|
|
88 |
#' More specifically it extracts the second values from each content cell and checks it is a fraction.
|
|
89 |
#'
|
|
90 |
#' @return
|
|
91 |
#' * `h_row_fractions()` returns a `vector` of proportions.
|
|
92 |
#'
|
|
93 |
#' @examples
|
|
94 |
#' # Row fractions
|
|
95 |
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
|
|
96 |
#' h_row_fractions(tree_row_elem)
|
|
97 |
#'
|
|
98 |
#' @export
|
|
99 |
h_row_fractions <- function(table_row, |
|
100 |
col_names = NULL, |
|
101 |
col_indices = NULL) { |
|
102 | 250x |
col_indices <- check_names_indices(table_row, col_names, col_indices) |
103 | 250x |
row_vals <- row_values(table_row)[col_indices] |
104 | 250x |
fractions <- sapply(row_vals, "[", 2L) |
105 | 250x |
checkmate::assert_numeric(fractions, lower = 0, upper = 1) |
106 | 250x |
fractions
|
107 |
}
|
|
108 | ||
109 |
#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
|
|
110 |
#'
|
|
111 |
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
|
|
112 |
#'
|
|
113 |
#' @return
|
|
114 |
#' * `h_col_counts()` returns a `vector` of column counts.
|
|
115 |
#'
|
|
116 |
#' @export
|
|
117 |
h_col_counts <- function(table, |
|
118 |
col_names = NULL, |
|
119 |
col_indices = NULL) { |
|
120 | 307x |
col_indices <- check_names_indices(table, col_names, col_indices) |
121 | 307x |
counts <- col_counts(table)[col_indices] |
122 | 307x |
stats::setNames(counts, col_names) |
123 |
}
|
|
124 | ||
125 |
#' @describeIn rtables_access Helper function to get first row of content table of current table.
|
|
126 |
#'
|
|
127 |
#' @return
|
|
128 |
#' * `h_content_first_row()` returns a row from an `rtables` table.
|
|
129 |
#'
|
|
130 |
#' @export
|
|
131 |
h_content_first_row <- function(table) { |
|
132 | 27x |
ct <- content_table(table) |
133 | 27x |
tree_children(ct)[[1]] |
134 |
}
|
|
135 | ||
136 |
#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
|
|
137 |
#'
|
|
138 |
#' @return
|
|
139 |
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
|
|
140 |
#'
|
|
141 |
#' @keywords internal
|
|
142 |
is_leaf_table <- function(table) { |
|
143 | 168x |
children <- tree_children(table) |
144 | 168x |
child_classes <- unique(sapply(children, class)) |
145 | 168x |
identical(child_classes, "ElementaryTable") |
146 |
}
|
|
147 | ||
148 |
#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
|
|
149 |
#'
|
|
150 |
#' @return
|
|
151 |
#' * `check_names_indices` returns column indices.
|
|
152 |
#'
|
|
153 |
#' @keywords internal
|
|
154 |
check_names_indices <- function(table_row, |
|
155 |
col_names = NULL, |
|
156 |
col_indices = NULL) { |
|
157 | 1302x |
if (!is.null(col_names)) { |
158 | 1256x |
if (!is.null(col_indices)) { |
159 | 1x |
stop( |
160 | 1x |
"Inserted both col_names and col_indices when selecting row values. ",
|
161 | 1x |
"Please choose one."
|
162 |
)
|
|
163 |
}
|
|
164 | 1255x |
col_indices <- h_col_indices(table_row, col_names) |
165 |
}
|
|
166 | 1301x |
if (is.null(col_indices)) { |
167 | 39x |
ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row)) |
168 | 39x |
col_indices <- seq_len(ll) |
169 |
}
|
|
170 | ||
171 | 1301x |
return(col_indices) |
172 |
}
|
1 |
#' Count patients with abnormal analysis range values by baseline status
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_abnormal_by_baseline()] creates a layout element to count patients with abnormal
|
|
6 |
#' analysis range values, categorized by baseline status.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `var` which indicates abnormal range results. Additional
|
|
9 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to
|
|
10 |
#' `USUBJID`), a variable to indicate unique subject identifiers, and `baseline` (defaults to `BNRIND`), a
|
|
11 |
#' variable to indicate baseline reference ranges.
|
|
12 |
#'
|
|
13 |
#' For each direction specified via the `abnormal` parameter (e.g. High or Low), we condition on baseline
|
|
14 |
#' range result and count patients in the numerator and denominator as follows for each of the following
|
|
15 |
#' categories:
|
|
16 |
#' * `Not <abnormality>`
|
|
17 |
#' * `num`: The number of patients without abnormality at baseline (excluding those with missing baseline)
|
|
18 |
#' and with at least one abnormality post-baseline.
|
|
19 |
#' * `denom`: The number of patients without abnormality at baseline (excluding those with missing baseline).
|
|
20 |
#' * `<Abnormality>`
|
|
21 |
#' * `num`: The number of patients with abnormality as baseline and at least one abnormality post-baseline.
|
|
22 |
#' * `denom`: The number of patients with abnormality at baseline.
|
|
23 |
#' * `Total`
|
|
24 |
#' * `num`: The number of patients with at least one post-baseline record and at least one abnormality
|
|
25 |
#' post-baseline.
|
|
26 |
#' * `denom`: The number of patients with at least one post-baseline record.
|
|
27 |
#'
|
|
28 |
#' This function assumes that `df` has been filtered to only include post-baseline records.
|
|
29 |
#'
|
|
30 |
#' @inheritParams argument_convention
|
|
31 |
#' @param abnormal (`character`)\cr values identifying the abnormal range level(s) in `.var`.
|
|
32 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
33 |
#'
|
|
34 |
#' Options are: ``r shQuote(get_stats("abnormal_by_baseline"), type = "sh")``
|
|
35 |
#'
|
|
36 |
#' @note
|
|
37 |
#' * `df` should be filtered to include only post-baseline records.
|
|
38 |
#' * If the baseline variable or analysis variable contains `NA` records, it is expected that `df` has been
|
|
39 |
#' pre-processed using [df_explicit_na()] or [explicit_na()].
|
|
40 |
#'
|
|
41 |
#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
|
|
42 |
#'
|
|
43 |
#' @name abnormal_by_baseline
|
|
44 |
#' @order 1
|
|
45 |
NULL
|
|
46 | ||
47 |
#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
|
|
48 |
#'
|
|
49 |
#' @param na_str (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
|
|
50 |
#' [df_explicit_na()]). The default is `"<Missing>"`.
|
|
51 |
#'
|
|
52 |
#' @return
|
|
53 |
#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
|
|
54 |
#' `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
|
|
55 |
#'
|
|
56 |
#' @keywords internal
|
|
57 |
s_count_abnormal_by_baseline <- function(df, |
|
58 |
.var,
|
|
59 |
abnormal,
|
|
60 |
na_str = "<Missing>", |
|
61 |
variables = list(id = "USUBJID", baseline = "BNRIND"), |
|
62 |
...) { |
|
63 | 11x |
checkmate::assert_string(.var) |
64 | 11x |
checkmate::assert_string(abnormal) |
65 | 11x |
checkmate::assert_string(na_str) |
66 | 11x |
assert_df_with_variables(df, c(range = .var, variables)) |
67 | 11x |
checkmate::assert_subset(names(variables), c("id", "baseline")) |
68 | 11x |
checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) |
69 | 11x |
checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character")) |
70 | 11x |
checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) |
71 | ||
72 |
# If input is passed as character, changed to factor
|
|
73 | 11x |
df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_str) |
74 | 11x |
df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_str) |
75 | ||
76 | 11x |
assert_valid_factor(df[[.var]], any.missing = FALSE) |
77 | 10x |
assert_valid_factor(df[[variables$baseline]], any.missing = FALSE) |
78 | ||
79 |
# Keep only records with valid analysis value.
|
|
80 | 9x |
df <- df[df[[.var]] != na_str, ] |
81 | ||
82 | 9x |
anl <- data.frame( |
83 | 9x |
id = df[[variables$id]], |
84 | 9x |
var = df[[.var]], |
85 | 9x |
baseline = df[[variables$baseline]], |
86 | 9x |
stringsAsFactors = FALSE |
87 |
)
|
|
88 | ||
89 |
# Total:
|
|
90 |
# - Patients in denominator: have at least one valid measurement post-baseline.
|
|
91 |
# - Patients in numerator: have at least one abnormality.
|
|
92 | 9x |
total_denom <- length(unique(anl$id)) |
93 | 9x |
total_num <- length(unique(anl$id[anl$var == abnormal])) |
94 | ||
95 |
# Baseline NA records are counted only in total rows.
|
|
96 | 9x |
anl <- anl[anl$baseline != na_str, ] |
97 | ||
98 |
# Abnormal:
|
|
99 |
# - Patients in denominator: have abnormality at baseline.
|
|
100 |
# - Patients in numerator: have abnormality at baseline AND
|
|
101 |
# have at least one abnormality post-baseline.
|
|
102 | 9x |
abn_denom <- length(unique(anl$id[anl$baseline == abnormal])) |
103 | 9x |
abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal])) |
104 | ||
105 |
# Not abnormal:
|
|
106 |
# - Patients in denominator: do not have abnormality at baseline.
|
|
107 |
# - Patients in numerator: do not have abnormality at baseline AND
|
|
108 |
# have at least one abnormality post-baseline.
|
|
109 | 9x |
not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal])) |
110 | 9x |
not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal])) |
111 | ||
112 | 9x |
labels <- d_count_abnormal_by_baseline(abnormal) |
113 | 9x |
list(fraction = list( |
114 | 9x |
not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal), |
115 | 9x |
abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal), |
116 | 9x |
total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total) |
117 |
)) |
|
118 |
}
|
|
119 | ||
120 |
#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
|
|
121 |
#' in `count_abnormal_by_baseline()`.
|
|
122 |
#'
|
|
123 |
#' @return
|
|
124 |
#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
125 |
#'
|
|
126 |
#' @keywords internal
|
|
127 |
a_count_abnormal_by_baseline <- function(df, |
|
128 |
...,
|
|
129 |
.stats = NULL, |
|
130 |
.stat_names = NULL, |
|
131 |
.formats = NULL, |
|
132 |
.labels = NULL, |
|
133 |
.indent_mods = NULL) { |
|
134 |
# Check for additional parameters to the statistics function
|
|
135 | 4x |
dots_extra_args <- list(...) |
136 | 4x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
137 | 4x |
dots_extra_args$.additional_fun_parameters <- NULL |
138 | ||
139 |
# Check for user-defined functions
|
|
140 | 4x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
141 | 4x |
.stats <- default_and_custom_stats_list$all_stats |
142 | 4x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
143 | ||
144 |
# Apply statistics function
|
|
145 | 4x |
x_stats <- .apply_stat_functions( |
146 | 4x |
default_stat_fnc = s_count_abnormal_by_baseline, |
147 | 4x |
custom_stat_fnc_list = custom_stat_functions, |
148 | 4x |
args_list = c( |
149 | 4x |
df = list(df), |
150 | 4x |
extra_afun_params,
|
151 | 4x |
dots_extra_args
|
152 |
)
|
|
153 |
)
|
|
154 | ||
155 |
# Fill in formatting defaults
|
|
156 | 4x |
.stats <- get_stats("abnormal_by_baseline", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
157 | 4x |
levels_per_stats <- lapply(x_stats, names) |
158 | 4x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
159 | 4x |
.labels <- get_labels_from_stats( |
160 | 4x |
.stats, .labels, levels_per_stats, d_count_abnormal_by_baseline(dots_extra_args$abnormal) |
161 |
)
|
|
162 | 4x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
163 | ||
164 | 4x |
x_stats <- x_stats[.stats] %>% |
165 | 4x |
.unlist_keep_nulls() %>% |
166 | 4x |
setNames(names(.formats)) |
167 | ||
168 |
# Auto format handling
|
|
169 | 4x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
170 | ||
171 |
# Get and check statistical names
|
|
172 | 4x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
173 | ||
174 | 4x |
in_rows( |
175 | 4x |
.list = x_stats, |
176 | 4x |
.formats = .formats, |
177 | 4x |
.names = .labels %>% .unlist_keep_nulls(), |
178 | 4x |
.stat_names = .stat_names, |
179 | 4x |
.labels = .labels %>% .unlist_keep_nulls(), |
180 | 4x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
181 |
)
|
|
182 |
}
|
|
183 | ||
184 |
#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
|
|
185 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
186 |
#'
|
|
187 |
#' @return
|
|
188 |
#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
|
|
189 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
190 |
#' the statistics from `s_count_abnormal_by_baseline()` to the table layout.
|
|
191 |
#'
|
|
192 |
#' @examples
|
|
193 |
#' df <- data.frame(
|
|
194 |
#' USUBJID = as.character(c(1:6)),
|
|
195 |
#' ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
|
|
196 |
#' BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
|
|
197 |
#' )
|
|
198 |
#' df <- df_explicit_na(df)
|
|
199 |
#'
|
|
200 |
#' # Layout creating function.
|
|
201 |
#' basic_table() %>%
|
|
202 |
#' count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
|
|
203 |
#' build_table(df)
|
|
204 |
#'
|
|
205 |
#' # Passing of statistics function and formatting arguments.
|
|
206 |
#' df2 <- data.frame(
|
|
207 |
#' ID = as.character(c(1, 2, 3, 4)),
|
|
208 |
#' RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
|
|
209 |
#' BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
|
|
210 |
#' )
|
|
211 |
#'
|
|
212 |
#' basic_table() %>%
|
|
213 |
#' count_abnormal_by_baseline(
|
|
214 |
#' var = "RANGE",
|
|
215 |
#' abnormal = c(Low = "LOW"),
|
|
216 |
#' variables = list(id = "ID", baseline = "BLRANGE"),
|
|
217 |
#' .formats = c(fraction = "xx / xx"),
|
|
218 |
#' .indent_mods = c(fraction = 2L)
|
|
219 |
#' ) %>%
|
|
220 |
#' build_table(df2)
|
|
221 |
#'
|
|
222 |
#' @export
|
|
223 |
#' @order 2
|
|
224 |
count_abnormal_by_baseline <- function(lyt, |
|
225 |
var,
|
|
226 |
abnormal,
|
|
227 |
variables = list(id = "USUBJID", baseline = "BNRIND"), |
|
228 |
na_str = "<Missing>", |
|
229 |
nested = TRUE, |
|
230 |
...,
|
|
231 |
table_names = abnormal, |
|
232 |
.stats = "fraction", |
|
233 |
.stat_names = NULL, |
|
234 |
.formats = list(fraction = format_fraction), |
|
235 |
.labels = NULL, |
|
236 |
.indent_mods = NULL) { |
|
237 | 2x |
checkmate::assert_character(abnormal, len = length(table_names), names = "named") |
238 | 2x |
checkmate::assert_string(var) |
239 | ||
240 |
# Process standard extra arguments
|
|
241 | 2x |
extra_args <- list(".stats" = .stats) |
242 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
243 | 2x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
244 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
245 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
246 | ||
247 |
# Process additional arguments to the statistic function
|
|
248 | 2x |
extra_args <- c(extra_args, "variables" = list(variables), ...) |
249 | ||
250 |
# Append additional info from layout to the analysis function
|
|
251 | 2x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
252 | 2x |
formals(a_count_abnormal_by_baseline) <- c( |
253 | 2x |
formals(a_count_abnormal_by_baseline), extra_args[[".additional_fun_parameters"]] |
254 |
)
|
|
255 | ||
256 |
# Add a new table section with label for each value in abnormal
|
|
257 | 2x |
for (i in seq_along(abnormal)) { |
258 | 4x |
extra_args[["abnormal"]] <- abnormal[i] |
259 | ||
260 | 4x |
lyt <- analyze( |
261 | 4x |
lyt = lyt, |
262 | 4x |
vars = var, |
263 | 4x |
afun = a_count_abnormal_by_baseline, |
264 | 4x |
var_labels = names(abnormal)[i], |
265 | 4x |
na_str = na_str, |
266 | 4x |
nested = nested, |
267 | 4x |
extra_args = extra_args, |
268 | 4x |
show_labels = "visible", |
269 | 4x |
table_names = table_names[i] |
270 |
)
|
|
271 |
}
|
|
272 | ||
273 | 2x |
lyt
|
274 |
}
|
|
275 | ||
276 |
#' Description function for `s_count_abnormal_by_baseline()`
|
|
277 |
#'
|
|
278 |
#' @description `r lifecycle::badge("stable")`
|
|
279 |
#'
|
|
280 |
#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
|
|
281 |
#'
|
|
282 |
#' @inheritParams abnormal_by_baseline
|
|
283 |
#'
|
|
284 |
#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
|
|
285 |
#'
|
|
286 |
#' @examples
|
|
287 |
#' d_count_abnormal_by_baseline("LOW")
|
|
288 |
#'
|
|
289 |
#' @export
|
|
290 |
d_count_abnormal_by_baseline <- function(abnormal) { |
|
291 | 13x |
not_abn_name <- paste("Not", tolower(abnormal)) |
292 | 13x |
abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2))) |
293 | 13x |
total_name <- "Total" |
294 | ||
295 | 13x |
list( |
296 | 13x |
not_abnormal = not_abn_name, |
297 | 13x |
abnormal = abn_name, |
298 | 13x |
total = total_name |
299 |
)
|
|
300 |
}
|
1 |
#' Additional assertions to use with `checkmate`
|
|
2 |
#'
|
|
3 |
#' Additional assertion functions which can be used together with the `checkmate` package.
|
|
4 |
#'
|
|
5 |
#' @inheritParams checkmate::assert_factor
|
|
6 |
#' @param x (`any`)\cr object to test.
|
|
7 |
#' @param df (`data.frame`)\cr data set to test.
|
|
8 |
#' @param variables (named `list` of `character`)\cr list of variables to test.
|
|
9 |
#' @param include_boundaries (`flag`)\cr whether to include boundaries when testing
|
|
10 |
#' for proportions.
|
|
11 |
#' @param na_level (`string`)\cr the string you have been using to represent NA or
|
|
12 |
#' missing data. For `NA` values please consider using directly [is.na()] or
|
|
13 |
#' similar approaches.
|
|
14 |
#'
|
|
15 |
#' @return Nothing if assertion passes, otherwise prints the error message.
|
|
16 |
#'
|
|
17 |
#' @name assertions
|
|
18 |
NULL
|
|
19 | ||
20 |
check_list_of_variables <- function(x) { |
|
21 |
# drop NULL elements in list
|
|
22 | 2999x |
x <- Filter(Negate(is.null), x) |
23 | ||
24 | 2999x |
res <- checkmate::check_list(x, |
25 | 2999x |
names = "named", |
26 | 2999x |
min.len = 1, |
27 | 2999x |
any.missing = FALSE, |
28 | 2999x |
types = "character" |
29 |
)
|
|
30 |
# no empty strings allowed
|
|
31 | 2999x |
if (isTRUE(res)) { |
32 | 2994x |
res <- checkmate::check_character(unlist(x), min.chars = 1) |
33 |
}
|
|
34 | 2999x |
return(res) |
35 |
}
|
|
36 |
#' @describeIn assertions Checks whether `x` is a valid list of variable names.
|
|
37 |
#' `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
|
|
38 |
#'
|
|
39 |
#' @keywords internal
|
|
40 |
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables) |
|
41 | ||
42 |
check_df_with_variables <- function(df, variables, na_level = NULL) { |
|
43 | 2682x |
checkmate::assert_data_frame(df) |
44 | 2680x |
assert_list_of_variables(variables) |
45 | ||
46 |
# flag for equal variables and column names
|
|
47 | 2678x |
err_flag <- all(unlist(variables) %in% colnames(df)) |
48 | 2678x |
checkmate::assert_flag(err_flag) |
49 | ||
50 | 2678x |
if (isFALSE(err_flag)) { |
51 | 5x |
vars <- setdiff(unlist(variables), colnames(df)) |
52 | 5x |
return(paste( |
53 | 5x |
deparse(substitute(df)), |
54 | 5x |
"does not contain all specified variables as column names. Missing from data frame:",
|
55 | 5x |
paste(vars, collapse = ", ") |
56 |
)) |
|
57 |
}
|
|
58 |
# checking if na_level is present and in which column
|
|
59 | 2673x |
if (!is.null(na_level)) { |
60 | 9x |
checkmate::assert_string(na_level) |
61 | 9x |
res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level))) |
62 | 9x |
if (any(res)) { |
63 | 1x |
return(paste0( |
64 | 1x |
deparse(substitute(df)), " contains explicit na_level (", na_level, |
65 | 1x |
") in the following columns: ", paste0(unlist(variables)[res], |
66 | 1x |
collapse = ", " |
67 |
)
|
|
68 |
)) |
|
69 |
}
|
|
70 |
}
|
|
71 | 2672x |
return(TRUE) |
72 |
}
|
|
73 |
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
|
|
74 |
#' Please notice how this produces an error when not all variables are present in the
|
|
75 |
#' data.frame while the opposite is not required.
|
|
76 |
#'
|
|
77 |
#' @keywords internal
|
|
78 |
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables) |
|
79 | ||
80 |
check_valid_factor <- function(x, |
|
81 |
min.levels = 1, # nolint |
|
82 |
max.levels = NULL, # nolint |
|
83 |
null.ok = TRUE, # nolint |
|
84 |
any.missing = TRUE, # nolint |
|
85 |
n.levels = NULL, # nolint |
|
86 |
len = NULL) { |
|
87 |
# checks on levels insertion
|
|
88 | 1113x |
checkmate::assert_int(min.levels, lower = 1) |
89 | ||
90 |
# main factor check
|
|
91 | 1113x |
res <- checkmate::check_factor(x, |
92 | 1113x |
min.levels = min.levels, |
93 | 1113x |
null.ok = null.ok, |
94 | 1113x |
max.levels = max.levels, |
95 | 1113x |
any.missing = any.missing, |
96 | 1113x |
n.levels = n.levels |
97 |
)
|
|
98 | ||
99 |
# no empty strings allowed
|
|
100 | 1113x |
if (isTRUE(res)) { |
101 | 1099x |
res <- checkmate::check_character(levels(x), min.chars = 1) |
102 |
}
|
|
103 | ||
104 | 1113x |
return(res) |
105 |
}
|
|
106 |
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
|
|
107 |
#' string levels). Note that `NULL` and `NA` elements are allowed.
|
|
108 |
#'
|
|
109 |
#' @keywords internal
|
|
110 |
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor) |
|
111 | ||
112 |
check_df_with_factors <- function(df, |
|
113 |
variables,
|
|
114 |
min.levels = 1, # nolint |
|
115 |
max.levels = NULL, # nolint |
|
116 |
any.missing = TRUE, # nolint |
|
117 |
na_level = NULL) { |
|
118 | 254x |
res <- check_df_with_variables(df, variables, na_level) |
119 |
# checking if all the columns specified by variables are valid factors
|
|
120 | 253x |
if (isTRUE(res)) { |
121 |
# searching the data.frame with selected columns (variables) as a list
|
|
122 | 251x |
res <- lapply( |
123 | 251x |
X = as.list(df)[unlist(variables)], |
124 | 251x |
FUN = check_valid_factor, |
125 | 251x |
min.levels = min.levels, |
126 | 251x |
max.levels = max.levels, |
127 | 251x |
any.missing = any.missing |
128 |
)
|
|
129 | 251x |
res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1))) |
130 | 251x |
if (any(res_lo)) { |
131 | 6x |
return(paste0( |
132 | 6x |
deparse(substitute(df)), " does not contain only factor variables among:", |
133 | 6x |
"\n* Column `", paste0(unlist(variables)[res_lo], |
134 | 6x |
"` of the data.frame -> ", res[res_lo], |
135 | 6x |
collapse = "\n* " |
136 |
)
|
|
137 |
)) |
|
138 |
} else { |
|
139 | 245x |
res <- TRUE |
140 |
}
|
|
141 |
}
|
|
142 | 247x |
return(res) |
143 |
}
|
|
144 | ||
145 |
#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
|
|
146 |
#' are all factors. Note that the creation of `NA` by direct call of `factor()` will
|
|
147 |
#' trim `NA` levels out of the vector list itself.
|
|
148 |
#'
|
|
149 |
#' @keywords internal
|
|
150 |
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors) |
|
151 | ||
152 |
#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
|
|
153 |
#'
|
|
154 |
#' @keywords internal
|
|
155 |
assert_proportion_value <- function(x, include_boundaries = FALSE) { |
|
156 | 18889x |
checkmate::assert_number(x, lower = 0, upper = 1) |
157 | 18877x |
checkmate::assert_flag(include_boundaries) |
158 | 18877x |
if (isFALSE(include_boundaries)) { |
159 | 12969x |
checkmate::assert_true(x > 0) |
160 | 12967x |
checkmate::assert_true(x < 1) |
161 |
}
|
|
162 |
}
|
1 |
#' Convert list of groups to a data frame
|
|
2 |
#'
|
|
3 |
#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
|
|
4 |
#'
|
|
5 |
#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
|
|
6 |
#' levels that belong to it in the character vectors that are elements of the list.
|
|
7 |
#'
|
|
8 |
#' @return A `tibble` in the required format.
|
|
9 |
#'
|
|
10 |
#' @examples
|
|
11 |
#' grade_groups <- list(
|
|
12 |
#' "Any Grade (%)" = c("1", "2", "3", "4", "5"),
|
|
13 |
#' "Grade 3-4 (%)" = c("3", "4"),
|
|
14 |
#' "Grade 5 (%)" = "5"
|
|
15 |
#' )
|
|
16 |
#' groups_list_to_df(grade_groups)
|
|
17 |
#'
|
|
18 |
#' @export
|
|
19 |
groups_list_to_df <- function(groups_list) { |
|
20 | 5x |
checkmate::assert_list(groups_list, names = "named") |
21 | 5x |
lapply(groups_list, checkmate::assert_character) |
22 | 5x |
tibble::tibble( |
23 | 5x |
valname = make_names(names(groups_list)), |
24 | 5x |
label = names(groups_list), |
25 | 5x |
levelcombo = unname(groups_list), |
26 | 5x |
exargs = replicate(length(groups_list), list()) |
27 |
)
|
|
28 |
}
|
|
29 | ||
30 |
#' Reference and treatment group combination
|
|
31 |
#'
|
|
32 |
#' @description `r lifecycle::badge("stable")`
|
|
33 |
#'
|
|
34 |
#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
|
|
35 |
#' columns in the `rtables` framework and teal modules.
|
|
36 |
#'
|
|
37 |
#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
|
|
38 |
#' @param ref (`character`)\cr the reference level(s).
|
|
39 |
#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
|
|
40 |
#'
|
|
41 |
#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
|
|
42 |
#'
|
|
43 |
#' @examples
|
|
44 |
#' groups <- combine_groups(
|
|
45 |
#' fct = DM$ARM,
|
|
46 |
#' ref = c("B: Placebo")
|
|
47 |
#' )
|
|
48 |
#'
|
|
49 |
#' basic_table() %>%
|
|
50 |
#' split_cols_by_groups("ARM", groups) %>%
|
|
51 |
#' add_colcounts() %>%
|
|
52 |
#' analyze_vars("AGE") %>%
|
|
53 |
#' build_table(DM)
|
|
54 |
#'
|
|
55 |
#' @export
|
|
56 |
combine_groups <- function(fct, |
|
57 |
ref = NULL, |
|
58 |
collapse = "/") { |
|
59 | 10x |
checkmate::assert_string(collapse) |
60 | 10x |
checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE) |
61 | 10x |
checkmate::assert_multi_class(fct, classes = c("factor", "character")) |
62 | ||
63 | 10x |
fct <- as_factor_keep_attributes(fct) |
64 | ||
65 | 10x |
group_levels <- levels(fct) |
66 | 10x |
if (is.null(ref)) { |
67 | 6x |
ref <- group_levels[1] |
68 |
} else { |
|
69 | 4x |
checkmate::assert_subset(ref, group_levels) |
70 |
}
|
|
71 | ||
72 | 10x |
groups <- list( |
73 | 10x |
ref = group_levels[group_levels %in% ref], |
74 | 10x |
trt = group_levels[!group_levels %in% ref] |
75 |
)
|
|
76 | 10x |
stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse)) |
77 |
}
|
|
78 | ||
79 |
#' Split columns by groups of levels
|
|
80 |
#'
|
|
81 |
#' @description `r lifecycle::badge("stable")`
|
|
82 |
#'
|
|
83 |
#' @inheritParams argument_convention
|
|
84 |
#' @inheritParams groups_list_to_df
|
|
85 |
#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
|
|
86 |
#' control formats (`format`), add a joint column for all groups (`incl_all`).
|
|
87 |
#'
|
|
88 |
#' @return A layout object suitable for passing to further layouting functions. Adding
|
|
89 |
#' this function to an `rtable` layout will add a column split including the given
|
|
90 |
#' groups to the table layout.
|
|
91 |
#'
|
|
92 |
#' @seealso [rtables::split_cols_by()]
|
|
93 |
#'
|
|
94 |
#' @examples
|
|
95 |
#' # 1 - Basic use
|
|
96 |
#'
|
|
97 |
#' # Without group combination `split_cols_by_groups` is
|
|
98 |
#' # equivalent to [rtables::split_cols_by()].
|
|
99 |
#' basic_table() %>%
|
|
100 |
#' split_cols_by_groups("ARM") %>%
|
|
101 |
#' add_colcounts() %>%
|
|
102 |
#' analyze("AGE") %>%
|
|
103 |
#' build_table(DM)
|
|
104 |
#'
|
|
105 |
#' # Add a reference column.
|
|
106 |
#' basic_table() %>%
|
|
107 |
#' split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
|
|
108 |
#' add_colcounts() %>%
|
|
109 |
#' analyze(
|
|
110 |
#' "AGE",
|
|
111 |
#' afun = function(x, .ref_group, .in_ref_col) {
|
|
112 |
#' if (.in_ref_col) {
|
|
113 |
#' in_rows("Diff Mean" = rcell(NULL))
|
|
114 |
#' } else {
|
|
115 |
#' in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
|
|
116 |
#' }
|
|
117 |
#' }
|
|
118 |
#' ) %>%
|
|
119 |
#' build_table(DM)
|
|
120 |
#'
|
|
121 |
#' # 2 - Adding group specification
|
|
122 |
#'
|
|
123 |
#' # Manual preparation of the groups.
|
|
124 |
#' groups <- list(
|
|
125 |
#' "Arms A+B" = c("A: Drug X", "B: Placebo"),
|
|
126 |
#' "Arms A+C" = c("A: Drug X", "C: Combination")
|
|
127 |
#' )
|
|
128 |
#'
|
|
129 |
#' # Use of split_cols_by_groups without reference column.
|
|
130 |
#' basic_table() %>%
|
|
131 |
#' split_cols_by_groups("ARM", groups) %>%
|
|
132 |
#' add_colcounts() %>%
|
|
133 |
#' analyze("AGE") %>%
|
|
134 |
#' build_table(DM)
|
|
135 |
#'
|
|
136 |
#' # Including differentiated output in the reference column.
|
|
137 |
#' basic_table() %>%
|
|
138 |
#' split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
|
|
139 |
#' analyze(
|
|
140 |
#' "AGE",
|
|
141 |
#' afun = function(x, .ref_group, .in_ref_col) {
|
|
142 |
#' if (.in_ref_col) {
|
|
143 |
#' in_rows("Diff. of Averages" = rcell(NULL))
|
|
144 |
#' } else {
|
|
145 |
#' in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
|
|
146 |
#' }
|
|
147 |
#' }
|
|
148 |
#' ) %>%
|
|
149 |
#' build_table(DM)
|
|
150 |
#'
|
|
151 |
#' # 3 - Binary list dividing factor levels into reference and treatment
|
|
152 |
#'
|
|
153 |
#' # `combine_groups` defines reference and treatment.
|
|
154 |
#' groups <- combine_groups(
|
|
155 |
#' fct = DM$ARM,
|
|
156 |
#' ref = c("A: Drug X", "B: Placebo")
|
|
157 |
#' )
|
|
158 |
#' groups
|
|
159 |
#'
|
|
160 |
#' # Use group definition without reference column.
|
|
161 |
#' basic_table() %>%
|
|
162 |
#' split_cols_by_groups("ARM", groups_list = groups) %>%
|
|
163 |
#' add_colcounts() %>%
|
|
164 |
#' analyze("AGE") %>%
|
|
165 |
#' build_table(DM)
|
|
166 |
#'
|
|
167 |
#' # Use group definition with reference column (first item of groups).
|
|
168 |
#' basic_table() %>%
|
|
169 |
#' split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
|
|
170 |
#' add_colcounts() %>%
|
|
171 |
#' analyze(
|
|
172 |
#' "AGE",
|
|
173 |
#' afun = function(x, .ref_group, .in_ref_col) {
|
|
174 |
#' if (.in_ref_col) {
|
|
175 |
#' in_rows("Diff Mean" = rcell(NULL))
|
|
176 |
#' } else {
|
|
177 |
#' in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
|
|
178 |
#' }
|
|
179 |
#' }
|
|
180 |
#' ) %>%
|
|
181 |
#' build_table(DM)
|
|
182 |
#'
|
|
183 |
#' @export
|
|
184 |
split_cols_by_groups <- function(lyt, |
|
185 |
var,
|
|
186 |
groups_list = NULL, |
|
187 |
ref_group = NULL, |
|
188 |
...) { |
|
189 | 6x |
if (is.null(groups_list)) { |
190 | 2x |
split_cols_by( |
191 | 2x |
lyt = lyt, |
192 | 2x |
var = var, |
193 | 2x |
ref_group = ref_group, |
194 |
...
|
|
195 |
)
|
|
196 |
} else { |
|
197 | 4x |
groups_df <- groups_list_to_df(groups_list) |
198 | 4x |
if (!is.null(ref_group)) { |
199 | 3x |
ref_group <- groups_df$valname[groups_df$label == ref_group] |
200 |
}
|
|
201 | 4x |
split_cols_by( |
202 | 4x |
lyt = lyt, |
203 | 4x |
var = var, |
204 | 4x |
split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname), |
205 | 4x |
ref_group = ref_group, |
206 |
...
|
|
207 |
)
|
|
208 |
}
|
|
209 |
}
|
|
210 | ||
211 |
#' Combine counts
|
|
212 |
#'
|
|
213 |
#' Simplifies the estimation of column counts, especially when group combination is required.
|
|
214 |
#'
|
|
215 |
#' @inheritParams combine_groups
|
|
216 |
#' @inheritParams groups_list_to_df
|
|
217 |
#'
|
|
218 |
#' @return A `vector` of column counts.
|
|
219 |
#'
|
|
220 |
#' @seealso [combine_groups()]
|
|
221 |
#'
|
|
222 |
#' @examples
|
|
223 |
#' ref <- c("A: Drug X", "B: Placebo")
|
|
224 |
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
|
|
225 |
#'
|
|
226 |
#' col_counts <- combine_counts(
|
|
227 |
#' fct = DM$ARM,
|
|
228 |
#' groups_list = groups
|
|
229 |
#' )
|
|
230 |
#'
|
|
231 |
#' basic_table() %>%
|
|
232 |
#' split_cols_by_groups("ARM", groups) %>%
|
|
233 |
#' add_colcounts() %>%
|
|
234 |
#' analyze_vars("AGE") %>%
|
|
235 |
#' build_table(DM, col_counts = col_counts)
|
|
236 |
#'
|
|
237 |
#' ref <- "A: Drug X"
|
|
238 |
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
|
|
239 |
#' col_counts <- combine_counts(
|
|
240 |
#' fct = DM$ARM,
|
|
241 |
#' groups_list = groups
|
|
242 |
#' )
|
|
243 |
#'
|
|
244 |
#' basic_table() %>%
|
|
245 |
#' split_cols_by_groups("ARM", groups) %>%
|
|
246 |
#' add_colcounts() %>%
|
|
247 |
#' analyze_vars("AGE") %>%
|
|
248 |
#' build_table(DM, col_counts = col_counts)
|
|
249 |
#'
|
|
250 |
#' @export
|
|
251 |
combine_counts <- function(fct, groups_list = NULL) { |
|
252 | 4x |
checkmate::assert_multi_class(fct, classes = c("factor", "character")) |
253 | ||
254 | 4x |
fct <- as_factor_keep_attributes(fct) |
255 | ||
256 | 4x |
if (is.null(groups_list)) { |
257 | 1x |
y <- table(fct) |
258 | 1x |
y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]]) |
259 |
} else { |
|
260 | 3x |
y <- vapply( |
261 | 3x |
X = groups_list, |
262 | 3x |
FUN = function(x) sum(table(fct)[x]), |
263 | 3x |
FUN.VALUE = 1 |
264 |
)
|
|
265 |
}
|
|
266 | 4x |
y
|
267 |
}
|
1 |
#' Helper functions for tabulating biomarker effects on binary response by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions which are documented here separately to not confuse the user
|
|
6 |
#' when reading about the user-facing functions.
|
|
7 |
#'
|
|
8 |
#' @inheritParams response_biomarkers_subgroups
|
|
9 |
#' @inheritParams extract_rsp_biomarkers
|
|
10 |
#' @inheritParams argument_convention
|
|
11 |
#'
|
|
12 |
#' @examples
|
|
13 |
#' library(dplyr)
|
|
14 |
#' library(forcats)
|
|
15 |
#'
|
|
16 |
#' adrs <- tern_ex_adrs
|
|
17 |
#' adrs_labels <- formatters::var_labels(adrs)
|
|
18 |
#'
|
|
19 |
#' adrs_f <- adrs %>%
|
|
20 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
21 |
#' mutate(rsp = AVALC == "CR")
|
|
22 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
|
|
23 |
#'
|
|
24 |
#' @name h_response_biomarkers_subgroups
|
|
25 |
NULL
|
|
26 | ||
27 |
#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
|
|
28 |
#' to the "logistic regression" variable list. The reason is that currently there is an
|
|
29 |
#' inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
|
|
30 |
#'
|
|
31 |
#' @param biomarker (`string`)\cr the name of the biomarker variable.
|
|
32 |
#'
|
|
33 |
#' @return
|
|
34 |
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
|
|
35 |
#'
|
|
36 |
#' @examples
|
|
37 |
#' # This is how the variable list is converted internally.
|
|
38 |
#' h_rsp_to_logistic_variables(
|
|
39 |
#' variables = list(
|
|
40 |
#' rsp = "RSP",
|
|
41 |
#' covariates = c("A", "B"),
|
|
42 |
#' strata = "D"
|
|
43 |
#' ),
|
|
44 |
#' biomarker = "AGE"
|
|
45 |
#' )
|
|
46 |
#'
|
|
47 |
#' @export
|
|
48 |
h_rsp_to_logistic_variables <- function(variables, biomarker) { |
|
49 | 49x |
if ("strat" %in% names(variables)) { |
50 | ! |
warning( |
51 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_rsp_to_logistic_variables() ",
|
52 | ! |
"was deprecated in tern 0.9.4.\n ",
|
53 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
54 |
)
|
|
55 | ! |
variables[["strata"]] <- variables[["strat"]] |
56 |
}
|
|
57 | 49x |
checkmate::assert_list(variables) |
58 | 49x |
checkmate::assert_string(variables$rsp) |
59 | 49x |
checkmate::assert_string(biomarker) |
60 | 49x |
list( |
61 | 49x |
response = variables$rsp, |
62 | 49x |
arm = biomarker, |
63 | 49x |
covariates = variables$covariates, |
64 | 49x |
strata = variables$strata |
65 |
)
|
|
66 |
}
|
|
67 | ||
68 |
#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
|
|
69 |
#' overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
|
|
70 |
#' biomarkers in a given single data set.
|
|
71 |
#' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
|
|
72 |
#' `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
|
|
73 |
#' and `strata`.
|
|
74 |
#'
|
|
75 |
#' @return
|
|
76 |
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
|
|
77 |
#'
|
|
78 |
#' @examples
|
|
79 |
#' # For a single population, estimate separately the effects
|
|
80 |
#' # of two biomarkers.
|
|
81 |
#' df <- h_logistic_mult_cont_df(
|
|
82 |
#' variables = list(
|
|
83 |
#' rsp = "rsp",
|
|
84 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
85 |
#' covariates = "SEX"
|
|
86 |
#' ),
|
|
87 |
#' data = adrs_f
|
|
88 |
#' )
|
|
89 |
#' df
|
|
90 |
#'
|
|
91 |
#' # If the data set is empty, still the corresponding rows with missings are returned.
|
|
92 |
#' h_coxreg_mult_cont_df(
|
|
93 |
#' variables = list(
|
|
94 |
#' rsp = "rsp",
|
|
95 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
96 |
#' covariates = "SEX",
|
|
97 |
#' strata = "STRATA1"
|
|
98 |
#' ),
|
|
99 |
#' data = adrs_f[NULL, ]
|
|
100 |
#' )
|
|
101 |
#'
|
|
102 |
#' @export
|
|
103 |
h_logistic_mult_cont_df <- function(variables, |
|
104 |
data,
|
|
105 |
control = control_logistic()) { |
|
106 | 28x |
if ("strat" %in% names(variables)) { |
107 | ! |
warning( |
108 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_logistic_mult_cont_df() ",
|
109 | ! |
"was deprecated in tern 0.9.4.\n ",
|
110 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
111 |
)
|
|
112 | ! |
variables[["strata"]] <- variables[["strat"]] |
113 |
}
|
|
114 | 28x |
assert_df_with_variables(data, variables) |
115 | ||
116 | 28x |
checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE) |
117 | 28x |
checkmate::assert_list(control, names = "named") |
118 | ||
119 | 28x |
conf_level <- control[["conf_level"]] |
120 | 28x |
pval_label <- "p-value (Wald)" |
121 | ||
122 |
# If there is any data, run model, otherwise return empty results.
|
|
123 | 28x |
if (nrow(data) > 0) { |
124 | 27x |
bm_cols <- match(variables$biomarkers, names(data)) |
125 | 27x |
l_result <- lapply(variables$biomarkers, function(bm) { |
126 | 48x |
model_fit <- fit_logistic( |
127 | 48x |
variables = h_rsp_to_logistic_variables(variables, bm), |
128 | 48x |
data = data, |
129 | 48x |
response_definition = control$response_definition |
130 |
)
|
|
131 | 48x |
result <- h_logistic_simple_terms( |
132 | 48x |
x = bm, |
133 | 48x |
fit_glm = model_fit, |
134 | 48x |
conf_level = control$conf_level |
135 |
)
|
|
136 | 48x |
resp_vector <- if (inherits(model_fit, "glm")) { |
137 | 38x |
model_fit$model[[variables$rsp]] |
138 |
} else { |
|
139 | 10x |
as.logical(as.matrix(model_fit$y)[, "status"]) |
140 |
}
|
|
141 | 48x |
data.frame( |
142 |
# Dummy column needed downstream to create a nested header.
|
|
143 | 48x |
biomarker = bm, |
144 | 48x |
biomarker_label = formatters::var_labels(data[bm], fill = TRUE), |
145 | 48x |
n_tot = length(resp_vector), |
146 | 48x |
n_rsp = sum(resp_vector), |
147 | 48x |
prop = mean(resp_vector), |
148 | 48x |
or = as.numeric(result[1L, "odds_ratio"]), |
149 | 48x |
lcl = as.numeric(result[1L, "lcl"]), |
150 | 48x |
ucl = as.numeric(result[1L, "ucl"]), |
151 | 48x |
conf_level = conf_level, |
152 | 48x |
pval = as.numeric(result[1L, "pvalue"]), |
153 | 48x |
pval_label = pval_label, |
154 | 48x |
stringsAsFactors = FALSE |
155 |
)
|
|
156 |
}) |
|
157 | 27x |
do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
158 |
} else { |
|
159 | 1x |
data.frame( |
160 | 1x |
biomarker = variables$biomarkers, |
161 | 1x |
biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE), |
162 | 1x |
n_tot = 0L, |
163 | 1x |
n_rsp = 0L, |
164 | 1x |
prop = NA, |
165 | 1x |
or = NA, |
166 | 1x |
lcl = NA, |
167 | 1x |
ucl = NA, |
168 | 1x |
conf_level = conf_level, |
169 | 1x |
pval = NA, |
170 | 1x |
pval_label = pval_label, |
171 | 1x |
row.names = seq_along(variables$biomarkers), |
172 | 1x |
stringsAsFactors = FALSE |
173 |
)
|
|
174 |
}
|
|
175 |
}
|
1 |
#' Convert `rtable` objects to `ggplot` objects
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("experimental")`
|
|
4 |
#'
|
|
5 |
#' Given a [rtables::rtable()] object, performs basic conversion to a [ggplot2::ggplot()] object built using
|
|
6 |
#' functions from the `ggplot2` package. Any table titles and/or footnotes are ignored.
|
|
7 |
#'
|
|
8 |
#' @param tbl (`VTableTree`)\cr `rtables` table object.
|
|
9 |
#' @param fontsize (`numeric(1)`)\cr font size.
|
|
10 |
#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in
|
|
11 |
#' `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths
|
|
12 |
#' are calculated according to maximum number of characters per column.
|
|
13 |
#' @param lbl_col_padding (`numeric`)\cr additional padding to use when calculating spacing between
|
|
14 |
#' the first (label) column and the second column of `tbl`. If `colwidths` is specified,
|
|
15 |
#' the width of the first column becomes `colwidths[1] + lbl_col_padding`. Defaults to 0.
|
|
16 |
#'
|
|
17 |
#' @return A `ggplot` object.
|
|
18 |
#'
|
|
19 |
#' @examples
|
|
20 |
#' dta <- data.frame(
|
|
21 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)),
|
|
22 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
23 |
#' AVAL = c(9:1, rep(NA, 9))
|
|
24 |
#' )
|
|
25 |
#'
|
|
26 |
#' lyt <- basic_table() %>%
|
|
27 |
#' split_cols_by(var = "ARM") %>%
|
|
28 |
#' split_rows_by(var = "AVISIT") %>%
|
|
29 |
#' analyze_vars(vars = "AVAL")
|
|
30 |
#'
|
|
31 |
#' tbl <- build_table(lyt, df = dta)
|
|
32 |
#'
|
|
33 |
#' rtable2gg(tbl)
|
|
34 |
#'
|
|
35 |
#' rtable2gg(tbl, fontsize = 15, colwidths = c(2, 1, 1, 1))
|
|
36 |
#'
|
|
37 |
#' @export
|
|
38 |
rtable2gg <- function(tbl, fontsize = 12, colwidths = NULL, lbl_col_padding = 0) { |
|
39 | 6x |
mat <- rtables::matrix_form(tbl, indent_rownames = TRUE) |
40 | 6x |
mat_strings <- formatters::mf_strings(mat) |
41 | 6x |
mat_aligns <- formatters::mf_aligns(mat) |
42 | 6x |
mat_indent <- formatters::mf_rinfo(mat)$indent |
43 | 6x |
mat_display <- formatters::mf_display(mat) |
44 | 6x |
nlines_hdr <- formatters::mf_nlheader(mat) |
45 | 6x |
shared_hdr_rows <- which(apply(mat_display, 1, function(x) (any(!x)))) |
46 | ||
47 | 6x |
tbl_df <- data.frame(mat_strings) |
48 | 6x |
body_rows <- seq(nlines_hdr + 1, nrow(tbl_df)) |
49 | 6x |
mat_aligns <- apply(mat_aligns, 1:2, function(x) if (x == "left") 0 else if (x == "right") 1 else 0.5) |
50 | ||
51 |
# Apply indentation in first column
|
|
52 | 6x |
tbl_df[body_rows, 1] <- sapply(body_rows, function(i) { |
53 | 42x |
ind_i <- mat_indent[i - nlines_hdr] * 4 |
54 | 18x |
if (ind_i > 0) paste0(paste(rep(" ", ind_i), collapse = ""), tbl_df[i, 1]) else tbl_df[i, 1] |
55 |
}) |
|
56 | ||
57 |
# Get column widths
|
|
58 | 6x |
if (is.null(colwidths)) { |
59 | 6x |
colwidths <- apply(tbl_df, 2, function(x) max(nchar(x))) + 1 |
60 |
}
|
|
61 | 6x |
tot_width <- sum(colwidths) + lbl_col_padding |
62 | ||
63 | 6x |
if (length(shared_hdr_rows) > 0) { |
64 | 5x |
tbl_df <- tbl_df[-shared_hdr_rows, ] |
65 | 5x |
mat_aligns <- mat_aligns[-shared_hdr_rows, ] |
66 |
}
|
|
67 | ||
68 | 6x |
res <- ggplot(data = tbl_df) + |
69 | 6x |
theme_void() + |
70 | 6x |
scale_x_continuous(limits = c(0, tot_width)) + |
71 | 6x |
scale_y_continuous(limits = c(0, nrow(mat_strings))) + |
72 | 6x |
annotate( |
73 | 6x |
"segment",
|
74 | 6x |
x = 0, xend = tot_width, |
75 | 6x |
y = nrow(mat_strings) - nlines_hdr + 0.5, yend = nrow(mat_strings) - nlines_hdr + 0.5 |
76 |
)
|
|
77 | ||
78 |
# If header content spans multiple columns, center over these columns
|
|
79 | 6x |
if (length(shared_hdr_rows) > 0) { |
80 | 5x |
mat_strings[shared_hdr_rows, ] <- trimws(mat_strings[shared_hdr_rows, ]) |
81 | 5x |
for (hr in shared_hdr_rows) { |
82 | 6x |
hdr_lbls <- mat_strings[1:hr, mat_display[hr, -1]] |
83 | 6x |
hdr_lbls <- matrix(hdr_lbls[nzchar(hdr_lbls)], nrow = hr) |
84 | 6x |
for (idx_hl in seq_len(ncol(hdr_lbls))) { |
85 | 13x |
cur_lbl <- tail(hdr_lbls[, idx_hl], 1) |
86 | 13x |
which_cols <- if (hr == 1) { |
87 | 9x |
which(mat_strings[hr, ] == hdr_lbls[idx_hl]) |
88 | 13x |
} else { # for >2 col splits, only print labels for each unique combo of nested columns |
89 | 4x |
which( |
90 | 4x |
apply(mat_strings[1:hr, ], 2, function(x) all(x == hdr_lbls[1:hr, idx_hl])) |
91 |
)
|
|
92 |
}
|
|
93 | 13x |
line_pos <- c( |
94 | 13x |
sum(colwidths[1:(which_cols[1] - 1)]) + 1 + lbl_col_padding, |
95 | 13x |
sum(colwidths[1:max(which_cols)]) - 1 + lbl_col_padding |
96 |
)
|
|
97 | ||
98 | 13x |
res <- res + |
99 | 13x |
annotate( |
100 | 13x |
"text",
|
101 | 13x |
x = mean(line_pos), |
102 | 13x |
y = nrow(mat_strings) + 1 - hr, |
103 | 13x |
label = cur_lbl, |
104 | 13x |
size = fontsize / .pt |
105 |
) + |
|
106 | 13x |
annotate( |
107 | 13x |
"segment",
|
108 | 13x |
x = line_pos[1], |
109 | 13x |
xend = line_pos[2], |
110 | 13x |
y = nrow(mat_strings) - hr + 0.5, |
111 | 13x |
yend = nrow(mat_strings) - hr + 0.5 |
112 |
)
|
|
113 |
}
|
|
114 |
}
|
|
115 |
}
|
|
116 | ||
117 |
# Add table columns
|
|
118 | 6x |
for (i in seq_len(ncol(tbl_df))) { |
119 | 40x |
res <- res + annotate( |
120 | 40x |
"text",
|
121 | 40x |
x = if (i == 1) 0 else sum(colwidths[1:i]) - 0.5 * colwidths[i] + lbl_col_padding, |
122 | 40x |
y = rev(seq_len(nrow(tbl_df))), |
123 | 40x |
label = tbl_df[, i], |
124 | 40x |
hjust = mat_aligns[, i], |
125 | 40x |
size = fontsize / .pt |
126 |
)
|
|
127 |
}
|
|
128 | ||
129 | 6x |
res
|
130 |
}
|
|
131 | ||
132 |
#' Convert `data.frame` object to `ggplot` object
|
|
133 |
#'
|
|
134 |
#' @description `r lifecycle::badge("experimental")`
|
|
135 |
#'
|
|
136 |
#' Given a `data.frame` object, performs basic conversion to a [ggplot2::ggplot()] object built using
|
|
137 |
#' functions from the `ggplot2` package.
|
|
138 |
#'
|
|
139 |
#' @param df (`data.frame`)\cr a data frame.
|
|
140 |
#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in
|
|
141 |
#' `colwidths` corresponds to the column of `df` in the same position. If `NULL`, column widths
|
|
142 |
#' are calculated according to maximum number of characters per column.
|
|
143 |
#' @param font_size (`numeric(1)`)\cr font size.
|
|
144 |
#' @param col_labels (`flag`)\cr whether the column names (labels) of `df` should be used as the first row
|
|
145 |
#' of the output table.
|
|
146 |
#' @param col_lab_fontface (`string`)\cr font face to apply to the first row (of column labels
|
|
147 |
#' if `col_labels = TRUE`). Defaults to `"bold"`.
|
|
148 |
#' @param hline (`flag`)\cr whether a horizontal line should be printed below the first row of the table.
|
|
149 |
#' @param bg_fill (`string`)\cr table background fill color.
|
|
150 |
#'
|
|
151 |
#' @return A `ggplot` object.
|
|
152 |
#'
|
|
153 |
#' @examples
|
|
154 |
#' \dontrun{
|
|
155 |
#' df2gg(head(iris, 5))
|
|
156 |
#'
|
|
157 |
#' df2gg(head(iris, 5), font_size = 15, colwidths = c(1, 1, 1, 1, 1))
|
|
158 |
#' }
|
|
159 |
#' @keywords internal
|
|
160 |
df2gg <- function(df, |
|
161 |
colwidths = NULL, |
|
162 |
font_size = 10, |
|
163 |
col_labels = TRUE, |
|
164 |
col_lab_fontface = "bold", |
|
165 |
hline = TRUE, |
|
166 |
bg_fill = NULL) { |
|
167 |
# convert to text
|
|
168 | 19x |
df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) "NA" else as.character(x))) |
169 | ||
170 | 19x |
if (col_labels) { |
171 | 10x |
df <- as.matrix(df) |
172 | 10x |
df <- rbind(colnames(df), df) |
173 |
}
|
|
174 | ||
175 |
# Get column widths
|
|
176 | 19x |
if (is.null(colwidths)) { |
177 | 1x |
colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) |
178 |
}
|
|
179 | 19x |
tot_width <- sum(colwidths) |
180 | ||
181 | 19x |
res <- ggplot(data = df) + |
182 | 19x |
theme_void() + |
183 | 19x |
scale_x_continuous(limits = c(0, tot_width)) + |
184 | 19x |
scale_y_continuous(limits = c(1, nrow(df))) |
185 | ||
186 | 9x |
if (!is.null(bg_fill)) res <- res + theme(plot.background = element_rect(fill = bg_fill)) |
187 | ||
188 | 19x |
if (hline) { |
189 | 10x |
res <- res + |
190 | 10x |
annotate( |
191 | 10x |
"segment",
|
192 | 10x |
x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), |
193 | 10x |
y = nrow(df) - 0.5, yend = nrow(df) - 0.5 |
194 |
)
|
|
195 |
}
|
|
196 | ||
197 | 19x |
for (i in seq_len(ncol(df))) { |
198 | 86x |
line_pos <- c( |
199 | 86x |
if (i == 1) 0 else sum(colwidths[1:(i - 1)]), |
200 | 86x |
sum(colwidths[1:i]) |
201 |
)
|
|
202 | 86x |
res <- res + |
203 | 86x |
annotate( |
204 | 86x |
"text",
|
205 | 86x |
x = mean(line_pos), |
206 | 86x |
y = rev(seq_len(nrow(df))), |
207 | 86x |
label = df[, i], |
208 | 86x |
size = font_size / .pt, |
209 | 86x |
fontface = if (col_labels) { |
210 | 32x |
c(col_lab_fontface, rep("plain", nrow(df) - 1)) |
211 |
} else { |
|
212 | 54x |
rep("plain", nrow(df)) |
213 |
}
|
|
214 |
)
|
|
215 |
}
|
|
216 | ||
217 | 19x |
res
|
218 |
}
|
1 |
#' Count patients with toxicity grades that have worsened from baseline by highest grade post-baseline
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_abnormal_lab_worsen_by_baseline()] creates a layout element to count patients with
|
|
6 |
#' analysis toxicity grades which have worsened from baseline, categorized by highest (worst) grade post-baseline.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `var` which indicates analysis toxicity grades. Additional
|
|
9 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to `USUBJID`),
|
|
10 |
#' a variable to indicate unique subject identifiers, `baseline_var` (defaults to `BTOXGR`), a variable to indicate
|
|
11 |
#' baseline toxicity grades, and `direction_var` (defaults to `GRADDIR`), a variable to indicate toxicity grade
|
|
12 |
#' directions of interest to include (e.g. `"H"` (high), `"L"` (low), or `"B"` (both)).
|
|
13 |
#'
|
|
14 |
#' For the direction(s) specified in `direction_var`, patient counts by worst grade for patients who have
|
|
15 |
#' worsened from baseline are calculated as follows:
|
|
16 |
#' * `1` to `4`: The number of patients who have worsened from their baseline grades with worst
|
|
17 |
#' grades 1-4, respectively.
|
|
18 |
#' * `Any`: The total number of patients who have worsened from their baseline grades.
|
|
19 |
#'
|
|
20 |
#' Fractions are calculated by dividing the above counts by the number of patients who's analysis toxicity grades
|
|
21 |
#' have worsened from baseline toxicity grades during treatment.
|
|
22 |
#'
|
|
23 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create a row
|
|
24 |
#' split on variable `direction_var`.
|
|
25 |
#'
|
|
26 |
#' @inheritParams argument_convention
|
|
27 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
|
|
28 |
#' * `id` (`string`)\cr subject variable name.
|
|
29 |
#' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
|
|
30 |
#' * `direction_var` (`string`)\cr see `direction_var` for more details.
|
|
31 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
32 |
#' @param table_names `r lifecycle::badge("deprecated")` this parameter has no effect.
|
|
33 |
#'
|
|
34 |
#' Options are: ``r shQuote(get_stats("abnormal_lab_worsen_by_baseline"), type = "sh")``
|
|
35 |
#'
|
|
36 |
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()] which are used within
|
|
37 |
#' [s_count_abnormal_lab_worsen_by_baseline()] to process input data.
|
|
38 |
#'
|
|
39 |
#' @name abnormal_lab_worsen_by_baseline
|
|
40 |
#' @order 1
|
|
41 |
NULL
|
|
42 | ||
43 |
#' @describeIn abnormal_lab_worsen_by_baseline Statistics function for patients whose worst post-baseline
|
|
44 |
#' lab grades are worse than their baseline grades.
|
|
45 |
#'
|
|
46 |
#' @return
|
|
47 |
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
|
|
48 |
#' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
|
|
49 |
#' "1", "2", "3", "4" and "Any".
|
|
50 |
#'
|
|
51 |
#' @keywords internal
|
|
52 |
s_count_abnormal_lab_worsen_by_baseline <- function(df, |
|
53 |
.var = "ATOXGR", |
|
54 |
variables = list( |
|
55 |
id = "USUBJID", |
|
56 |
baseline_var = "BTOXGR", |
|
57 |
direction_var = "GRADDR" |
|
58 |
),
|
|
59 |
...) { |
|
60 | 13x |
checkmate::assert_string(.var) |
61 | 13x |
checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var")) |
62 | 13x |
checkmate::assert_string(variables$id) |
63 | 13x |
checkmate::assert_string(variables$baseline_var) |
64 | 13x |
checkmate::assert_string(variables$direction_var) |
65 | 13x |
assert_df_with_variables(df, c(aval = .var, variables[1:3])) |
66 | 13x |
assert_list_of_variables(variables) |
67 | ||
68 | 13x |
h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var) |
69 |
}
|
|
70 | ||
71 |
#' @describeIn abnormal_lab_worsen_by_baseline Formatted analysis function which is used as `afun`
|
|
72 |
#' in `count_abnormal_lab_worsen_by_baseline()`.
|
|
73 |
#'
|
|
74 |
#' @return
|
|
75 |
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
|
|
76 |
#' formatted [rtables::CellValue()].
|
|
77 |
#'
|
|
78 |
#' @keywords internal
|
|
79 |
a_count_abnormal_lab_worsen_by_baseline <- function(df, |
|
80 |
...,
|
|
81 |
.stats = NULL, |
|
82 |
.stat_names = NULL, |
|
83 |
.formats = NULL, |
|
84 |
.labels = NULL, |
|
85 |
.indent_mods = NULL) { |
|
86 |
# Check for additional parameters to the statistics function
|
|
87 | 12x |
dots_extra_args <- list(...) |
88 | 12x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
89 | 12x |
dots_extra_args$.additional_fun_parameters <- NULL |
90 | ||
91 |
# Check for user-defined functions
|
|
92 | 12x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
93 | 12x |
.stats <- default_and_custom_stats_list$all_stats |
94 | 12x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
95 | ||
96 |
# Apply statistics function
|
|
97 | 12x |
x_stats <- .apply_stat_functions( |
98 | 12x |
default_stat_fnc = s_count_abnormal_lab_worsen_by_baseline, |
99 | 12x |
custom_stat_fnc_list = custom_stat_functions, |
100 | 12x |
args_list = c( |
101 | 12x |
df = list(df), |
102 | 12x |
extra_afun_params,
|
103 | 12x |
dots_extra_args
|
104 |
)
|
|
105 |
)
|
|
106 | ||
107 |
# Fill in formatting defaults
|
|
108 | 12x |
.stats <- get_stats( |
109 | 12x |
"abnormal_lab_worsen_by_baseline",
|
110 | 12x |
stats_in = .stats, |
111 | 12x |
custom_stats_in = names(custom_stat_functions) |
112 |
)
|
|
113 | 12x |
levels_per_stats <- lapply(x_stats, names) |
114 | 12x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
115 | 12x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
116 | 12x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
117 | ||
118 | 12x |
x_stats <- x_stats[.stats] %>% |
119 | 12x |
.unlist_keep_nulls() %>% |
120 | 12x |
setNames(names(.formats)) |
121 | ||
122 |
# Auto format handling
|
|
123 | 12x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
124 | ||
125 |
# Get and check statistical names
|
|
126 | 12x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
127 | ||
128 | 12x |
in_rows( |
129 | 12x |
.list = x_stats, |
130 | 12x |
.formats = .formats, |
131 | 12x |
.names = .labels %>% .unlist_keep_nulls(), |
132 | 12x |
.stat_names = .stat_names, |
133 | 12x |
.labels = .labels %>% .unlist_keep_nulls(), |
134 | 12x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
135 |
)
|
|
136 |
}
|
|
137 | ||
138 |
#' @describeIn abnormal_lab_worsen_by_baseline Layout-creating function which can take statistics function
|
|
139 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
140 |
#'
|
|
141 |
#' @return
|
|
142 |
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
|
|
143 |
#' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
|
|
144 |
#' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
|
|
145 |
#'
|
|
146 |
#' @examples
|
|
147 |
#' library(dplyr)
|
|
148 |
#'
|
|
149 |
#' # The direction variable, GRADDR, is based on metadata
|
|
150 |
#' adlb <- tern_ex_adlb %>%
|
|
151 |
#' mutate(
|
|
152 |
#' GRADDR = case_when(
|
|
153 |
#' PARAMCD == "ALT" ~ "B",
|
|
154 |
#' PARAMCD == "CRP" ~ "L",
|
|
155 |
#' PARAMCD == "IGA" ~ "H"
|
|
156 |
#' )
|
|
157 |
#' ) %>%
|
|
158 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
|
|
159 |
#'
|
|
160 |
#' df <- h_adlb_worsen(
|
|
161 |
#' adlb,
|
|
162 |
#' worst_flag_low = c("WGRLOFL" = "Y"),
|
|
163 |
#' worst_flag_high = c("WGRHIFL" = "Y"),
|
|
164 |
#' direction_var = "GRADDR"
|
|
165 |
#' )
|
|
166 |
#'
|
|
167 |
#' basic_table() %>%
|
|
168 |
#' split_cols_by("ARMCD") %>%
|
|
169 |
#' add_colcounts() %>%
|
|
170 |
#' split_rows_by("PARAMCD") %>%
|
|
171 |
#' split_rows_by("GRADDR") %>%
|
|
172 |
#' count_abnormal_lab_worsen_by_baseline(
|
|
173 |
#' var = "ATOXGR",
|
|
174 |
#' variables = list(
|
|
175 |
#' id = "USUBJID",
|
|
176 |
#' baseline_var = "BTOXGR",
|
|
177 |
#' direction_var = "GRADDR"
|
|
178 |
#' )
|
|
179 |
#' ) %>%
|
|
180 |
#' append_topleft("Direction of Abnormality") %>%
|
|
181 |
#' build_table(df = df, alt_counts_df = tern_ex_adsl)
|
|
182 |
#'
|
|
183 |
#' @export
|
|
184 |
#' @order 2
|
|
185 |
count_abnormal_lab_worsen_by_baseline <- function(lyt, |
|
186 |
var,
|
|
187 |
variables = list( |
|
188 |
id = "USUBJID", |
|
189 |
baseline_var = "BTOXGR", |
|
190 |
direction_var = "GRADDR" |
|
191 |
),
|
|
192 |
na_str = default_na_str(), |
|
193 |
nested = TRUE, |
|
194 |
...,
|
|
195 |
table_names = lifecycle::deprecated(), |
|
196 |
.stats = "fraction", |
|
197 |
.stat_names = NULL, |
|
198 |
.formats = list(fraction = format_fraction), |
|
199 |
.labels = NULL, |
|
200 |
.indent_mods = NULL) { |
|
201 | 1x |
checkmate::assert_string(var) |
202 | ||
203 |
# Deprecated argument warning
|
|
204 | 1x |
if (lifecycle::is_present(table_names)) { |
205 | ! |
lifecycle::deprecate_warn( |
206 | ! |
"0.9.8", "count_abnormal_lab_worsen_by_baseline(table_names)", |
207 | ! |
details = "The argument has no effect on the output." |
208 |
)
|
|
209 |
}
|
|
210 | ||
211 |
# Process standard extra arguments
|
|
212 | 1x |
extra_args <- list(".stats" = .stats) |
213 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
214 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
215 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
216 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
217 | ||
218 |
# Process additional arguments to the statistic function
|
|
219 | 1x |
extra_args <- c(extra_args, "variables" = list(variables), ...) |
220 | ||
221 |
# Append additional info from layout to the analysis function
|
|
222 | 1x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
223 | 1x |
formals(a_count_abnormal_lab_worsen_by_baseline) <- c( |
224 | 1x |
formals(a_count_abnormal_lab_worsen_by_baseline), extra_args[[".additional_fun_parameters"]] |
225 |
)
|
|
226 | ||
227 | 1x |
analyze( |
228 | 1x |
lyt = lyt, |
229 | 1x |
vars = var, |
230 | 1x |
afun = a_count_abnormal_lab_worsen_by_baseline, |
231 | 1x |
na_str = na_str, |
232 | 1x |
nested = nested, |
233 | 1x |
extra_args = extra_args, |
234 | 1x |
show_labels = "hidden" |
235 |
)
|
|
236 |
}
|
|
237 | ||
238 |
#' Helper function to prepare ADLB with worst labs
|
|
239 |
#'
|
|
240 |
#' @description `r lifecycle::badge("stable")`
|
|
241 |
#'
|
|
242 |
#' Helper function to prepare a `df` for generate the patient count shift table.
|
|
243 |
#'
|
|
244 |
#' @param adlb (`data.frame`)\cr ADLB data frame.
|
|
245 |
#' @param worst_flag_low (named `vector`)\cr worst low post-baseline lab grade flag variable. See how this is
|
|
246 |
#' implemented in the following examples.
|
|
247 |
#' @param worst_flag_high (named `vector`)\cr worst high post-baseline lab grade flag variable. See how this is
|
|
248 |
#' implemented in the following examples.
|
|
249 |
#' @param direction_var (`string`)\cr name of the direction variable specifying the direction of the shift table of
|
|
250 |
#' interest. Only lab records flagged by `L`, `H` or `B` are included in the shift table.
|
|
251 |
#' * `L`: low direction only
|
|
252 |
#' * `H`: high direction only
|
|
253 |
#' * `B`: both low and high directions
|
|
254 |
#'
|
|
255 |
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
|
|
256 |
#' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
|
|
257 |
#' direction specified according to `direction_var`. For instance, for a lab that is
|
|
258 |
#' needed for the low direction only, only records flagged by `worst_flag_low` are
|
|
259 |
#' selected. For a lab that is needed for both low and high directions, the worst
|
|
260 |
#' low records are selected for the low direction, and the worst high record are selected
|
|
261 |
#' for the high direction.
|
|
262 |
#'
|
|
263 |
#' @seealso [abnormal_lab_worsen_by_baseline]
|
|
264 |
#'
|
|
265 |
#' @examples
|
|
266 |
#' library(dplyr)
|
|
267 |
#'
|
|
268 |
#' # The direction variable, GRADDR, is based on metadata
|
|
269 |
#' adlb <- tern_ex_adlb %>%
|
|
270 |
#' mutate(
|
|
271 |
#' GRADDR = case_when(
|
|
272 |
#' PARAMCD == "ALT" ~ "B",
|
|
273 |
#' PARAMCD == "CRP" ~ "L",
|
|
274 |
#' PARAMCD == "IGA" ~ "H"
|
|
275 |
#' )
|
|
276 |
#' ) %>%
|
|
277 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
|
|
278 |
#'
|
|
279 |
#' df <- h_adlb_worsen(
|
|
280 |
#' adlb,
|
|
281 |
#' worst_flag_low = c("WGRLOFL" = "Y"),
|
|
282 |
#' worst_flag_high = c("WGRHIFL" = "Y"),
|
|
283 |
#' direction_var = "GRADDR"
|
|
284 |
#' )
|
|
285 |
#'
|
|
286 |
#' @export
|
|
287 |
h_adlb_worsen <- function(adlb, |
|
288 |
worst_flag_low = NULL, |
|
289 |
worst_flag_high = NULL, |
|
290 |
direction_var) { |
|
291 | 5x |
checkmate::assert_string(direction_var) |
292 | 5x |
checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H")) |
293 | 5x |
assert_df_with_variables(adlb, list("Col" = direction_var)) |
294 | ||
295 | 5x |
if (any(unique(adlb[[direction_var]]) == "H")) { |
296 | 4x |
assert_df_with_variables(adlb, list("High" = names(worst_flag_high))) |
297 |
}
|
|
298 | ||
299 | 5x |
if (any(unique(adlb[[direction_var]]) == "L")) { |
300 | 4x |
assert_df_with_variables(adlb, list("Low" = names(worst_flag_low))) |
301 |
}
|
|
302 | ||
303 | 5x |
if (any(unique(adlb[[direction_var]]) == "B")) { |
304 | 3x |
assert_df_with_variables( |
305 | 3x |
adlb,
|
306 | 3x |
list( |
307 | 3x |
"Low" = names(worst_flag_low), |
308 | 3x |
"High" = names(worst_flag_high) |
309 |
)
|
|
310 |
)
|
|
311 |
}
|
|
312 | ||
313 |
# extract patients with worst post-baseline lab, either low or high or both
|
|
314 | 5x |
worst_flag <- c(worst_flag_low, worst_flag_high) |
315 | 5x |
col_names <- names(worst_flag) |
316 | 5x |
filter_values <- worst_flag |
317 | 5x |
temp <- Map( |
318 | 5x |
function(x, y) which(adlb[[x]] == y), |
319 | 5x |
col_names,
|
320 | 5x |
filter_values
|
321 |
)
|
|
322 | 5x |
position_satisfy_filters <- Reduce(union, temp) |
323 | ||
324 |
# select variables of interest
|
|
325 | 5x |
adlb_f <- adlb[position_satisfy_filters, ] |
326 | ||
327 |
# generate subsets for different directionality
|
|
328 | 5x |
adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ] |
329 | 5x |
adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ] |
330 | 5x |
adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ] |
331 | ||
332 |
# for labs requiring both high and low, data is duplicated and will be stacked on top of each other
|
|
333 | 5x |
adlb_f_b_h <- adlb_f_b |
334 | 5x |
adlb_f_b_l <- adlb_f_b |
335 | ||
336 |
# extract data with worst lab
|
|
337 | 5x |
if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) { |
338 |
# change H to High, L to Low
|
|
339 | 3x |
adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h)) |
340 | 3x |
adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l)) |
341 | ||
342 |
# change, B to High and Low
|
|
343 | 3x |
adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h)) |
344 | 3x |
adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l)) |
345 | ||
346 | 3x |
adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] |
347 | 3x |
adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] |
348 | 3x |
adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] |
349 | 3x |
adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] |
350 | ||
351 | 3x |
out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l) |
352 | 2x |
} else if (!is.null(worst_flag_high)) { |
353 | 1x |
adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h)) |
354 | 1x |
adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h)) |
355 | ||
356 | 1x |
adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ] |
357 | 1x |
adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ] |
358 | ||
359 | 1x |
out <- rbind(adlb_out_h, adlb_out_b_h) |
360 | 1x |
} else if (!is.null(worst_flag_low)) { |
361 | 1x |
adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l)) |
362 | 1x |
adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l)) |
363 | ||
364 | 1x |
adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ] |
365 | 1x |
adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ] |
366 | ||
367 | 1x |
out <- rbind(adlb_out_l, adlb_out_b_l) |
368 |
}
|
|
369 | ||
370 |
# label
|
|
371 | 5x |
formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE) |
372 | ||
373 | 5x |
out
|
374 |
}
|
|
375 | ||
376 |
#' Helper function to analyze patients for `s_count_abnormal_lab_worsen_by_baseline()`
|
|
377 |
#'
|
|
378 |
#' @description `r lifecycle::badge("stable")`
|
|
379 |
#'
|
|
380 |
#' Helper function to count the number of patients and the fraction of patients according to
|
|
381 |
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
|
|
382 |
#' and the direction of interest specified in `direction_var`.
|
|
383 |
#'
|
|
384 |
#' @inheritParams argument_convention
|
|
385 |
#' @inheritParams h_adlb_worsen
|
|
386 |
#' @param baseline_var (`string`)\cr name of the baseline lab grade variable.
|
|
387 |
#'
|
|
388 |
#' @return The counts and fraction of patients
|
|
389 |
#' whose worst post-baseline lab grades are worse than their baseline grades, for
|
|
390 |
#' post-baseline worst grades "1", "2", "3", "4" and "Any".
|
|
391 |
#'
|
|
392 |
#' @seealso [abnormal_lab_worsen_by_baseline]
|
|
393 |
#'
|
|
394 |
#' @examples
|
|
395 |
#' library(dplyr)
|
|
396 |
#'
|
|
397 |
#' # The direction variable, GRADDR, is based on metadata
|
|
398 |
#' adlb <- tern_ex_adlb %>%
|
|
399 |
#' mutate(
|
|
400 |
#' GRADDR = case_when(
|
|
401 |
#' PARAMCD == "ALT" ~ "B",
|
|
402 |
#' PARAMCD == "CRP" ~ "L",
|
|
403 |
#' PARAMCD == "IGA" ~ "H"
|
|
404 |
#' )
|
|
405 |
#' ) %>%
|
|
406 |
#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
|
|
407 |
#'
|
|
408 |
#' df <- h_adlb_worsen(
|
|
409 |
#' adlb,
|
|
410 |
#' worst_flag_low = c("WGRLOFL" = "Y"),
|
|
411 |
#' worst_flag_high = c("WGRHIFL" = "Y"),
|
|
412 |
#' direction_var = "GRADDR"
|
|
413 |
#' )
|
|
414 |
#'
|
|
415 |
#' # `h_worsen_counter`
|
|
416 |
#' h_worsen_counter(
|
|
417 |
#' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
|
|
418 |
#' id = "USUBJID",
|
|
419 |
#' .var = "ATOXGR",
|
|
420 |
#' baseline_var = "BTOXGR",
|
|
421 |
#' direction_var = "GRADDR"
|
|
422 |
#' )
|
|
423 |
#'
|
|
424 |
#' @export
|
|
425 |
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) { |
|
426 | 17x |
checkmate::assert_string(id) |
427 | 17x |
checkmate::assert_string(.var) |
428 | 17x |
checkmate::assert_string(baseline_var) |
429 | 17x |
checkmate::assert_scalar(unique(df[[direction_var]])) |
430 | 17x |
checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low")) |
431 | 17x |
assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var))) |
432 | ||
433 |
# remove post-baseline missing
|
|
434 | 17x |
df <- df[df[[.var]] != "<Missing>", ] |
435 | ||
436 |
# obtain directionality
|
|
437 | 17x |
direction <- unique(df[[direction_var]]) |
438 | ||
439 | 17x |
if (direction == "Low") { |
440 | 10x |
grade <- -1:-4 |
441 | 10x |
worst_grade <- -4 |
442 | 7x |
} else if (direction == "High") { |
443 | 7x |
grade <- 1:4 |
444 | 7x |
worst_grade <- 4 |
445 |
}
|
|
446 | ||
447 | 17x |
if (nrow(df) > 0) { |
448 | 17x |
by_grade <- lapply(grade, function(i) { |
449 |
# filter baseline values that is less than i or <Missing>
|
|
450 | 68x |
df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ] |
451 |
# num: number of patients with post-baseline worst lab equal to i
|
|
452 | 68x |
num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE])) |
453 |
# denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
|
|
454 | 68x |
denom <- length(unique(df_temp[[id]])) |
455 | 68x |
rm(df_temp) |
456 | 68x |
c(num = num, denom = denom) |
457 |
}) |
|
458 |
} else { |
|
459 | ! |
by_grade <- lapply(1, function(i) { |
460 | ! |
c(num = 0, denom = 0) |
461 |
}) |
|
462 |
}
|
|
463 | ||
464 | 17x |
names(by_grade) <- as.character(seq_along(by_grade)) |
465 | ||
466 |
# baseline grade less 4 or missing
|
|
467 | 17x |
df_temp <- df[!df[[baseline_var]] %in% worst_grade, ] |
468 | ||
469 |
# denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
|
|
470 | 17x |
denom <- length(unique(df_temp[, id, drop = TRUE])) |
471 | ||
472 |
# condition 1: missing baseline and in the direction of abnormality
|
|
473 | 17x |
con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade) |
474 | 17x |
df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ] |
475 | ||
476 |
# condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
|
|
477 | 17x |
if (direction == "Low") { |
478 | 10x |
con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]]))) |
479 |
} else { |
|
480 | 7x |
con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]]))) |
481 |
}
|
|
482 | ||
483 |
# number of patients satisfy either conditions 1 or 2
|
|
484 | 17x |
num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE])) |
485 | ||
486 | 17x |
list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom)))) |
487 |
}
|
1 |
#' Odds ratio estimation
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [estimate_odds_ratio()] creates a layout element to compare bivariate responses between
|
|
6 |
#' two groups by estimating an odds ratio and its confidence interval.
|
|
7 |
#'
|
|
8 |
#' The primary analysis variable specified by `vars` is the group variable. Additional variables can be included in the
|
|
9 |
#' analysis via the `variables` argument, which accepts `arm`, an arm variable, and `strata`, a stratification variable.
|
|
10 |
#' If more than two arm levels are present, they can be combined into two groups using the `groups_list` argument.
|
|
11 |
#'
|
|
12 |
#' @inheritParams split_cols_by_groups
|
|
13 |
#' @inheritParams argument_convention
|
|
14 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
15 |
#'
|
|
16 |
#' Options are: ``r shQuote(get_stats("estimate_odds_ratio"), type = "sh")``
|
|
17 |
#' @param method (`string`)\cr whether to use the correct (`"exact"`) calculation in the conditional likelihood or one
|
|
18 |
#' of the approximations. See [survival::clogit()] for details.
|
|
19 |
#'
|
|
20 |
#' @note
|
|
21 |
#' * This function uses logistic regression for unstratified analyses, and conditional logistic regression for
|
|
22 |
#' stratified analyses. The Wald confidence interval is calculated with the specified confidence level.
|
|
23 |
#' * For stratified analyses, there is currently no implementation for conditional likelihood confidence intervals,
|
|
24 |
#' therefore the likelihood confidence interval is not available as an option.
|
|
25 |
#' * When `vars` contains only responders or non-responders no odds ratio estimation is possible so the returned
|
|
26 |
#' values will be `NA`.
|
|
27 |
#'
|
|
28 |
#' @seealso Relevant helper function [h_odds_ratio()].
|
|
29 |
#'
|
|
30 |
#' @name odds_ratio
|
|
31 |
#' @order 1
|
|
32 |
NULL
|
|
33 | ||
34 |
#' @describeIn odds_ratio Statistics function which estimates the odds ratio
|
|
35 |
#' between a treatment and a control. A `variables` list with `arm` and `strata`
|
|
36 |
#' variable names must be passed if a stratified analysis is required.
|
|
37 |
#'
|
|
38 |
#' @return
|
|
39 |
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
|
|
40 |
#' (containing `est`, `lcl`, and `ucl`) and `n_tot`.
|
|
41 |
#'
|
|
42 |
#' @examples
|
|
43 |
#' # Unstratified analysis.
|
|
44 |
#' s_odds_ratio(
|
|
45 |
#' df = subset(dta, grp == "A"),
|
|
46 |
#' .var = "rsp",
|
|
47 |
#' .ref_group = subset(dta, grp == "B"),
|
|
48 |
#' .in_ref_col = FALSE,
|
|
49 |
#' .df_row = dta
|
|
50 |
#' )
|
|
51 |
#'
|
|
52 |
#' # Stratified analysis.
|
|
53 |
#' s_odds_ratio(
|
|
54 |
#' df = subset(dta, grp == "A"),
|
|
55 |
#' .var = "rsp",
|
|
56 |
#' .ref_group = subset(dta, grp == "B"),
|
|
57 |
#' .in_ref_col = FALSE,
|
|
58 |
#' .df_row = dta,
|
|
59 |
#' variables = list(arm = "grp", strata = "strata")
|
|
60 |
#' )
|
|
61 |
#'
|
|
62 |
#' @export
|
|
63 |
s_odds_ratio <- function(df, |
|
64 |
.var,
|
|
65 |
.ref_group,
|
|
66 |
.in_ref_col,
|
|
67 |
.df_row,
|
|
68 |
variables = list(arm = NULL, strata = NULL), |
|
69 |
conf_level = 0.95, |
|
70 |
groups_list = NULL, |
|
71 |
method = "exact", |
|
72 |
...) { |
|
73 | 99x |
y <- list(or_ci = numeric(), n_tot = numeric()) |
74 | ||
75 | 99x |
if (!.in_ref_col) { |
76 | 94x |
assert_proportion_value(conf_level) |
77 | 94x |
assert_df_with_variables(df, list(rsp = .var)) |
78 | 94x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
79 | ||
80 | 94x |
if (is.null(variables$strata)) { |
81 | 76x |
data <- data.frame( |
82 | 76x |
rsp = c(.ref_group[[.var]], df[[.var]]), |
83 | 76x |
grp = factor( |
84 | 76x |
rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), |
85 | 76x |
levels = c("ref", "Not-ref") |
86 |
)
|
|
87 |
)
|
|
88 | 76x |
y <- or_glm(data, conf_level = conf_level) |
89 |
} else { |
|
90 | 18x |
assert_df_with_variables(.df_row, c(list(rsp = .var), variables)) |
91 | 18x |
checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE) |
92 | ||
93 |
# The group variable prepared for clogit must be synchronised with combination groups definition.
|
|
94 | 18x |
if (is.null(groups_list)) { |
95 | 16x |
ref_grp <- as.character(unique(.ref_group[[variables$arm]])) |
96 | 16x |
trt_grp <- as.character(unique(df[[variables$arm]])) |
97 | 16x |
grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp) |
98 |
} else { |
|
99 |
# If more than one level in reference col.
|
|
100 | 2x |
reference <- as.character(unique(.ref_group[[variables$arm]])) |
101 | 2x |
grp_ref_flag <- vapply( |
102 | 2x |
X = groups_list, |
103 | 2x |
FUN.VALUE = TRUE, |
104 | 2x |
FUN = function(x) all(reference %in% x) |
105 |
)
|
|
106 | 2x |
ref_grp <- names(groups_list)[grp_ref_flag] |
107 | ||
108 |
# If more than one level in treatment col.
|
|
109 | 2x |
treatment <- as.character(unique(df[[variables$arm]])) |
110 | 2x |
grp_trt_flag <- vapply( |
111 | 2x |
X = groups_list, |
112 | 2x |
FUN.VALUE = TRUE, |
113 | 2x |
FUN = function(x) all(treatment %in% x) |
114 |
)
|
|
115 | 2x |
trt_grp <- names(groups_list)[grp_trt_flag] |
116 | ||
117 | 2x |
grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp) |
118 | 2x |
grp <- combine_levels(grp, levels = treatment, new_level = trt_grp) |
119 |
}
|
|
120 | ||
121 |
# The reference level in `grp` must be the same as in the `rtables` column split.
|
|
122 | 18x |
data <- data.frame( |
123 | 18x |
rsp = .df_row[[.var]], |
124 | 18x |
grp = grp, |
125 | 18x |
strata = interaction(.df_row[variables$strata]) |
126 |
)
|
|
127 | 18x |
y_all <- or_clogit(data, conf_level = conf_level, method = method) |
128 | 18x |
checkmate::assert_string(trt_grp) |
129 | 18x |
checkmate::assert_subset(trt_grp, names(y_all$or_ci)) |
130 | 17x |
y$or_ci <- y_all$or_ci[[trt_grp]] |
131 | 17x |
y$n_tot <- y_all$n_tot |
132 |
}
|
|
133 |
}
|
|
134 | ||
135 | 98x |
if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && method != "approximate") { |
136 | 1x |
warning( |
137 | 1x |
"Unable to compute the odds ratio estimate. Please try re-running the function with ",
|
138 | 1x |
'parameter `method` set to "approximate".'
|
139 |
)
|
|
140 |
}
|
|
141 | ||
142 | 98x |
y$or_ci <- formatters::with_label( |
143 | 98x |
x = y$or_ci, |
144 | 98x |
label = paste0("Odds Ratio (", 100 * conf_level, "% CI)") |
145 |
)
|
|
146 | ||
147 | 98x |
y$n_tot <- formatters::with_label( |
148 | 98x |
x = y$n_tot, |
149 | 98x |
label = "Total n" |
150 |
)
|
|
151 | ||
152 | 98x |
y
|
153 |
}
|
|
154 | ||
155 |
#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
|
|
156 |
#'
|
|
157 |
#' @return
|
|
158 |
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
159 |
#'
|
|
160 |
#' @examples
|
|
161 |
#' a_odds_ratio(
|
|
162 |
#' df = subset(dta, grp == "A"),
|
|
163 |
#' .var = "rsp",
|
|
164 |
#' .ref_group = subset(dta, grp == "B"),
|
|
165 |
#' .in_ref_col = FALSE,
|
|
166 |
#' .df_row = dta
|
|
167 |
#' )
|
|
168 |
#'
|
|
169 |
#' @export
|
|
170 |
a_odds_ratio <- function(df, |
|
171 |
...,
|
|
172 |
.stats = NULL, |
|
173 |
.stat_names = NULL, |
|
174 |
.formats = NULL, |
|
175 |
.labels = NULL, |
|
176 |
.indent_mods = NULL) { |
|
177 |
# Check for additional parameters to the statistics function
|
|
178 | 12x |
dots_extra_args <- list(...) |
179 | 12x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
180 | 12x |
dots_extra_args$.additional_fun_parameters <- NULL |
181 | ||
182 |
# Check for user-defined functions
|
|
183 | 12x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
184 | 12x |
.stats <- default_and_custom_stats_list$all_stats |
185 | 12x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
186 | ||
187 |
# Apply statistics function
|
|
188 | 12x |
x_stats <- .apply_stat_functions( |
189 | 12x |
default_stat_fnc = s_odds_ratio, |
190 | 12x |
custom_stat_fnc_list = custom_stat_functions, |
191 | 12x |
args_list = c( |
192 | 12x |
df = list(df), |
193 | 12x |
extra_afun_params,
|
194 | 12x |
dots_extra_args
|
195 |
)
|
|
196 |
)
|
|
197 | ||
198 |
# Fill in formatting defaults
|
|
199 | 12x |
.stats <- get_stats("estimate_odds_ratio", |
200 | 12x |
stats_in = .stats, |
201 | 12x |
custom_stats_in = names(custom_stat_functions) |
202 |
)
|
|
203 | 12x |
x_stats <- x_stats[.stats] |
204 | 12x |
.formats <- get_formats_from_stats(.stats, .formats) |
205 | 12x |
.labels <- get_labels_from_stats( |
206 | 12x |
.stats, .labels, |
207 | 12x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
208 |
)
|
|
209 | 12x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
210 | ||
211 |
# Auto format handling
|
|
212 | 12x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
213 | ||
214 |
# Get and check statistical names
|
|
215 | 12x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
216 | ||
217 | 12x |
in_rows( |
218 | 12x |
.list = x_stats, |
219 | 12x |
.formats = .formats, |
220 | 12x |
.names = .labels %>% .unlist_keep_nulls(), |
221 | 12x |
.stat_names = .stat_names, |
222 | 12x |
.labels = .labels %>% .unlist_keep_nulls(), |
223 | 12x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
224 |
)
|
|
225 |
}
|
|
226 | ||
227 |
#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
|
|
228 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
229 |
#'
|
|
230 |
#' @return
|
|
231 |
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
|
|
232 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
233 |
#' the statistics from `s_odds_ratio()` to the table layout.
|
|
234 |
#'
|
|
235 |
#' @examples
|
|
236 |
#' set.seed(12)
|
|
237 |
#' dta <- data.frame(
|
|
238 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
|
|
239 |
#' grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
|
|
240 |
#' strata = factor(sample(c("C", "D"), 100, TRUE))
|
|
241 |
#' )
|
|
242 |
#'
|
|
243 |
#' l <- basic_table() %>%
|
|
244 |
#' split_cols_by(var = "grp", ref_group = "B") %>%
|
|
245 |
#' estimate_odds_ratio(vars = "rsp")
|
|
246 |
#'
|
|
247 |
#' build_table(l, df = dta)
|
|
248 |
#'
|
|
249 |
#' @export
|
|
250 |
#' @order 2
|
|
251 |
estimate_odds_ratio <- function(lyt, |
|
252 |
vars,
|
|
253 |
variables = list(arm = NULL, strata = NULL), |
|
254 |
conf_level = 0.95, |
|
255 |
groups_list = NULL, |
|
256 |
method = "exact", |
|
257 |
na_str = default_na_str(), |
|
258 |
nested = TRUE, |
|
259 |
...,
|
|
260 |
table_names = vars, |
|
261 |
show_labels = "hidden", |
|
262 |
var_labels = vars, |
|
263 |
.stats = "or_ci", |
|
264 |
.stat_names = NULL, |
|
265 |
.formats = NULL, |
|
266 |
.labels = NULL, |
|
267 |
.indent_mods = NULL) { |
|
268 |
# Process standard extra arguments
|
|
269 | 5x |
extra_args <- list(".stats" = .stats) |
270 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
271 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
272 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
273 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
274 | ||
275 |
# Process additional arguments to the statistic function
|
|
276 | 5x |
extra_args <- c( |
277 | 5x |
extra_args,
|
278 | 5x |
variables = list(variables), conf_level = list(conf_level), groups_list = list(groups_list), method = list(method), |
279 |
...
|
|
280 |
)
|
|
281 | ||
282 |
# Append additional info from layout to the analysis function
|
|
283 | 5x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
284 | 5x |
formals(a_odds_ratio) <- c(formals(a_odds_ratio), extra_args[[".additional_fun_parameters"]]) |
285 | ||
286 | 5x |
analyze( |
287 | 5x |
lyt = lyt, |
288 | 5x |
vars = vars, |
289 | 5x |
afun = a_odds_ratio, |
290 | 5x |
na_str = na_str, |
291 | 5x |
nested = nested, |
292 | 5x |
extra_args = extra_args, |
293 | 5x |
var_labels = var_labels, |
294 | 5x |
show_labels = show_labels, |
295 | 5x |
table_names = table_names |
296 |
)
|
|
297 |
}
|
|
298 | ||
299 |
#' Helper functions for odds ratio estimation
|
|
300 |
#'
|
|
301 |
#' @description `r lifecycle::badge("stable")`
|
|
302 |
#'
|
|
303 |
#' Functions to calculate odds ratios in [estimate_odds_ratio()].
|
|
304 |
#'
|
|
305 |
#' @inheritParams odds_ratio
|
|
306 |
#' @inheritParams argument_convention
|
|
307 |
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
|
|
308 |
#' `strata` for [or_clogit()].
|
|
309 |
#'
|
|
310 |
#' @return A named `list` of elements `or_ci` and `n_tot`.
|
|
311 |
#'
|
|
312 |
#' @seealso [odds_ratio]
|
|
313 |
#'
|
|
314 |
#' @name h_odds_ratio
|
|
315 |
NULL
|
|
316 | ||
317 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
|
|
318 |
#' exactly 2 groups in `data` as specified by the `grp` variable.
|
|
319 |
#'
|
|
320 |
#' @examples
|
|
321 |
#' # Data with 2 groups.
|
|
322 |
#' data <- data.frame(
|
|
323 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
|
|
324 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
|
|
325 |
#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
|
|
326 |
#' stringsAsFactors = TRUE
|
|
327 |
#' )
|
|
328 |
#'
|
|
329 |
#' # Odds ratio based on glm.
|
|
330 |
#' or_glm(data, conf_level = 0.95)
|
|
331 |
#'
|
|
332 |
#' @export
|
|
333 |
or_glm <- function(data, conf_level) { |
|
334 | 77x |
checkmate::assert_logical(data$rsp) |
335 | 77x |
assert_proportion_value(conf_level) |
336 | 77x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp")) |
337 | 77x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
338 | ||
339 | 77x |
data$grp <- as_factor_keep_attributes(data$grp) |
340 | 77x |
assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2) |
341 | 77x |
formula <- stats::as.formula("rsp ~ grp") |
342 | 77x |
model_fit <- stats::glm( |
343 | 77x |
formula = formula, data = data, |
344 | 77x |
family = stats::binomial(link = "logit") |
345 |
)
|
|
346 | ||
347 |
# Note that here we need to discard the intercept.
|
|
348 | 77x |
or <- exp(stats::coef(model_fit)[-1]) |
349 | 77x |
or_ci <- exp( |
350 | 77x |
stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE] |
351 |
)
|
|
352 | ||
353 | 77x |
values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl")) |
354 | 77x |
n_tot <- stats::setNames(nrow(model_fit$model), "n_tot") |
355 | ||
356 | 77x |
list(or_ci = values, n_tot = n_tot) |
357 |
}
|
|
358 | ||
359 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for
|
|
360 |
#' the whole data set including all groups, since the results are not the same as when doing
|
|
361 |
#' pairwise comparisons between the groups.
|
|
362 |
#'
|
|
363 |
#' @examples
|
|
364 |
#' # Data with 3 groups.
|
|
365 |
#' data <- data.frame(
|
|
366 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
|
|
367 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
|
|
368 |
#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
|
|
369 |
#' stringsAsFactors = TRUE
|
|
370 |
#' )
|
|
371 |
#'
|
|
372 |
#' # Odds ratio based on stratified estimation by conditional logistic regression.
|
|
373 |
#' or_clogit(data, conf_level = 0.95)
|
|
374 |
#'
|
|
375 |
#' @export
|
|
376 |
or_clogit <- function(data, conf_level, method = "exact") { |
|
377 | 19x |
checkmate::assert_logical(data$rsp) |
378 | 19x |
assert_proportion_value(conf_level) |
379 | 19x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata")) |
380 | 19x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
381 | 19x |
checkmate::assert_multi_class(data$strata, classes = c("factor", "character")) |
382 | 19x |
checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE) |
383 | ||
384 | 19x |
data$grp <- as_factor_keep_attributes(data$grp) |
385 | 19x |
data$strata <- as_factor_keep_attributes(data$strata) |
386 | ||
387 |
# Deviation from convention: `survival::strata` must be simply `strata`.
|
|
388 | 19x |
formula <- stats::as.formula("rsp ~ grp + strata(strata)") |
389 | 19x |
model_fit <- clogit_with_tryCatch(formula = formula, data = data, method = method) |
390 | ||
391 |
# Create a list with one set of OR estimates and CI per coefficient, i.e.
|
|
392 |
# comparison of one group vs. the reference group.
|
|
393 | 19x |
coef_est <- stats::coef(model_fit) |
394 | 19x |
ci_est <- stats::confint(model_fit, level = conf_level) |
395 | 19x |
or_ci <- list() |
396 | 19x |
for (coef_name in names(coef_est)) { |
397 | 21x |
grp_name <- gsub("^grp", "", x = coef_name) |
398 | 21x |
or_ci[[grp_name]] <- stats::setNames( |
399 | 21x |
object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), |
400 | 21x |
nm = c("est", "lcl", "ucl") |
401 |
)
|
|
402 |
}
|
|
403 | 19x |
list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n)) |
404 |
}
|
1 |
#' Count specific values
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_values()] creates a layout element to calculate counts of specific values within a
|
|
6 |
#' variable of interest.
|
|
7 |
#'
|
|
8 |
#' This function analyzes one or more variables of interest supplied as a vector to `vars`. Values to
|
|
9 |
#' count for variable(s) in `vars` can be given as a vector via the `values` argument. One row of
|
|
10 |
#' counts will be generated for each variable.
|
|
11 |
#'
|
|
12 |
#' @inheritParams argument_convention
|
|
13 |
#' @param values (`character`)\cr specific values that should be counted.
|
|
14 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
15 |
#'
|
|
16 |
#' Options are: ``r shQuote(get_stats("count_values"), type = "sh")``
|
|
17 |
#'
|
|
18 |
#' @note
|
|
19 |
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
|
|
20 |
#' and fails otherwise.
|
|
21 |
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
|
|
22 |
#' otherwise they are hidden.
|
|
23 |
#'
|
|
24 |
#' @name count_values
|
|
25 |
#' @order 1
|
|
26 |
NULL
|
|
27 | ||
28 |
#' @describeIn count_values S3 generic function to count values.
|
|
29 |
#'
|
|
30 |
#' @inheritParams s_summary.logical
|
|
31 |
#'
|
|
32 |
#' @return
|
|
33 |
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
|
|
34 |
#'
|
|
35 |
#' @export
|
|
36 |
s_count_values <- function(x, |
|
37 |
values,
|
|
38 |
na.rm = TRUE, # nolint |
|
39 |
denom = c("n", "N_col", "N_row"), |
|
40 |
...) { |
|
41 | 207x |
UseMethod("s_count_values", x) |
42 |
}
|
|
43 | ||
44 |
#' @describeIn count_values Method for `character` class.
|
|
45 |
#'
|
|
46 |
#' @method s_count_values character
|
|
47 |
#'
|
|
48 |
#' @examples
|
|
49 |
#' # `s_count_values.character`
|
|
50 |
#' s_count_values(x = c("a", "b", "a"), values = "a")
|
|
51 |
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
|
|
52 |
#'
|
|
53 |
#' @export
|
|
54 |
s_count_values.character <- function(x, |
|
55 |
values = "Y", |
|
56 |
na.rm = TRUE, # nolint |
|
57 |
...) { |
|
58 | 200x |
checkmate::assert_character(values) |
59 | ||
60 | 200x |
if (na.rm) { |
61 | 199x |
x <- x[!is.na(x)] |
62 |
}
|
|
63 | ||
64 | 200x |
is_in_values <- x %in% values |
65 | ||
66 | 200x |
s_summary(is_in_values, na_rm = na.rm, ...) |
67 |
}
|
|
68 | ||
69 |
#' @describeIn count_values Method for `factor` class. This makes an automatic
|
|
70 |
#' conversion to `character` and then forwards to the method for characters.
|
|
71 |
#'
|
|
72 |
#' @method s_count_values factor
|
|
73 |
#'
|
|
74 |
#' @examples
|
|
75 |
#' # `s_count_values.factor`
|
|
76 |
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
|
|
77 |
#'
|
|
78 |
#' @export
|
|
79 |
s_count_values.factor <- function(x, |
|
80 |
values = "Y", |
|
81 |
...) { |
|
82 | 4x |
s_count_values(as.character(x), values = as.character(values), ...) |
83 |
}
|
|
84 | ||
85 |
#' @describeIn count_values Method for `logical` class.
|
|
86 |
#'
|
|
87 |
#' @method s_count_values logical
|
|
88 |
#'
|
|
89 |
#' @examples
|
|
90 |
#' # `s_count_values.logical`
|
|
91 |
#' s_count_values(x = c(TRUE, FALSE, TRUE))
|
|
92 |
#'
|
|
93 |
#' @export
|
|
94 |
s_count_values.logical <- function(x, values = TRUE, ...) { |
|
95 | 3x |
checkmate::assert_logical(values) |
96 | 3x |
s_count_values(as.character(x), values = as.character(values), ...) |
97 |
}
|
|
98 | ||
99 |
#' @describeIn count_values Formatted analysis function which is used as `afun`
|
|
100 |
#' in `count_values()`.
|
|
101 |
#'
|
|
102 |
#' @return
|
|
103 |
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
104 |
#'
|
|
105 |
#' @examples
|
|
106 |
#' # `a_count_values`
|
|
107 |
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
|
|
108 |
#'
|
|
109 |
#' @export
|
|
110 |
a_count_values <- function(x, |
|
111 |
...,
|
|
112 |
.stats = NULL, |
|
113 |
.stat_names = NULL, |
|
114 |
.formats = NULL, |
|
115 |
.labels = NULL, |
|
116 |
.indent_mods = NULL) { |
|
117 |
# Check for additional parameters to the statistics function
|
|
118 | 17x |
dots_extra_args <- list(...) |
119 | 17x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
120 | 17x |
dots_extra_args$.additional_fun_parameters <- NULL |
121 | ||
122 |
# Check for user-defined functions
|
|
123 | 17x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
124 | 17x |
.stats <- default_and_custom_stats_list$all_stats |
125 | 17x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
126 | ||
127 |
# Main statistic calculations
|
|
128 | 17x |
x_stats <- .apply_stat_functions( |
129 | 17x |
default_stat_fnc = s_count_values, |
130 | 17x |
custom_stat_fnc_list = custom_stat_functions, |
131 | 17x |
args_list = c( |
132 | 17x |
x = list(x), |
133 | 17x |
extra_afun_params,
|
134 | 17x |
dots_extra_args
|
135 |
)
|
|
136 |
)
|
|
137 | ||
138 |
# Fill in formatting defaults
|
|
139 | 17x |
.stats <- get_stats("analyze_vars_counts", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
140 | 17x |
.formats <- get_formats_from_stats(.stats, .formats) |
141 | 17x |
.labels <- get_labels_from_stats(.stats, .labels) |
142 | 17x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
143 | ||
144 | 17x |
x_stats <- x_stats[.stats] |
145 | ||
146 |
# Auto format handling
|
|
147 | 17x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
148 | ||
149 |
# Get and check statistical names
|
|
150 | 17x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
151 | ||
152 | 17x |
in_rows( |
153 | 17x |
.list = x_stats, |
154 | 17x |
.formats = .formats, |
155 | 17x |
.names = names(.labels), |
156 | 17x |
.stat_names = .stat_names, |
157 | 17x |
.labels = .labels %>% .unlist_keep_nulls(), |
158 | 17x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
159 |
)
|
|
160 |
}
|
|
161 | ||
162 |
#' @describeIn count_values Layout-creating function which can take statistics function arguments
|
|
163 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
164 |
#'
|
|
165 |
#' @return
|
|
166 |
#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
|
|
167 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
168 |
#' the statistics from `s_count_values()` to the table layout.
|
|
169 |
#'
|
|
170 |
#' @examples
|
|
171 |
#' # `count_values`
|
|
172 |
#' basic_table() %>%
|
|
173 |
#' count_values("Species", values = "setosa") %>%
|
|
174 |
#' build_table(iris)
|
|
175 |
#'
|
|
176 |
#' @export
|
|
177 |
#' @order 2
|
|
178 |
count_values <- function(lyt, |
|
179 |
vars,
|
|
180 |
values,
|
|
181 |
na_str = default_na_str(), |
|
182 |
na_rm = TRUE, |
|
183 |
nested = TRUE, |
|
184 |
...,
|
|
185 |
table_names = vars, |
|
186 |
.stats = "count_fraction", |
|
187 |
.stat_names = NULL, |
|
188 |
.formats = c(count_fraction = "xx (xx.xx%)", count = "xx"), |
|
189 |
.labels = c(count_fraction = paste(values, collapse = ", ")), |
|
190 |
.indent_mods = NULL) { |
|
191 |
# Process standard extra arguments
|
|
192 | 8x |
extra_args <- list(".stats" = .stats) |
193 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
194 | 8x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
195 | 8x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
196 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
197 | ||
198 |
# Process additional arguments to the statistic function
|
|
199 | 8x |
extra_args <- c( |
200 | 8x |
extra_args,
|
201 | 8x |
na_rm = na_rm, values = list(values), |
202 |
...
|
|
203 |
)
|
|
204 | ||
205 |
# Adding additional info from layout to analysis function
|
|
206 | 8x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
207 | 8x |
formals(a_count_values) <- c(formals(a_count_values), extra_args[[".additional_fun_parameters"]]) |
208 | ||
209 | 8x |
analyze( |
210 | 8x |
lyt,
|
211 | 8x |
vars,
|
212 | 8x |
afun = a_count_values, |
213 | 8x |
na_str = na_str, |
214 | 8x |
nested = nested, |
215 | 8x |
extra_args = extra_args, |
216 | 8x |
show_labels = ifelse(length(vars) > 1, "visible", "hidden"), |
217 | 8x |
table_names = table_names |
218 |
)
|
|
219 |
}
|
1 |
#' Control functions for Kaplan-Meier plot annotation tables
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Auxiliary functions for controlling arguments for formatting the annotation tables that can be added to plots
|
|
6 |
#' generated via [g_km()].
|
|
7 |
#'
|
|
8 |
#' @param x (`proportion`)\cr x-coordinate for center of annotation table.
|
|
9 |
#' @param y (`proportion`)\cr y-coordinate for center of annotation table.
|
|
10 |
#' @param w (`proportion`)\cr relative width of the annotation table.
|
|
11 |
#' @param h (`proportion`)\cr relative height of the annotation table.
|
|
12 |
#' @param fill (`flag` or `character`)\cr whether the annotation table should have a background fill color.
|
|
13 |
#' Can also be a color code to use as the background fill color. If `TRUE`, color code defaults to `"#00000020"`.
|
|
14 |
#'
|
|
15 |
#' @return A list of components with the same names as the arguments.
|
|
16 |
#'
|
|
17 |
#' @seealso [g_km()]
|
|
18 |
#'
|
|
19 |
#' @name control_annot
|
|
20 |
NULL
|
|
21 | ||
22 |
#' @describeIn control_annot Control function for formatting the median survival time annotation table. This annotation
|
|
23 |
#' table can be added in [g_km()] by setting `annot_surv_med=TRUE`, and can be configured using the
|
|
24 |
#' `control_surv_med_annot()` function by setting it as the `control_annot_surv_med` argument.
|
|
25 |
#'
|
|
26 |
#' @examples
|
|
27 |
#' control_surv_med_annot()
|
|
28 |
#'
|
|
29 |
#' @export
|
|
30 |
control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { |
|
31 | 22x |
assert_proportion_value(x) |
32 | 22x |
assert_proportion_value(y) |
33 | 22x |
assert_proportion_value(w) |
34 | 22x |
assert_proportion_value(h) |
35 | ||
36 | 22x |
list(x = x, y = y, w = w, h = h, fill = fill) |
37 |
}
|
|
38 | ||
39 |
#' @describeIn control_annot Control function for formatting the Cox-PH annotation table. This annotation table can be
|
|
40 |
#' added in [g_km()] by setting `annot_coxph=TRUE`, and can be configured using the `control_coxph_annot()` function
|
|
41 |
#' by setting it as the `control_annot_coxph` argument.
|
|
42 |
#'
|
|
43 |
#' @param ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the
|
|
44 |
#' annotation table. If `FALSE` (default), only comparison groups will be printed in the table labels.
|
|
45 |
#'
|
|
46 |
#' @examples
|
|
47 |
#' control_coxph_annot()
|
|
48 |
#'
|
|
49 |
#' @export
|
|
50 |
control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { |
|
51 | 11x |
checkmate::assert_logical(ref_lbls, any.missing = FALSE) |
52 | ||
53 | 11x |
res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) |
54 | 11x |
res
|
55 |
}
|
|
56 | ||
57 |
#' Helper function to calculate x-tick positions
|
|
58 |
#'
|
|
59 |
#' @description `r lifecycle::badge("stable")`
|
|
60 |
#'
|
|
61 |
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already
|
|
62 |
#' exists it is kept as is. It is based on the same function `ggplot2` relies on,
|
|
63 |
#' and is required in the graphic and the patient-at-risk annotation table.
|
|
64 |
#'
|
|
65 |
#' @inheritParams g_km
|
|
66 |
#' @inheritParams h_ggkm
|
|
67 |
#'
|
|
68 |
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object.
|
|
69 |
#'
|
|
70 |
#' @examples
|
|
71 |
#' library(dplyr)
|
|
72 |
#' library(survival)
|
|
73 |
#'
|
|
74 |
#' data <- tern_ex_adtte %>%
|
|
75 |
#' filter(PARAMCD == "OS") %>%
|
|
76 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
|
|
77 |
#' h_data_plot()
|
|
78 |
#'
|
|
79 |
#' h_xticks(data)
|
|
80 |
#' h_xticks(data, xticks = seq(0, 3000, 500))
|
|
81 |
#' h_xticks(data, xticks = 500)
|
|
82 |
#' h_xticks(data, xticks = 500, max_time = 6000)
|
|
83 |
#' h_xticks(data, xticks = c(0, 500), max_time = 300)
|
|
84 |
#' h_xticks(data, xticks = 500, max_time = 300)
|
|
85 |
#'
|
|
86 |
#' @export
|
|
87 |
h_xticks <- function(data, xticks = NULL, max_time = NULL) { |
|
88 | 18x |
if (is.null(xticks)) { |
89 | 13x |
if (is.null(max_time)) { |
90 | 11x |
labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) |
91 |
} else { |
|
92 | 2x |
labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) |
93 |
}
|
|
94 | 5x |
} else if (checkmate::test_number(xticks)) { |
95 | 2x |
if (is.null(max_time)) { |
96 | 1x |
seq(0, max(data$time), xticks) |
97 |
} else { |
|
98 | 1x |
seq(0, max(data$time, max_time), xticks) |
99 |
}
|
|
100 | 3x |
} else if (is.numeric(xticks)) { |
101 | 2x |
xticks
|
102 |
} else { |
|
103 | 1x |
stop( |
104 | 1x |
paste( |
105 | 1x |
"xticks should be either `NULL`",
|
106 | 1x |
"or a single number (interval between x ticks)",
|
107 | 1x |
"or a numeric vector (position of ticks on the x axis)"
|
108 |
)
|
|
109 |
)
|
|
110 |
}
|
|
111 |
}
|
|
112 | ||
113 |
#' Helper function for survival estimations
|
|
114 |
#'
|
|
115 |
#' @description `r lifecycle::badge("stable")`
|
|
116 |
#'
|
|
117 |
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval.
|
|
118 |
#'
|
|
119 |
#' @inheritParams h_data_plot
|
|
120 |
#'
|
|
121 |
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
|
|
122 |
#'
|
|
123 |
#' @examples
|
|
124 |
#' library(dplyr)
|
|
125 |
#' library(survival)
|
|
126 |
#'
|
|
127 |
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS")
|
|
128 |
#' fit <- survfit(
|
|
129 |
#' formula = Surv(AVAL, 1 - CNSR) ~ ARMCD,
|
|
130 |
#' data = adtte
|
|
131 |
#' )
|
|
132 |
#' h_tbl_median_surv(fit_km = fit)
|
|
133 |
#'
|
|
134 |
#' @export
|
|
135 |
h_tbl_median_surv <- function(fit_km, armval = "All") { |
|
136 | 10x |
y <- if (is.null(fit_km$strata)) { |
137 | ! |
as.data.frame(t(summary(fit_km)$table), row.names = armval) |
138 |
} else { |
|
139 | 10x |
tbl <- summary(fit_km)$table |
140 | 10x |
rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") |
141 | 10x |
rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] |
142 | 10x |
as.data.frame(tbl) |
143 |
}
|
|
144 | 10x |
conf.int <- summary(fit_km)$conf.int # nolint |
145 | 10x |
y$records <- round(y$records) |
146 | 10x |
y$median <- signif(y$median, 4) |
147 | 10x |
y$`CI` <- paste0( |
148 | 10x |
"(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" |
149 |
)
|
|
150 | 10x |
stats::setNames( |
151 | 10x |
y[c("records", "median", "CI")], |
152 | 10x |
c("N", "Median", f_conf_level(conf.int)) |
153 |
)
|
|
154 |
}
|
|
155 | ||
156 |
#' Helper function for generating a pairwise Cox-PH table
|
|
157 |
#'
|
|
158 |
#' @description `r lifecycle::badge("stable")`
|
|
159 |
#'
|
|
160 |
#' Create a `data.frame` of pairwise stratified or unstratified Cox-PH analysis results.
|
|
161 |
#'
|
|
162 |
#' @inheritParams g_km
|
|
163 |
#' @param annot_coxph_ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the
|
|
164 |
#' `annot_coxph` table. If `FALSE` (default), only comparison groups will be printed in `annot_coxph` table labels.
|
|
165 |
#'
|
|
166 |
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
|
|
167 |
#' and `p-value (log-rank)`.
|
|
168 |
#'
|
|
169 |
#' @examples
|
|
170 |
#' library(dplyr)
|
|
171 |
#'
|
|
172 |
#' adtte <- tern_ex_adtte %>%
|
|
173 |
#' filter(PARAMCD == "OS") %>%
|
|
174 |
#' mutate(is_event = CNSR == 0)
|
|
175 |
#'
|
|
176 |
#' h_tbl_coxph_pairwise(
|
|
177 |
#' df = adtte,
|
|
178 |
#' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"),
|
|
179 |
#' control_coxph_pw = control_coxph(conf_level = 0.9)
|
|
180 |
#' )
|
|
181 |
#'
|
|
182 |
#' @export
|
|
183 |
h_tbl_coxph_pairwise <- function(df, |
|
184 |
variables,
|
|
185 |
ref_group_coxph = NULL, |
|
186 |
control_coxph_pw = control_coxph(), |
|
187 |
annot_coxph_ref_lbls = FALSE) { |
|
188 | 4x |
if ("strat" %in% names(variables)) { |
189 | ! |
warning( |
190 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_tbl_coxph_pairwise() ",
|
191 | ! |
"was deprecated in tern 0.9.4.\n ",
|
192 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
193 |
)
|
|
194 | ! |
variables[["strata"]] <- variables[["strat"]] |
195 |
}
|
|
196 | ||
197 | 4x |
assert_df_with_variables(df, variables) |
198 | 4x |
checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) |
199 | 4x |
checkmate::assert_flag(annot_coxph_ref_lbls) |
200 | ||
201 | 4x |
arm <- variables$arm |
202 | 4x |
df[[arm]] <- factor(df[[arm]]) |
203 | ||
204 | 4x |
ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] |
205 | 4x |
comp_group <- setdiff(levels(df[[arm]]), ref_group) |
206 | ||
207 | 4x |
results <- Map(function(comp) { |
208 | 8x |
res <- s_coxph_pairwise( |
209 | 8x |
df = df[df[[arm]] == comp, , drop = FALSE], |
210 | 8x |
.ref_group = df[df[[arm]] == ref_group, , drop = FALSE], |
211 | 8x |
.in_ref_col = FALSE, |
212 | 8x |
.var = variables$tte, |
213 | 8x |
is_event = variables$is_event, |
214 | 8x |
strata = variables$strata, |
215 | 8x |
control = control_coxph_pw |
216 |
)
|
|
217 | 8x |
res_df <- data.frame( |
218 | 8x |
hr = format(round(res$hr, 2), nsmall = 2), |
219 | 8x |
hr_ci = paste0( |
220 | 8x |
"(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", |
221 | 8x |
format(round(res$hr_ci[2], 2), nsmall = 2), ")" |
222 |
),
|
|
223 | 8x |
pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), |
224 | 8x |
stringsAsFactors = FALSE |
225 |
)
|
|
226 | 8x |
colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) |
227 | 8x |
row.names(res_df) <- comp |
228 | 8x |
res_df
|
229 | 4x |
}, comp_group) |
230 | 1x |
if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) |
231 | ||
232 | 4x |
do.call(rbind, results) |
233 |
}
|
|
234 | ||
235 |
#' Helper function to tidy survival fit data
|
|
236 |
#'
|
|
237 |
#' @description `r lifecycle::badge("stable")`
|
|
238 |
#'
|
|
239 |
#' Convert the survival fit data into a data frame designed for plotting
|
|
240 |
#' within `g_km`.
|
|
241 |
#'
|
|
242 |
#' This starts from the [broom::tidy()] result, and then:
|
|
243 |
#' * Post-processes the `strata` column into a factor.
|
|
244 |
#' * Extends each stratum by an additional first row with time 0 and probability 1 so that
|
|
245 |
#' downstream plot lines start at those coordinates.
|
|
246 |
#' * Adds a `censor` column.
|
|
247 |
#' * Filters the rows before `max_time`.
|
|
248 |
#'
|
|
249 |
#' @inheritParams g_km
|
|
250 |
#' @param fit_km (`survfit`)\cr result of [survival::survfit()].
|
|
251 |
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`.
|
|
252 |
#'
|
|
253 |
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`,
|
|
254 |
#' `conf.low`, `strata`, and `censor`.
|
|
255 |
#'
|
|
256 |
#' @examples
|
|
257 |
#' library(dplyr)
|
|
258 |
#' library(survival)
|
|
259 |
#'
|
|
260 |
#' # Test with multiple arms
|
|
261 |
#' tern_ex_adtte %>%
|
|
262 |
#' filter(PARAMCD == "OS") %>%
|
|
263 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
|
|
264 |
#' h_data_plot()
|
|
265 |
#'
|
|
266 |
#' # Test with single arm
|
|
267 |
#' tern_ex_adtte %>%
|
|
268 |
#' filter(PARAMCD == "OS", ARMCD == "ARM B") %>%
|
|
269 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
|
|
270 |
#' h_data_plot(armval = "ARM B")
|
|
271 |
#'
|
|
272 |
#' @export
|
|
273 |
h_data_plot <- function(fit_km, |
|
274 |
armval = "All", |
|
275 |
max_time = NULL) { |
|
276 | 18x |
y <- broom::tidy(fit_km) |
277 | ||
278 | 18x |
if (!is.null(fit_km$strata)) { |
279 | 18x |
fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") |
280 | 18x |
strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) |
281 | 18x |
strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals") |
282 | 18x |
y$strata <- factor( |
283 | 18x |
vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), |
284 | 18x |
levels = strata_levels |
285 |
)
|
|
286 |
} else { |
|
287 | ! |
y$strata <- armval |
288 |
}
|
|
289 | ||
290 | 18x |
y_by_strata <- split(y, y$strata) |
291 | 18x |
y_by_strata_extended <- lapply( |
292 | 18x |
y_by_strata,
|
293 | 18x |
FUN = function(tbl) { |
294 | 53x |
first_row <- tbl[1L, ] |
295 | 53x |
first_row$time <- 0 |
296 | 53x |
first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")]) |
297 | 53x |
first_row$n.event <- first_row$n.censor <- 0 |
298 | 53x |
first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 |
299 | 53x |
first_row$std.error <- 0 |
300 | 53x |
rbind( |
301 | 53x |
first_row,
|
302 | 53x |
tbl
|
303 |
)
|
|
304 |
}
|
|
305 |
)
|
|
306 | 18x |
y <- do.call(rbind, y_by_strata_extended) |
307 | ||
308 | 18x |
y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) |
309 | 18x |
if (!is.null(max_time)) { |
310 | 1x |
y <- y[y$time <= max(max_time), ] |
311 |
}
|
|
312 | 18x |
y
|
313 |
}
|
|
314 | ||
315 |
## Deprecated Functions ----
|
|
316 | ||
317 |
#' Helper function to create a KM plot
|
|
318 |
#'
|
|
319 |
#' @description `r lifecycle::badge("deprecated")`
|
|
320 |
#'
|
|
321 |
#' Draw the Kaplan-Meier plot using `ggplot2`.
|
|
322 |
#'
|
|
323 |
#' @inheritParams g_km
|
|
324 |
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`.
|
|
325 |
#'
|
|
326 |
#' @return A `ggplot` object.
|
|
327 |
#'
|
|
328 |
#' @examples
|
|
329 |
#' \donttest{
|
|
330 |
#' library(dplyr)
|
|
331 |
#' library(survival)
|
|
332 |
#'
|
|
333 |
#' fit_km <- tern_ex_adtte %>%
|
|
334 |
#' filter(PARAMCD == "OS") %>%
|
|
335 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
|
|
336 |
#' data_plot <- h_data_plot(fit_km = fit_km)
|
|
337 |
#' xticks <- h_xticks(data = data_plot)
|
|
338 |
#' gg <- h_ggkm(
|
|
339 |
#' data = data_plot,
|
|
340 |
#' censor_show = TRUE,
|
|
341 |
#' xticks = xticks,
|
|
342 |
#' xlab = "Days",
|
|
343 |
#' yval = "Survival",
|
|
344 |
#' ylab = "Survival Probability",
|
|
345 |
#' title = "Survival"
|
|
346 |
#' )
|
|
347 |
#' gg
|
|
348 |
#' }
|
|
349 |
#'
|
|
350 |
#' @export
|
|
351 |
h_ggkm <- function(data, |
|
352 |
xticks = NULL, |
|
353 |
yval = "Survival", |
|
354 |
censor_show,
|
|
355 |
xlab,
|
|
356 |
ylab,
|
|
357 |
ylim = NULL, |
|
358 |
title,
|
|
359 |
footnotes = NULL, |
|
360 |
max_time = NULL, |
|
361 |
lwd = 1, |
|
362 |
lty = NULL, |
|
363 |
pch = 3, |
|
364 |
size = 2, |
|
365 |
col = NULL, |
|
366 |
ci_ribbon = FALSE, |
|
367 |
ggtheme = nestcolor::theme_nest()) { |
|
368 | 1x |
lifecycle::deprecate_warn( |
369 | 1x |
"0.9.4",
|
370 | 1x |
"h_ggkm()",
|
371 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
372 |
)
|
|
373 | 1x |
checkmate::assert_numeric(lty, null.ok = TRUE) |
374 | 1x |
checkmate::assert_character(col, null.ok = TRUE) |
375 | ||
376 | 1x |
if (is.null(ylim)) { |
377 | 1x |
data_lims <- data |
378 | ! |
if (yval == "Failure") data_lims[["estimate"]] <- 1 - data_lims[["estimate"]] |
379 | 1x |
if (!is.null(max_time)) { |
380 | ! |
y_lwr <- min(data_lims[data_lims$time < max_time, ][["estimate"]]) |
381 | ! |
y_upr <- max(data_lims[data_lims$time < max_time, ][["estimate"]]) |
382 |
} else { |
|
383 | 1x |
y_lwr <- min(data_lims[["estimate"]]) |
384 | 1x |
y_upr <- max(data_lims[["estimate"]]) |
385 |
}
|
|
386 | 1x |
ylim <- c(y_lwr, y_upr) |
387 |
}
|
|
388 | 1x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE) |
389 | ||
390 |
# change estimates of survival to estimates of failure (1 - survival)
|
|
391 | 1x |
if (yval == "Failure") { |
392 | ! |
data$estimate <- 1 - data$estimate |
393 | ! |
data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high) |
394 | ! |
data$censor <- 1 - data$censor |
395 |
}
|
|
396 | ||
397 | 1x |
gg <- { |
398 | 1x |
ggplot2::ggplot( |
399 | 1x |
data = data, |
400 | 1x |
mapping = ggplot2::aes( |
401 | 1x |
x = .data[["time"]], |
402 | 1x |
y = .data[["estimate"]], |
403 | 1x |
ymin = .data[["conf.low"]], |
404 | 1x |
ymax = .data[["conf.high"]], |
405 | 1x |
color = .data[["strata"]], |
406 | 1x |
fill = .data[["strata"]] |
407 |
)
|
|
408 |
) + |
|
409 | 1x |
ggplot2::geom_hline(yintercept = 0) |
410 |
}
|
|
411 | ||
412 | 1x |
if (ci_ribbon) { |
413 | ! |
gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0) |
414 |
}
|
|
415 | ||
416 | 1x |
gg <- if (is.null(lty)) { |
417 | 1x |
gg + |
418 | 1x |
ggplot2::geom_step(linewidth = lwd) |
419 | 1x |
} else if (checkmate::test_number(lty)) { |
420 | ! |
gg + |
421 | ! |
ggplot2::geom_step(linewidth = lwd, lty = lty) |
422 | 1x |
} else if (is.numeric(lty)) { |
423 | ! |
gg + |
424 | ! |
ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) + |
425 | ! |
ggplot2::scale_linetype_manual(values = lty) |
426 |
}
|
|
427 | ||
428 | 1x |
gg <- gg + |
429 | 1x |
ggplot2::coord_cartesian(ylim = ylim) + |
430 | 1x |
ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes) |
431 | ||
432 | 1x |
if (!is.null(col)) { |
433 | ! |
gg <- gg + |
434 | ! |
ggplot2::scale_color_manual(values = col) + |
435 | ! |
ggplot2::scale_fill_manual(values = col) |
436 |
}
|
|
437 | 1x |
if (censor_show) { |
438 | 1x |
dt <- data[data$n.censor != 0, ] |
439 | 1x |
dt$censor_lbl <- factor("Censored") |
440 | ||
441 | 1x |
gg <- gg + ggplot2::geom_point( |
442 | 1x |
data = dt, |
443 | 1x |
ggplot2::aes( |
444 | 1x |
x = .data[["time"]], |
445 | 1x |
y = .data[["censor"]], |
446 | 1x |
shape = .data[["censor_lbl"]] |
447 |
),
|
|
448 | 1x |
size = size, |
449 | 1x |
show.legend = TRUE, |
450 | 1x |
inherit.aes = TRUE |
451 |
) + |
|
452 | 1x |
ggplot2::scale_shape_manual(name = NULL, values = pch) + |
453 | 1x |
ggplot2::guides( |
454 | 1x |
shape = ggplot2::guide_legend(override.aes = list(linetype = NA)), |
455 | 1x |
fill = ggplot2::guide_legend(override.aes = list(shape = NA)) |
456 |
)
|
|
457 |
}
|
|
458 | ||
459 | 1x |
if (!is.null(max_time) && !is.null(xticks)) { |
460 | ! |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time)))) |
461 | 1x |
} else if (!is.null(xticks)) { |
462 | 1x |
if (max(data$time) <= max(xticks)) { |
463 | 1x |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks))) |
464 |
} else { |
|
465 | ! |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks) |
466 |
}
|
|
467 | ! |
} else if (!is.null(max_time)) { |
468 | ! |
gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time)) |
469 |
}
|
|
470 | ||
471 | 1x |
if (!is.null(ggtheme)) { |
472 | 1x |
gg <- gg + ggtheme |
473 |
}
|
|
474 | ||
475 | 1x |
gg + ggplot2::theme( |
476 | 1x |
legend.position = "bottom", |
477 | 1x |
legend.title = ggplot2::element_blank(), |
478 | 1x |
legend.key.height = unit(0.02, "npc"), |
479 | 1x |
panel.grid.major.x = ggplot2::element_line(linewidth = 2) |
480 |
)
|
|
481 |
}
|
|
482 | ||
483 |
#' `ggplot` decomposition
|
|
484 |
#'
|
|
485 |
#' @description `r lifecycle::badge("deprecated")`
|
|
486 |
#'
|
|
487 |
#' The elements composing the `ggplot` are extracted and organized in a `list`.
|
|
488 |
#'
|
|
489 |
#' @param gg (`ggplot`)\cr a graphic to decompose.
|
|
490 |
#'
|
|
491 |
#' @return A named `list` with elements:
|
|
492 |
#' * `panel`: The panel.
|
|
493 |
#' * `yaxis`: The y-axis.
|
|
494 |
#' * `xaxis`: The x-axis.
|
|
495 |
#' * `xlab`: The x-axis label.
|
|
496 |
#' * `ylab`: The y-axis label.
|
|
497 |
#' * `guide`: The legend.
|
|
498 |
#'
|
|
499 |
#' @examples
|
|
500 |
#' \donttest{
|
|
501 |
#' library(dplyr)
|
|
502 |
#' library(survival)
|
|
503 |
#' library(grid)
|
|
504 |
#'
|
|
505 |
#' fit_km <- tern_ex_adtte %>%
|
|
506 |
#' filter(PARAMCD == "OS") %>%
|
|
507 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
|
|
508 |
#' data_plot <- h_data_plot(fit_km = fit_km)
|
|
509 |
#' xticks <- h_xticks(data = data_plot)
|
|
510 |
#' gg <- h_ggkm(
|
|
511 |
#' data = data_plot,
|
|
512 |
#' yval = "Survival",
|
|
513 |
#' censor_show = TRUE,
|
|
514 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability",
|
|
515 |
#' title = "tt",
|
|
516 |
#' footnotes = "ff"
|
|
517 |
#' )
|
|
518 |
#'
|
|
519 |
#' g_el <- h_decompose_gg(gg)
|
|
520 |
#' grid::grid.newpage()
|
|
521 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5))
|
|
522 |
#' grid::grid.draw(g_el$panel)
|
|
523 |
#'
|
|
524 |
#' grid::grid.newpage()
|
|
525 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5))
|
|
526 |
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis)))
|
|
527 |
#' }
|
|
528 |
#'
|
|
529 |
#' @export
|
|
530 |
h_decompose_gg <- function(gg) { |
|
531 | 1x |
lifecycle::deprecate_warn( |
532 | 1x |
"0.9.4",
|
533 | 1x |
"h_decompose_gg()",
|
534 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
535 |
)
|
|
536 | 1x |
g_el <- ggplot2::ggplotGrob(gg) |
537 | 1x |
y <- c( |
538 | 1x |
panel = "panel", |
539 | 1x |
yaxis = "axis-l", |
540 | 1x |
xaxis = "axis-b", |
541 | 1x |
xlab = "xlab-b", |
542 | 1x |
ylab = "ylab-l", |
543 | 1x |
guide = "guide" |
544 |
)
|
|
545 | 1x |
lapply(X = y, function(x) gtable::gtable_filter(g_el, x)) |
546 |
}
|
|
547 | ||
548 |
#' Helper function to prepare a KM layout
|
|
549 |
#'
|
|
550 |
#' @description `r lifecycle::badge("deprecated")`
|
|
551 |
#'
|
|
552 |
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve.
|
|
553 |
#'
|
|
554 |
#' @inheritParams g_km
|
|
555 |
#' @inheritParams h_ggkm
|
|
556 |
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`.
|
|
557 |
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of
|
|
558 |
#' patient at risk matching the main grid of the Kaplan-Meier curve.
|
|
559 |
#'
|
|
560 |
#' @return A grid layout.
|
|
561 |
#'
|
|
562 |
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the
|
|
563 |
#' dimension are fixed, only the curve is flexible and will accommodate with the remaining free space.
|
|
564 |
#' * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient
|
|
565 |
#' at risk tabulation. The main constraint is about the width of the columns which must allow the writing of
|
|
566 |
#' the strata name.
|
|
567 |
#' * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table.
|
|
568 |
#'
|
|
569 |
#' @examples
|
|
570 |
#' \donttest{
|
|
571 |
#' library(dplyr)
|
|
572 |
#' library(survival)
|
|
573 |
#' library(grid)
|
|
574 |
#'
|
|
575 |
#' fit_km <- tern_ex_adtte %>%
|
|
576 |
#' filter(PARAMCD == "OS") %>%
|
|
577 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
|
|
578 |
#' data_plot <- h_data_plot(fit_km = fit_km)
|
|
579 |
#' xticks <- h_xticks(data = data_plot)
|
|
580 |
#' gg <- h_ggkm(
|
|
581 |
#' data = data_plot,
|
|
582 |
#' censor_show = TRUE,
|
|
583 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability",
|
|
584 |
#' title = "tt", footnotes = "ff", yval = "Survival"
|
|
585 |
#' )
|
|
586 |
#' g_el <- h_decompose_gg(gg)
|
|
587 |
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
|
|
588 |
#' grid.show.layout(lyt)
|
|
589 |
#' }
|
|
590 |
#'
|
|
591 |
#' @export
|
|
592 |
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE, annot_at_risk_title = TRUE) { |
|
593 | 1x |
lifecycle::deprecate_warn( |
594 | 1x |
"0.9.4",
|
595 | 1x |
"h_km_layout()",
|
596 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
597 |
)
|
|
598 | 1x |
txtlines <- levels(as.factor(data$strata)) |
599 | 1x |
nlines <- nlevels(as.factor(data$strata)) |
600 | 1x |
col_annot_width <- max( |
601 | 1x |
c( |
602 | 1x |
as.numeric(grid::convertX(g_el$yaxis$widths + g_el$ylab$widths, "pt")), |
603 | 1x |
as.numeric( |
604 | 1x |
grid::convertX( |
605 | 1x |
grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt" |
606 |
)
|
|
607 |
)
|
|
608 |
)
|
|
609 |
)
|
|
610 | ||
611 | 1x |
ttl_row <- as.numeric(!is.null(title)) |
612 | 1x |
foot_row <- as.numeric(!is.null(footnotes)) |
613 | 1x |
no_tbl_ind <- c() |
614 | 1x |
ht_x <- c() |
615 | 1x |
ht_units <- c() |
616 | ||
617 | 1x |
if (ttl_row == 1) { |
618 | 1x |
no_tbl_ind <- c(no_tbl_ind, TRUE) |
619 | 1x |
ht_x <- c(ht_x, 2) |
620 | 1x |
ht_units <- c(ht_units, "lines") |
621 |
}
|
|
622 | ||
623 | 1x |
no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2)) |
624 | 1x |
ht_x <- c( |
625 | 1x |
ht_x,
|
626 | 1x |
1,
|
627 | 1x |
grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") + grid::unit(5, "pt"), |
628 | 1x |
grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"), |
629 | 1x |
1,
|
630 | 1x |
nlines + 0.5, |
631 | 1x |
grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") |
632 |
)
|
|
633 | 1x |
ht_units <- c( |
634 | 1x |
ht_units,
|
635 | 1x |
"null",
|
636 | 1x |
"pt",
|
637 | 1x |
"pt",
|
638 | 1x |
"lines",
|
639 | 1x |
"lines",
|
640 | 1x |
"pt"
|
641 |
)
|
|
642 | ||
643 | 1x |
if (foot_row == 1) { |
644 | 1x |
no_tbl_ind <- c(no_tbl_ind, TRUE) |
645 | 1x |
ht_x <- c(ht_x, 1) |
646 | 1x |
ht_units <- c(ht_units, "lines") |
647 |
}
|
|
648 | 1x |
if (annot_at_risk) { |
649 | 1x |
no_at_risk_tbl <- rep(TRUE, 6 + ttl_row + foot_row) |
650 | 1x |
if (!annot_at_risk_title) { |
651 | ! |
no_at_risk_tbl[length(no_at_risk_tbl) - 2 - foot_row] <- FALSE |
652 |
}
|
|
653 |
} else { |
|
654 | ! |
no_at_risk_tbl <- no_tbl_ind |
655 |
}
|
|
656 | ||
657 | 1x |
grid::grid.layout( |
658 | 1x |
nrow = sum(no_at_risk_tbl), ncol = 2, |
659 | 1x |
widths = grid::unit(c(col_annot_width, 1), c("pt", "null")), |
660 | 1x |
heights = grid::unit( |
661 | 1x |
x = ht_x[no_at_risk_tbl], |
662 | 1x |
units = ht_units[no_at_risk_tbl] |
663 |
)
|
|
664 |
)
|
|
665 |
}
|
|
666 | ||
667 |
#' Helper function to create patient-at-risk grobs
|
|
668 |
#'
|
|
669 |
#' @description `r lifecycle::badge("deprecated")`
|
|
670 |
#'
|
|
671 |
#' Two graphical objects are obtained, one corresponding to row labeling and the second to the table of
|
|
672 |
#' numbers of patients at risk. If `title = TRUE`, a third object corresponding to the table title is
|
|
673 |
#' also obtained.
|
|
674 |
#'
|
|
675 |
#' @inheritParams g_km
|
|
676 |
#' @inheritParams h_ggkm
|
|
677 |
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which
|
|
678 |
#' includes the number of patients at risk at given time points.
|
|
679 |
#' @param xlim (`numeric(1)`)\cr the maximum value on the x-axis (used to ensure the at risk table aligns with the KM
|
|
680 |
#' graph).
|
|
681 |
#' @param title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk`
|
|
682 |
#' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`.
|
|
683 |
#'
|
|
684 |
#' @return A named `list` of two `gTree` objects if `title = FALSE`: `at_risk` and `label`, or three
|
|
685 |
#' `gTree` objects if `title = TRUE`: `at_risk`, `label`, and `title`.
|
|
686 |
#'
|
|
687 |
#' @examples
|
|
688 |
#' \donttest{
|
|
689 |
#' library(dplyr)
|
|
690 |
#' library(survival)
|
|
691 |
#' library(grid)
|
|
692 |
#'
|
|
693 |
#' fit_km <- tern_ex_adtte %>%
|
|
694 |
#' filter(PARAMCD == "OS") %>%
|
|
695 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
|
|
696 |
#'
|
|
697 |
#' data_plot <- h_data_plot(fit_km = fit_km)
|
|
698 |
#'
|
|
699 |
#' xticks <- h_xticks(data = data_plot)
|
|
700 |
#'
|
|
701 |
#' gg <- h_ggkm(
|
|
702 |
#' data = data_plot,
|
|
703 |
#' censor_show = TRUE,
|
|
704 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability",
|
|
705 |
#' title = "tt", footnotes = "ff", yval = "Survival"
|
|
706 |
#' )
|
|
707 |
#'
|
|
708 |
#' # The annotation table reports the patient at risk for a given strata and
|
|
709 |
#' # times (`xticks`).
|
|
710 |
#' annot_tbl <- summary(fit_km, times = xticks)
|
|
711 |
#' if (is.null(fit_km$strata)) {
|
|
712 |
#' annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All"))
|
|
713 |
#' } else {
|
|
714 |
#' strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
|
|
715 |
#' levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
|
|
716 |
#' annot_tbl <- data.frame(
|
|
717 |
#' n.risk = annot_tbl$n.risk,
|
|
718 |
#' time = annot_tbl$time,
|
|
719 |
#' strata = annot_tbl$strata
|
|
720 |
#' )
|
|
721 |
#' }
|
|
722 |
#'
|
|
723 |
#' # The annotation table is transformed into a grob.
|
|
724 |
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks))
|
|
725 |
#'
|
|
726 |
#' # For the representation, the layout is estimated for which the decomposition
|
|
727 |
#' # of the graphic element is necessary.
|
|
728 |
#' g_el <- h_decompose_gg(gg)
|
|
729 |
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
|
|
730 |
#'
|
|
731 |
#' grid::grid.newpage()
|
|
732 |
#' pushViewport(viewport(layout = lyt, height = .95, width = .95))
|
|
733 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1))
|
|
734 |
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 2))
|
|
735 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1))
|
|
736 |
#' grid::grid.draw(tbl$at_risk)
|
|
737 |
#' popViewport()
|
|
738 |
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 1))
|
|
739 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1))
|
|
740 |
#' grid::grid.draw(tbl$label)
|
|
741 |
#' }
|
|
742 |
#'
|
|
743 |
#' @export
|
|
744 |
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim, title = TRUE) { |
|
745 | 1x |
lifecycle::deprecate_warn( |
746 | 1x |
"0.9.4",
|
747 | 1x |
"h_grob_tbl_at_risk()",
|
748 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
749 |
)
|
|
750 | 1x |
txtlines <- levels(as.factor(data$strata)) |
751 | 1x |
nlines <- nlevels(as.factor(data$strata)) |
752 | 1x |
y_int <- annot_tbl$time[2] - annot_tbl$time[1] |
753 | 1x |
annot_tbl <- expand.grid( |
754 | 1x |
time = seq(0, xlim, y_int), |
755 | 1x |
strata = unique(annot_tbl$strata) |
756 | 1x |
) %>% dplyr::left_join(annot_tbl, by = c("time", "strata")) |
757 | 1x |
annot_tbl[is.na(annot_tbl)] <- 0 |
758 | 1x |
y_str_unit <- as.numeric(annot_tbl$strata) |
759 | 1x |
vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines")) |
760 | 1x |
if (title) { |
761 | 1x |
gb_table_title <- grid::gList( |
762 | 1x |
grid::textGrob( |
763 | 1x |
label = "Patients at Risk:", |
764 | 1x |
x = 1, |
765 | 1x |
y = grid::unit(0.2, "native"), |
766 | 1x |
gp = grid::gpar(fontface = "bold", fontsize = 10) |
767 |
)
|
|
768 |
)
|
|
769 |
}
|
|
770 | 1x |
gb_table_left_annot <- grid::gList( |
771 | 1x |
grid::rectGrob( |
772 | 1x |
x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), |
773 | 1x |
gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"), |
774 | 1x |
height = grid::unit(1, "lines"), just = "bottom", hjust = 0 |
775 |
),
|
|
776 | 1x |
grid::textGrob( |
777 | 1x |
label = unique(annot_tbl$strata), |
778 | 1x |
x = 0.5, |
779 | 1x |
y = grid::unit( |
780 | 1x |
(max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75, |
781 | 1x |
"native"
|
782 |
),
|
|
783 | 1x |
gp = grid::gpar(fontface = "italic", fontsize = 10) |
784 |
)
|
|
785 |
)
|
|
786 | 1x |
gb_patient_at_risk <- grid::gList( |
787 | 1x |
grid::rectGrob( |
788 | 1x |
x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), |
789 | 1x |
gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"), |
790 | 1x |
height = grid::unit(1, "lines"), just = "bottom", hjust = 0 |
791 |
),
|
|
792 | 1x |
grid::textGrob( |
793 | 1x |
label = annot_tbl$n.risk, |
794 | 1x |
x = grid::unit(annot_tbl$time, "native"), |
795 | 1x |
y = grid::unit( |
796 | 1x |
(max(y_str_unit) - y_str_unit) + .5, |
797 | 1x |
"line"
|
798 | 1x |
) # maybe native |
799 |
)
|
|
800 |
)
|
|
801 | ||
802 | 1x |
ret <- list( |
803 | 1x |
at_risk = grid::gList( |
804 | 1x |
grid::gTree( |
805 | 1x |
vp = vp_table, |
806 | 1x |
children = grid::gList( |
807 | 1x |
grid::gTree( |
808 | 1x |
vp = grid::dataViewport( |
809 | 1x |
xscale = c(0, xlim) + c(-0.05, 0.05) * xlim, |
810 | 1x |
yscale = c(0, nlines + 1), |
811 | 1x |
extension = c(0.05, 0) |
812 |
),
|
|
813 | 1x |
children = grid::gList(gb_patient_at_risk) |
814 |
)
|
|
815 |
)
|
|
816 |
)
|
|
817 |
),
|
|
818 | 1x |
label = grid::gList( |
819 | 1x |
grid::gTree( |
820 | 1x |
vp = grid::viewport(width = max(grid::stringWidth(txtlines))), |
821 | 1x |
children = grid::gList( |
822 | 1x |
grid::gTree( |
823 | 1x |
vp = grid::dataViewport( |
824 | 1x |
xscale = 0:1, |
825 | 1x |
yscale = c(0, nlines + 1), |
826 | 1x |
extension = c(0.0, 0) |
827 |
),
|
|
828 | 1x |
children = grid::gList(gb_table_left_annot) |
829 |
)
|
|
830 |
)
|
|
831 |
)
|
|
832 |
)
|
|
833 |
)
|
|
834 | ||
835 | 1x |
if (title) { |
836 | 1x |
ret[["title"]] <- grid::gList( |
837 | 1x |
grid::gTree( |
838 | 1x |
vp = grid::viewport(width = max(grid::stringWidth(txtlines))), |
839 | 1x |
children = grid::gList( |
840 | 1x |
grid::gTree( |
841 | 1x |
vp = grid::dataViewport( |
842 | 1x |
xscale = 0:1, |
843 | 1x |
yscale = c(0, 1), |
844 | 1x |
extension = c(0, 0) |
845 |
),
|
|
846 | 1x |
children = grid::gList(gb_table_title) |
847 |
)
|
|
848 |
)
|
|
849 |
)
|
|
850 |
)
|
|
851 |
}
|
|
852 | ||
853 | 1x |
ret
|
854 |
}
|
|
855 | ||
856 |
#' Helper function to create survival estimation grobs
|
|
857 |
#'
|
|
858 |
#' @description `r lifecycle::badge("deprecated")`
|
|
859 |
#'
|
|
860 |
#' The survival fit is transformed in a grob containing a table with groups in
|
|
861 |
#' rows characterized by N, median and 95% confidence interval.
|
|
862 |
#'
|
|
863 |
#' @inheritParams g_km
|
|
864 |
#' @inheritParams h_data_plot
|
|
865 |
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()].
|
|
866 |
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location.
|
|
867 |
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location.
|
|
868 |
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob.
|
|
869 |
#'
|
|
870 |
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
|
|
871 |
#'
|
|
872 |
#' @examples
|
|
873 |
#' \donttest{
|
|
874 |
#' library(dplyr)
|
|
875 |
#' library(survival)
|
|
876 |
#' library(grid)
|
|
877 |
#'
|
|
878 |
#' grid::grid.newpage()
|
|
879 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
|
|
880 |
#' tern_ex_adtte %>%
|
|
881 |
#' filter(PARAMCD == "OS") %>%
|
|
882 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
|
|
883 |
#' h_grob_median_surv() %>%
|
|
884 |
#' grid::grid.draw()
|
|
885 |
#' }
|
|
886 |
#'
|
|
887 |
#' @export
|
|
888 |
h_grob_median_surv <- function(fit_km, |
|
889 |
armval = "All", |
|
890 |
x = 0.9, |
|
891 |
y = 0.9, |
|
892 |
width = grid::unit(0.3, "npc"), |
|
893 |
ttheme = gridExtra::ttheme_default()) { |
|
894 | 1x |
lifecycle::deprecate_warn( |
895 | 1x |
"0.9.4",
|
896 | 1x |
"h_grob_median_surv()",
|
897 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
898 |
)
|
|
899 | 1x |
data <- h_tbl_median_surv(fit_km, armval = armval) |
900 | ||
901 | 1x |
width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") |
902 | 1x |
height <- width * (nrow(data) + 1) / 12 |
903 | ||
904 | 1x |
w <- paste(" ", c( |
905 | 1x |
rownames(data)[which.max(nchar(rownames(data)))], |
906 | 1x |
sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) |
907 |
)) |
|
908 | 1x |
w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) |
909 | ||
910 | 1x |
w_txt <- sapply(1:64, function(x) { |
911 | 64x |
graphics::par(ps = x) |
912 | 64x |
graphics::strwidth(w[4], units = "in") |
913 |
}) |
|
914 | 1x |
f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) |
915 | ||
916 | 1x |
h_txt <- sapply(1:64, function(x) { |
917 | 64x |
graphics::par(ps = x) |
918 | 64x |
graphics::strheight(grid::stringHeight("X"), units = "in") |
919 |
}) |
|
920 | 1x |
f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) |
921 | ||
922 | 1x |
if (ttheme$core$fg_params$fontsize == 12) { |
923 | 1x |
ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) |
924 | 1x |
ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
925 | 1x |
ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
926 |
}
|
|
927 | ||
928 | 1x |
gt <- gridExtra::tableGrob( |
929 | 1x |
d = data, |
930 | 1x |
theme = ttheme |
931 |
)
|
|
932 | 1x |
gt$widths <- ((w_unit / sum(w_unit)) * width) |
933 | 1x |
gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) |
934 | ||
935 | 1x |
vp <- grid::viewport( |
936 | 1x |
x = grid::unit(x, "npc") + grid::unit(1, "lines"), |
937 | 1x |
y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), |
938 | 1x |
height = height, |
939 | 1x |
width = width, |
940 | 1x |
just = c("right", "top") |
941 |
)
|
|
942 | ||
943 | 1x |
grid::gList( |
944 | 1x |
grid::gTree( |
945 | 1x |
vp = vp, |
946 | 1x |
children = grid::gList(gt) |
947 |
)
|
|
948 |
)
|
|
949 |
}
|
|
950 | ||
951 |
#' Helper function to create grid object with y-axis annotation
|
|
952 |
#'
|
|
953 |
#' @description `r lifecycle::badge("deprecated")`
|
|
954 |
#'
|
|
955 |
#' Build the y-axis annotation from a decomposed `ggplot`.
|
|
956 |
#'
|
|
957 |
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`.
|
|
958 |
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`.
|
|
959 |
#'
|
|
960 |
#' @return A `gTree` object containing the y-axis annotation from a `ggplot`.
|
|
961 |
#'
|
|
962 |
#' @examples
|
|
963 |
#' \donttest{
|
|
964 |
#' library(dplyr)
|
|
965 |
#' library(survival)
|
|
966 |
#' library(grid)
|
|
967 |
#'
|
|
968 |
#' fit_km <- tern_ex_adtte %>%
|
|
969 |
#' filter(PARAMCD == "OS") %>%
|
|
970 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
|
|
971 |
#' data_plot <- h_data_plot(fit_km = fit_km)
|
|
972 |
#' xticks <- h_xticks(data = data_plot)
|
|
973 |
#' gg <- h_ggkm(
|
|
974 |
#' data = data_plot,
|
|
975 |
#' censor_show = TRUE,
|
|
976 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability",
|
|
977 |
#' title = "title", footnotes = "footnotes", yval = "Survival"
|
|
978 |
#' )
|
|
979 |
#'
|
|
980 |
#' g_el <- h_decompose_gg(gg)
|
|
981 |
#'
|
|
982 |
#' grid::grid.newpage()
|
|
983 |
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20))
|
|
984 |
#' pushViewport(pvp)
|
|
985 |
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis))
|
|
986 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA))
|
|
987 |
#' }
|
|
988 |
#'
|
|
989 |
#' @export
|
|
990 |
h_grob_y_annot <- function(ylab, yaxis) { |
|
991 | 1x |
lifecycle::deprecate_warn( |
992 | 1x |
"0.9.4",
|
993 | 1x |
"h_grob_y_annot()",
|
994 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
995 |
)
|
|
996 | 1x |
grid::gList( |
997 | 1x |
grid::gTree( |
998 | 1x |
vp = grid::viewport( |
999 | 1x |
width = grid::convertX(yaxis$widths + ylab$widths, "pt"), |
1000 | 1x |
x = grid::unit(1, "npc"), |
1001 | 1x |
just = "right" |
1002 |
),
|
|
1003 | 1x |
children = grid::gList(cbind(ylab, yaxis)) |
1004 |
)
|
|
1005 |
)
|
|
1006 |
}
|
|
1007 | ||
1008 |
#' Helper function to create Cox-PH grobs
|
|
1009 |
#'
|
|
1010 |
#' @description `r lifecycle::badge("deprecated")`
|
|
1011 |
#'
|
|
1012 |
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()]
|
|
1013 |
#'
|
|
1014 |
#' @inheritParams h_grob_median_surv
|
|
1015 |
#' @param ... arguments to pass to [h_tbl_coxph_pairwise()].
|
|
1016 |
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location.
|
|
1017 |
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location.
|
|
1018 |
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob.
|
|
1019 |
#'
|
|
1020 |
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
|
|
1021 |
#' and `p-value (log-rank)`.
|
|
1022 |
#'
|
|
1023 |
#' @examples
|
|
1024 |
#' \donttest{
|
|
1025 |
#' library(dplyr)
|
|
1026 |
#' library(survival)
|
|
1027 |
#' library(grid)
|
|
1028 |
#'
|
|
1029 |
#' grid::grid.newpage()
|
|
1030 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
|
|
1031 |
#' data <- tern_ex_adtte %>%
|
|
1032 |
#' filter(PARAMCD == "OS") %>%
|
|
1033 |
#' mutate(is_event = CNSR == 0)
|
|
1034 |
#' tbl_grob <- h_grob_coxph(
|
|
1035 |
#' df = data,
|
|
1036 |
#' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"),
|
|
1037 |
#' control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5
|
|
1038 |
#' )
|
|
1039 |
#' grid::grid.draw(tbl_grob)
|
|
1040 |
#' }
|
|
1041 |
#'
|
|
1042 |
#' @export
|
|
1043 |
h_grob_coxph <- function(..., |
|
1044 |
x = 0, |
|
1045 |
y = 0, |
|
1046 |
width = grid::unit(0.4, "npc"), |
|
1047 |
ttheme = gridExtra::ttheme_default( |
|
1048 |
padding = grid::unit(c(1, .5), "lines"), |
|
1049 |
core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5)) |
|
1050 |
)) { |
|
1051 | 1x |
lifecycle::deprecate_warn( |
1052 | 1x |
"0.9.4",
|
1053 | 1x |
"h_grob_coxph()",
|
1054 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
1055 |
)
|
|
1056 | 1x |
data <- h_tbl_coxph_pairwise(...) |
1057 | ||
1058 | 1x |
width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") |
1059 | 1x |
height <- width * (nrow(data) + 1) / 12 |
1060 | ||
1061 | 1x |
w <- paste(" ", c( |
1062 | 1x |
rownames(data)[which.max(nchar(rownames(data)))], |
1063 | 1x |
sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) |
1064 |
)) |
|
1065 | 1x |
w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) |
1066 | ||
1067 | 1x |
w_txt <- sapply(1:64, function(x) { |
1068 | 64x |
graphics::par(ps = x) |
1069 | 64x |
graphics::strwidth(w[4], units = "in") |
1070 |
}) |
|
1071 | 1x |
f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) |
1072 | ||
1073 | 1x |
h_txt <- sapply(1:64, function(x) { |
1074 | 64x |
graphics::par(ps = x) |
1075 | 64x |
graphics::strheight(grid::stringHeight("X"), units = "in") |
1076 |
}) |
|
1077 | 1x |
f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) |
1078 | ||
1079 | 1x |
if (ttheme$core$fg_params$fontsize == 12) { |
1080 | 1x |
ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) |
1081 | 1x |
ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
1082 | 1x |
ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
1083 |
}
|
|
1084 | ||
1085 | 1x |
tryCatch( |
1086 | 1x |
expr = { |
1087 | 1x |
gt <- gridExtra::tableGrob( |
1088 | 1x |
d = data, |
1089 | 1x |
theme = ttheme |
1090 | 1x |
) # ERROR 'data' must be of a vector type, was 'NULL' |
1091 | 1x |
gt$widths <- ((w_unit / sum(w_unit)) * width) |
1092 | 1x |
gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) |
1093 | 1x |
vp <- grid::viewport( |
1094 | 1x |
x = grid::unit(x, "npc") + grid::unit(1, "lines"), |
1095 | 1x |
y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), |
1096 | 1x |
height = height, |
1097 | 1x |
width = width, |
1098 | 1x |
just = c("left", "bottom") |
1099 |
)
|
|
1100 | 1x |
grid::gList( |
1101 | 1x |
grid::gTree( |
1102 | 1x |
vp = vp, |
1103 | 1x |
children = grid::gList(gt) |
1104 |
)
|
|
1105 |
)
|
|
1106 |
},
|
|
1107 | 1x |
error = function(w) { |
1108 | ! |
message(paste( |
1109 | ! |
"Warning: Cox table will not be displayed as there is",
|
1110 | ! |
"not any level to be compared in the arm variable."
|
1111 |
)) |
|
1112 | ! |
return( |
1113 | ! |
grid::gList( |
1114 | ! |
grid::gTree( |
1115 | ! |
vp = NULL, |
1116 | ! |
children = NULL |
1117 |
)
|
|
1118 |
)
|
|
1119 |
)
|
|
1120 |
}
|
|
1121 |
)
|
|
1122 |
}
|
1 |
#' Count occurrences by grade
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_occurrences_by_grade()] creates a layout element to calculate occurrence counts by grade.
|
|
6 |
#'
|
|
7 |
#' This function analyzes primary analysis variable `var` which indicates toxicity grades. The `id` variable
|
|
8 |
#' is used to indicate unique subject identifiers (defaults to `USUBJID`). The user can also supply a list of
|
|
9 |
#' custom groups of grades to analyze via the `grade_groups` parameter. The `remove_single` argument will
|
|
10 |
#' remove single grades from the analysis so that *only* grade groups are analyzed.
|
|
11 |
#'
|
|
12 |
#' If there are multiple grades recorded for one patient only the highest grade level is counted.
|
|
13 |
#'
|
|
14 |
#' The summarize function [summarize_occurrences_by_grade()] performs the same function as
|
|
15 |
#' [count_occurrences_by_grade()] except it creates content rows, not data rows, to summarize the current table
|
|
16 |
#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
|
|
17 |
#' occurred.
|
|
18 |
#'
|
|
19 |
#' @inheritParams count_occurrences
|
|
20 |
#' @inheritParams argument_convention
|
|
21 |
#' @param grade_groups (named `list` of `character`)\cr list containing groupings of grades.
|
|
22 |
#' @param remove_single (`flag`)\cr `TRUE` to not include the elements of one-element grade groups
|
|
23 |
#' in the the output list; in this case only the grade groups names will be included in the output. If
|
|
24 |
#' `only_grade_groups` is set to `TRUE` this argument is ignored.
|
|
25 |
#' @param only_grade_groups (`flag`)\cr whether only the specified grade groups should be
|
|
26 |
#' included, with individual grade rows removed (`TRUE`), or all grades and grade groups
|
|
27 |
#' should be displayed (`FALSE`).
|
|
28 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
29 |
#'
|
|
30 |
#' Options are: ``r shQuote(get_stats("count_occurrences_by_grade"), type = "sh")``
|
|
31 |
#'
|
|
32 |
#' @seealso Relevant helper function [h_append_grade_groups()].
|
|
33 |
#'
|
|
34 |
#' @name count_occurrences_by_grade
|
|
35 |
#' @order 1
|
|
36 |
NULL
|
|
37 | ||
38 |
#' Helper function for `s_count_occurrences_by_grade()`
|
|
39 |
#'
|
|
40 |
#' @description `r lifecycle::badge("stable")`
|
|
41 |
#'
|
|
42 |
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
|
|
43 |
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
|
|
44 |
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
|
|
45 |
#' the end. Grade groups names must be unique.
|
|
46 |
#'
|
|
47 |
#' @inheritParams count_occurrences_by_grade
|
|
48 |
#' @param refs (named `list` of `numeric`)\cr named list where each name corresponds to a reference grade level
|
|
49 |
#' and each entry represents a count.
|
|
50 |
#'
|
|
51 |
#' @return Formatted list of grade groupings.
|
|
52 |
#'
|
|
53 |
#' @examples
|
|
54 |
#' h_append_grade_groups(
|
|
55 |
#' list(
|
|
56 |
#' "Any Grade" = as.character(1:5),
|
|
57 |
#' "Grade 1-2" = c("1", "2"),
|
|
58 |
#' "Grade 3-4" = c("3", "4")
|
|
59 |
#' ),
|
|
60 |
#' list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
|
|
61 |
#' )
|
|
62 |
#'
|
|
63 |
#' h_append_grade_groups(
|
|
64 |
#' list(
|
|
65 |
#' "Any Grade" = as.character(5:1),
|
|
66 |
#' "Grade A" = "5",
|
|
67 |
#' "Grade B" = c("4", "3")
|
|
68 |
#' ),
|
|
69 |
#' list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
|
|
70 |
#' )
|
|
71 |
#'
|
|
72 |
#' h_append_grade_groups(
|
|
73 |
#' list(
|
|
74 |
#' "Any Grade" = as.character(1:5),
|
|
75 |
#' "Grade 1-2" = c("1", "2"),
|
|
76 |
#' "Grade 3-4" = c("3", "4")
|
|
77 |
#' ),
|
|
78 |
#' list("1" = 10, "2" = 5, "3" = 0)
|
|
79 |
#' )
|
|
80 |
#'
|
|
81 |
#' @export
|
|
82 |
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE, only_grade_groups = FALSE) { |
|
83 | 32x |
checkmate::assert_list(grade_groups) |
84 | 32x |
checkmate::assert_list(refs) |
85 | 32x |
refs_orig <- refs |
86 | 32x |
elements <- unique(unlist(grade_groups)) |
87 | ||
88 |
### compute sums in groups
|
|
89 | 32x |
grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i])) |
90 | 32x |
if (!checkmate::test_subset(elements, names(refs))) { |
91 | 2x |
padding_el <- setdiff(elements, names(refs)) |
92 | 2x |
refs[padding_el] <- 0 |
93 |
}
|
|
94 | 32x |
result <- c(grp_sum, refs) |
95 | ||
96 |
### order result while keeping grade_groups's ordering
|
|
97 | 32x |
ordr <- grade_groups |
98 | ||
99 |
# elements of any-grade group (if any) will be moved to the end
|
|
100 | 32x |
is_any <- sapply(grade_groups, setequal, y = names(refs)) |
101 | 32x |
ordr[is_any] <- list(character(0)) # hide elements under any-grade group |
102 | ||
103 |
# groups-elements combined sequence
|
|
104 | 32x |
ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE) |
105 | 32x |
ordr <- ordr[!duplicated(ordr)] |
106 | ||
107 |
# append remaining elements (if any)
|
|
108 | 32x |
ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group |
109 | 32x |
ordr <- union(ordr, names(refs)) # from refs |
110 | ||
111 |
# remove elements of single-element groups, if any
|
|
112 | 32x |
if (only_grade_groups) { |
113 | 3x |
ordr <- intersect(ordr, names(grade_groups)) |
114 | 29x |
} else if (remove_single) { |
115 | 29x |
is_single <- sapply(grade_groups, length) == 1L |
116 | 29x |
ordr <- setdiff(ordr, unlist(grade_groups[is_single])) |
117 |
}
|
|
118 | ||
119 |
# apply the order
|
|
120 | 32x |
result <- result[ordr] |
121 | ||
122 |
# remove groups without any elements in the original refs
|
|
123 |
# note: it's OK if groups have 0 value
|
|
124 | 32x |
keep_grp <- vapply(grade_groups, function(x, rf) { |
125 | 64x |
any(x %in% rf) |
126 | 32x |
}, rf = names(refs_orig), logical(1)) |
127 | ||
128 | 32x |
keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp] |
129 | 32x |
result <- result[keep_el] |
130 | ||
131 | 32x |
result
|
132 |
}
|
|
133 | ||
134 |
#' @describeIn count_occurrences_by_grade Statistics function which counts the
|
|
135 |
#' number of patients by highest grade.
|
|
136 |
#'
|
|
137 |
#' @return
|
|
138 |
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
|
|
139 |
#' grade level grouping.
|
|
140 |
#'
|
|
141 |
#' @examples
|
|
142 |
#' s_count_occurrences_by_grade(
|
|
143 |
#' df,
|
|
144 |
#' .N_col = 10L,
|
|
145 |
#' .var = "AETOXGR",
|
|
146 |
#' id = "USUBJID",
|
|
147 |
#' grade_groups = list("ANY" = levels(df$AETOXGR))
|
|
148 |
#' )
|
|
149 |
#'
|
|
150 |
#' @export
|
|
151 |
s_count_occurrences_by_grade <- function(df, |
|
152 |
labelstr = "", |
|
153 |
.var,
|
|
154 |
.N_row, # nolint |
|
155 |
.N_col, # nolint |
|
156 |
...,
|
|
157 |
id = "USUBJID", |
|
158 |
grade_groups = list(), |
|
159 |
remove_single = TRUE, |
|
160 |
only_grade_groups = FALSE, |
|
161 |
denom = c("N_col", "n", "N_row")) { |
|
162 | 75x |
assert_valid_factor(df[[.var]]) |
163 | 75x |
assert_df_with_variables(df, list(grade = .var, id = id)) |
164 | ||
165 | 75x |
denom <- match.arg(denom) %>% |
166 | 75x |
switch( |
167 | 75x |
n = nlevels(factor(df[[id]])), |
168 | 75x |
N_row = .N_row, |
169 | 75x |
N_col = .N_col |
170 |
)
|
|
171 | ||
172 | 75x |
if (nrow(df) < 1) { |
173 | 5x |
grade_levels <- levels(df[[.var]]) |
174 | 5x |
l_count <- as.list(rep(0, length(grade_levels))) |
175 | 5x |
names(l_count) <- grade_levels |
176 |
} else { |
|
177 | 70x |
if (isTRUE(is.factor(df[[id]]))) { |
178 | ! |
assert_valid_factor(df[[id]], any.missing = FALSE) |
179 |
} else { |
|
180 | 70x |
checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE) |
181 |
}
|
|
182 | 70x |
checkmate::assert_count(.N_col) |
183 | ||
184 | 70x |
id <- df[[id]] |
185 | 70x |
grade <- df[[.var]] |
186 | ||
187 | 70x |
if (!is.ordered(grade)) { |
188 | 70x |
grade_lbl <- obj_label(grade) |
189 | 70x |
lvls <- levels(grade) |
190 | 70x |
if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) { |
191 | 69x |
lvl_ord <- lvls |
192 |
} else { |
|
193 | 1x |
lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1 |
194 | 1x |
lvl_ord <- levels(grade)[order(as.numeric(lvls))] |
195 |
}
|
|
196 | 70x |
grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl) |
197 |
}
|
|
198 | ||
199 | 70x |
missing_lvl <- grepl("missing", tolower(levels(grade))) |
200 | 70x |
if (any(missing_lvl)) { |
201 | 1x |
grade <- factor( |
202 | 1x |
grade,
|
203 | 1x |
levels = c(levels(grade)[!missing_lvl], levels(grade)[missing_lvl]), |
204 | 1x |
ordered = is.ordered(grade) |
205 |
)
|
|
206 |
}
|
|
207 | 70x |
df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE) |
208 | 70x |
l_count <- as.list(table(df_max$grade)) |
209 |
}
|
|
210 | ||
211 | 75x |
if (length(grade_groups) > 0) { |
212 | 30x |
l_count <- h_append_grade_groups(grade_groups, l_count, remove_single, only_grade_groups) |
213 |
}
|
|
214 | ||
215 | 75x |
l_count_fraction <- lapply( |
216 | 75x |
l_count,
|
217 | 75x |
function(i, denom) { |
218 | 299x |
if (i == 0 && denom == 0) { |
219 | 9x |
c(0, 0) |
220 |
} else { |
|
221 | 290x |
c(i, i / denom) |
222 |
}
|
|
223 |
},
|
|
224 | 75x |
denom = denom |
225 |
)
|
|
226 | ||
227 | 75x |
list( |
228 | 75x |
count_fraction = l_count_fraction, |
229 | 75x |
count_fraction_fixed_dp = l_count_fraction |
230 |
)
|
|
231 |
}
|
|
232 | ||
233 |
#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
|
|
234 |
#' in `count_occurrences_by_grade()`.
|
|
235 |
#'
|
|
236 |
#' @return
|
|
237 |
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
238 |
#'
|
|
239 |
#' @examples
|
|
240 |
#' a_count_occurrences_by_grade(
|
|
241 |
#' df,
|
|
242 |
#' .N_col = 10L,
|
|
243 |
#' .N_row = 10L,
|
|
244 |
#' .var = "AETOXGR",
|
|
245 |
#' id = "USUBJID",
|
|
246 |
#' grade_groups = list("ANY" = levels(df$AETOXGR))
|
|
247 |
#' )
|
|
248 |
#'
|
|
249 |
#' @export
|
|
250 |
a_count_occurrences_by_grade <- function(df, |
|
251 |
labelstr = "", |
|
252 |
...,
|
|
253 |
.stats = NULL, |
|
254 |
.stat_names = NULL, |
|
255 |
.formats = NULL, |
|
256 |
.labels = NULL, |
|
257 |
.indent_mods = NULL) { |
|
258 |
# Check for additional parameters to the statistics function
|
|
259 | 56x |
dots_extra_args <- list(...) |
260 | 56x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
261 | 56x |
dots_extra_args$.additional_fun_parameters <- NULL |
262 | ||
263 |
# Check for user-defined functions
|
|
264 | 56x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
265 | 56x |
.stats <- default_and_custom_stats_list$all_stats |
266 | 56x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
267 | ||
268 |
# Apply statistics function
|
|
269 | 56x |
x_stats <- .apply_stat_functions( |
270 | 56x |
default_stat_fnc = s_count_occurrences_by_grade, |
271 | 56x |
custom_stat_fnc_list = custom_stat_functions, |
272 | 56x |
args_list = c( |
273 | 56x |
df = list(df), |
274 | 56x |
labelstr = list(labelstr), |
275 | 56x |
extra_afun_params,
|
276 | 56x |
dots_extra_args
|
277 |
)
|
|
278 |
)
|
|
279 | ||
280 |
# Fill in formatting defaults
|
|
281 | 56x |
.stats <- get_stats("count_occurrences_by_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
282 | 56x |
x_stats <- x_stats[.stats] |
283 | 56x |
levels_per_stats <- lapply(x_stats, names) |
284 | 56x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
285 | 56x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
286 | 56x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
287 | ||
288 | 56x |
x_stats <- x_stats[.stats] %>% |
289 | 56x |
.unlist_keep_nulls() %>% |
290 | 56x |
setNames(names(.formats)) |
291 | ||
292 |
# Auto format handling
|
|
293 | 56x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
294 | ||
295 |
# Get and check statistical names
|
|
296 | 56x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
297 | ||
298 | 56x |
in_rows( |
299 | 56x |
.list = x_stats, |
300 | 56x |
.formats = .formats, |
301 | 56x |
.names = .labels %>% .unlist_keep_nulls(), |
302 | 56x |
.stat_names = .stat_names, |
303 | 56x |
.labels = .labels %>% .unlist_keep_nulls(), |
304 | 56x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
305 |
)
|
|
306 |
}
|
|
307 | ||
308 |
#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
|
|
309 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
310 |
#'
|
|
311 |
#' @return
|
|
312 |
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
|
|
313 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
314 |
#' the statistics from `s_count_occurrences_by_grade()` to the table layout.
|
|
315 |
#'
|
|
316 |
#' @examples
|
|
317 |
#' library(dplyr)
|
|
318 |
#'
|
|
319 |
#' df <- data.frame(
|
|
320 |
#' USUBJID = as.character(c(1:6, 1)),
|
|
321 |
#' ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
|
|
322 |
#' AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
|
|
323 |
#' AESEV = factor(
|
|
324 |
#' x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
|
|
325 |
#' levels = c("MILD", "MODERATE", "SEVERE")
|
|
326 |
#' ),
|
|
327 |
#' stringsAsFactors = FALSE
|
|
328 |
#' )
|
|
329 |
#'
|
|
330 |
#' df_adsl <- df %>%
|
|
331 |
#' select(USUBJID, ARM) %>%
|
|
332 |
#' unique()
|
|
333 |
#'
|
|
334 |
#' # Layout creating function with custom format.
|
|
335 |
#' basic_table() %>%
|
|
336 |
#' split_cols_by("ARM") %>%
|
|
337 |
#' add_colcounts() %>%
|
|
338 |
#' count_occurrences_by_grade(
|
|
339 |
#' var = "AESEV",
|
|
340 |
#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
|
|
341 |
#' ) %>%
|
|
342 |
#' build_table(df, alt_counts_df = df_adsl)
|
|
343 |
#'
|
|
344 |
#' # Define additional grade groupings.
|
|
345 |
#' grade_groups <- list(
|
|
346 |
#' "-Any-" = c("1", "2", "3", "4", "5"),
|
|
347 |
#' "Grade 1-2" = c("1", "2"),
|
|
348 |
#' "Grade 3-5" = c("3", "4", "5")
|
|
349 |
#' )
|
|
350 |
#'
|
|
351 |
#' basic_table() %>%
|
|
352 |
#' split_cols_by("ARM") %>%
|
|
353 |
#' add_colcounts() %>%
|
|
354 |
#' count_occurrences_by_grade(
|
|
355 |
#' var = "AETOXGR",
|
|
356 |
#' grade_groups = grade_groups,
|
|
357 |
#' only_grade_groups = TRUE
|
|
358 |
#' ) %>%
|
|
359 |
#' build_table(df, alt_counts_df = df_adsl)
|
|
360 |
#'
|
|
361 |
#' @export
|
|
362 |
#' @order 2
|
|
363 |
count_occurrences_by_grade <- function(lyt, |
|
364 |
var,
|
|
365 |
id = "USUBJID", |
|
366 |
grade_groups = list(), |
|
367 |
remove_single = TRUE, |
|
368 |
only_grade_groups = FALSE, |
|
369 |
var_labels = var, |
|
370 |
show_labels = "default", |
|
371 |
riskdiff = FALSE, |
|
372 |
na_str = default_na_str(), |
|
373 |
nested = TRUE, |
|
374 |
...,
|
|
375 |
table_names = var, |
|
376 |
.stats = "count_fraction", |
|
377 |
.stat_names = NULL, |
|
378 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
379 |
.labels = NULL, |
|
380 |
.indent_mods = NULL) { |
|
381 | 12x |
checkmate::assert_flag(riskdiff) |
382 | 12x |
afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff |
383 | ||
384 |
# Process standard extra arguments
|
|
385 | 12x |
extra_args <- list(".stats" = .stats) |
386 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
387 | 12x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
388 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
389 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
390 | ||
391 |
# Process additional arguments to the statistic function
|
|
392 | 12x |
extra_args <- c( |
393 | 12x |
extra_args,
|
394 | 12x |
id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups, |
395 | 12x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)), |
396 |
...
|
|
397 |
)
|
|
398 | ||
399 |
# Append additional info from layout to the analysis function
|
|
400 | 12x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
401 | 12x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
402 | ||
403 | 12x |
analyze( |
404 | 12x |
lyt = lyt, |
405 | 12x |
vars = var, |
406 | 12x |
afun = afun, |
407 | 12x |
na_str = na_str, |
408 | 12x |
nested = nested, |
409 | 12x |
extra_args = extra_args, |
410 | 12x |
var_labels = var_labels, |
411 | 12x |
show_labels = show_labels, |
412 | 12x |
table_names = table_names |
413 |
)
|
|
414 |
}
|
|
415 | ||
416 |
#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
|
|
417 |
#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
|
|
418 |
#'
|
|
419 |
#' @return
|
|
420 |
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
|
|
421 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
|
|
422 |
#' containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
|
|
423 |
#'
|
|
424 |
#' @examples
|
|
425 |
#' # Layout creating function with custom format.
|
|
426 |
#' basic_table() %>%
|
|
427 |
#' add_colcounts() %>%
|
|
428 |
#' split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
|
|
429 |
#' summarize_occurrences_by_grade(
|
|
430 |
#' var = "AESEV",
|
|
431 |
#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
|
|
432 |
#' ) %>%
|
|
433 |
#' build_table(df, alt_counts_df = df_adsl)
|
|
434 |
#'
|
|
435 |
#' basic_table() %>%
|
|
436 |
#' add_colcounts() %>%
|
|
437 |
#' split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
|
|
438 |
#' summarize_occurrences_by_grade(
|
|
439 |
#' var = "AETOXGR",
|
|
440 |
#' grade_groups = grade_groups
|
|
441 |
#' ) %>%
|
|
442 |
#' build_table(df, alt_counts_df = df_adsl)
|
|
443 |
#'
|
|
444 |
#' @export
|
|
445 |
#' @order 3
|
|
446 |
summarize_occurrences_by_grade <- function(lyt, |
|
447 |
var,
|
|
448 |
id = "USUBJID", |
|
449 |
grade_groups = list(), |
|
450 |
remove_single = TRUE, |
|
451 |
only_grade_groups = FALSE, |
|
452 |
riskdiff = FALSE, |
|
453 |
na_str = default_na_str(), |
|
454 |
...,
|
|
455 |
.stats = "count_fraction", |
|
456 |
.stat_names = NULL, |
|
457 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
458 |
.labels = NULL, |
|
459 |
.indent_mods = 0L) { |
|
460 | 6x |
checkmate::assert_flag(riskdiff) |
461 | 6x |
afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff |
462 | ||
463 |
# Process standard extra arguments
|
|
464 | 6x |
extra_args <- list(".stats" = .stats) |
465 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
466 | 6x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
467 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
468 | 6x |
if (is.null(.indent_mods)) { |
469 | ! |
indent_mod <- 0L |
470 | 6x |
} else if (length(.indent_mods) == 1) { |
471 | 6x |
indent_mod <- .indent_mods |
472 |
} else { |
|
473 | ! |
indent_mod <- 0L |
474 | ! |
extra_args[[".indent_mods"]] <- .indent_mods |
475 |
}
|
|
476 | ||
477 |
# Process additional arguments to the statistic function
|
|
478 | 6x |
extra_args <- c( |
479 | 6x |
extra_args,
|
480 | 6x |
id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups, |
481 | 6x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)), |
482 |
...
|
|
483 |
)
|
|
484 | ||
485 |
# Append additional info from layout to the analysis function
|
|
486 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
487 | 6x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
488 | ||
489 | 6x |
summarize_row_groups( |
490 | 6x |
lyt = lyt, |
491 | 6x |
var = var, |
492 | 6x |
cfun = afun, |
493 | 6x |
na_str = na_str, |
494 | 6x |
extra_args = extra_args, |
495 | 6x |
indent_mod = indent_mod |
496 |
)
|
|
497 |
}
|
1 |
#' Confidence intervals for a difference of binomials
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("experimental")`
|
|
4 |
#'
|
|
5 |
#' Several confidence intervals for the difference between proportions.
|
|
6 |
#'
|
|
7 |
#' @name desctools_binom
|
|
8 |
NULL
|
|
9 | ||
10 |
#' Recycle list of parameters
|
|
11 |
#'
|
|
12 |
#' This function recycles all supplied elements to the maximal dimension.
|
|
13 |
#'
|
|
14 |
#' @param ... (`any`)\cr elements to recycle.
|
|
15 |
#'
|
|
16 |
#' @return A `list`.
|
|
17 |
#'
|
|
18 |
#' @keywords internal
|
|
19 |
#' @noRd
|
|
20 |
h_recycle <- function(...) { |
|
21 | 78x |
lst <- list(...) |
22 | 78x |
maxdim <- max(lengths(lst)) |
23 | 78x |
res <- lapply(lst, rep, length.out = maxdim) |
24 | 78x |
attr(res, "maxdim") <- maxdim |
25 | 78x |
return(res) |
26 |
}
|
|
27 | ||
28 |
#' @describeIn desctools_binom Several confidence intervals for the difference between proportions.
|
|
29 |
#'
|
|
30 |
#' @return A `matrix` of 3 values:
|
|
31 |
#' * `est`: estimate of proportion difference.
|
|
32 |
#' * `lwr.ci`: estimate of lower end of the confidence interval.
|
|
33 |
#' * `upr.ci`: estimate of upper end of the confidence interval.
|
|
34 |
#'
|
|
35 |
#' @keywords internal
|
|
36 |
desctools_binom <- function(x1, |
|
37 |
n1,
|
|
38 |
x2,
|
|
39 |
n2,
|
|
40 |
conf.level = 0.95, # nolint |
|
41 |
sides = c("two.sided", "left", "right"), |
|
42 |
method = c( |
|
43 |
"ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp" |
|
44 |
)) { |
|
45 | 26x |
if (missing(sides)) { |
46 | 26x |
sides <- match.arg(sides) |
47 |
}
|
|
48 | 26x |
if (missing(method)) { |
49 | 1x |
method <- match.arg(method) |
50 |
}
|
|
51 | 26x |
iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint |
52 | 26x |
if (sides != "two.sided") { |
53 | ! |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
54 |
}
|
|
55 | 26x |
alpha <- 1 - conf.level |
56 | 26x |
kappa <- stats::qnorm(1 - alpha / 2) |
57 | 26x |
p1_hat <- x1 / n1 |
58 | 26x |
p2_hat <- x2 / n2 |
59 | 26x |
est <- p1_hat - p2_hat |
60 | 26x |
switch(method, |
61 | 26x |
wald = { |
62 | 4x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
63 | 4x |
term2 <- kappa * sqrt(vd) |
64 | 4x |
ci_lwr <- max(-1, est - term2) |
65 | 4x |
ci_upr <- min(1, est + term2) |
66 |
},
|
|
67 | 26x |
waldcc = { |
68 | 6x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
69 | 6x |
term2 <- kappa * sqrt(vd) |
70 | 6x |
term2 <- term2 + 0.5 * (1 / n1 + 1 / n2) |
71 | 6x |
ci_lwr <- max(-1, est - term2) |
72 | 6x |
ci_upr <- min(1, est + term2) |
73 |
},
|
|
74 | 26x |
ac = { |
75 | 2x |
n1 <- n1 + 2 |
76 | 2x |
n2 <- n2 + 2 |
77 | 2x |
x1 <- x1 + 1 |
78 | 2x |
x2 <- x2 + 1 |
79 | 2x |
p1_hat <- x1 / n1 |
80 | 2x |
p2_hat <- x2 / n2 |
81 | 2x |
est1 <- p1_hat - p2_hat |
82 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
83 | 2x |
term2 <- kappa * sqrt(vd) |
84 | 2x |
ci_lwr <- max(-1, est1 - term2) |
85 | 2x |
ci_upr <- min(1, est1 + term2) |
86 |
},
|
|
87 | 26x |
exact = { |
88 | ! |
ci_lwr <- NA |
89 | ! |
ci_upr <- NA |
90 |
},
|
|
91 | 26x |
score = { |
92 | 3x |
w1 <- desctools_binomci( |
93 | 3x |
x = x1, n = n1, conf.level = conf.level, |
94 | 3x |
method = "wilson" |
95 |
)
|
|
96 | 3x |
w2 <- desctools_binomci( |
97 | 3x |
x = x2, n = n2, conf.level = conf.level, |
98 | 3x |
method = "wilson" |
99 |
)
|
|
100 | 3x |
l1 <- w1[2] |
101 | 3x |
u1 <- w1[3] |
102 | 3x |
l2 <- w2[2] |
103 | 3x |
u2 <- w2[3] |
104 | 3x |
ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2) |
105 | 3x |
ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2) |
106 |
},
|
|
107 | 26x |
scorecc = { |
108 | 1x |
w1 <- desctools_binomci( |
109 | 1x |
x = x1, n = n1, conf.level = conf.level, |
110 | 1x |
method = "wilsoncc" |
111 |
)
|
|
112 | 1x |
w2 <- desctools_binomci( |
113 | 1x |
x = x2, n = n2, conf.level = conf.level, |
114 | 1x |
method = "wilsoncc" |
115 |
)
|
|
116 | 1x |
l1 <- w1[2] |
117 | 1x |
u1 <- w1[3] |
118 | 1x |
l2 <- w2[2] |
119 | 1x |
u2 <- w2[3] |
120 | 1x |
ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2)) |
121 | 1x |
ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2)) |
122 |
},
|
|
123 | 26x |
mee = { |
124 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
125 | ! |
if (dif > 1) dif <- 1 |
126 | ! |
if (dif < -1) dif <- -1 |
127 | 24x |
diff <- p1 - p2 - dif |
128 | 24x |
if (abs(diff) == 0) { |
129 | ! |
res <- 0 |
130 |
} else { |
|
131 | 24x |
t <- n2 / n1 |
132 | 24x |
a <- 1 + t |
133 | 24x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
134 | 24x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
135 | 24x |
d <- -p1 * dif * (1 + dif) |
136 | 24x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
137 | 24x |
if (abs(v) < .Machine$double.eps) v <- 0 |
138 | 24x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
139 | 24x |
u <- ifelse(v > 0, 1, -1) * s |
140 | 24x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
141 | 24x |
p1d <- 2 * u * cos(w) - b / a / 3 |
142 | 24x |
p2d <- p1d - dif |
143 | 24x |
n <- n1 + n2 |
144 | 24x |
res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) |
145 |
}
|
|
146 | 24x |
return(sqrt(res)) |
147 |
}
|
|
148 | 1x |
pval <- function(delta) { |
149 | 24x |
z <- (est - delta) / .score(p1_hat, n1, p2_hat, n2, delta) |
150 | 24x |
2 * min(stats::pnorm(z), 1 - stats::pnorm(z)) |
151 |
}
|
|
152 | 1x |
ci_lwr <- max(-1, stats::uniroot(function(delta) { |
153 | 12x |
pval(delta) - alpha |
154 | 1x |
}, interval = c(-1 + 1e-06, est - 1e-06))$root) |
155 | 1x |
ci_upr <- min(1, stats::uniroot(function(delta) { |
156 | 12x |
pval(delta) - alpha |
157 | 1x |
}, interval = c(est + 1e-06, 1 - 1e-06))$root) |
158 |
},
|
|
159 | 26x |
blj = { |
160 | 1x |
p1_dash <- (x1 + 0.5) / (n1 + 1) |
161 | 1x |
p2_dash <- (x2 + 0.5) / (n2 + 1) |
162 | 1x |
vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - p2_dash) / n2 |
163 | 1x |
term2 <- kappa * sqrt(vd) |
164 | 1x |
est_dash <- p1_dash - p2_dash |
165 | 1x |
ci_lwr <- max(-1, est_dash - term2) |
166 | 1x |
ci_upr <- min(1, est_dash + term2) |
167 |
},
|
|
168 | 26x |
ha = { |
169 | 5x |
term2 <- 1 / |
170 | 5x |
(2 * min(n1, n2)) + kappa * sqrt(p1_hat * (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - 1)) |
171 | 5x |
ci_lwr <- max(-1, est - term2) |
172 | 5x |
ci_upr <- min(1, est + term2) |
173 |
},
|
|
174 | 26x |
mn = { |
175 | 1x |
.conf <- function(x1, n1, x2, n2, z, lower = FALSE) { |
176 | 2x |
p1 <- x1 / n1 |
177 | 2x |
p2 <- x2 / n2 |
178 | 2x |
p_hat <- p1 - p2 |
179 | 2x |
dp <- 1 + ifelse(lower, 1, -1) * p_hat |
180 | 2x |
i <- 1 |
181 | 2x |
while (i <= 50) { |
182 | 46x |
dp <- 0.5 * dp |
183 | 46x |
y <- p_hat + ifelse(lower, -1, 1) * dp |
184 | 46x |
score <- .score(p1, n1, p2, n2, y) |
185 | 46x |
if (score < z) { |
186 | 20x |
p_hat <- y |
187 |
}
|
|
188 | 46x |
if ((dp < 1e-07) || (abs(z - score) < 1e-06)) { |
189 | 2x |
(break)() |
190 |
} else { |
|
191 | 44x |
i <- i + 1 |
192 |
}
|
|
193 |
}
|
|
194 | 2x |
return(y) |
195 |
}
|
|
196 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
197 | 46x |
diff <- p1 - p2 - dif |
198 | 46x |
if (abs(diff) == 0) { |
199 | ! |
res <- 0 |
200 |
} else { |
|
201 | 46x |
t <- n2 / n1 |
202 | 46x |
a <- 1 + t |
203 | 46x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
204 | 46x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2 |
205 | 46x |
d <- -p1 * dif * (1 + dif) |
206 | 46x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
207 | 46x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
208 | 46x |
u <- ifelse(v > 0, 1, -1) * s |
209 | 46x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
210 | 46x |
p1d <- 2 * u * cos(w) - b / a / 3 |
211 | 46x |
p2d <- p1d - dif |
212 | 46x |
n <- n1 + n2 |
213 | 46x |
var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * n / (n - 1) |
214 | 46x |
res <- diff^2 / var |
215 |
}
|
|
216 | 46x |
return(res) |
217 |
}
|
|
218 | 1x |
z <- stats::qchisq(conf.level, 1) |
219 | 1x |
ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE)) |
220 | 1x |
ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE)) |
221 |
},
|
|
222 | 26x |
beal = { |
223 | ! |
a <- p1_hat + p2_hat |
224 | ! |
b <- p1_hat - p2_hat |
225 | ! |
u <- ((1 / n1) + (1 / n2)) / 4 |
226 | ! |
v <- ((1 / n1) - (1 / n2)) / 4 |
227 | ! |
V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint |
228 | ! |
z <- stats::qchisq(p = 1 - alpha / 2, df = 1) |
229 | ! |
A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint |
230 | ! |
B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint |
231 | ! |
ci_lwr <- max(-1, B - A / (1 + z * u)) |
232 | ! |
ci_upr <- min(1, B + A / (1 + z * u)) |
233 |
},
|
|
234 | 26x |
hal = { |
235 | 1x |
psi <- (p1_hat + p2_hat) / 2 |
236 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
237 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
238 | 1x |
z <- kappa |
239 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) |
240 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
241 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint |
242 | 1x |
c(theta + w, theta - w) |
243 | 1x |
ci_lwr <- max(-1, theta - w) |
244 | 1x |
ci_upr <- min(1, theta + w) |
245 |
},
|
|
246 | 26x |
jp = { |
247 | 1x |
psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + 1)) |
248 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
249 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
250 | 1x |
z <- kappa |
251 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u) |
252 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
253 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint |
254 | 1x |
c(theta + w, theta - w) |
255 | 1x |
ci_lwr <- max(-1, theta - w) |
256 | 1x |
ci_upr <- min(1, theta + w) |
257 |
},
|
|
258 |
)
|
|
259 | 26x |
ci <- c( |
260 | 26x |
est = est, lwr.ci = min(ci_lwr, ci_upr), |
261 | 26x |
upr.ci = max(ci_lwr, ci_upr) |
262 |
)
|
|
263 | 26x |
if (sides == "left") { |
264 | ! |
ci[3] <- 1 |
265 | 26x |
} else if (sides == "right") { |
266 | ! |
ci[2] <- -1 |
267 |
}
|
|
268 | 26x |
return(ci) |
269 |
}
|
|
270 | 26x |
method <- match.arg(arg = method, several.ok = TRUE) |
271 | 26x |
sides <- match.arg(arg = sides, several.ok = TRUE) |
272 | 26x |
lst <- h_recycle( |
273 | 26x |
x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level, |
274 | 26x |
sides = sides, method = method |
275 |
)
|
|
276 | 26x |
res <- t(sapply(1:attr(lst, "maxdim"), function(i) { |
277 | 26x |
iBinomDiffCI( |
278 | 26x |
x1 = lst$x1[i], |
279 | 26x |
n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i], |
280 | 26x |
sides = lst$sides[i], method = lst$method[i] |
281 |
)
|
|
282 |
})) |
|
283 | 26x |
lgn <- h_recycle(x1 = if (is.null(names(x1))) { |
284 | 26x |
paste("x1", seq_along(x1), sep = ".") |
285 |
} else { |
|
286 | ! |
names(x1) |
287 | 26x |
}, n1 = if (is.null(names(n1))) { |
288 | 26x |
paste("n1", seq_along(n1), sep = ".") |
289 |
} else { |
|
290 | ! |
names(n1) |
291 | 26x |
}, x2 = if (is.null(names(x2))) { |
292 | 26x |
paste("x2", seq_along(x2), sep = ".") |
293 |
} else { |
|
294 | ! |
names(x2) |
295 | 26x |
}, n2 = if (is.null(names(n2))) { |
296 | 26x |
paste("n2", seq_along(n2), sep = ".") |
297 |
} else { |
|
298 | ! |
names(n2) |
299 | 26x |
}, conf.level = conf.level, sides = sides, method = method) |
300 | 26x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { |
301 | 182x |
length(unique(x)) != |
302 | 182x |
1
|
303 | 26x |
})]), 1, paste, collapse = ":") |
304 | 26x |
rownames(res) <- xn |
305 | 26x |
return(res) |
306 |
}
|
|
307 | ||
308 |
#' @describeIn desctools_binom Compute confidence intervals for binomial proportions.
|
|
309 |
#'
|
|
310 |
#' @param x (`integer(1)`)\cr number of successes.
|
|
311 |
#' @param n (`integer(1)`)\cr number of trials.
|
|
312 |
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95.
|
|
313 |
#' @param sides (`string`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default),
|
|
314 |
#' `"left"`, or `"right"`.
|
|
315 |
#' @param method (`string`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`,
|
|
316 |
#' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`,
|
|
317 |
#' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`.
|
|
318 |
#'
|
|
319 |
#' @return A `matrix` with 3 columns containing:
|
|
320 |
#' * `est`: estimate of proportion difference.
|
|
321 |
#' * `lwr.ci`: lower end of the confidence interval.
|
|
322 |
#' * `upr.ci`: upper end of the confidence interval.
|
|
323 |
#'
|
|
324 |
#' @keywords internal
|
|
325 |
desctools_binomci <- function(x, |
|
326 |
n,
|
|
327 |
conf.level = 0.95, # nolint |
|
328 |
sides = c("two.sided", "left", "right"), |
|
329 |
method = c( |
|
330 |
"wilson", "wald", "waldcc", "agresti-coull", |
|
331 |
"jeffreys", "modified wilson", "wilsoncc", "modified jeffreys", |
|
332 |
"clopper-pearson", "arcsine", "logit", "witting", "pratt", |
|
333 |
"midp", "lik", "blaker" |
|
334 |
),
|
|
335 |
rand = 123, |
|
336 |
tol = 1e-05) { |
|
337 | 26x |
if (missing(method)) { |
338 | 1x |
method <- "wilson" |
339 |
}
|
|
340 | 26x |
if (missing(sides)) { |
341 | 25x |
sides <- "two.sided" |
342 |
}
|
|
343 | 26x |
iBinomCI <- function(x, n, conf.level = 0.95, sides = c("two.sided", "left", "right"), # nolint |
344 | 26x |
method = c( |
345 | 26x |
"wilson", "wilsoncc", "wald", |
346 | 26x |
"waldcc", "agresti-coull", "jeffreys", "modified wilson", |
347 | 26x |
"modified jeffreys", "clopper-pearson", "arcsine", "logit", |
348 | 26x |
"witting", "pratt", "midp", "lik", "blaker" |
349 |
),
|
|
350 | 26x |
rand = 123, |
351 | 26x |
tol = 1e-05) { |
352 | 26x |
if (length(x) != 1) { |
353 | ! |
stop("'x' has to be of length 1 (number of successes)") |
354 |
}
|
|
355 | 26x |
if (length(n) != 1) { |
356 | ! |
stop("'n' has to be of length 1 (number of trials)") |
357 |
}
|
|
358 | 26x |
if (length(conf.level) != 1) { |
359 | ! |
stop("'conf.level' has to be of length 1 (confidence level)") |
360 |
}
|
|
361 | 26x |
if (conf.level < 0.5 || conf.level > 1) { |
362 | ! |
stop("'conf.level' has to be in [0.5, 1]") |
363 |
}
|
|
364 | 26x |
sides <- match.arg(sides, choices = c( |
365 | 26x |
"two.sided", "left", |
366 | 26x |
"right"
|
367 | 26x |
), several.ok = FALSE) |
368 | 26x |
if (sides != "two.sided") { |
369 | 1x |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
370 |
}
|
|
371 | 26x |
alpha <- 1 - conf.level |
372 | 26x |
kappa <- stats::qnorm(1 - alpha / 2) |
373 | 26x |
p_hat <- x / n |
374 | 26x |
q_hat <- 1 - p_hat |
375 | 26x |
est <- p_hat |
376 | 26x |
switch(match.arg(arg = method, choices = c( |
377 | 26x |
"wilson",
|
378 | 26x |
"wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys", |
379 | 26x |
"modified wilson", "modified jeffreys", "clopper-pearson", |
380 | 26x |
"arcsine", "logit", "witting", "pratt", "midp", "lik", |
381 | 26x |
"blaker"
|
382 |
)), |
|
383 | 26x |
wald = { |
384 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
385 | 1x |
ci_lwr <- max(0, p_hat - term2) |
386 | 1x |
ci_upr <- min(1, p_hat + term2) |
387 |
},
|
|
388 | 26x |
waldcc = { |
389 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
390 | 1x |
term2 <- term2 + 1 / (2 * n) |
391 | 1x |
ci_lwr <- max(0, p_hat - term2) |
392 | 1x |
ci_upr <- min(1, p_hat + term2) |
393 |
},
|
|
394 | 26x |
wilson = { |
395 | 8x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
396 | 8x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) |
397 | 8x |
ci_lwr <- max(0, term1 - term2) |
398 | 8x |
ci_upr <- min(1, term1 + term2) |
399 |
},
|
|
400 | 26x |
wilsoncc = { |
401 | 3x |
lci <- ( |
402 | 3x |
2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - 2 - 1 / n + 4 * p_hat * (n * q_hat + 1)) |
403 | 3x |
) / (2 * (n + kappa^2)) |
404 | 3x |
uci <- ( |
405 | 3x |
2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + 2 - 1 / n + 4 * p_hat * (n * q_hat - 1)) |
406 | 3x |
) / (2 * (n + kappa^2)) |
407 | 3x |
ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci)) |
408 | 3x |
ci_upr <- min(1, ifelse(p_hat == 1, 1, uci)) |
409 |
},
|
|
410 | 26x |
`agresti-coull` = { |
411 | 1x |
x_tilde <- x + kappa^2 / 2 |
412 | 1x |
n_tilde <- n + kappa^2 |
413 | 1x |
p_tilde <- x_tilde / n_tilde |
414 | 1x |
q_tilde <- 1 - p_tilde |
415 | 1x |
est <- p_tilde |
416 | 1x |
term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
417 | 1x |
ci_lwr <- max(0, p_tilde - term2) |
418 | 1x |
ci_upr <- min(1, p_tilde + term2) |
419 |
},
|
|
420 | 26x |
jeffreys = { |
421 | 1x |
if (x == 0) { |
422 | ! |
ci_lwr <- 0 |
423 |
} else { |
|
424 | 1x |
ci_lwr <- stats::qbeta( |
425 | 1x |
alpha / 2, |
426 | 1x |
x + 0.5, n - x + 0.5 |
427 |
)
|
|
428 |
}
|
|
429 | 1x |
if (x == n) { |
430 | ! |
ci_upr <- 1 |
431 |
} else { |
|
432 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) |
433 |
}
|
|
434 |
},
|
|
435 | 26x |
`modified wilson` = { |
436 | 1x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
437 | 1x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n)) |
438 | 1x |
if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% c(1:3))) { |
439 | ! |
ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * x) / n |
440 |
} else { |
|
441 | 1x |
ci_lwr <- max(0, term1 - term2) |
442 |
}
|
|
443 | 1x |
if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & x %in% c(n - (1:3)))) { |
444 | ! |
ci_upr <- 1 - 0.5 * stats::qchisq( |
445 | ! |
alpha,
|
446 | ! |
2 * (n - x) |
447 | ! |
) / n |
448 |
} else { |
|
449 | 1x |
ci_upr <- min(1, term1 + term2) |
450 |
}
|
|
451 |
},
|
|
452 | 26x |
`modified jeffreys` = { |
453 | 1x |
if (x == n) { |
454 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
455 |
} else { |
|
456 | 1x |
if (x <= 1) { |
457 | ! |
ci_lwr <- 0 |
458 |
} else { |
|
459 | 1x |
ci_lwr <- stats::qbeta( |
460 | 1x |
alpha / 2, |
461 | 1x |
x + 0.5, n - x + 0.5 |
462 |
)
|
|
463 |
}
|
|
464 |
}
|
|
465 | 1x |
if (x == 0) { |
466 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
467 |
} else { |
|
468 | 1x |
if (x >= n - 1) { |
469 | ! |
ci_upr <- 1 |
470 |
} else { |
|
471 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5) |
472 |
}
|
|
473 |
}
|
|
474 |
},
|
|
475 | 26x |
`clopper-pearson` = { |
476 | 1x |
ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1) |
477 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x) |
478 |
},
|
|
479 | 26x |
arcsine = { |
480 | 1x |
p_tilde <- (x + 0.375) / (n + 0.75) |
481 | 1x |
est <- p_tilde |
482 | 1x |
ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2 |
483 | 1x |
ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2 |
484 |
},
|
|
485 | 26x |
logit = { |
486 | 1x |
lambda_hat <- log(x / (n - x)) |
487 | 1x |
V_hat <- n / (x * (n - x)) # nolint |
488 | 1x |
lambda_lower <- lambda_hat - kappa * sqrt(V_hat) |
489 | 1x |
lambda_upper <- lambda_hat + kappa * sqrt(V_hat) |
490 | 1x |
ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower)) |
491 | 1x |
ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper)) |
492 |
},
|
|
493 | 26x |
witting = { |
494 | 1x |
set.seed(rand) |
495 | 1x |
x_tilde <- x + stats::runif(1, min = 0, max = 1) |
496 | 1x |
pbinom_abscont <- function(q, size, prob) { |
497 | 22x |
v <- trunc(q) |
498 | 22x |
term1 <- stats::pbinom(v - 1, size = size, prob = prob) |
499 | 22x |
term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob) |
500 | 22x |
return(term1 + term2) |
501 |
}
|
|
502 | 1x |
qbinom_abscont <- function(p, size, x) { |
503 | 2x |
fun <- function(prob, size, x, p) { |
504 | 22x |
pbinom_abscont(x, size, prob) - p |
505 |
}
|
|
506 | 2x |
stats::uniroot(fun, |
507 | 2x |
interval = c(0, 1), size = size, |
508 | 2x |
x = x, p = p |
509 | 2x |
)$root |
510 |
}
|
|
511 | 1x |
ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde) |
512 | 1x |
ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde) |
513 |
},
|
|
514 | 26x |
pratt = { |
515 | 1x |
if (x == 0) { |
516 | ! |
ci_lwr <- 0 |
517 | ! |
ci_upr <- 1 - alpha^(1 / n) |
518 | 1x |
} else if (x == 1) { |
519 | ! |
ci_lwr <- 1 - (1 - alpha / 2)^(1 / n) |
520 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
521 | 1x |
} else if (x == (n - 1)) { |
522 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
523 | ! |
ci_upr <- (1 - alpha / 2)^(1 / n) |
524 | 1x |
} else if (x == n) { |
525 | ! |
ci_lwr <- alpha^(1 / n) |
526 | ! |
ci_upr <- 1 |
527 |
} else { |
|
528 | 1x |
z <- stats::qnorm(1 - alpha / 2) |
529 | 1x |
A <- ((x + 1) / (n - x))^2 # nolint |
530 | 1x |
B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint |
531 | 1x |
C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint |
532 | 1x |
D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint |
533 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
534 | 1x |
ci_upr <- 1 / E |
535 | 1x |
A <- (x / (n - x - 1))^2 # nolint |
536 | 1x |
B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint |
537 | 1x |
C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint |
538 | 1x |
D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint |
539 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
540 | 1x |
ci_lwr <- 1 / E |
541 |
}
|
|
542 |
},
|
|
543 | 26x |
midp = { |
544 | 1x |
f_low <- function(pi, x, n) { |
545 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x, |
546 | 12x |
size = n, prob = pi, lower.tail = FALSE |
547 |
) - |
|
548 | 12x |
(1 - conf.level) / 2 |
549 |
}
|
|
550 | 1x |
f_up <- function(pi, x, n) { |
551 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - 1, size = n, prob = pi) - (1 - conf.level) / 2 |
552 |
}
|
|
553 | 1x |
ci_lwr <- 0 |
554 | 1x |
ci_upr <- 1 |
555 | 1x |
if (x != 0) { |
556 | 1x |
ci_lwr <- stats::uniroot(f_low, |
557 | 1x |
interval = c(0, p_hat), |
558 | 1x |
x = x, n = n |
559 | 1x |
)$root |
560 |
}
|
|
561 | 1x |
if (x != n) { |
562 | 1x |
ci_upr <- stats::uniroot(f_up, interval = c( |
563 | 1x |
p_hat,
|
564 | 1x |
1
|
565 | 1x |
), x = x, n = n)$root |
566 |
}
|
|
567 |
},
|
|
568 | 26x |
lik = { |
569 | 2x |
ci_lwr <- 0 |
570 | 2x |
ci_upr <- 1 |
571 | 2x |
z <- stats::qnorm(1 - alpha * 0.5) |
572 | 2x |
tol <- .Machine$double.eps^0.5 |
573 | 2x |
BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint |
574 |
...) { |
|
575 | 40x |
ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt, |
576 | 40x |
y,
|
577 | 40x |
log = TRUE |
578 |
)) |
|
579 | 40x |
ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x, |
580 | 40x |
wt, mu, |
581 | 40x |
log = TRUE |
582 |
)) |
|
583 | 40x |
res <- ifelse(abs(y - mu) < tol, 0, sign(y - mu) * sqrt(-2 * (ll_y - ll_mu))) |
584 | 40x |
return(res - bound) |
585 |
}
|
|
586 | 2x |
if (x != 0 && tol < p_hat) { |
587 | 2x |
ci_lwr <- if (BinDev( |
588 | 2x |
tol, x, p_hat, n, -z, |
589 | 2x |
tol
|
590 | 2x |
) <= 0) { |
591 | 2x |
stats::uniroot( |
592 | 2x |
f = BinDev, interval = c(tol, if (p_hat < tol || p_hat == 1) { |
593 | ! |
1 - tol |
594 |
} else { |
|
595 | 2x |
p_hat
|
596 | 2x |
}), bound = -z, |
597 | 2x |
x = x, mu = p_hat, wt = n |
598 | 2x |
)$root |
599 |
}
|
|
600 |
}
|
|
601 | 2x |
if (x != n && p_hat < (1 - tol)) { |
602 | 2x |
ci_upr <- if ( |
603 | 2x |
BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > 1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < 0) { # nolint |
604 | ! |
ci_lwr <- if (BinDev( |
605 | ! |
tol, x, if (p_hat < tol || p_hat == 1) { |
606 | ! |
1 - tol |
607 |
} else { |
|
608 | ! |
p_hat
|
609 | ! |
}, n, |
610 | ! |
-z, tol |
611 | ! |
) <= 0) { |
612 | ! |
stats::uniroot( |
613 | ! |
f = BinDev, interval = c(tol, p_hat), |
614 | ! |
bound = -z, x = x, mu = p_hat, wt = n |
615 | ! |
)$root |
616 |
}
|
|
617 |
} else { |
|
618 | 2x |
stats::uniroot( |
619 | 2x |
f = BinDev, interval = c(if (p_hat > 1 - tol) { |
620 | ! |
tol
|
621 |
} else { |
|
622 | 2x |
p_hat
|
623 | 2x |
}, 1 - tol), bound = z, |
624 | 2x |
x = x, mu = p_hat, wt = n |
625 | 2x |
)$root |
626 |
}
|
|
627 |
}
|
|
628 |
},
|
|
629 | 26x |
blaker = { |
630 | 1x |
acceptbin <- function(x, n, p) { |
631 | 3954x |
p1 <- 1 - stats::pbinom(x - 1, n, p) |
632 | 3954x |
p2 <- stats::pbinom(x, n, p) |
633 | 3954x |
a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p) |
634 | 3954x |
a2 <- p2 + 1 - stats::pbinom( |
635 | 3954x |
stats::qbinom(1 - p2, n, p), n, |
636 | 3954x |
p
|
637 |
)
|
|
638 | 3954x |
return(min(a1, a2)) |
639 |
}
|
|
640 | 1x |
ci_lwr <- 0 |
641 | 1x |
ci_upr <- 1 |
642 | 1x |
if (x != 0) { |
643 | 1x |
ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - x + 1) |
644 | 1x |
while (acceptbin(x, n, ci_lwr + tol) < (1 - conf.level)) { |
645 | 1976x |
ci_lwr <- ci_lwr + tol |
646 |
}
|
|
647 |
}
|
|
648 | 1x |
if (x != n) { |
649 | 1x |
ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + 1, n - x) |
650 | 1x |
while (acceptbin(x, n, ci_upr - tol) < (1 - conf.level)) { |
651 | 1976x |
ci_upr <- ci_upr - tol |
652 |
}
|
|
653 |
}
|
|
654 |
}
|
|
655 |
)
|
|
656 | 26x |
ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min( |
657 | 26x |
1,
|
658 | 26x |
ci_upr
|
659 |
)) |
|
660 | 26x |
if (sides == "left") { |
661 | 1x |
ci[3] <- 1 |
662 | 25x |
} else if (sides == "right") { |
663 | ! |
ci[2] <- 0 |
664 |
}
|
|
665 | 26x |
return(ci) |
666 |
}
|
|
667 | 26x |
lst <- list( |
668 | 26x |
x = x, n = n, conf.level = conf.level, sides = sides, |
669 | 26x |
method = method, rand = rand |
670 |
)
|
|
671 | 26x |
maxdim <- max(unlist(lapply(lst, length))) |
672 | 26x |
lgp <- lapply(lst, rep, length.out = maxdim) |
673 | 26x |
lgn <- h_recycle(x = if (is.null(names(x))) { |
674 | 26x |
paste("x", seq_along(x), sep = ".") |
675 |
} else { |
|
676 | ! |
names(x) |
677 | 26x |
}, n = if (is.null(names(n))) { |
678 | 26x |
paste("n", seq_along(n), sep = ".") |
679 |
} else { |
|
680 | ! |
names(n) |
681 | 26x |
}, conf.level = conf.level, sides = sides, method = method) |
682 | 26x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { |
683 | 130x |
length(unique(x)) != |
684 | 130x |
1
|
685 | 26x |
})]), 1, paste, collapse = ":") |
686 | 26x |
res <- t(sapply(1:maxdim, function(i) { |
687 | 26x |
iBinomCI( |
688 | 26x |
x = lgp$x[i], |
689 | 26x |
n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i], |
690 | 26x |
method = lgp$method[i], rand = lgp$rand[i] |
691 |
)
|
|
692 |
})) |
|
693 | 26x |
colnames(res)[1] <- c("est") |
694 | 26x |
rownames(res) <- xn |
695 | 26x |
return(res) |
696 |
}
|
1 |
#' Control function for Cox regression
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Sets a list of parameters for Cox regression fit. Used internally.
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
|
|
9 |
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
|
|
10 |
#' treatment and candidate covariate. Note that for univariate models without treatment arm, and
|
|
11 |
#' multivariate models, no interaction can be used so that this needs to be `FALSE`.
|
|
12 |
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
|
|
13 |
#' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
|
|
14 |
#'
|
|
15 |
#' @return A `list` of items with names corresponding to the arguments.
|
|
16 |
#'
|
|
17 |
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
|
|
18 |
#'
|
|
19 |
#' @examples
|
|
20 |
#' control_coxreg()
|
|
21 |
#'
|
|
22 |
#' @export
|
|
23 |
control_coxreg <- function(pval_method = c("wald", "likelihood"), |
|
24 |
ties = c("exact", "efron", "breslow"), |
|
25 |
conf_level = 0.95, |
|
26 |
interaction = FALSE) { |
|
27 | 55x |
pval_method <- match.arg(pval_method) |
28 | 55x |
ties <- match.arg(ties) |
29 | 55x |
checkmate::assert_flag(interaction) |
30 | 55x |
assert_proportion_value(conf_level) |
31 | 55x |
list( |
32 | 55x |
pval_method = pval_method, |
33 | 55x |
ties = ties, |
34 | 55x |
conf_level = conf_level, |
35 | 55x |
interaction = interaction |
36 |
)
|
|
37 |
}
|
|
38 | ||
39 |
#' Custom tidy methods for Cox regression
|
|
40 |
#'
|
|
41 |
#' @description `r lifecycle::badge("stable")`
|
|
42 |
#'
|
|
43 |
#' @inheritParams argument_convention
|
|
44 |
#' @param x (`list`)\cr result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
|
|
45 |
#' or [fit_coxreg_multivar()] (for multivariate models).
|
|
46 |
#'
|
|
47 |
#' @return [broom::tidy()] returns:
|
|
48 |
#' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
|
|
49 |
#' `upper .95`, `level`, and `n`.
|
|
50 |
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
|
|
51 |
#' `lcl`, `ucl`, `pval`, and `ci`.
|
|
52 |
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
|
|
53 |
#' `level`, and `ci`.
|
|
54 |
#'
|
|
55 |
#' @seealso [cox_regression]
|
|
56 |
#'
|
|
57 |
#' @name tidy_coxreg
|
|
58 |
NULL
|
|
59 | ||
60 |
#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
|
|
61 |
#'
|
|
62 |
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
|
|
63 |
#'
|
|
64 |
#' @method tidy summary.coxph
|
|
65 |
#'
|
|
66 |
#' @examples
|
|
67 |
#' library(survival)
|
|
68 |
#' library(broom)
|
|
69 |
#'
|
|
70 |
#' set.seed(1, kind = "Mersenne-Twister")
|
|
71 |
#'
|
|
72 |
#' dta_bladder <- with(
|
|
73 |
#' data = bladder[bladder$enum < 5, ],
|
|
74 |
#' data.frame(
|
|
75 |
#' time = stop,
|
|
76 |
#' status = event,
|
|
77 |
#' armcd = as.factor(rx),
|
|
78 |
#' covar1 = as.factor(enum),
|
|
79 |
#' covar2 = factor(
|
|
80 |
#' sample(as.factor(enum)),
|
|
81 |
#' levels = 1:4, labels = c("F", "F", "M", "M")
|
|
82 |
#' )
|
|
83 |
#' )
|
|
84 |
#' )
|
|
85 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
|
|
86 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
|
|
87 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
|
|
88 |
#'
|
|
89 |
#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
|
|
90 |
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
|
|
91 |
#' tidy(msum)
|
|
92 |
#'
|
|
93 |
#' @export
|
|
94 |
tidy.summary.coxph <- function(x, # nolint |
|
95 |
...) { |
|
96 | 199x |
checkmate::assert_class(x, "summary.coxph") |
97 | 199x |
pval <- x$coefficients |
98 | 199x |
confint <- x$conf.int |
99 | 199x |
levels <- rownames(pval) |
100 | ||
101 | 199x |
pval <- tibble::as_tibble(pval) |
102 | 199x |
confint <- tibble::as_tibble(confint) |
103 | ||
104 | 199x |
ret <- cbind(pval[, grepl("Pr", names(pval))], confint) |
105 | 199x |
ret$level <- levels |
106 | 199x |
ret$n <- x[["n"]] |
107 | 199x |
ret
|
108 |
}
|
|
109 | ||
110 |
#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
|
|
111 |
#'
|
|
112 |
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
|
|
113 |
#'
|
|
114 |
#' @method tidy coxreg.univar
|
|
115 |
#'
|
|
116 |
#' @examples
|
|
117 |
#' ## Cox regression: arm + 1 covariate.
|
|
118 |
#' mod1 <- fit_coxreg_univar(
|
|
119 |
#' variables = list(
|
|
120 |
#' time = "time", event = "status", arm = "armcd",
|
|
121 |
#' covariates = "covar1"
|
|
122 |
#' ),
|
|
123 |
#' data = dta_bladder,
|
|
124 |
#' control = control_coxreg(conf_level = 0.91)
|
|
125 |
#' )
|
|
126 |
#'
|
|
127 |
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
|
|
128 |
#' mod2 <- fit_coxreg_univar(
|
|
129 |
#' variables = list(
|
|
130 |
#' time = "time", event = "status", arm = "armcd",
|
|
131 |
#' covariates = c("covar1", "covar2")
|
|
132 |
#' ),
|
|
133 |
#' data = dta_bladder,
|
|
134 |
#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
|
|
135 |
#' )
|
|
136 |
#'
|
|
137 |
#' tidy(mod1)
|
|
138 |
#' tidy(mod2)
|
|
139 |
#'
|
|
140 |
#' @export
|
|
141 |
tidy.coxreg.univar <- function(x, # nolint |
|
142 |
...) { |
|
143 | 38x |
checkmate::assert_class(x, "coxreg.univar") |
144 | 38x |
mod <- x$mod |
145 | 38x |
vars <- c(x$vars$arm, x$vars$covariates) |
146 | 38x |
has_arm <- "arm" %in% names(x$vars) |
147 | ||
148 | 38x |
result <- if (!has_arm) { |
149 | 5x |
Map( |
150 | 5x |
mod = mod, vars = vars, |
151 | 5x |
f = function(mod, vars) { |
152 | 6x |
h_coxreg_multivar_extract( |
153 | 6x |
var = vars, |
154 | 6x |
data = x$data, |
155 | 6x |
mod = mod, |
156 | 6x |
control = x$control |
157 |
)
|
|
158 |
}
|
|
159 |
)
|
|
160 | 38x |
} else if (x$control$interaction) { |
161 | 12x |
Map( |
162 | 12x |
mod = mod, covar = vars, |
163 | 12x |
f = function(mod, covar) { |
164 | 26x |
h_coxreg_extract_interaction( |
165 | 26x |
effect = x$vars$arm, covar = covar, mod = mod, data = x$data, |
166 | 26x |
at = x$at, control = x$control |
167 |
)
|
|
168 |
}
|
|
169 |
)
|
|
170 |
} else { |
|
171 | 21x |
Map( |
172 | 21x |
mod = mod, vars = vars, |
173 | 21x |
f = function(mod, vars) { |
174 | 53x |
h_coxreg_univar_extract( |
175 | 53x |
effect = x$vars$arm, covar = vars, data = x$data, mod = mod, |
176 | 53x |
control = x$control |
177 |
)
|
|
178 |
}
|
|
179 |
)
|
|
180 |
}
|
|
181 | 38x |
result <- do.call(rbind, result) |
182 | ||
183 | 38x |
result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) |
184 | 38x |
result$n <- lapply(result$n, empty_vector_if_na) |
185 | 38x |
result$ci <- lapply(result$ci, empty_vector_if_na) |
186 | 38x |
result$hr <- lapply(result$hr, empty_vector_if_na) |
187 | 38x |
if (x$control$interaction) { |
188 | 12x |
result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na) |
189 |
# Remove interaction p-values due to change in specifications.
|
|
190 | 12x |
result$pval[result$effect != "Treatment:"] <- NA |
191 |
}
|
|
192 | 38x |
result$pval <- lapply(result$pval, empty_vector_if_na) |
193 | 38x |
attr(result, "conf_level") <- x$control$conf_level |
194 | 38x |
result
|
195 |
}
|
|
196 | ||
197 |
#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
|
|
198 |
#'
|
|
199 |
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
|
|
200 |
#'
|
|
201 |
#' @method tidy coxreg.multivar
|
|
202 |
#'
|
|
203 |
#' @examples
|
|
204 |
#' multivar_model <- fit_coxreg_multivar(
|
|
205 |
#' variables = list(
|
|
206 |
#' time = "time", event = "status", arm = "armcd",
|
|
207 |
#' covariates = c("covar1", "covar2")
|
|
208 |
#' ),
|
|
209 |
#' data = dta_bladder
|
|
210 |
#' )
|
|
211 |
#' broom::tidy(multivar_model)
|
|
212 |
#'
|
|
213 |
#' @export
|
|
214 |
tidy.coxreg.multivar <- function(x, # nolint |
|
215 |
...) { |
|
216 | 16x |
checkmate::assert_class(x, "coxreg.multivar") |
217 | 16x |
vars <- c(x$vars$arm, x$vars$covariates) |
218 | ||
219 |
# Convert the model summaries to data.
|
|
220 | 16x |
result <- Map( |
221 | 16x |
vars = vars, |
222 | 16x |
f = function(vars) { |
223 | 60x |
h_coxreg_multivar_extract( |
224 | 60x |
var = vars, data = x$data, |
225 | 60x |
mod = x$mod, control = x$control |
226 |
)
|
|
227 |
}
|
|
228 |
)
|
|
229 | 16x |
result <- do.call(rbind, result) |
230 | ||
231 | 16x |
result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) |
232 | 16x |
result$ci <- lapply(result$ci, empty_vector_if_na) |
233 | 16x |
result$hr <- lapply(result$hr, empty_vector_if_na) |
234 | 16x |
result$pval <- lapply(result$pval, empty_vector_if_na) |
235 | 16x |
result <- result[, names(result) != "n"] |
236 | 16x |
attr(result, "conf_level") <- x$control$conf_level |
237 | ||
238 | 16x |
result
|
239 |
}
|
|
240 | ||
241 |
#' Fitting functions for Cox proportional hazards regression
|
|
242 |
#'
|
|
243 |
#' @description `r lifecycle::badge("stable")`
|
|
244 |
#'
|
|
245 |
#' Fitting functions for univariate and multivariate Cox regression models.
|
|
246 |
#'
|
|
247 |
#' @param variables (named `list`)\cr the names of the variables found in `data`, passed as a named list and
|
|
248 |
#' corresponding to the `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
|
|
249 |
#' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
|
|
250 |
#' estimates will be tabulated later.
|
|
251 |
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
|
|
252 |
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
|
|
253 |
#' the value of the covariate at which the effect should be estimated.
|
|
254 |
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
|
|
255 |
#'
|
|
256 |
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
|
|
257 |
#'
|
|
258 |
#' @examples
|
|
259 |
#' library(survival)
|
|
260 |
#'
|
|
261 |
#' set.seed(1, kind = "Mersenne-Twister")
|
|
262 |
#'
|
|
263 |
#' # Testing dataset [survival::bladder].
|
|
264 |
#' dta_bladder <- with(
|
|
265 |
#' data = bladder[bladder$enum < 5, ],
|
|
266 |
#' data.frame(
|
|
267 |
#' time = stop,
|
|
268 |
#' status = event,
|
|
269 |
#' armcd = as.factor(rx),
|
|
270 |
#' covar1 = as.factor(enum),
|
|
271 |
#' covar2 = factor(
|
|
272 |
#' sample(as.factor(enum)),
|
|
273 |
#' levels = 1:4, labels = c("F", "F", "M", "M")
|
|
274 |
#' )
|
|
275 |
#' )
|
|
276 |
#' )
|
|
277 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
|
|
278 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
|
|
279 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
|
|
280 |
#'
|
|
281 |
#' plot(
|
|
282 |
#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
|
|
283 |
#' lty = 2:4,
|
|
284 |
#' xlab = "Months",
|
|
285 |
#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
|
|
286 |
#' )
|
|
287 |
#'
|
|
288 |
#' @name fit_coxreg
|
|
289 |
NULL
|
|
290 | ||
291 |
#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
|
|
292 |
#'
|
|
293 |
#' @return
|
|
294 |
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
|
|
295 |
#' with 5 elements:
|
|
296 |
#' * `mod`: Cox regression models fitted by [survival::coxph()].
|
|
297 |
#' * `data`: The original data frame input.
|
|
298 |
#' * `control`: The original control input.
|
|
299 |
#' * `vars`: The variables used in the model.
|
|
300 |
#' * `at`: Value of the covariate at which the effect should be estimated.
|
|
301 |
#'
|
|
302 |
#' @note When using `fit_coxreg_univar` there should be two study arms.
|
|
303 |
#'
|
|
304 |
#' @examples
|
|
305 |
#' # fit_coxreg_univar
|
|
306 |
#'
|
|
307 |
#' ## Cox regression: arm + 1 covariate.
|
|
308 |
#' mod1 <- fit_coxreg_univar(
|
|
309 |
#' variables = list(
|
|
310 |
#' time = "time", event = "status", arm = "armcd",
|
|
311 |
#' covariates = "covar1"
|
|
312 |
#' ),
|
|
313 |
#' data = dta_bladder,
|
|
314 |
#' control = control_coxreg(conf_level = 0.91)
|
|
315 |
#' )
|
|
316 |
#'
|
|
317 |
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
|
|
318 |
#' mod2 <- fit_coxreg_univar(
|
|
319 |
#' variables = list(
|
|
320 |
#' time = "time", event = "status", arm = "armcd",
|
|
321 |
#' covariates = c("covar1", "covar2")
|
|
322 |
#' ),
|
|
323 |
#' data = dta_bladder,
|
|
324 |
#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
|
|
325 |
#' )
|
|
326 |
#'
|
|
327 |
#' ## Cox regression: arm + 1 covariate, stratified analysis.
|
|
328 |
#' mod3 <- fit_coxreg_univar(
|
|
329 |
#' variables = list(
|
|
330 |
#' time = "time", event = "status", arm = "armcd", strata = "covar2",
|
|
331 |
#' covariates = c("covar1")
|
|
332 |
#' ),
|
|
333 |
#' data = dta_bladder,
|
|
334 |
#' control = control_coxreg(conf_level = 0.91)
|
|
335 |
#' )
|
|
336 |
#'
|
|
337 |
#' ## Cox regression: no arm, only covariates.
|
|
338 |
#' mod4 <- fit_coxreg_univar(
|
|
339 |
#' variables = list(
|
|
340 |
#' time = "time", event = "status",
|
|
341 |
#' covariates = c("covar1", "covar2")
|
|
342 |
#' ),
|
|
343 |
#' data = dta_bladder
|
|
344 |
#' )
|
|
345 |
#'
|
|
346 |
#' @export
|
|
347 |
fit_coxreg_univar <- function(variables, |
|
348 |
data,
|
|
349 |
at = list(), |
|
350 |
control = control_coxreg()) { |
|
351 | 43x |
checkmate::assert_list(variables, names = "named") |
352 | 43x |
has_arm <- "arm" %in% names(variables) |
353 | 43x |
arm_name <- if (has_arm) "arm" else NULL |
354 | ||
355 | 43x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
356 | ||
357 | 43x |
assert_df_with_variables(data, variables) |
358 | 43x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
359 | ||
360 | 43x |
if (!is.null(variables$strata)) { |
361 | 4x |
checkmate::assert_disjunct(control$pval_method, "likelihood") |
362 |
}
|
|
363 | 42x |
if (has_arm) { |
364 | 36x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
365 |
}
|
|
366 | 41x |
vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE) |
367 | 41x |
for (i in vars) { |
368 | 94x |
if (is.factor(data[[i]])) { |
369 | 82x |
attr(data[[i]], "levels") <- levels(droplevels(data[[i]])) |
370 |
}
|
|
371 |
}
|
|
372 | 41x |
forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction) |
373 | 41x |
mod <- lapply( |
374 | 41x |
forms, function(x) { |
375 | 90x |
survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties) |
376 |
}
|
|
377 |
)
|
|
378 | 41x |
structure( |
379 | 41x |
list( |
380 | 41x |
mod = mod, |
381 | 41x |
data = data, |
382 | 41x |
control = control, |
383 | 41x |
vars = variables, |
384 | 41x |
at = at |
385 |
),
|
|
386 | 41x |
class = "coxreg.univar" |
387 |
)
|
|
388 |
}
|
|
389 | ||
390 |
#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
|
|
391 |
#'
|
|
392 |
#' @return
|
|
393 |
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
|
|
394 |
#' with 4 elements:
|
|
395 |
#' * `mod`: Cox regression model fitted by [survival::coxph()].
|
|
396 |
#' * `data`: The original data frame input.
|
|
397 |
#' * `control`: The original control input.
|
|
398 |
#' * `vars`: The variables used in the model.
|
|
399 |
#'
|
|
400 |
#' @examples
|
|
401 |
#' # fit_coxreg_multivar
|
|
402 |
#'
|
|
403 |
#' ## Cox regression: multivariate Cox regression.
|
|
404 |
#' multivar_model <- fit_coxreg_multivar(
|
|
405 |
#' variables = list(
|
|
406 |
#' time = "time", event = "status", arm = "armcd",
|
|
407 |
#' covariates = c("covar1", "covar2")
|
|
408 |
#' ),
|
|
409 |
#' data = dta_bladder
|
|
410 |
#' )
|
|
411 |
#'
|
|
412 |
#' # Example without treatment arm.
|
|
413 |
#' multivar_covs_model <- fit_coxreg_multivar(
|
|
414 |
#' variables = list(
|
|
415 |
#' time = "time", event = "status",
|
|
416 |
#' covariates = c("covar1", "covar2")
|
|
417 |
#' ),
|
|
418 |
#' data = dta_bladder
|
|
419 |
#' )
|
|
420 |
#'
|
|
421 |
#' @export
|
|
422 |
fit_coxreg_multivar <- function(variables, |
|
423 |
data,
|
|
424 |
control = control_coxreg()) { |
|
425 | 83x |
checkmate::assert_list(variables, names = "named") |
426 | 83x |
has_arm <- "arm" %in% names(variables) |
427 | 83x |
arm_name <- if (has_arm) "arm" else NULL |
428 | ||
429 | 83x |
if (!is.null(variables$covariates)) { |
430 | 21x |
checkmate::assert_character(variables$covariates) |
431 |
}
|
|
432 | ||
433 | 83x |
checkmate::assert_false(control$interaction) |
434 | 83x |
assert_df_with_variables(data, variables) |
435 | 83x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
436 | ||
437 | 83x |
if (!is.null(variables$strata)) { |
438 | 3x |
checkmate::assert_disjunct(control$pval_method, "likelihood") |
439 |
}
|
|
440 | ||
441 | 82x |
form <- h_coxreg_multivar_formula(variables) |
442 | 82x |
mod <- survival::coxph( |
443 | 82x |
formula = stats::as.formula(form), |
444 | 82x |
data = data, |
445 | 82x |
ties = control$ties |
446 |
)
|
|
447 | 82x |
structure( |
448 | 82x |
list( |
449 | 82x |
mod = mod, |
450 | 82x |
data = data, |
451 | 82x |
control = control, |
452 | 82x |
vars = variables |
453 |
),
|
|
454 | 82x |
class = "coxreg.multivar" |
455 |
)
|
|
456 |
}
|
|
457 | ||
458 |
#' Muffled `car::Anova`
|
|
459 |
#'
|
|
460 |
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
|
|
461 |
#' present, this function deliberately muffles this message.
|
|
462 |
#'
|
|
463 |
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
|
|
464 |
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
|
|
465 |
#'
|
|
466 |
#' @return The output of [car::Anova()], with convergence message muffled.
|
|
467 |
#'
|
|
468 |
#' @keywords internal
|
|
469 |
muffled_car_anova <- function(mod, test_statistic) { |
|
470 | 219x |
tryCatch( |
471 | 219x |
withCallingHandlers( |
472 | 219x |
expr = { |
473 | 219x |
car::Anova( |
474 | 219x |
mod,
|
475 | 219x |
test.statistic = test_statistic, |
476 | 219x |
type = "III" |
477 |
)
|
|
478 |
},
|
|
479 | 219x |
message = function(m) invokeRestart("muffleMessage"), |
480 | 219x |
error = function(e) { |
481 | 1x |
stop(paste( |
482 | 1x |
"the model seems to have convergence problems, please try to change",
|
483 | 1x |
"the configuration of covariates or strata variables, e.g.",
|
484 | 1x |
"- original error:", e |
485 |
)) |
|
486 |
}
|
|
487 |
)
|
|
488 |
)
|
|
489 |
}
|
1 |
#' Helper function to create a map data frame for `trim_levels_to_map()`
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper function to create a map data frame from the input dataset, which can be used as an argument in the
|
|
6 |
#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
|
|
10 |
#' abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
|
|
11 |
#' `abnormal = list(Low = "LOW", High = "HIGH"))`
|
|
12 |
#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
|
|
13 |
#'
|
|
14 |
#' @return A map `data.frame`.
|
|
15 |
#'
|
|
16 |
#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
|
|
17 |
#' `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
|
|
18 |
#' `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
|
|
19 |
#' for low direction and at least one observation with high range is not missing for high direction.
|
|
20 |
#'
|
|
21 |
#' @examples
|
|
22 |
#' adlb <- df_explicit_na(tern_ex_adlb)
|
|
23 |
#'
|
|
24 |
#' h_map_for_count_abnormal(
|
|
25 |
#' df = adlb,
|
|
26 |
#' variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
|
|
27 |
#' abnormal = list(low = c("LOW"), high = c("HIGH")),
|
|
28 |
#' method = "default",
|
|
29 |
#' na_str = "<Missing>"
|
|
30 |
#' )
|
|
31 |
#'
|
|
32 |
#' df <- data.frame(
|
|
33 |
#' USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
|
|
34 |
#' AVISIT = c(
|
|
35 |
#' rep("WEEK 1", 2),
|
|
36 |
#' rep("WEEK 2", 2),
|
|
37 |
#' rep("WEEK 1", 2),
|
|
38 |
#' rep("WEEK 2", 2),
|
|
39 |
#' rep("WEEK 1", 2),
|
|
40 |
#' rep("WEEK 2", 2)
|
|
41 |
#' ),
|
|
42 |
#' PARAM = rep(c("ALT", "CPR"), 6),
|
|
43 |
#' ANRIND = c(
|
|
44 |
#' "NORMAL", "NORMAL", "LOW",
|
|
45 |
#' "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
|
|
46 |
#' ),
|
|
47 |
#' ANRLO = rep(5, 12),
|
|
48 |
#' ANRHI = rep(20, 12)
|
|
49 |
#' )
|
|
50 |
#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
|
|
51 |
#' h_map_for_count_abnormal(
|
|
52 |
#' df = df,
|
|
53 |
#' variables = list(
|
|
54 |
#' anl = "ANRIND",
|
|
55 |
#' split_rows = c("PARAM"),
|
|
56 |
#' range_low = "ANRLO",
|
|
57 |
#' range_high = "ANRHI"
|
|
58 |
#' ),
|
|
59 |
#' abnormal = list(low = c("LOW"), high = c("HIGH")),
|
|
60 |
#' method = "range",
|
|
61 |
#' na_str = "<Missing>"
|
|
62 |
#' )
|
|
63 |
#'
|
|
64 |
#' @export
|
|
65 |
h_map_for_count_abnormal <- function(df, |
|
66 |
variables = list( |
|
67 |
anl = "ANRIND", |
|
68 |
split_rows = c("PARAM"), |
|
69 |
range_low = "ANRLO", |
|
70 |
range_high = "ANRHI" |
|
71 |
),
|
|
72 |
abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")), |
|
73 |
method = c("default", "range"), |
|
74 |
na_str = "<Missing>") { |
|
75 | 7x |
method <- match.arg(method) |
76 | 7x |
checkmate::assert_subset(c("anl", "split_rows"), names(variables)) |
77 | 7x |
checkmate::assert_false(anyNA(df[variables$split_rows])) |
78 | 7x |
assert_df_with_variables(df, |
79 | 7x |
variables = list(anl = variables$anl, split_rows = variables$split_rows), |
80 | 7x |
na_level = na_str |
81 |
)
|
|
82 | 7x |
assert_df_with_factors(df, list(val = variables$anl)) |
83 | 7x |
assert_valid_factor(df[[variables$anl]], any.missing = FALSE) |
84 | 7x |
assert_list_of_variables(variables) |
85 | 7x |
checkmate::assert_list(abnormal, types = "character", len = 2) |
86 | ||
87 |
# Drop usued levels from df as they are not supposed to be in the final map
|
|
88 | 7x |
df <- droplevels(df) |
89 | ||
90 | 7x |
normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal)) |
91 | ||
92 |
# Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
|
|
93 | 7x |
checkmate::assert_vector(normal_value, len = 1) |
94 | ||
95 |
# Default method will only have what is observed in the df, and records with all normal values will be excluded to
|
|
96 |
# avoid error in layout building.
|
|
97 | 7x |
if (method == "default") { |
98 | 3x |
df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal)) |
99 | 3x |
map <- unique(df_abnormal[c(variables$split_rows, variables$anl)]) |
100 | 3x |
map_normal <- unique(subset(map, select = variables$split_rows)) |
101 | 3x |
map_normal[[variables$anl]] <- normal_value |
102 | 3x |
map <- rbind(map, map_normal) |
103 | 4x |
} else if (method == "range") { |
104 |
# range method follows the rule that at least one observation with ANRLO > 0 for low
|
|
105 |
# direction and at least one observation with ANRHI is not missing for high direction.
|
|
106 | 4x |
checkmate::assert_subset(c("range_low", "range_high"), names(variables)) |
107 | 4x |
checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal))) |
108 | ||
109 | 4x |
assert_df_with_variables(df, |
110 | 4x |
variables = list( |
111 | 4x |
range_low = variables$range_low, |
112 | 4x |
range_high = variables$range_high |
113 |
)
|
|
114 |
)
|
|
115 | ||
116 |
# Define low direction of map
|
|
117 | 4x |
df_low <- subset(df, df[[variables$range_low]] > 0) |
118 | 4x |
map_low <- unique(df_low[variables$split_rows]) |
119 | 4x |
low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"])) |
120 | 4x |
low_levels_df <- as.data.frame(low_levels) |
121 | 4x |
colnames(low_levels_df) <- variables$anl |
122 | 4x |
low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE)) |
123 | 4x |
rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed |
124 | 4x |
map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE] |
125 | 4x |
map_low <- cbind(map_low, low_levels_df) |
126 | ||
127 |
# Define high direction of map
|
|
128 | 4x |
df_high <- subset(df, df[[variables$range_high]] != na_str | !is.na(df[[variables$range_high]])) |
129 | 4x |
map_high <- unique(df_high[variables$split_rows]) |
130 | 4x |
high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"])) |
131 | 4x |
high_levels_df <- as.data.frame(high_levels) |
132 | 4x |
colnames(high_levels_df) <- variables$anl |
133 | 4x |
high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE)) |
134 | 4x |
rownames(map_high) <- NULL |
135 | 4x |
map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE] |
136 | 4x |
map_high <- cbind(map_high, high_levels_df) |
137 | ||
138 |
# Define normal of map
|
|
139 | 4x |
map_normal <- unique(rbind(map_low, map_high)[variables$split_rows]) |
140 | 4x |
map_normal[variables$anl] <- normal_value |
141 | ||
142 | 4x |
map <- rbind(map_low, map_high, map_normal) |
143 |
}
|
|
144 | ||
145 |
# map should be all characters
|
|
146 | 7x |
map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE) |
147 | ||
148 |
# sort the map final output by split_rows variables
|
|
149 | 7x |
for (i in rev(seq_len(length(variables$split_rows)))) { |
150 | 7x |
map <- map[order(map[[i]]), ] |
151 |
}
|
|
152 | 7x |
map
|
153 |
}
|
1 |
#' Stack multiple grobs
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("deprecated")`
|
|
4 |
#'
|
|
5 |
#' Stack grobs as a new grob with 1 column and multiple rows layout.
|
|
6 |
#'
|
|
7 |
#' @param ... grobs.
|
|
8 |
#' @param grobs (`list` of `grob`)\cr a list of grobs.
|
|
9 |
#' @param padding (`grid::unit`)\cr unit of length 1, space between each grob.
|
|
10 |
#' @param vp (`viewport` or `NULL`)\cr a [viewport()] object (or `NULL`).
|
|
11 |
#' @param name (`string`)\cr a character identifier for the grob.
|
|
12 |
#' @param gp (`gpar`)\cr a [gpar()] object.
|
|
13 |
#'
|
|
14 |
#' @return A `grob`.
|
|
15 |
#'
|
|
16 |
#' @examples
|
|
17 |
#' library(grid)
|
|
18 |
#'
|
|
19 |
#' g1 <- circleGrob(gp = gpar(col = "blue"))
|
|
20 |
#' g2 <- circleGrob(gp = gpar(col = "red"))
|
|
21 |
#' g3 <- textGrob("TEST TEXT")
|
|
22 |
#' grid.newpage()
|
|
23 |
#' grid.draw(stack_grobs(g1, g2, g3))
|
|
24 |
#'
|
|
25 |
#' showViewport()
|
|
26 |
#'
|
|
27 |
#' grid.newpage()
|
|
28 |
#' pushViewport(viewport(layout = grid.layout(1, 2)))
|
|
29 |
#' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2)
|
|
30 |
#' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test"))
|
|
31 |
#'
|
|
32 |
#' showViewport()
|
|
33 |
#' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE)
|
|
34 |
#'
|
|
35 |
#' @export
|
|
36 |
stack_grobs <- function(..., |
|
37 |
grobs = list(...), |
|
38 |
padding = grid::unit(2, "line"), |
|
39 |
vp = NULL, |
|
40 |
gp = NULL, |
|
41 |
name = NULL) { |
|
42 | 4x |
lifecycle::deprecate_warn( |
43 | 4x |
"0.9.4",
|
44 | 4x |
"stack_grobs()",
|
45 | 4x |
details = "`tern` plotting functions no longer generate `grob` objects." |
46 |
)
|
|
47 | ||
48 | 4x |
checkmate::assert_true( |
49 | 4x |
all(vapply(grobs, grid::is.grob, logical(1))) |
50 |
)
|
|
51 | ||
52 | 4x |
if (length(grobs) == 1) { |
53 | 1x |
return(grobs[[1]]) |
54 |
}
|
|
55 | ||
56 | 3x |
n_layout <- 2 * length(grobs) - 1 |
57 | 3x |
hts <- lapply( |
58 | 3x |
seq(1, n_layout), |
59 | 3x |
function(i) { |
60 | 39x |
if (i %% 2 != 0) { |
61 | 21x |
grid::unit(1, "null") |
62 |
} else { |
|
63 | 18x |
padding
|
64 |
}
|
|
65 |
}
|
|
66 |
)
|
|
67 | 3x |
hts <- do.call(grid::unit.c, hts) |
68 | ||
69 | 3x |
main_vp <- grid::viewport( |
70 | 3x |
layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts) |
71 |
)
|
|
72 | ||
73 | 3x |
nested_grobs <- Map(function(g, i) { |
74 | 21x |
grid::gTree( |
75 | 21x |
children = grid::gList(g), |
76 | 21x |
vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1) |
77 |
)
|
|
78 | 3x |
}, grobs, seq_along(grobs) * 2 - 1) |
79 | ||
80 | 3x |
grobs_mainvp <- grid::gTree( |
81 | 3x |
children = do.call(grid::gList, nested_grobs), |
82 | 3x |
vp = main_vp |
83 |
)
|
|
84 | ||
85 | 3x |
grid::gTree( |
86 | 3x |
children = grid::gList(grobs_mainvp), |
87 | 3x |
vp = vp, |
88 | 3x |
gp = gp, |
89 | 3x |
name = name |
90 |
)
|
|
91 |
}
|
|
92 | ||
93 |
#' Arrange multiple grobs
|
|
94 |
#'
|
|
95 |
#' @description `r lifecycle::badge("deprecated")`
|
|
96 |
#'
|
|
97 |
#' Arrange grobs as a new grob with `n * m (rows * cols)` layout.
|
|
98 |
#'
|
|
99 |
#' @inheritParams stack_grobs
|
|
100 |
#' @param ncol (`integer(1)`)\cr number of columns in layout.
|
|
101 |
#' @param nrow (`integer(1)`)\cr number of rows in layout.
|
|
102 |
#' @param padding_ht (`grid::unit`)\cr unit of length 1, vertical space between each grob.
|
|
103 |
#' @param padding_wt (`grid::unit`)\cr unit of length 1, horizontal space between each grob.
|
|
104 |
#'
|
|
105 |
#' @return A `grob`.
|
|
106 |
#'
|
|
107 |
#' @examples
|
|
108 |
#' library(grid)
|
|
109 |
#'
|
|
110 |
#' \donttest{
|
|
111 |
#' num <- lapply(1:9, textGrob)
|
|
112 |
#' grid::grid.newpage()
|
|
113 |
#' grid.draw(arrange_grobs(grobs = num, ncol = 2))
|
|
114 |
#'
|
|
115 |
#' showViewport()
|
|
116 |
#'
|
|
117 |
#' g1 <- circleGrob(gp = gpar(col = "blue"))
|
|
118 |
#' g2 <- circleGrob(gp = gpar(col = "red"))
|
|
119 |
#' g3 <- textGrob("TEST TEXT")
|
|
120 |
#' grid::grid.newpage()
|
|
121 |
#' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2))
|
|
122 |
#'
|
|
123 |
#' showViewport()
|
|
124 |
#'
|
|
125 |
#' grid::grid.newpage()
|
|
126 |
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3))
|
|
127 |
#'
|
|
128 |
#' grid::grid.newpage()
|
|
129 |
#' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2)))
|
|
130 |
#' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2)
|
|
131 |
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1))
|
|
132 |
#'
|
|
133 |
#' showViewport()
|
|
134 |
#' }
|
|
135 |
#' @export
|
|
136 |
arrange_grobs <- function(..., |
|
137 |
grobs = list(...), |
|
138 |
ncol = NULL, nrow = NULL, |
|
139 |
padding_ht = grid::unit(2, "line"), |
|
140 |
padding_wt = grid::unit(2, "line"), |
|
141 |
vp = NULL, |
|
142 |
gp = NULL, |
|
143 |
name = NULL) { |
|
144 | 5x |
lifecycle::deprecate_warn( |
145 | 5x |
"0.9.4",
|
146 | 5x |
"arrange_grobs()",
|
147 | 5x |
details = "`tern` plotting functions no longer generate `grob` objects." |
148 |
)
|
|
149 | ||
150 | 5x |
checkmate::assert_true( |
151 | 5x |
all(vapply(grobs, grid::is.grob, logical(1))) |
152 |
)
|
|
153 | ||
154 | 5x |
if (length(grobs) == 1) { |
155 | 1x |
return(grobs[[1]]) |
156 |
}
|
|
157 | ||
158 | 4x |
if (is.null(ncol) && is.null(nrow)) { |
159 | 1x |
ncol <- 1 |
160 | 1x |
nrow <- ceiling(length(grobs) / ncol) |
161 | 3x |
} else if (!is.null(ncol) && is.null(nrow)) { |
162 | 1x |
nrow <- ceiling(length(grobs) / ncol) |
163 | 2x |
} else if (is.null(ncol) && !is.null(nrow)) { |
164 | ! |
ncol <- ceiling(length(grobs) / nrow) |
165 |
}
|
|
166 | ||
167 | 4x |
if (ncol * nrow < length(grobs)) { |
168 | 1x |
stop("specififed ncol and nrow are not enough for arranging the grobs ") |
169 |
}
|
|
170 | ||
171 | 3x |
if (ncol == 1) { |
172 | 2x |
return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name)) |
173 |
}
|
|
174 | ||
175 | 1x |
n_col <- 2 * ncol - 1 |
176 | 1x |
n_row <- 2 * nrow - 1 |
177 | 1x |
hts <- lapply( |
178 | 1x |
seq(1, n_row), |
179 | 1x |
function(i) { |
180 | 5x |
if (i %% 2 != 0) { |
181 | 3x |
grid::unit(1, "null") |
182 |
} else { |
|
183 | 2x |
padding_ht
|
184 |
}
|
|
185 |
}
|
|
186 |
)
|
|
187 | 1x |
hts <- do.call(grid::unit.c, hts) |
188 | ||
189 | 1x |
wts <- lapply( |
190 | 1x |
seq(1, n_col), |
191 | 1x |
function(i) { |
192 | 5x |
if (i %% 2 != 0) { |
193 | 3x |
grid::unit(1, "null") |
194 |
} else { |
|
195 | 2x |
padding_wt
|
196 |
}
|
|
197 |
}
|
|
198 |
)
|
|
199 | 1x |
wts <- do.call(grid::unit.c, wts) |
200 | ||
201 | 1x |
main_vp <- grid::viewport( |
202 | 1x |
layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts) |
203 |
)
|
|
204 | ||
205 | 1x |
nested_grobs <- list() |
206 | 1x |
k <- 0 |
207 | 1x |
for (i in seq(nrow) * 2 - 1) { |
208 | 3x |
for (j in seq(ncol) * 2 - 1) { |
209 | 9x |
k <- k + 1 |
210 | 9x |
if (k <= length(grobs)) { |
211 | 9x |
nested_grobs <- c( |
212 | 9x |
nested_grobs,
|
213 | 9x |
list(grid::gTree( |
214 | 9x |
children = grid::gList(grobs[[k]]), |
215 | 9x |
vp = grid::viewport(layout.pos.row = i, layout.pos.col = j) |
216 |
)) |
|
217 |
)
|
|
218 |
}
|
|
219 |
}
|
|
220 |
}
|
|
221 | 1x |
grobs_mainvp <- grid::gTree( |
222 | 1x |
children = do.call(grid::gList, nested_grobs), |
223 | 1x |
vp = main_vp |
224 |
)
|
|
225 | ||
226 | 1x |
grid::gTree( |
227 | 1x |
children = grid::gList(grobs_mainvp), |
228 | 1x |
vp = vp, |
229 | 1x |
gp = gp, |
230 | 1x |
name = name |
231 |
)
|
|
232 |
}
|
|
233 | ||
234 |
#' Draw `grob`
|
|
235 |
#'
|
|
236 |
#' @description `r lifecycle::badge("deprecated")`
|
|
237 |
#'
|
|
238 |
#' Draw grob on device page.
|
|
239 |
#'
|
|
240 |
#' @param grob (`grob`)\cr grid object.
|
|
241 |
#' @param newpage (`flag`)\cr draw on a new page.
|
|
242 |
#' @param vp (`viewport` or `NULL`)\cr a [viewport()] object (or `NULL`).
|
|
243 |
#'
|
|
244 |
#' @return A `grob`.
|
|
245 |
#'
|
|
246 |
#' @examples
|
|
247 |
#' library(dplyr)
|
|
248 |
#' library(grid)
|
|
249 |
#'
|
|
250 |
#' \donttest{
|
|
251 |
#' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc"))
|
|
252 |
#' rect %>% draw_grob(vp = grid::viewport(angle = 45))
|
|
253 |
#'
|
|
254 |
#' num <- lapply(1:10, textGrob)
|
|
255 |
#' num %>%
|
|
256 |
#' arrange_grobs(grobs = .) %>%
|
|
257 |
#' draw_grob()
|
|
258 |
#' showViewport()
|
|
259 |
#' }
|
|
260 |
#'
|
|
261 |
#' @export
|
|
262 |
draw_grob <- function(grob, newpage = TRUE, vp = NULL) { |
|
263 | 3x |
lifecycle::deprecate_warn( |
264 | 3x |
"0.9.4",
|
265 | 3x |
"draw_grob()",
|
266 | 3x |
details = "`tern` plotting functions no longer generate `grob` objects." |
267 |
)
|
|
268 | ||
269 | 3x |
if (newpage) { |
270 | 3x |
grid::grid.newpage() |
271 |
}
|
|
272 | 3x |
if (!is.null(vp)) { |
273 | 1x |
grid::pushViewport(vp) |
274 |
}
|
|
275 | 3x |
grid::grid.draw(grob) |
276 |
}
|
|
277 | ||
278 |
tern_grob <- function(x) { |
|
279 | ! |
class(x) <- unique(c("ternGrob", class(x))) |
280 | ! |
x
|
281 |
}
|
|
282 | ||
283 |
#' @keywords internal
|
|
284 |
print.ternGrob <- function(x, ...) { |
|
285 | ! |
grid::grid.newpage() |
286 | ! |
grid::grid.draw(x) |
287 |
}
|
1 |
#' Control function for descriptive statistics
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
|
|
6 |
#' details for [s_summary()]. This function family is mainly used by [analyze_vars()].
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param quantiles (`numeric(2)`)\cr vector of length two to specify the quantiles to calculate.
|
|
10 |
#' @param quantile_type (`numeric(1)`)\cr number between 1 and 9 selecting quantile algorithms to be used.
|
|
11 |
#' Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
|
|
12 |
#' This differs from R's default. See more about `type` in [stats::quantile()].
|
|
13 |
#' @param test_mean (`numeric(1)`)\cr number to test against the mean under the null hypothesis when calculating
|
|
14 |
#' p-value.
|
|
15 |
#'
|
|
16 |
#' @return A list of components with the same names as the arguments.
|
|
17 |
#'
|
|
18 |
#' @export
|
|
19 |
control_analyze_vars <- function(conf_level = 0.95, |
|
20 |
quantiles = c(0.25, 0.75), |
|
21 |
quantile_type = 2, |
|
22 |
test_mean = 0) { |
|
23 | 1098x |
checkmate::assert_vector(quantiles, len = 2) |
24 | 1098x |
checkmate::assert_int(quantile_type, lower = 1, upper = 9) |
25 | 1098x |
checkmate::assert_numeric(test_mean) |
26 | 1098x |
lapply(quantiles, assert_proportion_value) |
27 | 1097x |
assert_proportion_value(conf_level) |
28 | 1096x |
list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean) |
29 |
}
|
|
30 | ||
31 |
# Helper function to fix numeric or counts pval if necessary
|
|
32 |
.correct_num_or_counts_pval <- function(type, .stats) { |
|
33 | 332x |
if (type == "numeric") { |
34 | 92x |
if (!is.null(.stats) && any(grepl("^pval", .stats))) { |
35 | 10x |
.stats[grepl("^pval", .stats)] <- "pval" # tmp fix xxx |
36 |
}
|
|
37 |
} else { |
|
38 | 240x |
if (!is.null(.stats) && any(grepl("^pval", .stats))) { |
39 | 9x |
.stats[grepl("^pval", .stats)] <- "pval_counts" # tmp fix xxx |
40 |
}
|
|
41 |
}
|
|
42 | 332x |
.stats
|
43 |
}
|
|
44 | ||
45 |
#' Analyze variables
|
|
46 |
#'
|
|
47 |
#' @description `r lifecycle::badge("stable")`
|
|
48 |
#'
|
|
49 |
#' The analyze function [analyze_vars()] creates a layout element to summarize one or more variables, using the S3
|
|
50 |
#' generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics for
|
|
51 |
#' numeric variables can be viewed by running `get_stats("analyze_vars_numeric")` and for non-numeric variables by
|
|
52 |
#' running `get_stats("analyze_vars_counts")`. Use the `.stats` parameter to specify the statistics to include in your
|
|
53 |
#' output summary table. Use `compare_with_ref_group = TRUE` to compare the variable with reference groups.
|
|
54 |
#'
|
|
55 |
#' @details
|
|
56 |
#' **Automatic digit formatting:** The number of digits to display can be automatically determined from the analyzed
|
|
57 |
#' variable(s) (`vars`) for certain statistics by setting the statistic format to `"auto"` in `.formats`.
|
|
58 |
#' This utilizes the [format_auto()] formatting function. Note that only data for the current row & variable (for all
|
|
59 |
#' columns) will be considered (`.df_row[[.var]]`, see [`rtables::additional_fun_params`]) and not the whole dataset.
|
|
60 |
#'
|
|
61 |
#' @inheritParams argument_convention
|
|
62 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
63 |
#'
|
|
64 |
#' Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
|
|
65 |
#'
|
|
66 |
#' Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts"), type = "sh")``
|
|
67 |
#'
|
|
68 |
#' @name analyze_variables
|
|
69 |
#' @order 1
|
|
70 |
NULL
|
|
71 | ||
72 |
#' @describeIn analyze_variables S3 generic function to produces a variable summary.
|
|
73 |
#'
|
|
74 |
#' @return
|
|
75 |
#' * `s_summary()` returns different statistics depending on the class of `x`.
|
|
76 |
#'
|
|
77 |
#' @export
|
|
78 |
s_summary <- function(x, ...) { |
|
79 | 1661x |
UseMethod("s_summary", x) |
80 |
}
|
|
81 | ||
82 |
#' @describeIn analyze_variables Method for `numeric` class.
|
|
83 |
#'
|
|
84 |
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using
|
|
85 |
#' the helper function [control_analyze_vars()]. Some possible parameter options are:
|
|
86 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median.
|
|
87 |
#' * `quantiles` (`numeric(2)`)\cr vector of length two to specify the quantiles.
|
|
88 |
#' * `quantile_type` (`numeric(1)`)\cr between 1 and 9 selecting quantile algorithms to be used.
|
|
89 |
#' See more about `type` in [stats::quantile()].
|
|
90 |
#' * `test_mean` (`numeric(1)`)\cr value to test against the mean under the null hypothesis when calculating p-value.
|
|
91 |
#'
|
|
92 |
#' @return
|
|
93 |
#' * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items:
|
|
94 |
#' * `n`: The [length()] of `x`.
|
|
95 |
#' * `sum`: The [sum()] of `x`.
|
|
96 |
#' * `mean`: The [mean()] of `x`.
|
|
97 |
#' * `sd`: The [stats::sd()] of `x`.
|
|
98 |
#' * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`).
|
|
99 |
#' * `mean_sd`: The [mean()] and [stats::sd()] of `x`.
|
|
100 |
#' * `mean_se`: The [mean()] of `x` and its standard error (see above).
|
|
101 |
#' * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]).
|
|
102 |
#' * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]).
|
|
103 |
#' * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]).
|
|
104 |
#' * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]).
|
|
105 |
#' * `median`: The [stats::median()] of `x`.
|
|
106 |
#' * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`,
|
|
107 |
#' where `xc` = `x` - [stats::median()]).
|
|
108 |
#' * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]).
|
|
109 |
#' * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]).
|
|
110 |
#' * `iqr`: The [stats::IQR()] of `x`.
|
|
111 |
#' * `range`: The [range_noinf()] of `x`.
|
|
112 |
#' * `min`: The [max()] of `x`.
|
|
113 |
#' * `max`: The [min()] of `x`.
|
|
114 |
#' * `median_range`: The [median()] and [range_noinf()] of `x`.
|
|
115 |
#' * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100).
|
|
116 |
#' * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`).
|
|
117 |
#' * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`).
|
|
118 |
#'
|
|
119 |
#' @note
|
|
120 |
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in
|
|
121 |
#' `rtables` when the intersection of a column and a row delimits an empty data selection.
|
|
122 |
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter
|
|
123 |
#' being standard behavior in R.
|
|
124 |
#'
|
|
125 |
#' @method s_summary numeric
|
|
126 |
#'
|
|
127 |
#' @examples
|
|
128 |
#' # `s_summary.numeric`
|
|
129 |
#'
|
|
130 |
#' ## Basic usage: empty numeric returns NA-filled items.
|
|
131 |
#' s_summary(numeric())
|
|
132 |
#'
|
|
133 |
#' ## Management of NA values.
|
|
134 |
#' x <- c(NA_real_, 1)
|
|
135 |
#' s_summary(x, na_rm = TRUE)
|
|
136 |
#' s_summary(x, na_rm = FALSE)
|
|
137 |
#'
|
|
138 |
#' x <- c(NA_real_, 1, 2)
|
|
139 |
#' s_summary(x)
|
|
140 |
#'
|
|
141 |
#' ## Benefits in `rtables` contructions:
|
|
142 |
#' dta_test <- data.frame(
|
|
143 |
#' Group = rep(LETTERS[seq(3)], each = 2),
|
|
144 |
#' sub_group = rep(letters[seq(2)], each = 3),
|
|
145 |
#' x = seq(6)
|
|
146 |
#' )
|
|
147 |
#'
|
|
148 |
#' ## The summary obtained in with `rtables`:
|
|
149 |
#' basic_table() %>%
|
|
150 |
#' split_cols_by(var = "Group") %>%
|
|
151 |
#' split_rows_by(var = "sub_group") %>%
|
|
152 |
#' analyze(vars = "x", afun = s_summary) %>%
|
|
153 |
#' build_table(df = dta_test)
|
|
154 |
#'
|
|
155 |
#' ## By comparison with `lapply`:
|
|
156 |
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
|
|
157 |
#' lapply(X, function(x) s_summary(x$x))
|
|
158 |
#'
|
|
159 |
#' @export
|
|
160 |
s_summary.numeric <- function(x, control = control_analyze_vars(), ...) { |
|
161 | 1143x |
checkmate::assert_numeric(x) |
162 | 1143x |
args_list <- list(...) |
163 | 1143x |
.N_row <- args_list[[".N_row"]] # nolint |
164 | 1143x |
.N_col <- args_list[[".N_col"]] # nolint |
165 | 1143x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
166 | 1143x |
compare_with_ref_group <- args_list[["compare_with_ref_group"]] |
167 | ||
168 | 1143x |
if (na_rm) { |
169 | 1141x |
x <- x[!is.na(x)] |
170 |
}
|
|
171 | ||
172 | 1143x |
y <- list() |
173 | ||
174 | 1143x |
y$n <- c("n" = length(x)) |
175 | ||
176 | 1143x |
y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE))) |
177 | ||
178 | 1143x |
y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE))) |
179 | ||
180 | 1143x |
y$sd <- c("sd" = stats::sd(x, na.rm = FALSE)) |
181 | ||
182 | 1143x |
y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x)))) |
183 | ||
184 | 1143x |
y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE)) |
185 | ||
186 | 1143x |
y$mean_se <- c(y$mean, y$se) |
187 | ||
188 | 1143x |
mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE) |
189 | 1143x |
y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level))) |
190 | ||
191 | 1143x |
mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n) |
192 | 1143x |
names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr") |
193 | 1143x |
y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE") |
194 | ||
195 | 1143x |
mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) |
196 | 1143x |
names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr") |
197 | 1143x |
y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD") |
198 | 1143x |
mean_ci_3d <- c(y$mean, y$mean_ci) |
199 | 1143x |
y$mean_ci_3d <- formatters::with_label(mean_ci_3d, paste0("Mean (", f_conf_level(control$conf_level), ")")) |
200 | ||
201 | 1143x |
mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2) |
202 | 1143x |
y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean))) |
203 | ||
204 | 1143x |
y$median <- c("median" = stats::median(x, na.rm = FALSE)) |
205 | ||
206 | 1143x |
y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE)) |
207 | ||
208 | 1143x |
median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE) |
209 | 1143x |
y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level))) |
210 | ||
211 | 1143x |
median_ci_3d <- c(y$median, median_ci) |
212 | 1143x |
y$median_ci_3d <- formatters::with_label(median_ci_3d, paste0("Median (", f_conf_level(control$conf_level), ")")) |
213 | ||
214 | 1143x |
q <- control$quantiles |
215 | 1143x |
if (any(is.na(x))) { |
216 | 2x |
qnts <- rep(NA_real_, length(q)) |
217 |
} else { |
|
218 | 1141x |
qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE) |
219 |
}
|
|
220 | 1143x |
names(qnts) <- paste("quantile", q, sep = "_") |
221 | 1143x |
y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile")) |
222 | ||
223 | 1143x |
y$iqr <- c("iqr" = ifelse( |
224 | 1143x |
any(is.na(x)), |
225 | 1143x |
NA_real_,
|
226 | 1143x |
stats::IQR(x, na.rm = FALSE, type = control$quantile_type) |
227 |
)) |
|
228 | ||
229 | 1143x |
y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max")) |
230 | 1143x |
y$min <- y$range[1] |
231 | 1143x |
y$max <- y$range[2] |
232 | ||
233 | 1143x |
y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)") |
234 | ||
235 | 1143x |
y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100) |
236 | ||
237 |
# Geometric Mean - Convert negative values to NA for log calculation.
|
|
238 | 1143x |
geom_verbose <- args_list[["geom_verbose"]] %||% FALSE # Additional info if requested |
239 | 1143x |
checkmate::assert_flag(geom_verbose) |
240 | 1143x |
x_no_negative_vals <- x |
241 | 1143x |
if (identical(x_no_negative_vals, numeric())) { |
242 | 76x |
x_no_negative_vals <- NA |
243 |
}
|
|
244 | 1143x |
x_no_negative_vals[x_no_negative_vals <= 0] <- NA |
245 | 1143x |
if (geom_verbose) { |
246 | 2x |
if (any(x <= 0)) { |
247 | 2x |
warning("Negative values were converted to NA for calculation of the geometric mean.") |
248 |
}
|
|
249 | 2x |
if (all(is.na(x_no_negative_vals))) { |
250 | 1x |
warning("Since all values are negative or NA, the geometric mean is NA.") |
251 |
}
|
|
252 |
}
|
|
253 | 1143x |
y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE))) |
254 | 1143x |
y$geom_sd <- c("geom_sd" = geom_sd <- exp(sd(log(x_no_negative_vals), na.rm = FALSE))) |
255 | 1143x |
y$geom_mean_sd <- c(y$geom_mean, y$geom_sd) |
256 | 1143x |
geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE) |
257 | 1143x |
y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level))) |
258 | ||
259 | 1143x |
y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off |
260 | ||
261 | 1143x |
geom_mean_ci_3d <- c(y$geom_mean, y$geom_mean_ci) |
262 | 1143x |
y$geom_mean_ci_3d <- formatters::with_label( |
263 | 1143x |
geom_mean_ci_3d,
|
264 | 1143x |
paste0("Geometric Mean (", f_conf_level(control$conf_level), ")") |
265 |
)
|
|
266 | ||
267 |
# Compare with reference group
|
|
268 | 1143x |
if (isTRUE(compare_with_ref_group)) { |
269 | 13x |
.ref_group <- args_list[[".ref_group"]] |
270 | 13x |
.in_ref_col <- args_list[[".in_ref_col"]] |
271 | 13x |
checkmate::assert_numeric(.ref_group) |
272 | 13x |
checkmate::assert_flag(.in_ref_col) |
273 | ||
274 | 13x |
y$pval <- numeric() |
275 | 13x |
if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) { |
276 | 9x |
y$pval <- stats::t.test(x, .ref_group)$p.value |
277 |
}
|
|
278 |
}
|
|
279 | ||
280 | 1143x |
y
|
281 |
}
|
|
282 | ||
283 |
#' @describeIn analyze_variables Method for `factor` class.
|
|
284 |
#'
|
|
285 |
#' @return
|
|
286 |
#' * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
|
|
287 |
#' * `n`: The [length()] of `x`.
|
|
288 |
#' * `count`: A list with the number of cases for each level of the factor `x`.
|
|
289 |
#' * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the
|
|
290 |
#' factor `x` relative to the denominator, or `NA` if the denominator is zero.
|
|
291 |
#'
|
|
292 |
#' @note
|
|
293 |
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element
|
|
294 |
#' per factor level. If there are no levels in `x`, the function fails.
|
|
295 |
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
|
|
296 |
#' set `na_rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
|
|
297 |
#' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
|
|
298 |
#' default `na_level` (`"<Missing>"`) will also be excluded when `na_rm` is set to `TRUE`.
|
|
299 |
#'
|
|
300 |
#' @method s_summary factor
|
|
301 |
#'
|
|
302 |
#' @examples
|
|
303 |
#' # `s_summary.factor`
|
|
304 |
#'
|
|
305 |
#' ## Basic usage:
|
|
306 |
#' s_summary(factor(c("a", "a", "b", "c", "a")))
|
|
307 |
#'
|
|
308 |
#' # Empty factor returns zero-filled items.
|
|
309 |
#' s_summary(factor(levels = c("a", "b", "c")))
|
|
310 |
#'
|
|
311 |
#' ## Management of NA values.
|
|
312 |
#' x <- factor(c(NA, "Female"))
|
|
313 |
#' x <- explicit_na(x)
|
|
314 |
#' s_summary(x, na_rm = TRUE)
|
|
315 |
#' s_summary(x, na_rm = FALSE)
|
|
316 |
#'
|
|
317 |
#' ## Different denominators.
|
|
318 |
#' x <- factor(c("a", "a", "b", "c", "a"))
|
|
319 |
#' s_summary(x, denom = "N_row", .N_row = 10L)
|
|
320 |
#' s_summary(x, denom = "N_col", .N_col = 20L)
|
|
321 |
#'
|
|
322 |
#' @export
|
|
323 |
s_summary.factor <- function(x, denom = c("n", "N_col", "N_row"), ...) { |
|
324 | 304x |
assert_valid_factor(x) |
325 | 301x |
args_list <- list(...) |
326 | 301x |
.N_row <- args_list[[".N_row"]] # nolint |
327 | 301x |
.N_col <- args_list[[".N_col"]] # nolint |
328 | 301x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
329 | 301x |
verbose <- args_list[["verbose"]] %||% TRUE |
330 | 301x |
compare_with_ref_group <- args_list[["compare_with_ref_group"]] |
331 | ||
332 | 301x |
if (na_rm) { |
333 | 292x |
x <- x[!is.na(x)] %>% fct_discard("<Missing>") |
334 |
} else { |
|
335 | 9x |
x <- x %>% explicit_na(label = "NA") |
336 |
}
|
|
337 | ||
338 | 301x |
y <- list() |
339 | ||
340 | 301x |
y$n <- list("n" = c("n" = length(x))) # all list of a list |
341 | ||
342 | 301x |
y$count <- lapply(as.list(table(x, useNA = "ifany")), setNames, nm = "count") |
343 | ||
344 | 301x |
denom <- match.arg(denom) %>% |
345 | 301x |
switch( |
346 | 301x |
n = length(x), |
347 | 301x |
N_row = .N_row, |
348 | 301x |
N_col = .N_col |
349 |
)
|
|
350 | ||
351 | 301x |
y$count_fraction <- lapply( |
352 | 301x |
y$count, |
353 | 301x |
function(x) { |
354 | 2182x |
c(x, "p" = ifelse(denom > 0, x / denom, 0)) |
355 |
}
|
|
356 |
)
|
|
357 | ||
358 | 301x |
y$count_fraction_fixed_dp <- y$count_fraction |
359 | ||
360 | 301x |
y$fraction <- lapply( |
361 | 301x |
y$count, |
362 | 301x |
function(count) c("num" = unname(count), "denom" = denom) |
363 |
)
|
|
364 | ||
365 | 301x |
y$n_blq <- list("n_blq" = c("n_blq" = sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x)))) |
366 | ||
367 | ||
368 | 301x |
if (isTRUE(compare_with_ref_group)) { |
369 | 16x |
.ref_group <- as_factor_keep_attributes(args_list[[".ref_group"]], verbose = verbose) |
370 | 16x |
.in_ref_col <- args_list[[".in_ref_col"]] |
371 | 16x |
checkmate::assert_flag(.in_ref_col) |
372 | 16x |
assert_valid_factor(x) |
373 | 16x |
assert_valid_factor(.ref_group) |
374 | ||
375 | 16x |
if (na_rm) { |
376 | 14x |
x <- x[!is.na(x)] %>% fct_discard("<Missing>") |
377 | 14x |
.ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>") |
378 |
} else { |
|
379 | 2x |
x <- x %>% explicit_na(label = "NA") |
380 | 2x |
.ref_group <- .ref_group %>% explicit_na(label = "NA") |
381 |
}
|
|
382 | ||
383 | 1x |
if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA") |
384 | 16x |
checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2) |
385 | ||
386 | 16x |
y$pval_counts <- numeric() |
387 | 16x |
if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { |
388 | 13x |
tab <- rbind(table(x), table(.ref_group)) |
389 | 13x |
res <- suppressWarnings(stats::chisq.test(tab)) |
390 | 13x |
y$pval_counts <- res$p.value |
391 |
}
|
|
392 |
}
|
|
393 | ||
394 | 301x |
y
|
395 |
}
|
|
396 | ||
397 |
#' @describeIn analyze_variables Method for `character` class. This makes an automatic
|
|
398 |
#' conversion to factor (with a warning) and then forwards to the method for factors.
|
|
399 |
#'
|
|
400 |
#' @note
|
|
401 |
#' * Automatic conversion of character to factor does not guarantee that the table
|
|
402 |
#' can be generated correctly. In particular for sparse tables this very likely can fail.
|
|
403 |
#' It is therefore better to always pre-process the dataset such that factors are manually
|
|
404 |
#' created from character variables before passing the dataset to [rtables::build_table()].
|
|
405 |
#'
|
|
406 |
#' @method s_summary character
|
|
407 |
#'
|
|
408 |
#' @examples
|
|
409 |
#' # `s_summary.character`
|
|
410 |
#'
|
|
411 |
#' ## Basic usage:
|
|
412 |
#' s_summary(c("a", "a", "b", "c", "a"), verbose = FALSE)
|
|
413 |
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na_rm = FALSE, verbose = FALSE)
|
|
414 |
#'
|
|
415 |
#' @export
|
|
416 |
s_summary.character <- function(x, denom = c("n", "N_col", "N_row"), ...) { |
|
417 | 12x |
args_list <- list(...) |
418 | 12x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
419 | 12x |
verbose <- args_list[["verbose"]] %||% TRUE |
420 | ||
421 | 12x |
if (na_rm) { |
422 | 11x |
y <- as_factor_keep_attributes(x, verbose = verbose) |
423 |
} else { |
|
424 | 1x |
y <- as_factor_keep_attributes(x, verbose = verbose, na_level = "NA") |
425 |
}
|
|
426 | ||
427 | 12x |
s_summary(x = y, denom = denom, ...) |
428 |
}
|
|
429 | ||
430 |
#' @describeIn analyze_variables Method for `logical` class.
|
|
431 |
#'
|
|
432 |
#' @return
|
|
433 |
#' * If `x` is of class `logical`, returns a `list` with named `numeric` items:
|
|
434 |
#' * `n`: The [length()] of `x` (possibly after removing `NA`s).
|
|
435 |
#' * `count`: Count of `TRUE` in `x`.
|
|
436 |
#' * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the
|
|
437 |
#' denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here.
|
|
438 |
#'
|
|
439 |
#' @method s_summary logical
|
|
440 |
#'
|
|
441 |
#' @examples
|
|
442 |
#' # `s_summary.logical`
|
|
443 |
#'
|
|
444 |
#' ## Basic usage:
|
|
445 |
#' s_summary(c(TRUE, FALSE, TRUE, TRUE))
|
|
446 |
#'
|
|
447 |
#' # Empty factor returns zero-filled items.
|
|
448 |
#' s_summary(as.logical(c()))
|
|
449 |
#'
|
|
450 |
#' ## Management of NA values.
|
|
451 |
#' x <- c(NA, TRUE, FALSE)
|
|
452 |
#' s_summary(x, na_rm = TRUE)
|
|
453 |
#' s_summary(x, na_rm = FALSE)
|
|
454 |
#'
|
|
455 |
#' ## Different denominators.
|
|
456 |
#' x <- c(TRUE, FALSE, TRUE, TRUE)
|
|
457 |
#' s_summary(x, denom = "N_row", .N_row = 10L)
|
|
458 |
#' s_summary(x, denom = "N_col", .N_col = 20L)
|
|
459 |
#'
|
|
460 |
#' @export
|
|
461 |
s_summary.logical <- function(x, denom = c("n", "N_col", "N_row"), ...) { |
|
462 | 211x |
checkmate::assert_logical(x) |
463 | 211x |
args_list <- list(...) |
464 | 211x |
.N_row <- args_list[[".N_row"]] # nolint |
465 | 211x |
.N_col <- args_list[[".N_col"]] # nolint |
466 | 211x |
na_rm <- args_list[["na_rm"]] %||% TRUE |
467 | 211x |
compare_with_ref_group <- args_list[["compare_with_ref_group"]] |
468 | ||
469 | 211x |
if (na_rm) { |
470 | 208x |
x <- x[!is.na(x)] |
471 |
}
|
|
472 | ||
473 | 211x |
y <- list() |
474 | 211x |
y$n <- c("n" = length(x)) |
475 | 211x |
denom <- match.arg(denom) %>% |
476 | 211x |
switch( |
477 | 211x |
n = length(x), |
478 | 211x |
N_row = .N_row, |
479 | 211x |
N_col = .N_col |
480 |
)
|
|
481 | 211x |
y$count <- c("count" = sum(x, na.rm = TRUE)) |
482 | 211x |
y$count_fraction <- c(y$count, "fraction" = ifelse(denom > 0, y$count / denom, 0)) |
483 | 211x |
y$count_fraction_fixed_dp <- y$count_fraction |
484 | 211x |
y$fraction <- c("num" = unname(y$count), "denom" = denom) |
485 | 211x |
y$n_blq <- c("n_blq" = 0L) |
486 | ||
487 | ||
488 | 211x |
if (isTRUE(compare_with_ref_group)) { |
489 | 4x |
.ref_group <- args_list[[".ref_group"]] |
490 | 4x |
.in_ref_col <- args_list[[".in_ref_col"]] |
491 | 4x |
checkmate::assert_flag(.in_ref_col) |
492 | ||
493 | 4x |
if (na_rm) { |
494 | 3x |
x <- stats::na.omit(x) |
495 | 3x |
.ref_group <- stats::na.omit(.ref_group) |
496 |
} else { |
|
497 | 1x |
x[is.na(x)] <- FALSE |
498 | 1x |
.ref_group[is.na(.ref_group)] <- FALSE |
499 |
}
|
|
500 | ||
501 | 4x |
y$pval_counts <- numeric() |
502 | 4x |
if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { |
503 | 4x |
x <- factor(x, levels = c(TRUE, FALSE)) |
504 | 4x |
.ref_group <- factor(.ref_group, levels = c(TRUE, FALSE)) |
505 | 4x |
tbl <- rbind(table(x), table(.ref_group)) |
506 | 4x |
y$pval_counts <- suppressWarnings(prop_chisq(tbl)) |
507 |
}
|
|
508 |
}
|
|
509 | ||
510 | 211x |
y
|
511 |
}
|
|
512 | ||
513 |
#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and
|
|
514 |
#' `compare_vars()` and as `cfun` in `summarize_colvars()`.
|
|
515 |
#'
|
|
516 |
#' @param compare_with_ref_group (`flag`)\cr whether comparison statistics should be analyzed instead of summary
|
|
517 |
#' statistics (`compare_with_ref_group = TRUE` adds `pval` statistic comparing
|
|
518 |
#' against reference group).
|
|
519 |
#'
|
|
520 |
#' @return
|
|
521 |
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
522 |
#'
|
|
523 |
#' @note
|
|
524 |
#' * To use for comparison (with additional p-value statistic), parameter
|
|
525 |
#' `compare_with_ref_group` must be set to `TRUE`.
|
|
526 |
#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
|
|
527 |
#'
|
|
528 |
#' @examples
|
|
529 |
#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
|
|
530 |
#' a_summary(
|
|
531 |
#' factor(c("a", "a", "b", "c", "a")),
|
|
532 |
#' .ref_group = factor(c("a", "a", "b", "c")), compare_with_ref_group = TRUE, .in_ref_col = TRUE
|
|
533 |
#' )
|
|
534 |
#'
|
|
535 |
#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
|
|
536 |
#' a_summary(
|
|
537 |
#' c("A", "B", "A", "C"),
|
|
538 |
#' .ref_group = c("B", "A", "C"), .var = "x", compare_with_ref_group = TRUE, verbose = FALSE,
|
|
539 |
#' .in_ref_col = FALSE
|
|
540 |
#' )
|
|
541 |
#'
|
|
542 |
#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
|
|
543 |
#' a_summary(
|
|
544 |
#' c(TRUE, FALSE, FALSE, TRUE, TRUE),
|
|
545 |
#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare_with_ref_group = TRUE,
|
|
546 |
#' .in_ref_col = FALSE
|
|
547 |
#' )
|
|
548 |
#'
|
|
549 |
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
|
|
550 |
#' a_summary(rnorm(10, 5, 1),
|
|
551 |
#' .ref_group = rnorm(20, -5, 1), .var = "bla", compare_with_ref_group = TRUE,
|
|
552 |
#' .in_ref_col = FALSE
|
|
553 |
#' )
|
|
554 |
#'
|
|
555 |
#' @export
|
|
556 |
a_summary <- function(x, |
|
557 |
...,
|
|
558 |
.stats = NULL, |
|
559 |
.stat_names = NULL, |
|
560 |
.formats = NULL, |
|
561 |
.labels = NULL, |
|
562 |
.indent_mods = NULL) { |
|
563 | 332x |
dots_extra_args <- list(...) |
564 | ||
565 |
# Check if there are user-defined functions
|
|
566 | 332x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
567 | 332x |
.stats <- default_and_custom_stats_list$all_stats # just the labels of stats |
568 | 332x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
569 | ||
570 |
# Correction of the pval indication if it is numeric or counts
|
|
571 | 332x |
type <- ifelse(is.numeric(x), "numeric", "counts") # counts is "categorical" |
572 | 332x |
.stats <- .correct_num_or_counts_pval(type, .stats) |
573 | ||
574 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
|
|
575 | 332x |
extra_afun_params <- retrieve_extra_afun_params( |
576 | 332x |
names(dots_extra_args$.additional_fun_parameters) |
577 |
)
|
|
578 | 332x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
579 | ||
580 |
# Check if compare_with_ref_group is TRUE but no ref col is set
|
|
581 | 332x |
if (isTRUE(dots_extra_args$compare_with_ref_group) && |
582 | 332x |
all( |
583 | 332x |
length(dots_extra_args[[".ref_group"]]) == 0, # only used for testing |
584 | 332x |
length(extra_afun_params[[".ref_group"]]) == 0 |
585 |
)
|
|
586 |
) { |
|
587 | ! |
stop( |
588 | ! |
"For comparison (compare_with_ref_group = TRUE), the reference group must be specified.",
|
589 | ! |
"\nSee ref_group in split_cols_by()."
|
590 |
)
|
|
591 |
}
|
|
592 | ||
593 |
# Main statistical functions application
|
|
594 | 332x |
x_stats <- .apply_stat_functions( |
595 | 332x |
default_stat_fnc = s_summary, |
596 | 332x |
custom_stat_fnc_list = custom_stat_functions, |
597 | 332x |
args_list = c( |
598 | 332x |
x = list(x), |
599 | 332x |
extra_afun_params,
|
600 | 332x |
dots_extra_args
|
601 |
)
|
|
602 |
)
|
|
603 | ||
604 |
# Fill in with stats defaults if needed
|
|
605 | 332x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_") |
606 | 332x |
.stats <- get_stats( |
607 | 332x |
met_grp,
|
608 | 332x |
stats_in = .stats, |
609 | 332x |
custom_stats_in = names(custom_stat_functions), |
610 | 332x |
add_pval = dots_extra_args$compare_with_ref_group %||% FALSE |
611 |
)
|
|
612 | ||
613 | 332x |
x_stats <- x_stats[.stats] |
614 | ||
615 | 332x |
is_char <- is.character(x) || is.factor(x) |
616 | 332x |
if (is_char) { |
617 | 236x |
levels_per_stats <- lapply(x_stats, names) |
618 |
} else { |
|
619 | 96x |
levels_per_stats <- names(x_stats) %>% |
620 | 96x |
as.list() %>% |
621 | 96x |
setNames(names(x_stats)) |
622 |
}
|
|
623 | ||
624 |
# Fill in formats/indents/labels with custom input and defaults
|
|
625 | 332x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
626 | 332x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
627 | 332x |
lbls <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
628 | ||
629 | 332x |
if (is_char) { |
630 |
# Keep pval_counts stat if present from comparisons and empty
|
|
631 | 236x |
if ("pval_counts" %in% names(x_stats) && length(x_stats[["pval_counts"]]) == 0) { |
632 | 3x |
x_stats[["pval_counts"]] <- list(NULL) %>% setNames("pval_counts") |
633 |
}
|
|
634 | ||
635 |
# Unlist stats
|
|
636 | 236x |
x_stats <- x_stats %>% |
637 | 236x |
.unlist_keep_nulls() %>% |
638 | 236x |
setNames(names(.formats)) |
639 |
}
|
|
640 | ||
641 |
# Check for custom labels from control_analyze_vars
|
|
642 | 332x |
.labels <- if ("control" %in% names(dots_extra_args)) { |
643 | 2x |
labels_use_control(lbls, dots_extra_args[["control"]], .labels) |
644 |
} else { |
|
645 | 330x |
lbls
|
646 |
}
|
|
647 | ||
648 |
# Auto format handling
|
|
649 | 332x |
.formats <- apply_auto_formatting( |
650 | 332x |
.formats,
|
651 | 332x |
x_stats,
|
652 | 332x |
extra_afun_params$.df_row, |
653 | 332x |
extra_afun_params$.var |
654 |
)
|
|
655 | ||
656 |
# Get and check statistical names from defaults
|
|
657 | 332x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
658 | ||
659 | 332x |
in_rows( |
660 | 332x |
.list = x_stats, |
661 | 332x |
.formats = .formats, |
662 | 332x |
.names = names(.labels), |
663 | 332x |
.stat_names = .stat_names, |
664 | 332x |
.labels = .labels %>% .unlist_keep_nulls(), |
665 | 332x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
666 |
)
|
|
667 |
}
|
|
668 | ||
669 |
#' @describeIn analyze_variables Layout-creating function which can take statistics function arguments
|
|
670 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
671 |
#'
|
|
672 |
#' @param ... additional arguments passed to `s_summary()`, including:
|
|
673 |
#' * `denom`: (`string`) See parameter description below.
|
|
674 |
#' * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no
|
|
675 |
#' column-based subsetting).
|
|
676 |
#' * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within.
|
|
677 |
#' * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out
|
|
678 |
#' information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only.
|
|
679 |
#' @param compare_with_ref_group (logical)\cr whether to compare the variable with a reference group.
|
|
680 |
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
|
|
681 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
|
|
682 |
#' for that statistic's row label.
|
|
683 |
#'
|
|
684 |
#' @return
|
|
685 |
#' * `analyze_vars()` returns a layout object suitable for passing to further layouting functions,
|
|
686 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
687 |
#' the statistics from `s_summary()` to the table layout.
|
|
688 |
#'
|
|
689 |
#' @examples
|
|
690 |
#' ## Fabricated dataset.
|
|
691 |
#' dta_test <- data.frame(
|
|
692 |
#' USUBJID = rep(1:6, each = 3),
|
|
693 |
#' PARAMCD = rep("lab", 6 * 3),
|
|
694 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
695 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)),
|
|
696 |
#' AVAL = c(9:1, rep(NA, 9))
|
|
697 |
#' )
|
|
698 |
#'
|
|
699 |
#' # `analyze_vars()` in `rtables` pipelines
|
|
700 |
#' ## Default output within a `rtables` pipeline.
|
|
701 |
#' l <- basic_table() %>%
|
|
702 |
#' split_cols_by(var = "ARM") %>%
|
|
703 |
#' split_rows_by(var = "AVISIT") %>%
|
|
704 |
#' analyze_vars(vars = "AVAL")
|
|
705 |
#'
|
|
706 |
#' build_table(l, df = dta_test)
|
|
707 |
#'
|
|
708 |
#' ## Select and format statistics output.
|
|
709 |
#' l <- basic_table() %>%
|
|
710 |
#' split_cols_by(var = "ARM") %>%
|
|
711 |
#' split_rows_by(var = "AVISIT") %>%
|
|
712 |
#' analyze_vars(
|
|
713 |
#' vars = "AVAL",
|
|
714 |
#' .stats = c("n", "mean_sd", "quantiles"),
|
|
715 |
#' .formats = c("mean_sd" = "xx.x, xx.x"),
|
|
716 |
#' .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
|
|
717 |
#' )
|
|
718 |
#'
|
|
719 |
#' build_table(l, df = dta_test)
|
|
720 |
#'
|
|
721 |
#' ## Use arguments interpreted by `s_summary`.
|
|
722 |
#' l <- basic_table() %>%
|
|
723 |
#' split_cols_by(var = "ARM") %>%
|
|
724 |
#' split_rows_by(var = "AVISIT") %>%
|
|
725 |
#' analyze_vars(vars = "AVAL", na_rm = FALSE)
|
|
726 |
#'
|
|
727 |
#' build_table(l, df = dta_test)
|
|
728 |
#'
|
|
729 |
#' ## Handle `NA` levels first when summarizing factors.
|
|
730 |
#' dta_test$AVISIT <- NA_character_
|
|
731 |
#' dta_test <- df_explicit_na(dta_test)
|
|
732 |
#' l <- basic_table() %>%
|
|
733 |
#' split_cols_by(var = "ARM") %>%
|
|
734 |
#' analyze_vars(vars = "AVISIT", na_rm = FALSE)
|
|
735 |
#'
|
|
736 |
#' build_table(l, df = dta_test)
|
|
737 |
#'
|
|
738 |
#' # auto format
|
|
739 |
#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4))
|
|
740 |
#' basic_table() %>%
|
|
741 |
#' analyze_vars(
|
|
742 |
#' vars = "VAR",
|
|
743 |
#' .stats = c("n", "mean", "mean_sd", "range"),
|
|
744 |
#' .formats = c("mean_sd" = "auto", "range" = "auto")
|
|
745 |
#' ) %>%
|
|
746 |
#' build_table(dt)
|
|
747 |
#'
|
|
748 |
#' @export
|
|
749 |
#' @order 2
|
|
750 |
analyze_vars <- function(lyt, |
|
751 |
vars,
|
|
752 |
var_labels = vars, |
|
753 |
na_str = default_na_str(), |
|
754 |
nested = TRUE, |
|
755 |
show_labels = "default", |
|
756 |
table_names = vars, |
|
757 |
section_div = NA_character_, |
|
758 |
...,
|
|
759 |
na_rm = TRUE, |
|
760 |
compare_with_ref_group = FALSE, |
|
761 |
.stats = c("n", "mean_sd", "median", "range", "count_fraction"), |
|
762 |
.stat_names = NULL, |
|
763 |
.formats = NULL, |
|
764 |
.labels = NULL, |
|
765 |
.indent_mods = NULL) { |
|
766 |
# Depending on main functions
|
|
767 | 40x |
extra_args <- list( |
768 | 40x |
"na_rm" = na_rm, |
769 | 40x |
"compare_with_ref_group" = compare_with_ref_group, |
770 |
...
|
|
771 |
)
|
|
772 | ||
773 |
# Needed defaults
|
|
774 | 40x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
775 | 3x |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
776 | 9x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
777 | 4x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
778 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
779 | ||
780 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
|
|
781 | 40x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
782 | 40x |
formals(a_summary) <- c( |
783 | 40x |
formals(a_summary), |
784 | 40x |
extra_args[[".additional_fun_parameters"]] |
785 |
)
|
|
786 | ||
787 |
# Main {rtables} structural call
|
|
788 | 40x |
analyze( |
789 | 40x |
lyt = lyt, |
790 | 40x |
vars = vars, |
791 | 40x |
var_labels = var_labels, |
792 | 40x |
afun = a_summary, |
793 | 40x |
na_str = na_str, |
794 | 40x |
inclNAs = !na_rm, |
795 | 40x |
nested = nested, |
796 | 40x |
extra_args = extra_args, |
797 | 40x |
show_labels = show_labels, |
798 | 40x |
table_names = table_names, |
799 | 40x |
section_div = section_div |
800 |
)
|
|
801 |
}
|
1 |
#' Formatting functions
|
|
2 |
#'
|
|
3 |
#' See below for the list of formatting functions created in `tern` to work with `rtables`.
|
|
4 |
#'
|
|
5 |
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
|
|
6 |
#' custom formats can be created via the [`formatters::sprintf_format()`] function.
|
|
7 |
#'
|
|
8 |
#' @family formatting functions
|
|
9 |
#' @name formatting_functions
|
|
10 |
NULL
|
|
11 | ||
12 |
#' Format fraction and percentage
|
|
13 |
#'
|
|
14 |
#' @description `r lifecycle::badge("stable")`
|
|
15 |
#'
|
|
16 |
#' Formats a fraction together with ratio in percent.
|
|
17 |
#'
|
|
18 |
#' @param x (named `integer`)\cr vector with elements `num` and `denom`.
|
|
19 |
#' @param ... not used. Required for `rtables` interface.
|
|
20 |
#'
|
|
21 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
|
|
22 |
#'
|
|
23 |
#' @examples
|
|
24 |
#' format_fraction(x = c(num = 2L, denom = 3L))
|
|
25 |
#' format_fraction(x = c(num = 0L, denom = 3L))
|
|
26 |
#'
|
|
27 |
#' @family formatting functions
|
|
28 |
#' @export
|
|
29 |
format_fraction <- function(x, ...) { |
|
30 | 220x |
attr(x, "label") <- NULL |
31 | ||
32 | 220x |
checkmate::assert_vector(x) |
33 | 220x |
checkmate::assert_count(x["num"]) |
34 | 218x |
checkmate::assert_count(x["denom"]) |
35 | ||
36 | 218x |
result <- if (x["num"] == 0) { |
37 | 10x |
paste0(x["num"], "/", x["denom"]) |
38 |
} else { |
|
39 | 208x |
paste0( |
40 | 208x |
x["num"], "/", x["denom"], |
41 | 208x |
" (", round(x["num"] / x["denom"] * 100, 1), "%)" |
42 |
)
|
|
43 |
}
|
|
44 | ||
45 | 218x |
return(result) |
46 |
}
|
|
47 | ||
48 |
#' Format fraction and percentage with fixed single decimal place
|
|
49 |
#'
|
|
50 |
#' @description `r lifecycle::badge("stable")`
|
|
51 |
#'
|
|
52 |
#' Formats a fraction together with ratio in percent with fixed single decimal place.
|
|
53 |
#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
|
|
54 |
#'
|
|
55 |
#' @inheritParams format_fraction
|
|
56 |
#'
|
|
57 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
|
|
58 |
#'
|
|
59 |
#' @examples
|
|
60 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
|
|
61 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
|
|
62 |
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
|
|
63 |
#'
|
|
64 |
#' @family formatting functions
|
|
65 |
#' @export
|
|
66 |
format_fraction_fixed_dp <- function(x, ...) { |
|
67 | 3x |
attr(x, "label") <- NULL |
68 | 3x |
checkmate::assert_vector(x) |
69 | 3x |
checkmate::assert_count(x["num"]) |
70 | 3x |
checkmate::assert_count(x["denom"]) |
71 | ||
72 | 3x |
result <- if (x["num"] == 0) { |
73 | 1x |
paste0(x["num"], "/", x["denom"]) |
74 |
} else { |
|
75 | 2x |
paste0( |
76 | 2x |
x["num"], "/", x["denom"], |
77 | 2x |
" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)" |
78 |
)
|
|
79 |
}
|
|
80 | 3x |
return(result) |
81 |
}
|
|
82 | ||
83 |
#' Format count and fraction
|
|
84 |
#'
|
|
85 |
#' @description `r lifecycle::badge("stable")`
|
|
86 |
#'
|
|
87 |
#' Formats a count together with fraction with special consideration when count is `0`.
|
|
88 |
#'
|
|
89 |
#' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively.
|
|
90 |
#' @param ... not used. Required for `rtables` interface.
|
|
91 |
#'
|
|
92 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
|
|
93 |
#'
|
|
94 |
#' @examples
|
|
95 |
#' format_count_fraction(x = c(2, 0.6667))
|
|
96 |
#' format_count_fraction(x = c(0, 0))
|
|
97 |
#'
|
|
98 |
#' @family formatting functions
|
|
99 |
#' @export
|
|
100 |
format_count_fraction <- function(x, ...) { |
|
101 | 102x |
attr(x, "label") <- NULL |
102 | ||
103 | 102x |
if (any(is.na(x))) { |
104 | 1x |
return("NA") |
105 |
}
|
|
106 | ||
107 | 101x |
checkmate::assert_vector(x) |
108 | 101x |
checkmate::assert_integerish(x[1]) |
109 | 101x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
110 | ||
111 | 101x |
result <- if (x[1] == 0) { |
112 | 13x |
"0"
|
113 |
} else { |
|
114 | 88x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)") |
115 |
}
|
|
116 | ||
117 | 101x |
return(result) |
118 |
}
|
|
119 | ||
120 |
#' Format count and percentage with fixed single decimal place
|
|
121 |
#'
|
|
122 |
#' @description `r lifecycle::badge("experimental")`
|
|
123 |
#'
|
|
124 |
#' Formats a count together with fraction with special consideration when count is `0`.
|
|
125 |
#'
|
|
126 |
#' @inheritParams format_count_fraction
|
|
127 |
#'
|
|
128 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
|
|
129 |
#'
|
|
130 |
#' @examples
|
|
131 |
#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
|
|
132 |
#' format_count_fraction_fixed_dp(x = c(2, 0.5))
|
|
133 |
#' format_count_fraction_fixed_dp(x = c(0, 0))
|
|
134 |
#'
|
|
135 |
#' @family formatting functions
|
|
136 |
#' @export
|
|
137 |
format_count_fraction_fixed_dp <- function(x, ...) { |
|
138 | 1408x |
attr(x, "label") <- NULL |
139 | ||
140 | 1408x |
if (any(is.na(x))) { |
141 | ! |
return("NA") |
142 |
}
|
|
143 | ||
144 | 1408x |
checkmate::assert_vector(x) |
145 | 1408x |
checkmate::assert_integerish(x[1]) |
146 | 1408x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
147 | ||
148 | 1408x |
result <- if (x[1] == 0) { |
149 | 195x |
"0"
|
150 | 1408x |
} else if (.is_equal_float(x[2], 1)) { |
151 | 549x |
sprintf("%d (100%%)", x[1]) |
152 |
} else { |
|
153 | 664x |
sprintf("%d (%.1f%%)", x[1], x[2] * 100) |
154 |
}
|
|
155 | ||
156 | 1408x |
return(result) |
157 |
}
|
|
158 | ||
159 |
#' Format count and fraction with special case for count < 10
|
|
160 |
#'
|
|
161 |
#' @description `r lifecycle::badge("stable")`
|
|
162 |
#'
|
|
163 |
#' Formats a count together with fraction with special consideration when count is less than 10.
|
|
164 |
#'
|
|
165 |
#' @inheritParams format_count_fraction
|
|
166 |
#'
|
|
167 |
#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed.
|
|
168 |
#'
|
|
169 |
#' @examples
|
|
170 |
#' format_count_fraction_lt10(x = c(275, 0.9673))
|
|
171 |
#' format_count_fraction_lt10(x = c(2, 0.6667))
|
|
172 |
#' format_count_fraction_lt10(x = c(9, 1))
|
|
173 |
#'
|
|
174 |
#' @family formatting functions
|
|
175 |
#' @export
|
|
176 |
format_count_fraction_lt10 <- function(x, ...) { |
|
177 | 7x |
attr(x, "label") <- NULL |
178 | ||
179 | 7x |
if (any(is.na(x))) { |
180 | 1x |
return("NA") |
181 |
}
|
|
182 | ||
183 | 6x |
checkmate::assert_vector(x) |
184 | 6x |
checkmate::assert_integerish(x[1]) |
185 | 6x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
186 | ||
187 | 6x |
result <- if (x[1] < 10) { |
188 | 3x |
paste0(x[1]) |
189 |
} else { |
|
190 | 3x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)") |
191 |
}
|
|
192 | ||
193 | 6x |
return(result) |
194 |
}
|
|
195 | ||
196 |
#' Format XX as a formatting function
|
|
197 |
#'
|
|
198 |
#' Translate a string where x and dots are interpreted as number place
|
|
199 |
#' holders, and others as formatting elements.
|
|
200 |
#'
|
|
201 |
#' @param str (`string`)\cr template.
|
|
202 |
#'
|
|
203 |
#' @return An `rtables` formatting function.
|
|
204 |
#'
|
|
205 |
#' @examples
|
|
206 |
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
|
|
207 |
#'
|
|
208 |
#' z <- format_xx("xx (xx.x)")
|
|
209 |
#' sapply(test, z)
|
|
210 |
#'
|
|
211 |
#' z <- format_xx("xx.x - xx.x")
|
|
212 |
#' sapply(test, z)
|
|
213 |
#'
|
|
214 |
#' z <- format_xx("xx.x, incl. xx.x% NE")
|
|
215 |
#' sapply(test, z)
|
|
216 |
#'
|
|
217 |
#' @family formatting functions
|
|
218 |
#' @export
|
|
219 |
format_xx <- function(str) { |
|
220 |
# Find position in the string.
|
|
221 | 1x |
positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE) |
222 | 1x |
x_positions <- regmatches(x = str, m = positions)[[1]] |
223 | ||
224 |
# Roundings depends on the number of x behind [.].
|
|
225 | 1x |
roundings <- lapply( |
226 | 1x |
X = x_positions, |
227 | 1x |
function(x) { |
228 | 2x |
y <- strsplit(split = "\\.", x = x)[[1]] |
229 | 2x |
rounding <- function(x) { |
230 | 4x |
round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)) |
231 |
}
|
|
232 | 2x |
return(rounding) |
233 |
}
|
|
234 |
)
|
|
235 | ||
236 | 1x |
rtable_format <- function(x, output) { |
237 | 2x |
values <- Map(y = x, fun = roundings, function(y, fun) fun(y)) |
238 | 2x |
regmatches(x = str, m = positions)[[1]] <- values |
239 | 2x |
return(str) |
240 |
}
|
|
241 | ||
242 | 1x |
return(rtable_format) |
243 |
}
|
|
244 | ||
245 |
#' Format numeric values by significant figures
|
|
246 |
#'
|
|
247 |
#' Format numeric values to print with a specified number of significant figures.
|
|
248 |
#'
|
|
249 |
#' @param sigfig (`integer(1)`)\cr number of significant figures to display.
|
|
250 |
#' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal
|
|
251 |
#' places in string are ignored in favor of formatting by significant figures. Formats options are:
|
|
252 |
#' `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`.
|
|
253 |
#' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for
|
|
254 |
#' standard significant figures formatting - fixed (non-scientific notation) format (`"f"`)
|
|
255 |
#' and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the
|
|
256 |
#' [formatC()] `format` argument for more options.
|
|
257 |
#'
|
|
258 |
#' @return An `rtables` formatting function.
|
|
259 |
#'
|
|
260 |
#' @examples
|
|
261 |
#' fmt_3sf <- format_sigfig(3)
|
|
262 |
#' fmt_3sf(1.658)
|
|
263 |
#' fmt_3sf(1e1)
|
|
264 |
#'
|
|
265 |
#' fmt_5sf <- format_sigfig(5)
|
|
266 |
#' fmt_5sf(0.57)
|
|
267 |
#' fmt_5sf(0.000025645)
|
|
268 |
#'
|
|
269 |
#' @family formatting functions
|
|
270 |
#' @export
|
|
271 |
format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") { |
|
272 | 3x |
checkmate::assert_integerish(sigfig) |
273 | 3x |
format <- gsub("xx\\.|xx\\.x+", "xx", format) |
274 | 3x |
checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)")) |
275 | 3x |
function(x, ...) { |
276 | ! |
if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.") |
277 | 12x |
num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#") |
278 | 12x |
num <- gsub("\\.$", "", num) # remove trailing "." |
279 | ||
280 | 12x |
format_value(num, format) |
281 |
}
|
|
282 |
}
|
|
283 | ||
284 |
#' Format fraction with lower threshold
|
|
285 |
#'
|
|
286 |
#' @description `r lifecycle::badge("stable")`
|
|
287 |
#'
|
|
288 |
#' Formats a fraction when the second element of the input `x` is the fraction. It applies
|
|
289 |
#' a lower threshold, below which it is just stated that the fraction is smaller than that.
|
|
290 |
#'
|
|
291 |
#' @param threshold (`proportion`)\cr lower threshold.
|
|
292 |
#'
|
|
293 |
#' @return An `rtables` formatting function that takes numeric input `x` where the second
|
|
294 |
#' element is the fraction that is formatted. If the fraction is above or equal to the threshold,
|
|
295 |
#' then it is displayed in percentage. If it is positive but below the threshold, it returns,
|
|
296 |
#' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
|
|
297 |
#'
|
|
298 |
#' @examples
|
|
299 |
#' format_fun <- format_fraction_threshold(0.05)
|
|
300 |
#' format_fun(x = c(20, 0.1))
|
|
301 |
#' format_fun(x = c(2, 0.01))
|
|
302 |
#' format_fun(x = c(0, 0))
|
|
303 |
#'
|
|
304 |
#' @family formatting functions
|
|
305 |
#' @export
|
|
306 |
format_fraction_threshold <- function(threshold) { |
|
307 | 1x |
assert_proportion_value(threshold) |
308 | 1x |
string_below_threshold <- paste0("<", round(threshold * 100)) |
309 | 1x |
function(x, ...) { |
310 | 3x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
311 | 3x |
ifelse( |
312 | 3x |
x[2] > 0.01, |
313 | 3x |
round(x[2] * 100), |
314 | 3x |
ifelse( |
315 | 3x |
x[2] == 0, |
316 | 3x |
"0",
|
317 | 3x |
string_below_threshold
|
318 |
)
|
|
319 |
)
|
|
320 |
}
|
|
321 |
}
|
|
322 | ||
323 |
#' Format extreme values
|
|
324 |
#'
|
|
325 |
#' @description `r lifecycle::badge("stable")`
|
|
326 |
#'
|
|
327 |
#' `rtables` formatting functions that handle extreme values.
|
|
328 |
#'
|
|
329 |
#' @param digits (`integer(1)`)\cr number of decimal places to display.
|
|
330 |
#'
|
|
331 |
#' @details For each input, apply a format to the specified number of `digits`. If the value is
|
|
332 |
#' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
|
|
333 |
#' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
|
|
334 |
#' If it is zero, then returns "0.00".
|
|
335 |
#'
|
|
336 |
#' @family formatting functions
|
|
337 |
#' @name extreme_format
|
|
338 |
NULL
|
|
339 | ||
340 |
#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
|
|
341 |
#' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
|
|
342 |
#'
|
|
343 |
#' @return
|
|
344 |
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
|
|
345 |
#' and `format_string`, with thresholds formatted as strings.
|
|
346 |
#'
|
|
347 |
#' @examples
|
|
348 |
#' h_get_format_threshold(2L)
|
|
349 |
#'
|
|
350 |
#' @export
|
|
351 |
h_get_format_threshold <- function(digits = 2L) { |
|
352 | 2013x |
checkmate::assert_integerish(digits) |
353 | ||
354 | 2013x |
low_threshold <- 1 / (10 ^ digits) # styler: off |
355 | 2013x |
high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off |
356 | ||
357 | 2013x |
string_below_threshold <- paste0("<", low_threshold) |
358 | 2013x |
string_above_threshold <- paste0(">", high_threshold) |
359 | ||
360 | 2013x |
list( |
361 | 2013x |
"threshold" = c(low = low_threshold, high = high_threshold), |
362 | 2013x |
"format_string" = c(low = string_below_threshold, high = string_above_threshold) |
363 |
)
|
|
364 |
}
|
|
365 | ||
366 |
#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
|
|
367 |
#' Creates a formatted string to be used in Formatting Functions.
|
|
368 |
#'
|
|
369 |
#' @param x (`numeric(1)`)\cr value to format.
|
|
370 |
#'
|
|
371 |
#' @return
|
|
372 |
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
|
|
373 |
#' of the given value to the digit threshold, as a formatted string.
|
|
374 |
#'
|
|
375 |
#' @examples
|
|
376 |
#' h_format_threshold(0.001)
|
|
377 |
#' h_format_threshold(1000)
|
|
378 |
#'
|
|
379 |
#' @export
|
|
380 |
h_format_threshold <- function(x, digits = 2L) { |
|
381 | 2015x |
if (is.na(x)) { |
382 | 4x |
return(x) |
383 |
}
|
|
384 | ||
385 | 2011x |
checkmate::assert_numeric(x, lower = 0) |
386 | ||
387 | 2011x |
l_fmt <- h_get_format_threshold(digits) |
388 | ||
389 | 2011x |
result <- if (x < l_fmt$threshold["low"] && 0 < x) { |
390 | 44x |
l_fmt$format_string["low"] |
391 | 2011x |
} else if (x > l_fmt$threshold["high"]) { |
392 | 99x |
l_fmt$format_string["high"] |
393 |
} else { |
|
394 | 1868x |
sprintf(fmt = paste0("%.", digits, "f"), x) |
395 |
}
|
|
396 | ||
397 | 2011x |
unname(result) |
398 |
}
|
|
399 | ||
400 |
#' Format a single extreme value
|
|
401 |
#'
|
|
402 |
#' @description `r lifecycle::badge("stable")`
|
|
403 |
#'
|
|
404 |
#' Create a formatting function for a single extreme value.
|
|
405 |
#'
|
|
406 |
#' @inheritParams extreme_format
|
|
407 |
#'
|
|
408 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
|
|
409 |
#'
|
|
410 |
#' @examples
|
|
411 |
#' format_fun <- format_extreme_values(2L)
|
|
412 |
#' format_fun(x = 0.127)
|
|
413 |
#' format_fun(x = Inf)
|
|
414 |
#' format_fun(x = 0)
|
|
415 |
#' format_fun(x = 0.009)
|
|
416 |
#'
|
|
417 |
#' @family formatting functions
|
|
418 |
#' @export
|
|
419 |
format_extreme_values <- function(digits = 2L) { |
|
420 | 1x |
function(x, ...) { |
421 | 5x |
checkmate::assert_scalar(x, na.ok = TRUE) |
422 | ||
423 | 5x |
h_format_threshold(x = x, digits = digits) |
424 |
}
|
|
425 |
}
|
|
426 | ||
427 |
#' Format extreme values part of a confidence interval
|
|
428 |
#'
|
|
429 |
#' @description `r lifecycle::badge("stable")`
|
|
430 |
#'
|
|
431 |
#' Formatting Function for extreme values part of a confidence interval. Values
|
|
432 |
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
|
|
433 |
#'
|
|
434 |
#' @inheritParams extreme_format
|
|
435 |
#'
|
|
436 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
|
|
437 |
#' values confidence interval.
|
|
438 |
#'
|
|
439 |
#' @examples
|
|
440 |
#' format_fun <- format_extreme_values_ci(2L)
|
|
441 |
#' format_fun(x = c(0.127, Inf))
|
|
442 |
#' format_fun(x = c(0, 0.009))
|
|
443 |
#'
|
|
444 |
#' @family formatting functions
|
|
445 |
#' @export
|
|
446 |
format_extreme_values_ci <- function(digits = 2L) { |
|
447 | 9x |
function(x, ...) { |
448 | 54x |
checkmate::assert_vector(x, len = 2) |
449 | 54x |
l_result <- h_format_threshold(x = x[1], digits = digits) |
450 | 54x |
h_result <- h_format_threshold(x = x[2], digits = digits) |
451 | ||
452 | 54x |
paste0("(", l_result, ", ", h_result, ")") |
453 |
}
|
|
454 |
}
|
|
455 | ||
456 |
#' Format automatically using data significant digits
|
|
457 |
#'
|
|
458 |
#' @description `r lifecycle::badge("stable")`
|
|
459 |
#'
|
|
460 |
#' Formatting function for the majority of default methods used in [analyze_vars()].
|
|
461 |
#' For non-derived values, the significant digits of data is used (e.g. range), while derived
|
|
462 |
#' values have one more digits (measure of location and dispersion like mean, standard deviation).
|
|
463 |
#' This function can be called internally with "auto" like, for example,
|
|
464 |
#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function.
|
|
465 |
#'
|
|
466 |
#' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to
|
|
467 |
#' find significant digits. In [analyze_vars] this comes from `.df_row` (see
|
|
468 |
#' [rtables::additional_fun_params]), and it is the row data after the above row splits. No
|
|
469 |
#' column split is considered.
|
|
470 |
#' @param x_stat (`string`)\cr string indicating the current statistical method used.
|
|
471 |
#'
|
|
472 |
#' @return A string that `rtables` prints in a table cell.
|
|
473 |
#'
|
|
474 |
#' @details
|
|
475 |
#' The internal function is needed to work with `rtables` default structure for
|
|
476 |
#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation.
|
|
477 |
#' It can be more than one element (e.g. for `.stats = "mean_sd"`).
|
|
478 |
#'
|
|
479 |
#' @examples
|
|
480 |
#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4)
|
|
481 |
#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3]))
|
|
482 |
#'
|
|
483 |
#' # x is the result coming into the formatting function -> res!!
|
|
484 |
#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res)
|
|
485 |
#' format_auto(x_todo, "range")(x = range(x_todo))
|
|
486 |
#' no_sc_x <- c(0.0000001, 1)
|
|
487 |
#' format_auto(no_sc_x, "range")(x = no_sc_x)
|
|
488 |
#'
|
|
489 |
#' @family formatting functions
|
|
490 |
#' @export
|
|
491 |
format_auto <- function(dt_var, x_stat) { |
|
492 | 16x |
function(x = "", ...) { |
493 | 39x |
checkmate::assert_numeric(x, min.len = 1) |
494 | 39x |
checkmate::assert_numeric(dt_var, min.len = 1) |
495 |
# Defaults - they may be a param in the future
|
|
496 | 39x |
der_stats <- c( |
497 | 39x |
"mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr", |
498 | 39x |
"mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi", |
499 | 39x |
"median_ci"
|
500 |
)
|
|
501 | 39x |
nonder_stats <- c("n", "range", "min", "max") |
502 | ||
503 |
# Safenet for miss-modifications
|
|
504 | 39x |
stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint |
505 | 39x |
checkmate::assert_choice(x_stat, c(der_stats, nonder_stats)) |
506 | ||
507 |
# Finds the max number of digits in data
|
|
508 | 39x |
detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>% |
509 | 39x |
max() |
510 | ||
511 | 39x |
if (x_stat %in% der_stats) { |
512 | 27x |
detect_dig <- detect_dig + 1 |
513 |
}
|
|
514 | ||
515 |
# Render input
|
|
516 | 39x |
str_vals <- formatC(x, digits = detect_dig, format = "f") |
517 | 39x |
def_fmt <- get_formats_from_stats(x_stat)[[x_stat]] |
518 | 39x |
str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]] |
519 | 39x |
if (length(str_fmt) != length(str_vals)) { |
520 | 2x |
stop( |
521 | 2x |
"Number of inserted values as result (", length(str_vals), |
522 | 2x |
") is not the same as there should be in the default tern formats for ",
|
523 | 2x |
x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ", |
524 | 2x |
"See tern_default_formats to check all of them."
|
525 |
)
|
|
526 |
}
|
|
527 | ||
528 |
# Squashing them together
|
|
529 | 37x |
inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]] |
530 | 37x |
stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint |
531 | ||
532 | 37x |
out <- vector("character", length = length(inv_str_fmt) + length(str_vals)) |
533 | 37x |
is_even <- seq_along(out) %% 2 == 0 |
534 | 37x |
out[is_even] <- str_vals |
535 | 37x |
out[!is_even] <- inv_str_fmt |
536 | ||
537 | 37x |
return(paste0(out, collapse = "")) |
538 |
}
|
|
539 |
}
|
|
540 | ||
541 |
# Utility function that could be useful in general
|
|
542 |
str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) { |
|
543 | 76x |
regmatches(string, gregexpr(pattern, string), invert = invert) |
544 |
}
|
|
545 | ||
546 |
# Helper function
|
|
547 |
count_decimalplaces <- function(dec) { |
|
548 | 1953x |
if (is.na(dec)) { |
549 | 6x |
return(0) |
550 | 1947x |
} else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision |
551 | 1888x |
nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]]) |
552 |
} else { |
|
553 | 59x |
return(0) |
554 |
}
|
|
555 |
}
|
|
556 | ||
557 |
#' Apply automatic formatting
|
|
558 |
#'
|
|
559 |
#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with
|
|
560 |
#' the correct implementation of `format_auto` for the given statistics, data, and variable.
|
|
561 |
#'
|
|
562 |
#' @inheritParams argument_convention
|
|
563 |
#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds
|
|
564 |
#' to an element in `.formats`, with matching names.
|
|
565 |
#'
|
|
566 |
#' @keywords internal
|
|
567 |
apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) { |
|
568 | 1574x |
is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) |
569 | 1574x |
if (any(is_auto_fmt)) { |
570 | 8x |
auto_stats <- x_stats[is_auto_fmt] |
571 | 8x |
var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets |
572 | 8x |
.formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df) |
573 |
}
|
|
574 | 1574x |
.formats
|
575 |
}
|
1 |
#' Create a forest plot from an `rtable`
|
|
2 |
#'
|
|
3 |
#' Given a [rtables::rtable()] object with at least one column with a single value and one column with 2
|
|
4 |
#' values, converts table to a [ggplot2::ggplot()] object and generates an accompanying forest plot. The
|
|
5 |
#' table and forest plot are printed side-by-side.
|
|
6 |
#'
|
|
7 |
#' @description `r lifecycle::badge("stable")`
|
|
8 |
#'
|
|
9 |
#' @inheritParams rtable2gg
|
|
10 |
#' @inheritParams argument_convention
|
|
11 |
#' @param tbl (`VTableTree`)\cr `rtables` table with at least one column with a single value and one column with 2
|
|
12 |
#' values.
|
|
13 |
#' @param col_x (`integer(1)` or `NULL`)\cr column index with estimator. By default tries to get this from
|
|
14 |
#' `tbl` attribute `col_x`, otherwise needs to be manually specified. If `NULL`, points will be excluded
|
|
15 |
#' from forest plot.
|
|
16 |
#' @param col_ci (`integer(1)` or `NULL`)\cr column index with confidence intervals. By default tries to get this from
|
|
17 |
#' `tbl` attribute `col_ci`, otherwise needs to be manually specified. If `NULL`, lines will be excluded
|
|
18 |
#' from forest plot.
|
|
19 |
#' @param vline (`numeric(1)` or `NULL`)\cr x coordinate for vertical line, if `NULL` then the line is omitted.
|
|
20 |
#' @param forest_header (`character(2)`)\cr text displayed to the left and right of `vline`, respectively.
|
|
21 |
#' If `vline = NULL` then `forest_header` is not printed. By default tries to get this from `tbl` attribute
|
|
22 |
#' `forest_header`. If `NULL`, defaults will be extracted from the table if possible, and set to
|
|
23 |
#' `"Comparison\nBetter"` and `"Treatment\nBetter"` if not.
|
|
24 |
#' @param xlim (`numeric(2)`)\cr limits for x axis.
|
|
25 |
#' @param logx (`flag`)\cr show the x-values on logarithm scale.
|
|
26 |
#' @param x_at (`numeric`)\cr x-tick locations, if `NULL`, `x_at` is set to `vline` and both `xlim` values.
|
|
27 |
#' @param width_row_names `r lifecycle::badge("deprecated")` Please use the `lbl_col_padding` argument instead.
|
|
28 |
#' @param width_columns (`numeric`)\cr a vector of column widths. Each element's position in
|
|
29 |
#' `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths are calculated
|
|
30 |
#' according to maximum number of characters per column.
|
|
31 |
#' @param width_forest `r lifecycle::badge("deprecated")` Please use the `rel_width_forest` argument instead.
|
|
32 |
#' @param rel_width_forest (`proportion`)\cr proportion of total width to allocate to the forest plot. Relative
|
|
33 |
#' width of table is then `1 - rel_width_forest`. If `as_list = TRUE`, this parameter is ignored.
|
|
34 |
#' @param font_size (`numeric(1)`)\cr font size.
|
|
35 |
#' @param col_symbol_size (`numeric` or `NULL`)\cr column index from `tbl` containing data to be used
|
|
36 |
#' to determine relative size for estimator plot symbol. Typically, the symbol size is proportional
|
|
37 |
#' to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups.
|
|
38 |
#' By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified.
|
|
39 |
#' @param col (`character`)\cr color(s).
|
|
40 |
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
|
|
41 |
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list. If `TRUE`, a named list
|
|
42 |
#' with two elements, `table` and `plot`, will be returned. If `FALSE` (default) the table and forest plot are
|
|
43 |
#' printed side-by-side via [cowplot::plot_grid()].
|
|
44 |
#' @param gp `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument
|
|
45 |
#' is no longer used.
|
|
46 |
#' @param draw `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument
|
|
47 |
#' is no longer used.
|
|
48 |
#' @param newpage `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument
|
|
49 |
#' is no longer used.
|
|
50 |
#'
|
|
51 |
#' @return `ggplot` forest plot and table.
|
|
52 |
#'
|
|
53 |
#' @examples
|
|
54 |
#' library(dplyr)
|
|
55 |
#' library(forcats)
|
|
56 |
#'
|
|
57 |
#' adrs <- tern_ex_adrs
|
|
58 |
#' n_records <- 20
|
|
59 |
#' adrs_labels <- formatters::var_labels(adrs, fill = TRUE)
|
|
60 |
#' adrs <- adrs %>%
|
|
61 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
62 |
#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
|
|
63 |
#' slice(seq_len(n_records)) %>%
|
|
64 |
#' droplevels() %>%
|
|
65 |
#' mutate(
|
|
66 |
#' # Reorder levels of factor to make the placebo group the reference arm.
|
|
67 |
#' ARM = fct_relevel(ARM, "B: Placebo"),
|
|
68 |
#' rsp = AVALC == "CR"
|
|
69 |
#' )
|
|
70 |
#' formatters::var_labels(adrs) <- c(adrs_labels, "Response")
|
|
71 |
#' df <- extract_rsp_subgroups(
|
|
72 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")),
|
|
73 |
#' data = adrs
|
|
74 |
#' )
|
|
75 |
#' # Full commonly used response table.
|
|
76 |
#'
|
|
77 |
#' tbl <- basic_table() %>%
|
|
78 |
#' tabulate_rsp_subgroups(df)
|
|
79 |
#' g_forest(tbl)
|
|
80 |
#'
|
|
81 |
#' # Odds ratio only table.
|
|
82 |
#'
|
|
83 |
#' tbl_or <- basic_table() %>%
|
|
84 |
#' tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci"))
|
|
85 |
#' g_forest(
|
|
86 |
#' tbl_or,
|
|
87 |
#' forest_header = c("Comparison\nBetter", "Treatment\nBetter")
|
|
88 |
#' )
|
|
89 |
#'
|
|
90 |
#' # Survival forest plot example.
|
|
91 |
#' adtte <- tern_ex_adtte
|
|
92 |
#' # Save variable labels before data processing steps.
|
|
93 |
#' adtte_labels <- formatters::var_labels(adtte, fill = TRUE)
|
|
94 |
#' adtte_f <- adtte %>%
|
|
95 |
#' filter(
|
|
96 |
#' PARAMCD == "OS",
|
|
97 |
#' ARM %in% c("B: Placebo", "A: Drug X"),
|
|
98 |
#' SEX %in% c("M", "F")
|
|
99 |
#' ) %>%
|
|
100 |
#' mutate(
|
|
101 |
#' # Reorder levels of ARM to display reference arm before treatment arm.
|
|
102 |
#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
|
|
103 |
#' SEX = droplevels(SEX),
|
|
104 |
#' AVALU = as.character(AVALU),
|
|
105 |
#' is_event = CNSR == 0
|
|
106 |
#' )
|
|
107 |
#' labels <- list(
|
|
108 |
#' "ARM" = adtte_labels["ARM"],
|
|
109 |
#' "SEX" = adtte_labels["SEX"],
|
|
110 |
#' "AVALU" = adtte_labels["AVALU"],
|
|
111 |
#' "is_event" = "Event Flag"
|
|
112 |
#' )
|
|
113 |
#' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels)
|
|
114 |
#' df <- extract_survival_subgroups(
|
|
115 |
#' variables = list(
|
|
116 |
#' tte = "AVAL",
|
|
117 |
#' is_event = "is_event",
|
|
118 |
#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
|
|
119 |
#' ),
|
|
120 |
#' data = adtte_f
|
|
121 |
#' )
|
|
122 |
#' table_hr <- basic_table() %>%
|
|
123 |
#' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
|
|
124 |
#' g_forest(table_hr)
|
|
125 |
#'
|
|
126 |
#' # Works with any `rtable`.
|
|
127 |
#' tbl <- rtable(
|
|
128 |
#' header = c("E", "CI", "N"),
|
|
129 |
#' rrow("", 1, c(.8, 1.2), 200),
|
|
130 |
#' rrow("", 1.2, c(1.1, 1.4), 50)
|
|
131 |
#' )
|
|
132 |
#' g_forest(
|
|
133 |
#' tbl = tbl,
|
|
134 |
#' col_x = 1,
|
|
135 |
#' col_ci = 2,
|
|
136 |
#' xlim = c(0.5, 2),
|
|
137 |
#' x_at = c(0.5, 1, 2),
|
|
138 |
#' col_symbol_size = 3
|
|
139 |
#' )
|
|
140 |
#'
|
|
141 |
#' tbl <- rtable(
|
|
142 |
#' header = rheader(
|
|
143 |
#' rrow("", rcell("A", colspan = 2)),
|
|
144 |
#' rrow("", "c1", "c2")
|
|
145 |
#' ),
|
|
146 |
#' rrow("row 1", 1, c(.8, 1.2)),
|
|
147 |
#' rrow("row 2", 1.2, c(1.1, 1.4))
|
|
148 |
#' )
|
|
149 |
#' g_forest(
|
|
150 |
#' tbl = tbl,
|
|
151 |
#' col_x = 1,
|
|
152 |
#' col_ci = 2,
|
|
153 |
#' xlim = c(0.5, 2),
|
|
154 |
#' x_at = c(0.5, 1, 2),
|
|
155 |
#' vline = 1,
|
|
156 |
#' forest_header = c("Hello", "World")
|
|
157 |
#' )
|
|
158 |
#'
|
|
159 |
#' @export
|
|
160 |
g_forest <- function(tbl, |
|
161 |
col_x = attr(tbl, "col_x"), |
|
162 |
col_ci = attr(tbl, "col_ci"), |
|
163 |
vline = 1, |
|
164 |
forest_header = attr(tbl, "forest_header"), |
|
165 |
xlim = c(0.1, 10), |
|
166 |
logx = TRUE, |
|
167 |
x_at = c(0.1, 1, 10), |
|
168 |
width_row_names = lifecycle::deprecated(), |
|
169 |
width_columns = NULL, |
|
170 |
width_forest = lifecycle::deprecated(), |
|
171 |
lbl_col_padding = 0, |
|
172 |
rel_width_forest = 0.25, |
|
173 |
font_size = 12, |
|
174 |
col_symbol_size = attr(tbl, "col_symbol_size"), |
|
175 |
col = getOption("ggplot2.discrete.colour")[1], |
|
176 |
ggtheme = NULL, |
|
177 |
as_list = FALSE, |
|
178 |
gp = lifecycle::deprecated(), |
|
179 |
draw = lifecycle::deprecated(), |
|
180 |
newpage = lifecycle::deprecated()) { |
|
181 |
# Deprecated argument warnings
|
|
182 | 4x |
if (lifecycle::is_present(width_row_names)) { |
183 | 1x |
lifecycle::deprecate_warn( |
184 | 1x |
"0.9.4", "g_forest(width_row_names)", "g_forest(lbl_col_padding)", |
185 | 1x |
details = "The width of the row label column can be adjusted via the `lbl_col_padding` parameter." |
186 |
)
|
|
187 |
}
|
|
188 | 4x |
if (lifecycle::is_present(width_forest)) { |
189 | 1x |
lifecycle::deprecate_warn( |
190 | 1x |
"0.9.4", "g_forest(width_forest)", "g_forest(rel_width_forest)", |
191 | 1x |
details = "Relative width of the forest plot (as a proportion) can be set via the `rel_width_forest` parameter." |
192 |
)
|
|
193 |
}
|
|
194 | 4x |
if (lifecycle::is_present(gp)) { |
195 | 1x |
lifecycle::deprecate_warn( |
196 | 1x |
"0.9.4", "g_forest(gp)", "g_forest(ggtheme)", |
197 | 1x |
details = paste( |
198 | 1x |
"`g_forest` is now generated as a `ggplot` object.",
|
199 | 1x |
"Additional display settings should be supplied via the `ggtheme` parameter."
|
200 |
)
|
|
201 |
)
|
|
202 |
}
|
|
203 | 4x |
if (lifecycle::is_present(draw)) { |
204 | 1x |
lifecycle::deprecate_warn( |
205 | 1x |
"0.9.4", "g_forest(draw)", |
206 | 1x |
details = "`g_forest` now generates `ggplot` objects. This parameter has no effect." |
207 |
)
|
|
208 |
}
|
|
209 | 4x |
if (lifecycle::is_present(newpage)) { |
210 | 1x |
lifecycle::deprecate_warn( |
211 | 1x |
"0.9.4", "g_forest(newpage)", |
212 | 1x |
details = "`g_forest` now generates `ggplot` objects. This parameter has no effect." |
213 |
)
|
|
214 |
}
|
|
215 | ||
216 | 4x |
checkmate::assert_class(tbl, "VTableTree") |
217 | 4x |
checkmate::assert_number(col_x, lower = 0, upper = ncol(tbl), null.ok = TRUE) |
218 | 4x |
checkmate::assert_number(col_ci, lower = 0, upper = ncol(tbl), null.ok = TRUE) |
219 | 4x |
checkmate::assert_number(col_symbol_size, lower = 0, upper = ncol(tbl), null.ok = TRUE) |
220 | 4x |
checkmate::assert_number(font_size, lower = 0) |
221 | 4x |
checkmate::assert_character(col, null.ok = TRUE) |
222 | 4x |
checkmate::assert_true(is.null(col) | length(col) == 1 | length(col) == nrow(tbl)) |
223 | ||
224 |
# Extract info from table
|
|
225 | 4x |
mat <- matrix_form(tbl, indent_rownames = TRUE) |
226 | 4x |
mat_strings <- formatters::mf_strings(mat) |
227 | 4x |
nlines_hdr <- formatters::mf_nlheader(mat) |
228 | 4x |
nrows_body <- nrow(mat_strings) - nlines_hdr |
229 | 4x |
tbl_stats <- mat_strings[nlines_hdr, -1] |
230 | ||
231 |
# Generate and modify table as ggplot object
|
|
232 | 4x |
gg_table <- rtable2gg(tbl, fontsize = font_size, colwidths = width_columns, lbl_col_padding = lbl_col_padding) + |
233 | 4x |
theme(plot.margin = margin(0, 0, 0, 0.025, "npc")) |
234 | 4x |
gg_table$scales$scales[[1]]$expand <- c(0.01, 0.01) |
235 | 4x |
gg_table$scales$scales[[2]]$limits[2] <- nrow(mat_strings) + 1 |
236 | 4x |
if (nlines_hdr == 2) { |
237 | 4x |
gg_table$scales$scales[[2]]$expand <- c(0, 0) |
238 | 4x |
arms <- unique(mat_strings[1, ][nzchar(trimws(mat_strings[1, ]))]) |
239 |
} else { |
|
240 | ! |
arms <- NULL |
241 |
}
|
|
242 | ||
243 | 4x |
tbl_df <- as_result_df(tbl) |
244 | 4x |
dat_cols <- seq(which(names(tbl_df) == "node_class") + 1, ncol(tbl_df)) |
245 | 4x |
tbl_df <- tbl_df[, c(which(names(tbl_df) == "row_num"), dat_cols)] |
246 | 4x |
names(tbl_df) <- c("row_num", tbl_stats) |
247 | ||
248 |
# Check table data columns
|
|
249 | 4x |
if (!is.null(col_ci)) { |
250 | 4x |
ci_col <- col_ci + 1 |
251 |
} else { |
|
252 | ! |
tbl_df[["empty_ci"]] <- rep(list(c(NA_real_, NA_real_)), nrow(tbl_df)) |
253 | ! |
ci_col <- which(names(tbl_df) == "empty_ci") |
254 |
}
|
|
255 | ! |
if (length(tbl_df[, ci_col][[1]]) != 2) stop("CI column must have two elements (lower and upper limits).") |
256 | ||
257 | 4x |
if (!is.null(col_x)) { |
258 | 4x |
x_col <- col_x + 1 |
259 |
} else { |
|
260 | ! |
tbl_df[["empty_x"]] <- NA_real_ |
261 | ! |
x_col <- which(names(tbl_df) == "empty_x") |
262 |
}
|
|
263 | 4x |
if (!is.null(col_symbol_size)) { |
264 | 3x |
sym_size <- unlist(tbl_df[, col_symbol_size + 1]) |
265 |
} else { |
|
266 | 1x |
sym_size <- rep(1, nrow(tbl_df)) |
267 |
}
|
|
268 | ||
269 | 4x |
tbl_df[, c("ci_lwr", "ci_upr")] <- t(sapply(tbl_df[, ci_col], unlist)) |
270 | 4x |
x <- unlist(tbl_df[, x_col]) |
271 | 4x |
lwr <- unlist(tbl_df[["ci_lwr"]]) |
272 | 4x |
upr <- unlist(tbl_df[["ci_upr"]]) |
273 | 4x |
row_num <- nrow(mat_strings) - tbl_df[["row_num"]] - as.numeric(nlines_hdr == 2) |
274 | ||
275 | ! |
if (is.null(col)) col <- "#343cff" |
276 | 4x |
if (length(col) == 1) col <- rep(col, nrow(tbl_df)) |
277 | ! |
if (is.null(x_at)) x_at <- union(xlim, vline) |
278 | 4x |
x_labels <- x_at |
279 | ||
280 |
# Apply log transformation
|
|
281 | 4x |
if (logx) { |
282 | 4x |
x_t <- log(x) |
283 | 4x |
lwr_t <- log(lwr) |
284 | 4x |
upr_t <- log(upr) |
285 | 4x |
xlim_t <- log(xlim) |
286 |
} else { |
|
287 | ! |
x_t <- x |
288 | ! |
lwr_t <- lwr |
289 | ! |
upr_t <- upr |
290 | ! |
xlim_t <- xlim |
291 |
}
|
|
292 | ||
293 |
# Set up plot area
|
|
294 | 4x |
gg_plt <- ggplot(data = tbl_df) + |
295 | 4x |
theme( |
296 | 4x |
panel.background = element_rect(fill = "transparent", color = NA_character_), |
297 | 4x |
plot.background = element_rect(fill = "transparent", color = NA_character_), |
298 | 4x |
panel.grid.major = element_blank(), |
299 | 4x |
panel.grid.minor = element_blank(), |
300 | 4x |
axis.title.x = element_blank(), |
301 | 4x |
axis.title.y = element_blank(), |
302 | 4x |
axis.line.x = element_line(), |
303 | 4x |
axis.text = element_text(size = font_size), |
304 | 4x |
legend.position = "none", |
305 | 4x |
plot.margin = margin(0, 0.1, 0.05, 0, "npc") |
306 |
) + |
|
307 | 4x |
scale_x_continuous( |
308 | 4x |
transform = ifelse(logx, "log", "identity"), |
309 | 4x |
limits = xlim, |
310 | 4x |
breaks = x_at, |
311 | 4x |
labels = x_labels, |
312 | 4x |
expand = c(0.01, 0) |
313 |
) + |
|
314 | 4x |
scale_y_continuous( |
315 | 4x |
limits = c(0, nrow(mat_strings) + 1), |
316 | 4x |
breaks = NULL, |
317 | 4x |
expand = c(0, 0) |
318 |
) + |
|
319 | 4x |
coord_cartesian(clip = "off") |
320 | ||
321 | 4x |
if (is.null(ggtheme)) { |
322 | 4x |
gg_plt <- gg_plt + annotate( |
323 | 4x |
"rect",
|
324 | 4x |
xmin = xlim[1], |
325 | 4x |
xmax = xlim[2], |
326 | 4x |
ymin = 0, |
327 | 4x |
ymax = nrows_body + 0.5, |
328 | 4x |
fill = "grey92" |
329 |
)
|
|
330 |
}
|
|
331 | ||
332 | 4x |
if (!is.null(vline)) { |
333 |
# Set default forest header
|
|
334 | 4x |
if (is.null(forest_header)) { |
335 | ! |
forest_header <- c( |
336 | ! |
paste(if (length(arms) == 2) arms[1] else "Comparison", "Better", sep = "\n"), |
337 | ! |
paste(if (length(arms) == 2) arms[2] else "Treatment", "Better", sep = "\n") |
338 |
)
|
|
339 |
}
|
|
340 | ||
341 |
# Add vline and forest header labels
|
|
342 | 4x |
mid_pts <- if (logx) { |
343 | 4x |
c(exp(mean(log(c(xlim[1], vline)))), exp(mean(log(c(vline, xlim[2]))))) |
344 |
} else { |
|
345 | ! |
c(mean(c(xlim[1], vline)), mean(c(vline, xlim[2]))) |
346 |
}
|
|
347 | 4x |
gg_plt <- gg_plt + |
348 | 4x |
annotate( |
349 | 4x |
"segment",
|
350 | 4x |
x = vline, xend = vline, y = 0, yend = nrows_body + 0.5 |
351 |
) + |
|
352 | 4x |
annotate( |
353 | 4x |
"text",
|
354 | 4x |
x = mid_pts[1], y = nrows_body + 1.25, |
355 | 4x |
label = forest_header[1], |
356 | 4x |
size = font_size / .pt, |
357 | 4x |
lineheight = 0.9 |
358 |
) + |
|
359 | 4x |
annotate( |
360 | 4x |
"text",
|
361 | 4x |
x = mid_pts[2], y = nrows_body + 1.25, |
362 | 4x |
label = forest_header[2], |
363 | 4x |
size = font_size / .pt, |
364 | 4x |
lineheight = 0.9 |
365 |
)
|
|
366 |
}
|
|
367 | ||
368 |
# Add points to plot
|
|
369 | 4x |
if (any(!is.na(x_t))) { |
370 | 4x |
x_t[x < xlim[1] | x > xlim[2]] <- NA |
371 | 4x |
gg_plt <- gg_plt + geom_point( |
372 | 4x |
x = x_t, |
373 | 4x |
y = row_num, |
374 | 4x |
color = col, |
375 | 4x |
aes(size = sym_size), |
376 | 4x |
na.rm = TRUE |
377 |
)
|
|
378 |
}
|
|
379 | ||
380 | 4x |
for (i in seq_len(nrow(tbl_df))) { |
381 |
# Determine which arrow(s) to add to CI lines
|
|
382 | 17x |
which_arrow <- c(lwr_t[i] < xlim_t[1], upr_t[i] > xlim_t[2]) |
383 | 17x |
which_arrow <- dplyr::case_when( |
384 | 17x |
all(which_arrow) ~ "both", |
385 | 17x |
which_arrow[1] ~ "first", |
386 | 17x |
which_arrow[2] ~ "last", |
387 | 17x |
TRUE ~ NA_character_ |
388 |
)
|
|
389 | ||
390 |
# Add CI lines
|
|
391 | 17x |
gg_plt <- gg_plt + |
392 | 17x |
if (!is.na(which_arrow)) { |
393 | 15x |
annotate( |
394 | 15x |
"segment",
|
395 | 15x |
x = if (!which_arrow %in% c("first", "both")) lwr[i] else xlim[1], |
396 | 15x |
xend = if (!which_arrow %in% c("last", "both")) upr[i] else xlim[2], |
397 | 15x |
y = row_num[i], yend = row_num[i], |
398 | 15x |
color = if (length(col) == 1) col else col[i], |
399 | 15x |
arrow = arrow(length = unit(0.05, "npc"), ends = which_arrow), |
400 | 15x |
na.rm = TRUE |
401 |
)
|
|
402 |
} else { |
|
403 | 2x |
annotate( |
404 | 2x |
"segment",
|
405 | 2x |
x = lwr[i], xend = upr[i], |
406 | 2x |
y = row_num[i], yend = row_num[i], |
407 | 2x |
color = if (length(col) == 1) col else col[i], |
408 | 2x |
na.rm = TRUE |
409 |
)
|
|
410 |
}
|
|
411 |
}
|
|
412 | ||
413 |
# Apply custom ggtheme to plot
|
|
414 | ! |
if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme |
415 | ||
416 | 4x |
if (as_list) { |
417 | 1x |
list( |
418 | 1x |
table = gg_table, |
419 | 1x |
plot = gg_plt |
420 |
)
|
|
421 |
} else { |
|
422 | 3x |
cowplot::plot_grid( |
423 | 3x |
gg_table,
|
424 | 3x |
gg_plt,
|
425 | 3x |
align = "h", |
426 | 3x |
axis = "tblr", |
427 | 3x |
rel_widths = c(1 - rel_width_forest, rel_width_forest) |
428 |
)
|
|
429 |
}
|
|
430 |
}
|
|
431 | ||
432 |
#' Forest plot grob
|
|
433 |
#'
|
|
434 |
#' @description `r lifecycle::badge("deprecated")`
|
|
435 |
#'
|
|
436 |
#' @inheritParams g_forest
|
|
437 |
#' @param tbl (`VTableTree`)\cr `rtables` table object.
|
|
438 |
#' @param x (`numeric`)\cr coordinate of point.
|
|
439 |
#' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval.
|
|
440 |
#' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol.
|
|
441 |
#' If `NULL`, the same symbol size is used.
|
|
442 |
#'
|
|
443 |
#' @details
|
|
444 |
#' The heights get automatically determined.
|
|
445 |
#'
|
|
446 |
#' @examples
|
|
447 |
#' tbl <- rtable(
|
|
448 |
#' header = rheader(
|
|
449 |
#' rrow("", "E", rcell("CI", colspan = 2), "N"),
|
|
450 |
#' rrow("", "A", "B", "C", "D")
|
|
451 |
#' ),
|
|
452 |
#' rrow("row 1", 1, 0.8, 1.1, 16),
|
|
453 |
#' rrow("row 2", 1.4, 0.8, 1.6, 25),
|
|
454 |
#' rrow("row 3", 1.2, 0.8, 1.6, 36)
|
|
455 |
#' )
|
|
456 |
#'
|
|
457 |
#' x <- c(1, 1.4, 1.2)
|
|
458 |
#' lower <- c(0.8, 0.8, 0.8)
|
|
459 |
#' upper <- c(1.1, 1.6, 1.6)
|
|
460 |
#' # numeric vector with multiplication factor to scale each circle radius
|
|
461 |
#' # default radius is 1/3.5 lines
|
|
462 |
#' symbol_scale <- c(1, 1.25, 1.5)
|
|
463 |
#'
|
|
464 |
#' # Internal function - forest_grob
|
|
465 |
#' \donttest{
|
|
466 |
#' p <- forest_grob(tbl, x, lower, upper,
|
|
467 |
#' vline = 1, forest_header = c("A", "B"),
|
|
468 |
#' x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale,
|
|
469 |
#' vp = grid::plotViewport(margins = c(1, 1, 1, 1))
|
|
470 |
#' )
|
|
471 |
#'
|
|
472 |
#' draw_grob(p)
|
|
473 |
#' }
|
|
474 |
#'
|
|
475 |
#' @noRd
|
|
476 |
#' @keywords internal
|
|
477 |
forest_grob <- function(tbl, |
|
478 |
x,
|
|
479 |
lower,
|
|
480 |
upper,
|
|
481 |
vline,
|
|
482 |
forest_header,
|
|
483 |
xlim = NULL, |
|
484 |
logx = FALSE, |
|
485 |
x_at = NULL, |
|
486 |
width_row_names = NULL, |
|
487 |
width_columns = NULL, |
|
488 |
width_forest = grid::unit(1, "null"), |
|
489 |
symbol_size = NULL, |
|
490 |
col = "blue", |
|
491 |
name = NULL, |
|
492 |
gp = NULL, |
|
493 |
vp = NULL) { |
|
494 | 1x |
lifecycle::deprecate_warn( |
495 | 1x |
"0.9.4", "forest_grob()", |
496 | 1x |
details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." |
497 |
)
|
|
498 | ||
499 | 1x |
nr <- nrow(tbl) |
500 | 1x |
if (is.null(vline)) { |
501 | ! |
checkmate::assert_true(is.null(forest_header)) |
502 |
} else { |
|
503 | 1x |
checkmate::assert_number(vline) |
504 | 1x |
checkmate::assert_character(forest_header, len = 2, null.ok = TRUE) |
505 |
}
|
|
506 | ||
507 | 1x |
checkmate::assert_numeric(x, len = nr) |
508 | 1x |
checkmate::assert_numeric(lower, len = nr) |
509 | 1x |
checkmate::assert_numeric(upper, len = nr) |
510 | 1x |
checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE) |
511 | 1x |
checkmate::assert_character(col) |
512 | ||
513 | 1x |
if (is.null(symbol_size)) { |
514 | ! |
symbol_size <- rep(1, nr) |
515 |
}
|
|
516 | ||
517 | 1x |
if (is.null(xlim)) { |
518 | ! |
r <- range(c(x, lower, upper), na.rm = TRUE) |
519 | ! |
xlim <- r + c(-0.05, 0.05) * diff(r) |
520 |
}
|
|
521 | ||
522 | 1x |
if (logx) { |
523 | 1x |
if (is.null(x_at)) { |
524 | ! |
x_at <- pretty(log(stats::na.omit(c(x, lower, upper)))) |
525 | ! |
x_labels <- exp(x_at) |
526 |
} else { |
|
527 | 1x |
x_labels <- x_at |
528 | 1x |
x_at <- log(x_at) |
529 |
}
|
|
530 | 1x |
xlim <- log(xlim) |
531 | 1x |
x <- log(x) |
532 | 1x |
lower <- log(lower) |
533 | 1x |
upper <- log(upper) |
534 | 1x |
if (!is.null(vline)) { |
535 | 1x |
vline <- log(vline) |
536 |
}
|
|
537 |
} else { |
|
538 | ! |
x_labels <- TRUE |
539 |
}
|
|
540 | ||
541 | 1x |
data_forest_vp <- grid::dataViewport(xlim, c(0, 1)) |
542 | ||
543 |
# Get table content as matrix form.
|
|
544 | 1x |
mf <- matrix_form(tbl) |
545 | ||
546 |
# Use `rtables` indent_string eventually.
|
|
547 | 1x |
mf$strings[, 1] <- paste0( |
548 | 1x |
strrep(" ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)), |
549 | 1x |
mf$strings[, 1] |
550 |
)
|
|
551 | ||
552 | 1x |
n_header <- attr(mf, "nrow_header") |
553 | ||
554 | ! |
if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed") |
555 | ||
556 |
# Pre-process the data to be used in lapply and cell_in_rows.
|
|
557 | 1x |
to_args_for_cell_in_rows_fun <- function(part = c("body", "header"), |
558 | 1x |
underline_colspan = FALSE) { |
559 | 2x |
part <- match.arg(part) |
560 | 2x |
if (part == "body") { |
561 | 1x |
mat_row_indices <- seq_len(nrow(tbl)) + n_header |
562 | 1x |
row_ind_offset <- -n_header |
563 |
} else { |
|
564 | 1x |
mat_row_indices <- seq_len(n_header) |
565 | 1x |
row_ind_offset <- 0 |
566 |
}
|
|
567 | ||
568 | 2x |
lapply(mat_row_indices, function(i) { |
569 | 5x |
disp <- mf$display[i, -1] |
570 | 5x |
list( |
571 | 5x |
row_name = mf$strings[i, 1], |
572 | 5x |
cells = mf$strings[i, -1][disp], |
573 | 5x |
cell_spans = mf$spans[i, -1][disp], |
574 | 5x |
row_index = i + row_ind_offset, |
575 | 5x |
underline_colspan = underline_colspan |
576 |
)
|
|
577 |
}) |
|
578 |
}
|
|
579 | ||
580 | 1x |
args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE) |
581 | 1x |
args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE) |
582 | ||
583 | 1x |
grid::gTree( |
584 | 1x |
name = name, |
585 | 1x |
children = grid::gList( |
586 | 1x |
grid::gTree( |
587 | 1x |
children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)), |
588 | 1x |
vp = grid::vpPath("vp_table_layout", "vp_header") |
589 |
),
|
|
590 | 1x |
grid::gTree( |
591 | 1x |
children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)), |
592 | 1x |
vp = grid::vpPath("vp_table_layout", "vp_body") |
593 |
),
|
|
594 | 1x |
grid::linesGrob( |
595 | 1x |
grid::unit(c(0, 1), "npc"), |
596 | 1x |
y = grid::unit(c(.5, .5), "npc"), |
597 | 1x |
vp = grid::vpPath("vp_table_layout", "vp_spacer") |
598 |
),
|
|
599 |
# forest part
|
|
600 | 1x |
if (is.null(vline)) { |
601 | ! |
NULL
|
602 |
} else { |
|
603 | 1x |
grid::gTree( |
604 | 1x |
children = grid::gList( |
605 | 1x |
grid::gTree( |
606 | 1x |
children = grid::gList( |
607 | 1x |
grid::textGrob( |
608 | 1x |
forest_header[1], |
609 | 1x |
x = grid::unit(vline, "native") - grid::unit(1, "lines"), |
610 | 1x |
just = c("right", "center") |
611 |
),
|
|
612 | 1x |
grid::textGrob( |
613 | 1x |
forest_header[2], |
614 | 1x |
x = grid::unit(vline, "native") + grid::unit(1, "lines"), |
615 | 1x |
just = c("left", "center") |
616 |
)
|
|
617 |
),
|
|
618 | 1x |
vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp) |
619 |
)
|
|
620 |
),
|
|
621 | 1x |
vp = grid::vpPath("vp_table_layout", "vp_header") |
622 |
)
|
|
623 |
},
|
|
624 | 1x |
grid::gTree( |
625 | 1x |
children = grid::gList( |
626 | 1x |
grid::gTree( |
627 | 1x |
children = grid::gList( |
628 | 1x |
grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")), |
629 | 1x |
if (is.null(vline)) { |
630 | ! |
NULL
|
631 |
} else { |
|
632 | 1x |
grid::linesGrob( |
633 | 1x |
x = grid::unit(rep(vline, 2), "native"), |
634 | 1x |
y = grid::unit(c(0, 1), "npc"), |
635 | 1x |
gp = grid::gpar(lwd = 2), |
636 | 1x |
vp = data_forest_vp |
637 |
)
|
|
638 |
},
|
|
639 | 1x |
grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp) |
640 |
),
|
|
641 | 1x |
vp = grid::viewport(layout.pos.col = ncol(tbl) + 2) |
642 |
)
|
|
643 |
),
|
|
644 | 1x |
vp = grid::vpPath("vp_table_layout", "vp_body") |
645 |
),
|
|
646 | 1x |
grid::gTree( |
647 | 1x |
children = do.call( |
648 | 1x |
grid::gList, |
649 | 1x |
Map( |
650 | 1x |
function(xi, li, ui, row_index, size_i, col) { |
651 | 3x |
forest_dot_line( |
652 | 3x |
xi,
|
653 | 3x |
li,
|
654 | 3x |
ui,
|
655 | 3x |
row_index,
|
656 | 3x |
xlim,
|
657 | 3x |
symbol_size = size_i, |
658 | 3x |
col = col, |
659 | 3x |
datavp = data_forest_vp |
660 |
)
|
|
661 |
},
|
|
662 | 1x |
x,
|
663 | 1x |
lower,
|
664 | 1x |
upper,
|
665 | 1x |
seq_along(x), |
666 | 1x |
symbol_size,
|
667 | 1x |
col,
|
668 | 1x |
USE.NAMES = FALSE |
669 |
)
|
|
670 |
),
|
|
671 | 1x |
vp = grid::vpPath("vp_table_layout", "vp_body") |
672 |
)
|
|
673 |
),
|
|
674 | 1x |
childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest), |
675 | 1x |
vp = vp, |
676 | 1x |
gp = gp |
677 |
)
|
|
678 |
}
|
|
679 | ||
680 |
cell_in_rows <- function(row_name, |
|
681 |
cells,
|
|
682 |
cell_spans,
|
|
683 |
row_index,
|
|
684 |
underline_colspan = FALSE) { |
|
685 | 5x |
checkmate::assert_string(row_name) |
686 | 5x |
checkmate::assert_character(cells, min.len = 1, any.missing = FALSE) |
687 | 5x |
checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE) |
688 | 5x |
checkmate::assert_number(row_index) |
689 | 5x |
checkmate::assert_flag(underline_colspan) |
690 | ||
691 | 5x |
vp_name_rn <- paste0("rowname-", row_index) |
692 | 5x |
g_rowname <- if (!is.null(row_name) && row_name != "") { |
693 | 3x |
grid::textGrob( |
694 | 3x |
name = vp_name_rn, |
695 | 3x |
label = row_name, |
696 | 3x |
x = grid::unit(0, "npc"), |
697 | 3x |
just = c("left", "center"), |
698 | 3x |
vp = grid::vpPath(paste0("rowname-", row_index)) |
699 |
)
|
|
700 |
} else { |
|
701 | 2x |
NULL
|
702 |
}
|
|
703 | ||
704 | 5x |
gl_cols <- if (!(length(cells) > 0)) { |
705 | ! |
list(NULL) |
706 |
} else { |
|
707 | 5x |
j <- 1 # column index of cell |
708 | ||
709 | 5x |
lapply(seq_along(cells), function(k) { |
710 | 19x |
cell_ascii <- cells[[k]] |
711 | 19x |
cs <- cell_spans[[k]] |
712 | ||
713 | 19x |
if (is.na(cell_ascii) || is.null(cell_ascii)) { |
714 | ! |
cell_ascii <- "NA" |
715 |
}
|
|
716 | ||
717 | 19x |
cell_name <- paste0("g-cell-", row_index, "-", j) |
718 | ||
719 | 19x |
cell_grobs <- if (identical(cell_ascii, "")) { |
720 | ! |
NULL
|
721 |
} else { |
|
722 | 19x |
if (cs == 1) { |
723 | 18x |
grid::textGrob( |
724 | 18x |
label = cell_ascii, |
725 | 18x |
name = cell_name, |
726 | 18x |
vp = grid::vpPath(paste0("cell-", row_index, "-", j)) |
727 |
)
|
|
728 |
} else { |
|
729 |
# +1 because of rowname
|
|
730 | 1x |
vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs)) |
731 | ||
732 | 1x |
lab <- grid::textGrob( |
733 | 1x |
label = cell_ascii, |
734 | 1x |
name = cell_name, |
735 | 1x |
vp = vp_joined_cols |
736 |
)
|
|
737 | ||
738 | 1x |
if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) { |
739 | ! |
lab
|
740 |
} else { |
|
741 | 1x |
grid::gList( |
742 | 1x |
lab,
|
743 | 1x |
grid::linesGrob( |
744 | 1x |
x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")), |
745 | 1x |
y = grid::unit(c(0, 0), "npc"), |
746 | 1x |
vp = vp_joined_cols |
747 |
)
|
|
748 |
)
|
|
749 |
}
|
|
750 |
}
|
|
751 |
}
|
|
752 | 19x |
j <<- j + cs |
753 | ||
754 | 19x |
cell_grobs
|
755 |
}) |
|
756 |
}
|
|
757 | ||
758 | 5x |
grid::gList( |
759 | 5x |
g_rowname,
|
760 | 5x |
do.call(grid::gList, gl_cols) |
761 |
)
|
|
762 |
}
|
|
763 | ||
764 |
#' Graphic object: forest dot line
|
|
765 |
#'
|
|
766 |
#' @description `r lifecycle::badge("deprecated")`
|
|
767 |
#'
|
|
768 |
#' Calculate the `grob` corresponding to the dot line within the forest plot.
|
|
769 |
#'
|
|
770 |
#' @noRd
|
|
771 |
#' @keywords internal
|
|
772 |
forest_dot_line <- function(x, |
|
773 |
lower,
|
|
774 |
upper,
|
|
775 |
row_index,
|
|
776 |
xlim,
|
|
777 |
symbol_size = 1, |
|
778 |
col = "blue", |
|
779 |
datavp) { |
|
780 | 3x |
lifecycle::deprecate_warn( |
781 | 3x |
"0.9.4", "forest_dot_line()", |
782 | 3x |
details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." |
783 |
)
|
|
784 | ||
785 | 3x |
ci <- c(lower, upper) |
786 | 3x |
if (any(!is.na(c(x, ci)))) { |
787 |
# line
|
|
788 | 3x |
y <- grid::unit(c(0.5, 0.5), "npc") |
789 | ||
790 | 3x |
g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) { |
791 |
# -
|
|
792 | 3x |
if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) { |
793 | 3x |
grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y) |
794 | ! |
} else if (ci[1] < xlim[1] && ci[2] > xlim[2]) { |
795 |
# <->
|
|
796 | ! |
grid::linesGrob( |
797 | ! |
x = grid::unit(xlim, "native"), |
798 | ! |
y = y, |
799 | ! |
arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both") |
800 |
)
|
|
801 | ! |
} else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) { |
802 |
# <-
|
|
803 | ! |
grid::linesGrob( |
804 | ! |
x = grid::unit(c(xlim[1], ci[2]), "native"), |
805 | ! |
y = y, |
806 | ! |
arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first") |
807 |
)
|
|
808 | ! |
} else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) { |
809 |
# ->
|
|
810 | ! |
grid::linesGrob( |
811 | ! |
x = grid::unit(c(ci[1], xlim[2]), "native"), |
812 | ! |
y = y, |
813 | ! |
arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last") |
814 |
)
|
|
815 |
}
|
|
816 |
} else { |
|
817 | ! |
NULL
|
818 |
}
|
|
819 | ||
820 | 3x |
g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) { |
821 | 3x |
grid::circleGrob( |
822 | 3x |
x = grid::unit(x, "native"), |
823 | 3x |
y = y, |
824 | 3x |
r = grid::unit(1 / 3.5 * symbol_size, "lines"), |
825 | 3x |
name = "point" |
826 |
)
|
|
827 |
} else { |
|
828 | ! |
NULL
|
829 |
}
|
|
830 | ||
831 | 3x |
grid::gTree( |
832 | 3x |
children = grid::gList( |
833 | 3x |
grid::gTree( |
834 | 3x |
children = grid::gList( |
835 | 3x |
grid::gList( |
836 | 3x |
g_line,
|
837 | 3x |
g_circle
|
838 |
)
|
|
839 |
),
|
|
840 | 3x |
vp = datavp, |
841 | 3x |
gp = grid::gpar(col = col, fill = col) |
842 |
)
|
|
843 |
),
|
|
844 | 3x |
vp = grid::vpPath(paste0("forest-", row_index)) |
845 |
)
|
|
846 |
} else { |
|
847 | ! |
NULL
|
848 |
}
|
|
849 |
}
|
|
850 | ||
851 |
#' Create a viewport tree for the forest plot
|
|
852 |
#'
|
|
853 |
#' @description `r lifecycle::badge("deprecated")`
|
|
854 |
#'
|
|
855 |
#' @param tbl (`VTableTree`)\cr `rtables` table object.
|
|
856 |
#' @param width_row_names (`grid::unit`)\cr width of row names.
|
|
857 |
#' @param width_columns (`grid::unit`)\cr width of column spans.
|
|
858 |
#' @param width_forest (`grid::unit`)\cr width of the forest plot.
|
|
859 |
#' @param gap_column (`grid::unit`)\cr gap width between the columns.
|
|
860 |
#' @param gap_header (`grid::unit`)\cr gap width between the header.
|
|
861 |
#' @param mat_form (`MatrixPrintForm`)\cr matrix print form of the table.
|
|
862 |
#'
|
|
863 |
#' @return A viewport tree.
|
|
864 |
#'
|
|
865 |
#' @examples
|
|
866 |
#' library(grid)
|
|
867 |
#'
|
|
868 |
#' tbl <- rtable(
|
|
869 |
#' header = rheader(
|
|
870 |
#' rrow("", "E", rcell("CI", colspan = 2)),
|
|
871 |
#' rrow("", "A", "B", "C")
|
|
872 |
#' ),
|
|
873 |
#' rrow("row 1", 1, 0.8, 1.1),
|
|
874 |
#' rrow("row 2", 1.4, 0.8, 1.6),
|
|
875 |
#' rrow("row 3", 1.2, 0.8, 1.2)
|
|
876 |
#' )
|
|
877 |
#'
|
|
878 |
#' \donttest{
|
|
879 |
#' v <- forest_viewport(tbl)
|
|
880 |
#'
|
|
881 |
#' grid::grid.newpage()
|
|
882 |
#' showViewport(v)
|
|
883 |
#' }
|
|
884 |
#'
|
|
885 |
#' @export
|
|
886 |
forest_viewport <- function(tbl, |
|
887 |
width_row_names = NULL, |
|
888 |
width_columns = NULL, |
|
889 |
width_forest = grid::unit(1, "null"), |
|
890 |
gap_column = grid::unit(1, "lines"), |
|
891 |
gap_header = grid::unit(1, "lines"), |
|
892 |
mat_form = NULL) { |
|
893 | 2x |
lifecycle::deprecate_warn( |
894 | 2x |
"0.9.4",
|
895 | 2x |
"forest_viewport()",
|
896 | 2x |
details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." |
897 |
)
|
|
898 | ||
899 | 2x |
checkmate::assert_class(tbl, "VTableTree") |
900 | 2x |
checkmate::assert_true(grid::is.unit(width_forest)) |
901 | 2x |
if (!is.null(width_row_names)) { |
902 | ! |
checkmate::assert_true(grid::is.unit(width_row_names)) |
903 |
}
|
|
904 | 2x |
if (!is.null(width_columns)) { |
905 | ! |
checkmate::assert_true(grid::is.unit(width_columns)) |
906 |
}
|
|
907 | ||
908 | 2x |
if (is.null(mat_form)) mat_form <- matrix_form(tbl) |
909 | ||
910 | 2x |
mat_form$strings[!mat_form$display] <- "" |
911 | ||
912 | 2x |
nr <- nrow(tbl) |
913 | 2x |
nc <- ncol(tbl) |
914 | 2x |
nr_h <- attr(mat_form, "nrow_header") |
915 | ||
916 | 2x |
if (is.null(width_row_names) || is.null(width_columns)) { |
917 | 2x |
tbl_widths <- formatters::propose_column_widths(mat_form) |
918 | 2x |
strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts |
919 | 2x |
if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1]) |
920 | 2x |
if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1]) |
921 |
}
|
|
922 | ||
923 |
# Widths for row name, cols, forest.
|
|
924 | 2x |
widths <- grid::unit.c( |
925 | 2x |
width_row_names + gap_column, |
926 | 2x |
width_columns + gap_column, |
927 | 2x |
width_forest
|
928 |
)
|
|
929 | ||
930 | 2x |
n_lines_per_row <- apply( |
931 | 2x |
X = mat_form$strings, |
932 | 2x |
MARGIN = 1, |
933 | 2x |
FUN = function(row) { |
934 | 10x |
tmp <- vapply( |
935 | 10x |
gregexpr("\n", row, fixed = TRUE), |
936 | 10x |
attr, numeric(1), |
937 | 10x |
"match.length"
|
938 | 10x |
) + 1 |
939 | 10x |
max(c(tmp, 1)) |
940 |
}
|
|
941 |
)
|
|
942 | ||
943 | 2x |
i_header <- seq_len(nr_h) |
944 | ||
945 | 2x |
height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines") |
946 | 2x |
height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines") |
947 | ||
948 | 2x |
height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines") |
949 | 2x |
height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines") |
950 | ||
951 | 2x |
nc_g <- nc + 2 # number of columns incl. row names and forest |
952 | ||
953 | 2x |
vp_tbl <- grid::vpTree( |
954 | 2x |
parent = grid::viewport( |
955 | 2x |
name = "vp_table_layout", |
956 | 2x |
layout = grid::grid.layout( |
957 | 2x |
nrow = 3, ncol = 1, |
958 | 2x |
heights = grid::unit.c(height_header, gap_header, height_body) |
959 |
)
|
|
960 |
),
|
|
961 | 2x |
children = grid::vpList( |
962 | 2x |
vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"), |
963 | 2x |
vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"), |
964 | 2x |
grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1) |
965 |
)
|
|
966 |
)
|
|
967 | 2x |
vp_tbl
|
968 |
}
|
|
969 | ||
970 |
#' Viewport forest plot: table part
|
|
971 |
#'
|
|
972 |
#' @description `r lifecycle::badge("deprecated")`
|
|
973 |
#'
|
|
974 |
#' Prepares a viewport for the table included in the forest plot.
|
|
975 |
#'
|
|
976 |
#' @noRd
|
|
977 |
#' @keywords internal
|
|
978 |
vp_forest_table_part <- function(nrow, |
|
979 |
ncol,
|
|
980 |
l_row,
|
|
981 |
l_col,
|
|
982 |
widths,
|
|
983 |
heights,
|
|
984 |
name) { |
|
985 | 4x |
lifecycle::deprecate_warn( |
986 | 4x |
"0.9.4", "vp_forest_table_part()", |
987 | 4x |
details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." |
988 |
)
|
|
989 | ||
990 | 4x |
grid::vpTree( |
991 | 4x |
grid::viewport( |
992 | 4x |
name = name, |
993 | 4x |
layout.pos.row = l_row, |
994 | 4x |
layout.pos.col = l_col, |
995 | 4x |
layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights) |
996 |
),
|
|
997 | 4x |
children = grid::vpList( |
998 | 4x |
do.call( |
999 | 4x |
grid::vpList, |
1000 | 4x |
lapply( |
1001 | 4x |
seq_len(nrow), function(i) { |
1002 | 10x |
grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i)) |
1003 |
}
|
|
1004 |
)
|
|
1005 |
),
|
|
1006 | 4x |
do.call( |
1007 | 4x |
grid::vpList, |
1008 | 4x |
apply( |
1009 | 4x |
expand.grid(seq_len(nrow), seq_len(ncol - 2)), |
1010 | 4x |
1,
|
1011 | 4x |
function(x) { |
1012 | 35x |
i <- x[1] |
1013 | 35x |
j <- x[2] |
1014 | 35x |
grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j)) |
1015 |
}
|
|
1016 |
)
|
|
1017 |
),
|
|
1018 | 4x |
do.call( |
1019 | 4x |
grid::vpList, |
1020 | 4x |
lapply( |
1021 | 4x |
seq_len(nrow), |
1022 | 4x |
function(i) { |
1023 | 10x |
grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i)) |
1024 |
}
|
|
1025 |
)
|
|
1026 |
)
|
|
1027 |
)
|
|
1028 |
)
|
|
1029 |
}
|
|
1030 | ||
1031 |
#' Forest rendering
|
|
1032 |
#'
|
|
1033 |
#' @description `r lifecycle::badge("deprecated")`
|
|
1034 |
#'
|
|
1035 |
#' Renders the forest grob.
|
|
1036 |
#'
|
|
1037 |
#' @noRd
|
|
1038 |
#' @keywords internal
|
|
1039 |
grid.forest <- function(...) { # nolint |
|
1040 | ! |
lifecycle::deprecate_warn( |
1041 | ! |
"0.9.4", "grid.forest()", |
1042 | ! |
details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`." |
1043 |
)
|
|
1044 | ||
1045 | ! |
grid::grid.draw(forest_grob(...)) |
1046 |
}
|
1 |
# summarize_glm_count ----------------------------------------------------------
|
|
2 |
#' Summarize Poisson negative binomial regression
|
|
3 |
#'
|
|
4 |
#' @description `r lifecycle::badge("experimental")`
|
|
5 |
#'
|
|
6 |
#' Summarize results of a Poisson negative binomial regression.
|
|
7 |
#' This can be used to analyze count and/or frequency data using a linear model.
|
|
8 |
#' It is specifically useful for analyzing count data (using the Poisson or Negative
|
|
9 |
#' Binomial distribution) that is result of a generalized linear model of one (e.g. arm) or more
|
|
10 |
#' covariates.
|
|
11 |
#'
|
|
12 |
#' @inheritParams h_glm_count
|
|
13 |
#' @inheritParams argument_convention
|
|
14 |
#' @param rate_mean_method (`character(1)`)\cr method used to estimate the mean odds ratio. Defaults to `emmeans`.
|
|
15 |
#' see details for more information.
|
|
16 |
#' @param scale (`numeric(1)`)\cr linear scaling factor for rate and confidence intervals. Defaults to `1`.
|
|
17 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
18 |
#'
|
|
19 |
#' Options are: ``r shQuote(get_stats("summarize_glm_count"), type = "sh")``
|
|
20 |
#'
|
|
21 |
#' @details
|
|
22 |
#' `summarize_glm_count()` uses `s_glm_count()` to calculate the statistics for the table. This
|
|
23 |
#' analysis function uses [h_glm_count()] to estimate the GLM with [stats::glm()] for Poisson and Quasi-Poisson
|
|
24 |
#' distributions or [MASS::glm.nb()] for Negative Binomial distribution. All methods assume a
|
|
25 |
#' logarithmic link function.
|
|
26 |
#'
|
|
27 |
#' At this point, rates and confidence intervals are estimated from the model using
|
|
28 |
#' either [emmeans::emmeans()] when `rate_mean_method = "emmeans"` or [h_ppmeans()]
|
|
29 |
#' when `rate_mean_method = "ppmeans"`.
|
|
30 |
#'
|
|
31 |
#' If a reference group is specified while building the table with `split_cols_by(ref_group)`,
|
|
32 |
#' no rate ratio or `p-value` are calculated. Otherwise, we use [emmeans::contrast()] to
|
|
33 |
#' calculate the rate ratio and `p-value` for the reference group. Values are always estimated
|
|
34 |
#' with `method = "trt.vs.ctrl"` and `ref` equal to the first `arm` value.
|
|
35 |
#'
|
|
36 |
#' @name summarize_glm_count
|
|
37 |
NULL
|
|
38 | ||
39 |
#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
|
|
40 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
41 |
#'
|
|
42 |
#' @return
|
|
43 |
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
|
|
44 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
45 |
#' the statistics from `s_glm_count()` to the table layout.
|
|
46 |
#'
|
|
47 |
#' @examples
|
|
48 |
#' library(dplyr)
|
|
49 |
#'
|
|
50 |
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
|
|
51 |
#' anl$AVAL_f <- as.factor(anl$AVAL)
|
|
52 |
#'
|
|
53 |
#' lyt <- basic_table() %>%
|
|
54 |
#' split_cols_by("ARM", ref_group = "B: Placebo") %>%
|
|
55 |
#' add_colcounts() %>%
|
|
56 |
#' analyze_vars(
|
|
57 |
#' "AVAL_f",
|
|
58 |
#' var_labels = "Number of exacerbations per patient",
|
|
59 |
#' .stats = c("count_fraction"),
|
|
60 |
#' .formats = c("count_fraction" = "xx (xx.xx%)"),
|
|
61 |
#' .labels = c("Number of exacerbations per patient")
|
|
62 |
#' ) %>%
|
|
63 |
#' summarize_glm_count(
|
|
64 |
#' vars = "AVAL",
|
|
65 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
|
|
66 |
#' conf_level = 0.95,
|
|
67 |
#' distribution = "poisson",
|
|
68 |
#' rate_mean_method = "emmeans",
|
|
69 |
#' var_labels = "Adjusted (P) exacerbation rate (per year)",
|
|
70 |
#' table_names = "adjP",
|
|
71 |
#' .stats = c("rate"),
|
|
72 |
#' .labels = c(rate = "Rate")
|
|
73 |
#' ) %>%
|
|
74 |
#' summarize_glm_count(
|
|
75 |
#' vars = "AVAL",
|
|
76 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
|
|
77 |
#' conf_level = 0.95,
|
|
78 |
#' distribution = "quasipoisson",
|
|
79 |
#' rate_mean_method = "ppmeans",
|
|
80 |
#' var_labels = "Adjusted (QP) exacerbation rate (per year)",
|
|
81 |
#' table_names = "adjQP",
|
|
82 |
#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
|
|
83 |
#' .labels = c(
|
|
84 |
#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
|
|
85 |
#' rate_ratio_ci = "Rate Ratio CI", pval = "p value"
|
|
86 |
#' )
|
|
87 |
#' ) %>%
|
|
88 |
#' summarize_glm_count(
|
|
89 |
#' vars = "AVAL",
|
|
90 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
|
|
91 |
#' conf_level = 0.95,
|
|
92 |
#' distribution = "negbin",
|
|
93 |
#' rate_mean_method = "emmeans",
|
|
94 |
#' var_labels = "Adjusted (NB) exacerbation rate (per year)",
|
|
95 |
#' table_names = "adjNB",
|
|
96 |
#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
|
|
97 |
#' .labels = c(
|
|
98 |
#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
|
|
99 |
#' rate_ratio_ci = "Rate Ratio CI", pval = "p value"
|
|
100 |
#' )
|
|
101 |
#' )
|
|
102 |
#'
|
|
103 |
#' build_table(lyt = lyt, df = anl)
|
|
104 |
#'
|
|
105 |
#' @export
|
|
106 |
summarize_glm_count <- function(lyt, |
|
107 |
vars,
|
|
108 |
variables,
|
|
109 |
distribution,
|
|
110 |
conf_level,
|
|
111 |
rate_mean_method = c("emmeans", "ppmeans")[1], |
|
112 |
weights = stats::weights, |
|
113 |
scale = 1, |
|
114 |
var_labels,
|
|
115 |
na_str = default_na_str(), |
|
116 |
nested = TRUE, |
|
117 |
...,
|
|
118 |
show_labels = "visible", |
|
119 |
table_names = vars, |
|
120 |
.stats = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), |
|
121 |
.stat_names = NULL, |
|
122 |
.formats = NULL, |
|
123 |
.labels = NULL, |
|
124 |
.indent_mods = list("rate_ci" = 1L, "rate_ratio_ci" = 1L, "pval" = 1L)) { |
|
125 | 3x |
checkmate::assert_choice(rate_mean_method, c("emmeans", "ppmeans")) |
126 | ||
127 |
# Process standard extra arguments
|
|
128 | 3x |
extra_args <- list(".stats" = .stats) |
129 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
130 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
131 | 3x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
132 | 3x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
133 | ||
134 |
# Process additional arguments to the statistic function
|
|
135 | 3x |
extra_args <- c( |
136 | 3x |
extra_args,
|
137 | 3x |
variables = list(variables), distribution = list(distribution), conf_level = list(conf_level), |
138 | 3x |
rate_mean_method = list(rate_mean_method), weights = list(weights), scale = list(scale), |
139 |
...
|
|
140 |
)
|
|
141 | ||
142 |
# Append additional info from layout to the analysis function
|
|
143 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
144 | 3x |
formals(a_glm_count) <- c(formals(a_glm_count), extra_args[[".additional_fun_parameters"]]) |
145 | ||
146 | 3x |
analyze( |
147 | 3x |
lyt = lyt, |
148 | 3x |
vars = vars, |
149 | 3x |
afun = a_glm_count, |
150 | 3x |
na_str = na_str, |
151 | 3x |
nested = nested, |
152 | 3x |
extra_args = extra_args, |
153 | 3x |
var_labels = var_labels, |
154 | 3x |
show_labels = show_labels, |
155 | 3x |
table_names = table_names |
156 |
)
|
|
157 |
}
|
|
158 | ||
159 |
#' @describeIn summarize_glm_count Statistics function that produces a named list of results
|
|
160 |
#' of the investigated Poisson model.
|
|
161 |
#'
|
|
162 |
#' @return
|
|
163 |
#' * `s_glm_count()` returns a named `list` of 5 statistics:
|
|
164 |
#' * `n`: Count of complete sample size for the group.
|
|
165 |
#' * `rate`: Estimated event rate per follow-up time.
|
|
166 |
#' * `rate_ci`: Confidence level for estimated rate per follow-up time.
|
|
167 |
#' * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
|
|
168 |
#' * `rate_ratio_ci`: Confidence level for the rate ratio.
|
|
169 |
#' * `pval`: p-value.
|
|
170 |
#'
|
|
171 |
#' @keywords internal
|
|
172 |
s_glm_count <- function(df, |
|
173 |
.var,
|
|
174 |
.df_row,
|
|
175 |
.ref_group,
|
|
176 |
.in_ref_col,
|
|
177 |
variables,
|
|
178 |
distribution,
|
|
179 |
conf_level,
|
|
180 |
rate_mean_method,
|
|
181 |
weights,
|
|
182 |
scale = 1, |
|
183 |
...) { |
|
184 | 14x |
arm <- variables$arm |
185 | ||
186 | 14x |
y <- df[[.var]] |
187 | 13x |
smry_level <- as.character(unique(df[[arm]])) |
188 | ||
189 |
# ensure there is only 1 value
|
|
190 | 13x |
checkmate::assert_scalar(smry_level) |
191 | ||
192 | 13x |
results <- h_glm_count( |
193 | 13x |
.var = .var, |
194 | 13x |
.df_row = .df_row, |
195 | 13x |
variables = variables, |
196 | 13x |
distribution = distribution, |
197 | 13x |
weights
|
198 |
)
|
|
199 | ||
200 | 13x |
if (rate_mean_method == "emmeans") { |
201 | 13x |
emmeans_smry <- summary(results$emmeans_fit, level = conf_level) |
202 | ! |
} else if (rate_mean_method == "ppmeans") { |
203 | ! |
emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level) |
204 |
}
|
|
205 | ||
206 | 13x |
emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ] |
207 | ||
208 |
# This happens if there is a reference col. No Ratio is calculated?
|
|
209 | 13x |
if (.in_ref_col) { |
210 | 5x |
list( |
211 | 5x |
n = length(y[!is.na(y)]), |
212 | 5x |
rate = formatters::with_label( |
213 | 5x |
ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate * scale), |
214 | 5x |
"Adjusted Rate"
|
215 |
),
|
|
216 | 5x |
rate_ci = formatters::with_label( |
217 | 5x |
c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), |
218 | 5x |
f_conf_level(conf_level) |
219 |
),
|
|
220 | 5x |
rate_ratio = formatters::with_label(numeric(), "Adjusted Rate Ratio"), |
221 | 5x |
rate_ratio_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), |
222 | 5x |
pval = formatters::with_label(numeric(), "p-value") |
223 |
)
|
|
224 |
} else { |
|
225 | 8x |
emmeans_contrasts <- emmeans::contrast( |
226 | 8x |
results$emmeans_fit, |
227 | 8x |
method = "trt.vs.ctrl", |
228 | 8x |
ref = grep( |
229 | 8x |
as.character(unique(.ref_group[[arm]])), |
230 | 8x |
as.data.frame(results$emmeans_fit)[[arm]] |
231 |
)
|
|
232 |
)
|
|
233 | ||
234 | 8x |
contrasts_smry <- summary( |
235 | 8x |
emmeans_contrasts,
|
236 | 8x |
infer = TRUE, |
237 | 8x |
adjust = "none" |
238 |
)
|
|
239 | ||
240 | 8x |
smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ] |
241 | ||
242 | 8x |
list( |
243 | 8x |
n = length(y[!is.na(y)]), |
244 | 8x |
rate = formatters::with_label( |
245 | 8x |
ifelse(distribution == "negbin", |
246 | 8x |
emmeans_smry_level$response * scale, |
247 | 8x |
emmeans_smry_level$rate * scale |
248 |
),
|
|
249 | 8x |
"Adjusted Rate"
|
250 |
),
|
|
251 | 8x |
rate_ci = formatters::with_label( |
252 | 8x |
c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), |
253 | 8x |
f_conf_level(conf_level) |
254 |
),
|
|
255 | 8x |
rate_ratio = formatters::with_label( |
256 | 8x |
smry_contrasts_level$ratio, |
257 | 8x |
"Adjusted Rate Ratio"
|
258 |
),
|
|
259 | 8x |
rate_ratio_ci = formatters::with_label( |
260 | 8x |
c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL), |
261 | 8x |
f_conf_level(conf_level) |
262 |
),
|
|
263 | 8x |
pval = formatters::with_label( |
264 | 8x |
smry_contrasts_level$p.value, |
265 | 8x |
"p-value"
|
266 |
)
|
|
267 |
)
|
|
268 |
}
|
|
269 |
}
|
|
270 | ||
271 |
#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
|
|
272 |
#'
|
|
273 |
#' @return
|
|
274 |
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
275 |
#'
|
|
276 |
#' @keywords internal
|
|
277 |
a_glm_count <- function(df, |
|
278 |
...,
|
|
279 |
.stats = NULL, |
|
280 |
.stat_names = NULL, |
|
281 |
.formats = NULL, |
|
282 |
.labels = NULL, |
|
283 |
.indent_mods = NULL) { |
|
284 |
# Check for additional parameters to the statistics function
|
|
285 | 9x |
dots_extra_args <- list(...) |
286 | 9x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
287 | 9x |
dots_extra_args$.additional_fun_parameters <- NULL |
288 | ||
289 |
# Check for user-defined functions
|
|
290 | 9x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
291 | 9x |
.stats <- default_and_custom_stats_list$all_stats |
292 | 9x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
293 | ||
294 |
# Apply statistics function
|
|
295 | 9x |
x_stats <- .apply_stat_functions( |
296 | 9x |
default_stat_fnc = s_glm_count, |
297 | 9x |
custom_stat_fnc_list = custom_stat_functions, |
298 | 9x |
args_list = c( |
299 | 9x |
df = list(df), |
300 | 9x |
extra_afun_params,
|
301 | 9x |
dots_extra_args
|
302 |
)
|
|
303 |
)
|
|
304 | ||
305 |
# Fill in formatting defaults
|
|
306 | 9x |
.stats <- get_stats("summarize_glm_count", |
307 | 9x |
stats_in = .stats, |
308 | 9x |
custom_stats_in = names(custom_stat_functions) |
309 |
)
|
|
310 | 9x |
.formats <- get_formats_from_stats(.stats, .formats) |
311 | 9x |
.labels <- get_labels_from_stats(.stats, .labels) |
312 | 9x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
313 | ||
314 | 9x |
x_stats <- x_stats[.stats] |
315 | ||
316 |
# Auto format handling
|
|
317 | 9x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
318 | ||
319 |
# Get and check statistical names
|
|
320 | 9x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
321 | ||
322 | 9x |
in_rows( |
323 | 9x |
.list = x_stats, |
324 | 9x |
.formats = .formats, |
325 | 9x |
.names = .labels %>% .unlist_keep_nulls(), |
326 | 9x |
.stat_names = .stat_names, |
327 | 9x |
.labels = .labels %>% .unlist_keep_nulls(), |
328 | 9x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
329 |
)
|
|
330 |
}
|
|
331 | ||
332 |
# h_glm_count ------------------------------------------------------------------
|
|
333 | ||
334 |
#' Helper functions for Poisson models
|
|
335 |
#'
|
|
336 |
#' @description `r lifecycle::badge("experimental")`
|
|
337 |
#'
|
|
338 |
#' Helper functions that returns the results of [stats::glm()] when Poisson or Quasi-Poisson
|
|
339 |
#' distributions are needed (see `family` parameter), or [MASS::glm.nb()] for Negative Binomial
|
|
340 |
#' distributions. Link function for the GLM is `log`.
|
|
341 |
#'
|
|
342 |
#' @inheritParams argument_convention
|
|
343 |
#'
|
|
344 |
#' @seealso [summarize_glm_count]
|
|
345 |
#'
|
|
346 |
#' @name h_glm_count
|
|
347 |
NULL
|
|
348 | ||
349 |
#' @describeIn h_glm_count Helper function to return the results of the
|
|
350 |
#' selected model (Poisson, Quasi-Poisson, negative binomial).
|
|
351 |
#'
|
|
352 |
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called
|
|
353 |
#' in `.var` and `variables`.
|
|
354 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with
|
|
355 |
#' expected elements:
|
|
356 |
#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
|
|
357 |
#' groups will be summarized. Specifically, the first level of `arm` variable is taken as the
|
|
358 |
#' reference group.
|
|
359 |
#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as
|
|
360 |
#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
|
|
361 |
#' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
|
|
362 |
#' @param distribution (`character`)\cr a character value specifying the distribution
|
|
363 |
#' used in the regression (Poisson, Quasi-Poisson, negative binomial).
|
|
364 |
#' @param weights (`character`)\cr a character vector specifying weights used
|
|
365 |
#' in averaging predictions. Number of weights must equal the number of levels included in the covariates.
|
|
366 |
#' Weights option passed to [emmeans::emmeans()].
|
|
367 |
#'
|
|
368 |
#' @return
|
|
369 |
#' * `h_glm_count()` returns the results of the selected model.
|
|
370 |
#'
|
|
371 |
#' @keywords internal
|
|
372 |
h_glm_count <- function(.var, |
|
373 |
.df_row,
|
|
374 |
variables,
|
|
375 |
distribution,
|
|
376 |
weights) { |
|
377 | 21x |
checkmate::assert_subset(distribution, c("poisson", "quasipoisson", "negbin"), empty.ok = FALSE) |
378 | 19x |
switch(distribution, |
379 | 13x |
poisson = h_glm_poisson(.var, .df_row, variables, weights), |
380 | 1x |
quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights), |
381 | 5x |
negbin = h_glm_negbin(.var, .df_row, variables, weights) |
382 |
)
|
|
383 |
}
|
|
384 | ||
385 |
#' @describeIn h_glm_count Helper function to return results of a Poisson model.
|
|
386 |
#'
|
|
387 |
#' @return
|
|
388 |
#' * `h_glm_poisson()` returns the results of a Poisson model.
|
|
389 |
#'
|
|
390 |
#' @keywords internal
|
|
391 |
h_glm_poisson <- function(.var, |
|
392 |
.df_row,
|
|
393 |
variables,
|
|
394 |
weights) { |
|
395 | 17x |
arm <- variables$arm |
396 | 17x |
covariates <- variables$covariates |
397 | ||
398 | 17x |
formula <- stats::as.formula(paste0( |
399 | 17x |
.var, " ~ ", |
400 |
" + ",
|
|
401 | 17x |
paste(covariates, collapse = " + "), |
402 |
" + ",
|
|
403 | 17x |
arm
|
404 |
)) |
|
405 | ||
406 | 17x |
if (is.null(variables$offset)) { |
407 | 1x |
glm_fit <- stats::glm( |
408 | 1x |
formula = formula, |
409 | 1x |
data = .df_row, |
410 | 1x |
family = stats::poisson(link = "log") |
411 |
)
|
|
412 |
} else { |
|
413 | 16x |
offset <- .df_row[[variables$offset]] |
414 | 14x |
glm_fit <- stats::glm( |
415 | 14x |
formula = formula, |
416 | 14x |
offset = offset, |
417 | 14x |
data = .df_row, |
418 | 14x |
family = stats::poisson(link = "log") |
419 |
)
|
|
420 |
}
|
|
421 | ||
422 | 15x |
emmeans_fit <- emmeans::emmeans( |
423 | 15x |
glm_fit,
|
424 | 15x |
specs = arm, |
425 | 15x |
data = .df_row, |
426 | 15x |
type = "response", |
427 | 15x |
offset = 0, |
428 | 15x |
weights = weights |
429 |
)
|
|
430 | ||
431 | 15x |
list( |
432 | 15x |
glm_fit = glm_fit, |
433 | 15x |
emmeans_fit = emmeans_fit |
434 |
)
|
|
435 |
}
|
|
436 | ||
437 |
#' @describeIn h_glm_count Helper function to return results of a Quasi-Poisson model.
|
|
438 |
#'
|
|
439 |
#' @return
|
|
440 |
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
|
|
441 |
#'
|
|
442 |
#' @keywords internal
|
|
443 |
h_glm_quasipoisson <- function(.var, |
|
444 |
.df_row,
|
|
445 |
variables,
|
|
446 |
weights) { |
|
447 | 5x |
arm <- variables$arm |
448 | 5x |
covariates <- variables$covariates |
449 | ||
450 | 5x |
formula <- stats::as.formula(paste0( |
451 | 5x |
.var, " ~ ", |
452 |
" + ",
|
|
453 | 5x |
paste(covariates, collapse = " + "), |
454 |
" + ",
|
|
455 | 5x |
arm
|
456 |
)) |
|
457 | ||
458 | 5x |
if (is.null(variables$offset)) { |
459 | ! |
glm_fit <- stats::glm( |
460 | ! |
formula = formula, |
461 | ! |
data = .df_row, |
462 | ! |
family = stats::quasipoisson(link = "log") |
463 |
)
|
|
464 |
} else { |
|
465 | 5x |
offset <- .df_row[[variables$offset]] |
466 | 3x |
glm_fit <- stats::glm( |
467 | 3x |
formula = formula, |
468 | 3x |
offset = offset, |
469 | 3x |
data = .df_row, |
470 | 3x |
family = stats::quasipoisson(link = "log") |
471 |
)
|
|
472 |
}
|
|
473 | 3x |
emmeans_fit <- emmeans::emmeans( |
474 | 3x |
glm_fit,
|
475 | 3x |
specs = arm, |
476 | 3x |
data = .df_row, |
477 | 3x |
type = "response", |
478 | 3x |
offset = 0, |
479 | 3x |
weights = weights |
480 |
)
|
|
481 | ||
482 | 3x |
list( |
483 | 3x |
glm_fit = glm_fit, |
484 | 3x |
emmeans_fit = emmeans_fit |
485 |
)
|
|
486 |
}
|
|
487 | ||
488 |
#' @describeIn h_glm_count Helper function to return results of a negative binomial model.
|
|
489 |
#'
|
|
490 |
#' @return
|
|
491 |
#' * `h_glm_negbin()` returns the results of a negative binomial model.
|
|
492 |
#'
|
|
493 |
#' @keywords internal
|
|
494 |
h_glm_negbin <- function(.var, |
|
495 |
.df_row,
|
|
496 |
variables,
|
|
497 |
weights) { |
|
498 | 9x |
arm <- variables$arm |
499 | 9x |
covariates <- variables$covariates |
500 | 9x |
formula <- stats::as.formula(paste0( |
501 | 9x |
.var, " ~ ", |
502 |
" + ",
|
|
503 | 9x |
paste(covariates, collapse = " + "), |
504 |
" + ",
|
|
505 | 9x |
arm
|
506 |
)) |
|
507 | ||
508 | 9x |
if (is.null(variables$offset)) { |
509 | 1x |
formula <- stats::as.formula(paste0( |
510 | 1x |
.var, " ~ ", |
511 |
" + ",
|
|
512 | 1x |
paste(covariates, collapse = " + "), |
513 |
" + ",
|
|
514 | 1x |
arm
|
515 |
)) |
|
516 |
} else { |
|
517 | 8x |
offset <- variables$offset |
518 | 8x |
formula_txt <- sprintf( |
519 | 8x |
"%s ~ %s + %s + offset(%s)",
|
520 | 8x |
.var,
|
521 | 8x |
arm, paste0(covariates, collapse = " + "), offset |
522 |
)
|
|
523 | 8x |
formula <- stats::as.formula( |
524 | 8x |
formula_txt
|
525 |
)
|
|
526 |
}
|
|
527 | ||
528 | 9x |
glm_fit <- MASS::glm.nb( |
529 | 9x |
formula = formula, |
530 | 9x |
data = .df_row, |
531 | 9x |
link = "log" |
532 |
)
|
|
533 | ||
534 | 7x |
emmeans_fit <- emmeans::emmeans( |
535 | 7x |
glm_fit,
|
536 | 7x |
specs = arm, |
537 | 7x |
data = .df_row, |
538 | 7x |
type = "response", |
539 | 7x |
offset = 0, |
540 | 7x |
weights = weights |
541 |
)
|
|
542 | ||
543 | 7x |
list( |
544 | 7x |
glm_fit = glm_fit, |
545 | 7x |
emmeans_fit = emmeans_fit |
546 |
)
|
|
547 |
}
|
|
548 | ||
549 |
# h_ppmeans --------------------------------------------------------------------
|
|
550 |
#' Function to return the estimated means using predicted probabilities
|
|
551 |
#'
|
|
552 |
#' @description
|
|
553 |
#' For each arm level, the predicted mean rate is calculated using the fitted model object, with `newdata`
|
|
554 |
#' set to the result of `stats::model.frame`, a reconstructed data or the original data, depending on the
|
|
555 |
#' object formula (coming from the fit). The confidence interval is derived using the `conf_level` parameter.
|
|
556 |
#'
|
|
557 |
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
|
|
558 |
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called in `.var` and `variables`.
|
|
559 |
#' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
|
|
560 |
#' summarized. Specifically, the first level of `arm` variable is taken as the reference group.
|
|
561 |
#' @param conf_level (`proportion`)\cr value used to derive the confidence interval for the rate.
|
|
562 |
#'
|
|
563 |
#' @return
|
|
564 |
#' * `h_ppmeans()` returns the estimated means.
|
|
565 |
#'
|
|
566 |
#' @seealso [summarize_glm_count()].
|
|
567 |
#'
|
|
568 |
#' @export
|
|
569 |
h_ppmeans <- function(obj, .df_row, arm, conf_level) { |
|
570 | 1x |
alpha <- 1 - conf_level |
571 | 1x |
p <- 1 - alpha / 2 |
572 | ||
573 | 1x |
arm_levels <- levels(.df_row[[arm]]) |
574 | ||
575 | 1x |
out <- lapply(arm_levels, function(lev) { |
576 | 3x |
temp <- .df_row |
577 | 3x |
temp[[arm]] <- factor(lev, levels = arm_levels) |
578 | ||
579 | 3x |
mf <- stats::model.frame(obj$formula, data = temp) |
580 | 3x |
X <- stats::model.matrix(obj$formula, data = mf) # nolint |
581 | ||
582 | 3x |
rate <- stats::predict(obj, newdata = mf, type = "response") |
583 | 3x |
rate_hat <- mean(rate) |
584 | ||
585 | 3x |
zz <- colMeans(rate * X) |
586 | 3x |
se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz)) |
587 | 3x |
rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat) |
588 | 3x |
rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat) |
589 | ||
590 | 3x |
c(rate_hat, rate_lwr, rate_upr) |
591 |
}) |
|
592 | ||
593 | 1x |
names(out) <- arm_levels |
594 | 1x |
out <- do.call(rbind, out) |
595 | 1x |
if ("negbin" %in% class(obj)) { |
596 | ! |
colnames(out) <- c("response", "asymp.LCL", "asymp.UCL") |
597 |
} else { |
|
598 | 1x |
colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL") |
599 |
}
|
|
600 | 1x |
out <- as.data.frame(out) |
601 | 1x |
out[[arm]] <- rownames(out) |
602 | 1x |
out
|
603 |
}
|
1 |
#' Create a STEP graph
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
|
|
6 |
#' along the continuous biomarker value subgroups.
|
|
7 |
#'
|
|
8 |
#' @param df (`tibble`)\cr result of [tidy.step()].
|
|
9 |
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
|
|
10 |
#' biomarker values.
|
|
11 |
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
|
|
12 |
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
|
|
13 |
#' ribbon area, or `NULL` to not plot a CI ribbon.
|
|
14 |
#' @param col (`character`)\cr color(s).
|
|
15 |
#'
|
|
16 |
#' @return A `ggplot` STEP graph.
|
|
17 |
#'
|
|
18 |
#' @seealso Custom tidy method [tidy.step()].
|
|
19 |
#'
|
|
20 |
#' @examples
|
|
21 |
#' library(survival)
|
|
22 |
#' lung$sex <- factor(lung$sex)
|
|
23 |
#'
|
|
24 |
#' # Survival example.
|
|
25 |
#' vars <- list(
|
|
26 |
#' time = "time",
|
|
27 |
#' event = "status",
|
|
28 |
#' arm = "sex",
|
|
29 |
#' biomarker = "age"
|
|
30 |
#' )
|
|
31 |
#'
|
|
32 |
#' step_matrix <- fit_survival_step(
|
|
33 |
#' variables = vars,
|
|
34 |
#' data = lung,
|
|
35 |
#' control = c(control_coxph(), control_step(num_points = 10, degree = 2))
|
|
36 |
#' )
|
|
37 |
#' step_data <- broom::tidy(step_matrix)
|
|
38 |
#'
|
|
39 |
#' # Default plot.
|
|
40 |
#' g_step(step_data)
|
|
41 |
#'
|
|
42 |
#' # Add the reference 1 horizontal line.
|
|
43 |
#' library(ggplot2)
|
|
44 |
#' g_step(step_data) +
|
|
45 |
#' ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
|
|
46 |
#'
|
|
47 |
#' # Use actual values instead of percentiles, different color for estimate and no CI,
|
|
48 |
#' # use log scale for y axis.
|
|
49 |
#' g_step(
|
|
50 |
#' step_data,
|
|
51 |
#' use_percentile = FALSE,
|
|
52 |
#' est = list(col = "blue", lty = 1),
|
|
53 |
#' ci_ribbon = NULL
|
|
54 |
#' ) + scale_y_log10()
|
|
55 |
#'
|
|
56 |
#' # Adding another curve based on additional column.
|
|
57 |
#' step_data$extra <- exp(step_data$`Percentile Center`)
|
|
58 |
#' g_step(step_data) +
|
|
59 |
#' ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
|
|
60 |
#'
|
|
61 |
#' # Response example.
|
|
62 |
#' vars <- list(
|
|
63 |
#' response = "status",
|
|
64 |
#' arm = "sex",
|
|
65 |
#' biomarker = "age"
|
|
66 |
#' )
|
|
67 |
#'
|
|
68 |
#' step_matrix <- fit_rsp_step(
|
|
69 |
#' variables = vars,
|
|
70 |
#' data = lung,
|
|
71 |
#' control = c(
|
|
72 |
#' control_logistic(response_definition = "I(response == 2)"),
|
|
73 |
#' control_step()
|
|
74 |
#' )
|
|
75 |
#' )
|
|
76 |
#' step_data <- broom::tidy(step_matrix)
|
|
77 |
#' g_step(step_data)
|
|
78 |
#'
|
|
79 |
#' @export
|
|
80 |
g_step <- function(df, |
|
81 |
use_percentile = "Percentile Center" %in% names(df), |
|
82 |
est = list(col = "blue", lty = 1), |
|
83 |
ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5), |
|
84 |
col = getOption("ggplot2.discrete.colour")) { |
|
85 | 2x |
checkmate::assert_tibble(df) |
86 | 2x |
checkmate::assert_flag(use_percentile) |
87 | 2x |
checkmate::assert_character(col, null.ok = TRUE) |
88 | 2x |
checkmate::assert_list(est, names = "named") |
89 | 2x |
checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE) |
90 | ||
91 | 2x |
x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center") |
92 | 2x |
df$x <- df[[x_var]] |
93 | 2x |
attrs <- attributes(df) |
94 | 2x |
df$y <- df[[attrs$estimate]] |
95 | ||
96 |
# Set legend names. To be modified also at call level
|
|
97 | 2x |
legend_names <- c("Estimate", "CI 95%") |
98 | ||
99 | 2x |
p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]])) |
100 | ||
101 | 2x |
if (!is.null(col)) { |
102 | 2x |
p <- p + |
103 | 2x |
ggplot2::scale_color_manual(values = col) |
104 |
}
|
|
105 | ||
106 | 2x |
if (!is.null(ci_ribbon)) { |
107 | 1x |
if (is.null(ci_ribbon$fill)) { |
108 | ! |
ci_ribbon$fill <- "lightblue" |
109 |
}
|
|
110 | 1x |
p <- p + ggplot2::geom_ribbon( |
111 | 1x |
ggplot2::aes( |
112 | 1x |
ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]], |
113 | 1x |
fill = legend_names[2] |
114 |
),
|
|
115 | 1x |
alpha = ci_ribbon$alpha |
116 |
) + |
|
117 | 1x |
scale_fill_manual( |
118 | 1x |
name = "", values = c("CI 95%" = ci_ribbon$fill) |
119 |
)
|
|
120 |
}
|
|
121 | 2x |
suppressMessages(p <- p + |
122 | 2x |
ggplot2::geom_line( |
123 | 2x |
ggplot2::aes(y = .data[["y"]], color = legend_names[1]), |
124 | 2x |
linetype = est$lty |
125 |
) + |
|
126 | 2x |
scale_colour_manual( |
127 | 2x |
name = "", values = c("Estimate" = "blue") |
128 |
)) |
|
129 | ||
130 | 2x |
p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate) |
131 | 2x |
if (use_percentile) { |
132 | 1x |
p <- p + ggplot2::scale_x_continuous(labels = scales::percent) |
133 |
}
|
|
134 | 2x |
p
|
135 |
}
|
|
136 | ||
137 |
#' Custom tidy method for STEP results
|
|
138 |
#'
|
|
139 |
#' @description `r lifecycle::badge("stable")`
|
|
140 |
#'
|
|
141 |
#' Tidy the STEP results into a `tibble` format ready for plotting.
|
|
142 |
#'
|
|
143 |
#' @param x (`matrix`)\cr results from [fit_survival_step()].
|
|
144 |
#' @param ... not used.
|
|
145 |
#'
|
|
146 |
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
|
|
147 |
#' respectively. Additional attributes carry metadata also used for plotting.
|
|
148 |
#'
|
|
149 |
#' @seealso [g_step()] which consumes the result from this function.
|
|
150 |
#'
|
|
151 |
#' @method tidy step
|
|
152 |
#'
|
|
153 |
#' @examples
|
|
154 |
#' library(survival)
|
|
155 |
#' lung$sex <- factor(lung$sex)
|
|
156 |
#' vars <- list(
|
|
157 |
#' time = "time",
|
|
158 |
#' event = "status",
|
|
159 |
#' arm = "sex",
|
|
160 |
#' biomarker = "age"
|
|
161 |
#' )
|
|
162 |
#' step_matrix <- fit_survival_step(
|
|
163 |
#' variables = vars,
|
|
164 |
#' data = lung,
|
|
165 |
#' control = c(control_coxph(), control_step(num_points = 10, degree = 2))
|
|
166 |
#' )
|
|
167 |
#' broom::tidy(step_matrix)
|
|
168 |
#'
|
|
169 |
#' @export
|
|
170 |
tidy.step <- function(x, ...) { # nolint |
|
171 | 7x |
checkmate::assert_class(x, "step") |
172 | 7x |
dat <- as.data.frame(x) |
173 | 7x |
nams <- names(dat) |
174 | 7x |
is_surv <- "loghr" %in% names(dat) |
175 | 7x |
est_var <- ifelse(is_surv, "loghr", "logor") |
176 | 7x |
new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio") |
177 | 7x |
new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper")) |
178 | 7x |
names(dat)[match(est_var, nams)] <- new_est_var |
179 | 7x |
dat[, new_y_vars] <- exp(dat[, new_y_vars]) |
180 | 7x |
any_is_na <- any(is.na(dat[, new_y_vars])) |
181 | 7x |
any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE) |
182 | 7x |
if (any_is_na) { |
183 | 2x |
warning(paste( |
184 | 2x |
"Missing values in the point estimate or CI columns,",
|
185 | 2x |
"this will lead to holes in the `g_step()` plot"
|
186 |
)) |
|
187 |
}
|
|
188 | 7x |
if (any_is_very_large) { |
189 | 2x |
warning(paste( |
190 | 2x |
"Very large absolute values in the point estimate or CI columns,",
|
191 | 2x |
"consider adding `scale_y_log10()` to the `g_step()` result for plotting"
|
192 |
)) |
|
193 |
}
|
|
194 | 7x |
if (any_is_na || any_is_very_large) { |
195 | 4x |
warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting") |
196 |
}
|
|
197 | 7x |
structure( |
198 | 7x |
tibble::as_tibble(dat), |
199 | 7x |
estimate = new_est_var, |
200 | 7x |
biomarker = attr(x, "variables")$biomarker, |
201 | 7x |
ci = f_conf_level(attr(x, "control")$conf_level) |
202 |
)
|
|
203 |
}
|
1 |
#' Proportion estimation
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [estimate_proportion()] creates a layout element to estimate the proportion of responders
|
|
6 |
#' within a studied population. The primary analysis variable, `vars`, indicates whether a response has occurred for
|
|
7 |
#' each record. See the `method` parameter for options of methods to use when constructing the confidence interval of
|
|
8 |
#' the proportion. Additionally, a stratification variable can be supplied via the `strata` element of the `variables`
|
|
9 |
#' argument.
|
|
10 |
#'
|
|
11 |
#' @inheritParams prop_strat_wilson
|
|
12 |
#' @inheritParams argument_convention
|
|
13 |
#' @param method (`string`)\cr the method used to construct the confidence interval
|
|
14 |
#' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
|
|
15 |
#' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
|
|
16 |
#' @param long (`flag`)\cr whether a long description is required.
|
|
17 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
18 |
#'
|
|
19 |
#' Options are: ``r shQuote(get_stats("estimate_proportion"), type = "sh")``
|
|
20 |
#'
|
|
21 |
#' @seealso [h_proportions]
|
|
22 |
#'
|
|
23 |
#' @name estimate_proportion
|
|
24 |
#' @order 1
|
|
25 |
NULL
|
|
26 | ||
27 |
#' @describeIn estimate_proportion Statistics function estimating a
|
|
28 |
#' proportion along with its confidence interval.
|
|
29 |
#'
|
|
30 |
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
|
|
31 |
#' it indicates whether each subject is a responder or not. `TRUE` represents
|
|
32 |
#' a successful outcome. If a `data.frame` is provided, also the `strata` variable
|
|
33 |
#' names must be provided in `variables` as a list element with the strata strings.
|
|
34 |
#' In the case of `data.frame`, the logical vector of responses must be indicated as a
|
|
35 |
#' variable name in `.var`.
|
|
36 |
#'
|
|
37 |
#' @return
|
|
38 |
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
|
|
39 |
#' given variable.
|
|
40 |
#'
|
|
41 |
#' @examples
|
|
42 |
#' # Case with only logical vector.
|
|
43 |
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
|
|
44 |
#' s_proportion(rsp_v)
|
|
45 |
#'
|
|
46 |
#' # Example for Stratified Wilson CI
|
|
47 |
#' nex <- 100 # Number of example rows
|
|
48 |
#' dta <- data.frame(
|
|
49 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
|
|
50 |
#' "grp" = sample(c("A", "B"), nex, TRUE),
|
|
51 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE),
|
|
52 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
|
|
53 |
#' stringsAsFactors = TRUE
|
|
54 |
#' )
|
|
55 |
#'
|
|
56 |
#' s_proportion(
|
|
57 |
#' df = dta,
|
|
58 |
#' .var = "rsp",
|
|
59 |
#' variables = list(strata = c("f1", "f2")),
|
|
60 |
#' conf_level = 0.90,
|
|
61 |
#' method = "strat_wilson"
|
|
62 |
#' )
|
|
63 |
#'
|
|
64 |
#' @export
|
|
65 |
s_proportion <- function(df, |
|
66 |
.var,
|
|
67 |
conf_level = 0.95, |
|
68 |
method = c( |
|
69 |
"waldcc", "wald", "clopper-pearson", |
|
70 |
"wilson", "wilsonc", "strat_wilson", "strat_wilsonc", |
|
71 |
"agresti-coull", "jeffreys" |
|
72 |
),
|
|
73 |
weights = NULL, |
|
74 |
max_iterations = 50, |
|
75 |
variables = list(strata = NULL), |
|
76 |
long = FALSE, |
|
77 |
denom = c("n", "N_col", "N_row"), |
|
78 |
...) { |
|
79 | 182x |
method <- match.arg(method) |
80 | 182x |
checkmate::assert_flag(long) |
81 | 182x |
assert_proportion_value(conf_level) |
82 | 182x |
args_list <- list(...) |
83 | 182x |
.N_row <- args_list[[".N_row"]] # nolint |
84 | 182x |
.N_col <- args_list[[".N_col"]] # nolint |
85 | ||
86 | 182x |
if (!is.null(variables$strata)) { |
87 |
# Checks for strata
|
|
88 | ! |
if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.") |
89 | 9x |
strata_colnames <- variables$strata |
90 | 9x |
checkmate::assert_character(strata_colnames, null.ok = FALSE) |
91 | 9x |
strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) |
92 | 9x |
assert_df_with_variables(df, strata_vars) |
93 | ||
94 | 9x |
strata <- interaction(df[strata_colnames]) |
95 | 9x |
strata <- as.factor(strata) |
96 | ||
97 |
# Pushing down checks to prop_strat_wilson
|
|
98 | 173x |
} else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) { |
99 | ! |
stop("To use stratified methods you need to specify the strata variables.") |
100 |
}
|
|
101 | ||
102 |
# Finding the Responders
|
|
103 | 182x |
if (checkmate::test_atomic_vector(df)) { |
104 | 167x |
rsp <- as.logical(df) |
105 |
} else { |
|
106 | 15x |
rsp <- as.logical(df[[.var]]) |
107 |
}
|
|
108 | ||
109 |
# Stop for stratified analysis
|
|
110 | 182x |
if (method %in% c("strat_wilson", "strat_wilsonc") && denom[1] != "n") { |
111 | 1x |
stop( |
112 | 1x |
"Stratified methods only support 'n' as the denominator (denom). ",
|
113 | 1x |
"Consider adding negative responders directly to the dataset."
|
114 |
)
|
|
115 |
}
|
|
116 | ||
117 | 181x |
denom <- match.arg(denom) %>% |
118 | 181x |
switch( |
119 | 181x |
n = length(rsp), |
120 | 181x |
N_row = .N_row, |
121 | 181x |
N_col = .N_col |
122 |
)
|
|
123 | 181x |
n_rsp <- sum(rsp) |
124 | 181x |
p_hat <- ifelse(denom > 0, n_rsp / denom, 0) |
125 | ||
126 | 181x |
prop_ci <- switch(method, |
127 | 181x |
"clopper-pearson" = prop_clopper_pearson(rsp, n = denom, conf_level), |
128 | 181x |
"wilson" = prop_wilson(rsp, n = denom, conf_level), |
129 | 181x |
"wilsonc" = prop_wilson(rsp, n = denom, conf_level, correct = TRUE), |
130 | 181x |
"strat_wilson" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = FALSE)$conf_int, |
131 | 181x |
"strat_wilsonc" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = TRUE)$conf_int, |
132 | 181x |
"wald" = prop_wald(rsp, n = denom, conf_level), |
133 | 181x |
"waldcc" = prop_wald(rsp, n = denom, conf_level, correct = TRUE), |
134 | 181x |
"agresti-coull" = prop_agresti_coull(rsp, n = denom, conf_level), |
135 | 181x |
"jeffreys" = prop_jeffreys(rsp, n = denom, conf_level) |
136 |
)
|
|
137 | ||
138 | 181x |
list( |
139 | 181x |
"n_prop" = formatters::with_label(c(n_rsp, p_hat), "Responders"), |
140 | 181x |
"prop_ci" = formatters::with_label(x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)) |
141 |
)
|
|
142 |
}
|
|
143 | ||
144 |
#' @describeIn estimate_proportion Formatted analysis function which is used as `afun`
|
|
145 |
#' in `estimate_proportion()`.
|
|
146 |
#'
|
|
147 |
#' @return
|
|
148 |
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
149 |
#'
|
|
150 |
#' @export
|
|
151 |
a_proportion <- function(df, |
|
152 |
...,
|
|
153 |
.stats = NULL, |
|
154 |
.stat_names = NULL, |
|
155 |
.formats = NULL, |
|
156 |
.labels = NULL, |
|
157 |
.indent_mods = NULL) { |
|
158 |
# Check for additional parameters to the statistics function
|
|
159 | 15x |
dots_extra_args <- list(...) |
160 | 15x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
161 | 15x |
dots_extra_args$.additional_fun_parameters <- NULL |
162 | ||
163 |
# Check for user-defined functions
|
|
164 | 15x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
165 | 15x |
.stats <- default_and_custom_stats_list$all_stats |
166 | 15x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
167 | ||
168 |
# Apply statistics function
|
|
169 | 15x |
x_stats <- .apply_stat_functions( |
170 | 15x |
default_stat_fnc = s_proportion, |
171 | 15x |
custom_stat_fnc_list = custom_stat_functions, |
172 | 15x |
args_list = c( |
173 | 15x |
df = list(df), |
174 | 15x |
extra_afun_params,
|
175 | 15x |
dots_extra_args
|
176 |
)
|
|
177 |
)
|
|
178 | ||
179 |
# Fill in formatting defaults
|
|
180 | 14x |
.stats <- get_stats("estimate_proportion", |
181 | 14x |
stats_in = .stats, |
182 | 14x |
custom_stats_in = names(custom_stat_functions) |
183 |
)
|
|
184 | 14x |
x_stats <- x_stats[.stats] |
185 | 14x |
.formats <- get_formats_from_stats(.stats, .formats) |
186 | 14x |
.labels <- get_labels_from_stats( |
187 | 14x |
.stats, .labels, |
188 | 14x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
189 |
)
|
|
190 | 14x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
191 | ||
192 |
# Auto format handling
|
|
193 | 14x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
194 | ||
195 |
# Get and check statistical names
|
|
196 | 14x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
197 | ||
198 | 14x |
in_rows( |
199 | 14x |
.list = x_stats, |
200 | 14x |
.formats = .formats, |
201 | 14x |
.names = .labels %>% .unlist_keep_nulls(), |
202 | 14x |
.stat_names = .stat_names, |
203 | 14x |
.labels = .labels %>% .unlist_keep_nulls(), |
204 | 14x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
205 |
)
|
|
206 |
}
|
|
207 | ||
208 |
#' @describeIn estimate_proportion Layout-creating function which can take statistics function arguments
|
|
209 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
210 |
#'
|
|
211 |
#' @return
|
|
212 |
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
|
|
213 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
214 |
#' the statistics from `s_proportion()` to the table layout.
|
|
215 |
#'
|
|
216 |
#' @examples
|
|
217 |
#' dta_test <- data.frame(
|
|
218 |
#' USUBJID = paste0("S", 1:12),
|
|
219 |
#' ARM = rep(LETTERS[1:3], each = 4),
|
|
220 |
#' AVAL = rep(LETTERS[1:3], each = 4)
|
|
221 |
#' ) %>%
|
|
222 |
#' dplyr::mutate(is_rsp = AVAL == "A")
|
|
223 |
#'
|
|
224 |
#' basic_table() %>%
|
|
225 |
#' split_cols_by("ARM") %>%
|
|
226 |
#' estimate_proportion(vars = "is_rsp") %>%
|
|
227 |
#' build_table(df = dta_test)
|
|
228 |
#'
|
|
229 |
#' @export
|
|
230 |
#' @order 2
|
|
231 |
estimate_proportion <- function(lyt, |
|
232 |
vars,
|
|
233 |
conf_level = 0.95, |
|
234 |
method = c( |
|
235 |
"waldcc", "wald", "clopper-pearson", |
|
236 |
"wilson", "wilsonc", "strat_wilson", "strat_wilsonc", |
|
237 |
"agresti-coull", "jeffreys" |
|
238 |
),
|
|
239 |
weights = NULL, |
|
240 |
max_iterations = 50, |
|
241 |
variables = list(strata = NULL), |
|
242 |
long = FALSE, |
|
243 |
na_str = default_na_str(), |
|
244 |
nested = TRUE, |
|
245 |
...,
|
|
246 |
show_labels = "hidden", |
|
247 |
table_names = vars, |
|
248 |
.stats = c("n_prop", "prop_ci"), |
|
249 |
.stat_names = NULL, |
|
250 |
.formats = NULL, |
|
251 |
.labels = NULL, |
|
252 |
.indent_mods = NULL) { |
|
253 |
# Process standard extra arguments
|
|
254 | 6x |
extra_args <- list(".stats" = .stats) |
255 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
256 | 3x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
257 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
258 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
259 | ||
260 |
# Process additional arguments to the statistic function
|
|
261 | 6x |
extra_args <- c( |
262 | 6x |
extra_args,
|
263 | 6x |
conf_level = list(conf_level), method = list(method), weights = list(weights), |
264 | 6x |
max_iterations = list(max_iterations), variables = list(variables), long = list(long), |
265 |
...
|
|
266 |
)
|
|
267 | ||
268 |
# Append additional info from layout to the analysis function
|
|
269 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
270 | 6x |
formals(a_proportion) <- c(formals(a_proportion), extra_args[[".additional_fun_parameters"]]) |
271 | ||
272 | 6x |
analyze( |
273 | 6x |
lyt = lyt, |
274 | 6x |
vars = vars, |
275 | 6x |
afun = a_proportion, |
276 | 6x |
na_str = na_str, |
277 | 6x |
nested = nested, |
278 | 6x |
extra_args = extra_args, |
279 | 6x |
show_labels = show_labels, |
280 | 6x |
table_names = table_names |
281 |
)
|
|
282 |
}
|
|
283 | ||
284 |
#' Helper functions for calculating proportion confidence intervals
|
|
285 |
#'
|
|
286 |
#' @description `r lifecycle::badge("stable")`
|
|
287 |
#'
|
|
288 |
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
|
|
289 |
#'
|
|
290 |
#' @inheritParams argument_convention
|
|
291 |
#' @inheritParams estimate_proportion
|
|
292 |
#'
|
|
293 |
#' @return Confidence interval of a proportion.
|
|
294 |
#'
|
|
295 |
#' @seealso [estimate_proportion], descriptive function [d_proportion()],
|
|
296 |
#' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
|
|
297 |
#'
|
|
298 |
#' @name h_proportions
|
|
299 |
NULL
|
|
300 | ||
301 |
#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
|
|
302 |
#' Also referred to as Wilson score interval.
|
|
303 |
#'
|
|
304 |
#' @examples
|
|
305 |
#' rsp <- c(
|
|
306 |
#' TRUE, TRUE, TRUE, TRUE, TRUE,
|
|
307 |
#' FALSE, FALSE, FALSE, FALSE, FALSE
|
|
308 |
#' )
|
|
309 |
#' prop_wilson(rsp, conf_level = 0.9)
|
|
310 |
#'
|
|
311 |
#' @export
|
|
312 |
prop_wilson <- function(rsp, n = length(rsp), conf_level, correct = FALSE) { |
|
313 | 5x |
y <- stats::prop.test( |
314 | 5x |
sum(rsp), |
315 | 5x |
n,
|
316 | 5x |
correct = correct, |
317 | 5x |
conf.level = conf_level |
318 |
)
|
|
319 | ||
320 | 5x |
as.numeric(y$conf.int) |
321 |
}
|
|
322 | ||
323 |
#' @describeIn h_proportions Calculates the stratified Wilson confidence
|
|
324 |
#' interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
|
|
325 |
#'
|
|
326 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
|
|
327 |
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
|
|
328 |
#' estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
|
|
329 |
#' minimizes the weighted squared length of the confidence interval.
|
|
330 |
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
|
|
331 |
#' to find estimates of optimal weights.
|
|
332 |
#' @param correct (`flag`)\cr whether to include the continuity correction. For further information, see for example
|
|
333 |
#' for [stats::prop.test()].
|
|
334 |
#'
|
|
335 |
#' @references
|
|
336 |
#' \insertRef{Yan2010-jt}{tern}
|
|
337 |
#'
|
|
338 |
#' @examples
|
|
339 |
#' # Stratified Wilson confidence interval with unequal probabilities
|
|
340 |
#'
|
|
341 |
#' set.seed(1)
|
|
342 |
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
|
|
343 |
#' strata_data <- data.frame(
|
|
344 |
#' "f1" = sample(c("a", "b"), 100, TRUE),
|
|
345 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
346 |
#' stringsAsFactors = TRUE
|
|
347 |
#' )
|
|
348 |
#' strata <- interaction(strata_data)
|
|
349 |
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
|
|
350 |
#'
|
|
351 |
#' prop_strat_wilson(
|
|
352 |
#' rsp = rsp, strata = strata,
|
|
353 |
#' conf_level = 0.90
|
|
354 |
#' )
|
|
355 |
#'
|
|
356 |
#' # Not automatic setting of weights
|
|
357 |
#' prop_strat_wilson(
|
|
358 |
#' rsp = rsp, strata = strata,
|
|
359 |
#' weights = rep(1 / n_strata, n_strata),
|
|
360 |
#' conf_level = 0.90
|
|
361 |
#' )
|
|
362 |
#'
|
|
363 |
#' @export
|
|
364 |
prop_strat_wilson <- function(rsp, |
|
365 |
strata,
|
|
366 |
weights = NULL, |
|
367 |
conf_level = 0.95, |
|
368 |
max_iterations = NULL, |
|
369 |
correct = FALSE) { |
|
370 | 20x |
checkmate::assert_logical(rsp, any.missing = FALSE) |
371 | 20x |
checkmate::assert_factor(strata, len = length(rsp)) |
372 | 20x |
assert_proportion_value(conf_level) |
373 | ||
374 | 20x |
tbl <- table(rsp, strata) |
375 | 20x |
n_strata <- length(unique(strata)) |
376 | ||
377 |
# Checking the weights and maximum number of iterations.
|
|
378 | 20x |
do_iter <- FALSE |
379 | 20x |
if (is.null(weights)) { |
380 | 6x |
weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure |
381 | 6x |
do_iter <- TRUE |
382 | ||
383 |
# Iteration parameters
|
|
384 | 2x |
if (is.null(max_iterations)) max_iterations <- 10 |
385 | 6x |
checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1) |
386 |
}
|
|
387 | 20x |
checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata) |
388 | 20x |
sum_weights <- checkmate::assert_int(sum(weights)) |
389 | ! |
if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.") |
390 | ||
391 | 20x |
xs <- tbl["TRUE", ] |
392 | 20x |
ns <- colSums(tbl) |
393 | 20x |
use_stratum <- (ns > 0) |
394 | 20x |
ns <- ns[use_stratum] |
395 | 20x |
xs <- xs[use_stratum] |
396 | 20x |
ests <- xs / ns |
397 | 20x |
vars <- ests * (1 - ests) / ns |
398 | ||
399 | 20x |
strata_qnorm <- strata_normal_quantile(vars, weights, conf_level) |
400 | ||
401 |
# Iterative setting of weights if they were not set externally
|
|
402 | 20x |
weights_new <- if (do_iter) { |
403 | 6x |
update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights |
404 |
} else { |
|
405 | 14x |
weights
|
406 |
}
|
|
407 | ||
408 | 20x |
strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1 |
409 | ||
410 | 20x |
ci_by_strata <- Map( |
411 | 20x |
function(x, n) { |
412 |
# Classic Wilson's confidence interval
|
|
413 | 139x |
suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int) |
414 |
},
|
|
415 | 20x |
x = xs, |
416 | 20x |
n = ns |
417 |
)
|
|
418 | 20x |
lower_by_strata <- sapply(ci_by_strata, "[", 1L) |
419 | 20x |
upper_by_strata <- sapply(ci_by_strata, "[", 2L) |
420 | ||
421 | 20x |
lower <- sum(weights_new * lower_by_strata) |
422 | 20x |
upper <- sum(weights_new * upper_by_strata) |
423 | ||
424 |
# Return values
|
|
425 | 20x |
if (do_iter) { |
426 | 6x |
list( |
427 | 6x |
conf_int = c( |
428 | 6x |
lower = lower, |
429 | 6x |
upper = upper |
430 |
),
|
|
431 | 6x |
weights = weights_new |
432 |
)
|
|
433 |
} else { |
|
434 | 14x |
list( |
435 | 14x |
conf_int = c( |
436 | 14x |
lower = lower, |
437 | 14x |
upper = upper |
438 |
)
|
|
439 |
)
|
|
440 |
}
|
|
441 |
}
|
|
442 | ||
443 |
#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
|
|
444 |
#' Also referred to as the `exact` method.
|
|
445 |
#'
|
|
446 |
#' @param n (`count`)\cr number of participants (if `denom = "N_col"`) or the number of responders
|
|
447 |
#' (if `denom = "n"`, the default).
|
|
448 |
#'
|
|
449 |
#' @examples
|
|
450 |
#' prop_clopper_pearson(rsp, conf_level = .95)
|
|
451 |
#'
|
|
452 |
#' @export
|
|
453 |
prop_clopper_pearson <- function(rsp, n = length(rsp), conf_level) { |
|
454 | 1x |
y <- stats::binom.test( |
455 | 1x |
x = sum(rsp), |
456 | 1x |
n = n, |
457 | 1x |
conf.level = conf_level |
458 |
)
|
|
459 | 1x |
as.numeric(y$conf.int) |
460 |
}
|
|
461 | ||
462 |
#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
|
|
463 |
#' for a single proportion confidence interval using the normal approximation.
|
|
464 |
#'
|
|
465 |
#' @param correct (`flag`)\cr whether to apply continuity correction.
|
|
466 |
#'
|
|
467 |
#' @examples
|
|
468 |
#' prop_wald(rsp, conf_level = 0.95)
|
|
469 |
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
|
|
470 |
#'
|
|
471 |
#' @export
|
|
472 |
prop_wald <- function(rsp, n = length(rsp), conf_level, correct = FALSE) { |
|
473 | 165x |
p_hat <- ifelse(n > 0, sum(rsp) / n, 0) |
474 | 165x |
z <- stats::qnorm((1 + conf_level) / 2) |
475 | 165x |
q_hat <- 1 - p_hat |
476 | 165x |
correct <- if (correct) 1 / (2 * n) else 0 |
477 | ||
478 | 165x |
err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct |
479 | 165x |
l_ci <- max(0, p_hat - err) |
480 | 165x |
u_ci <- min(1, p_hat + err) |
481 | ||
482 | 165x |
c(l_ci, u_ci) |
483 |
}
|
|
484 | ||
485 |
#' @describeIn h_proportions Calculates the Agresti-Coull interval. Constructed (for 95% CI) by adding two successes
|
|
486 |
#' and two failures to the data and then using the Wald formula to construct a CI.
|
|
487 |
#'
|
|
488 |
#' @examples
|
|
489 |
#' prop_agresti_coull(rsp, conf_level = 0.95)
|
|
490 |
#'
|
|
491 |
#' @export
|
|
492 |
prop_agresti_coull <- function(rsp, n = length(rsp), conf_level) { |
|
493 | 3x |
x_sum <- sum(rsp) |
494 | 3x |
z <- stats::qnorm((1 + conf_level) / 2) |
495 | ||
496 |
# Add here both z^2 / 2 successes and failures.
|
|
497 | 3x |
x_sum_tilde <- x_sum + z^2 / 2 |
498 | 3x |
n_tilde <- n + z^2 |
499 | ||
500 |
# Then proceed as with the Wald interval.
|
|
501 | 3x |
p_tilde <- x_sum_tilde / n_tilde |
502 | 3x |
q_tilde <- 1 - p_tilde |
503 | 3x |
err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
504 | 3x |
l_ci <- max(0, p_tilde - err) |
505 | 3x |
u_ci <- min(1, p_tilde + err) |
506 | ||
507 | 3x |
c(l_ci, u_ci) |
508 |
}
|
|
509 | ||
510 |
#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
|
|
511 |
#' non-informative Jeffreys prior for a binomial proportion.
|
|
512 |
#'
|
|
513 |
#' @examples
|
|
514 |
#' prop_jeffreys(rsp, conf_level = 0.95)
|
|
515 |
#'
|
|
516 |
#' @export
|
|
517 |
prop_jeffreys <- function(rsp, n = length(rsp), conf_level) { |
|
518 | 5x |
x_sum <- sum(rsp) |
519 | ||
520 | 5x |
alpha <- 1 - conf_level |
521 | 5x |
l_ci <- ifelse( |
522 | 5x |
x_sum == 0, |
523 | 5x |
0,
|
524 | 5x |
stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5) |
525 |
)
|
|
526 | ||
527 | 5x |
u_ci <- ifelse( |
528 | 5x |
x_sum == n, |
529 | 5x |
1,
|
530 | 5x |
stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5) |
531 |
)
|
|
532 | ||
533 | 5x |
c(l_ci, u_ci) |
534 |
}
|
|
535 | ||
536 |
#' Description of the proportion summary
|
|
537 |
#'
|
|
538 |
#' @description `r lifecycle::badge("stable")`
|
|
539 |
#'
|
|
540 |
#' This is a helper function that describes the analysis in [s_proportion()].
|
|
541 |
#'
|
|
542 |
#' @inheritParams s_proportion
|
|
543 |
#' @param long (`flag`)\cr whether a long or a short (default) description is required.
|
|
544 |
#'
|
|
545 |
#' @return String describing the analysis.
|
|
546 |
#'
|
|
547 |
#' @export
|
|
548 |
d_proportion <- function(conf_level, |
|
549 |
method,
|
|
550 |
long = FALSE) { |
|
551 | 181x |
label <- paste0(conf_level * 100, "% CI") |
552 | ||
553 | ! |
if (long) label <- paste(label, "for Response Rates") |
554 | ||
555 | 181x |
method_part <- switch(method, |
556 | 181x |
"clopper-pearson" = "Clopper-Pearson", |
557 | 181x |
"waldcc" = "Wald, with correction", |
558 | 181x |
"wald" = "Wald, without correction", |
559 | 181x |
"wilson" = "Wilson, without correction", |
560 | 181x |
"strat_wilson" = "Stratified Wilson, without correction", |
561 | 181x |
"wilsonc" = "Wilson, with correction", |
562 | 181x |
"strat_wilsonc" = "Stratified Wilson, with correction", |
563 | 181x |
"agresti-coull" = "Agresti-Coull", |
564 | 181x |
"jeffreys" = "Jeffreys", |
565 | 181x |
stop(paste(method, "does not have a description")) |
566 |
)
|
|
567 | ||
568 | 181x |
paste0(label, " (", method_part, ")") |
569 |
}
|
|
570 | ||
571 |
#' Helper function for the estimation of stratified quantiles
|
|
572 |
#'
|
|
573 |
#' @description `r lifecycle::badge("stable")`
|
|
574 |
#'
|
|
575 |
#' This function wraps the estimation of stratified percentiles when we assume
|
|
576 |
#' the approximation for large numbers. This is necessary only in the case
|
|
577 |
#' proportions for each strata are unequal.
|
|
578 |
#'
|
|
579 |
#' @inheritParams argument_convention
|
|
580 |
#' @inheritParams prop_strat_wilson
|
|
581 |
#'
|
|
582 |
#' @return Stratified quantile.
|
|
583 |
#'
|
|
584 |
#' @seealso [prop_strat_wilson()]
|
|
585 |
#'
|
|
586 |
#' @examples
|
|
587 |
#' strata_data <- table(data.frame(
|
|
588 |
#' "f1" = sample(c(TRUE, FALSE), 100, TRUE),
|
|
589 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
590 |
#' stringsAsFactors = TRUE
|
|
591 |
#' ))
|
|
592 |
#' ns <- colSums(strata_data)
|
|
593 |
#' ests <- strata_data["TRUE", ] / ns
|
|
594 |
#' vars <- ests * (1 - ests) / ns
|
|
595 |
#' weights <- rep(1 / length(ns), length(ns))
|
|
596 |
#'
|
|
597 |
#' strata_normal_quantile(vars, weights, 0.95)
|
|
598 |
#'
|
|
599 |
#' @export
|
|
600 |
strata_normal_quantile <- function(vars, weights, conf_level) { |
|
601 | 43x |
summands <- weights^2 * vars |
602 |
# Stratified quantile
|
|
603 | 43x |
sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2) |
604 |
}
|
|
605 | ||
606 |
#' Helper function for the estimation of weights for `prop_strat_wilson()`
|
|
607 |
#'
|
|
608 |
#' @description `r lifecycle::badge("stable")`
|
|
609 |
#'
|
|
610 |
#' This function wraps the iteration procedure that allows you to estimate
|
|
611 |
#' the weights for each proportional strata. This assumes to minimize the
|
|
612 |
#' weighted squared length of the confidence interval.
|
|
613 |
#'
|
|
614 |
#' @inheritParams prop_strat_wilson
|
|
615 |
#' @param vars (`numeric`)\cr normalized proportions for each strata.
|
|
616 |
#' @param strata_qnorm (`numeric(1)`)\cr initial estimation with identical weights of the quantiles.
|
|
617 |
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
|
|
618 |
#' be optimized in the future if we need to estimate better initial weights.
|
|
619 |
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
|
|
620 |
#' @param max_iterations (`integer(1)`)\cr maximum number of iterations to be tried. Convergence is always checked.
|
|
621 |
#' @param tol (`numeric(1)`)\cr tolerance threshold for convergence.
|
|
622 |
#'
|
|
623 |
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
|
|
624 |
#'
|
|
625 |
#' @seealso For references and details see [prop_strat_wilson()].
|
|
626 |
#'
|
|
627 |
#' @examples
|
|
628 |
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
|
|
629 |
#' sq <- 0.674
|
|
630 |
#' ws <- rep(1 / length(vs), length(vs))
|
|
631 |
#' ns <- c(22, 18, 17, 17, 14, 12)
|
|
632 |
#'
|
|
633 |
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
|
|
634 |
#'
|
|
635 |
#' @export
|
|
636 |
update_weights_strat_wilson <- function(vars, |
|
637 |
strata_qnorm,
|
|
638 |
initial_weights,
|
|
639 |
n_per_strata,
|
|
640 |
max_iterations = 50, |
|
641 |
conf_level = 0.95, |
|
642 |
tol = 0.001) { |
|
643 | 9x |
it <- 0 |
644 | 9x |
diff_v <- NULL |
645 | ||
646 | 9x |
while (it < max_iterations) { |
647 | 21x |
it <- it + 1 |
648 | 21x |
weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2 |
649 | 21x |
weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2)) |
650 | 21x |
weights_new <- weights_new_t / weights_new_b |
651 | 21x |
weights_new <- weights_new / sum(weights_new) |
652 | 21x |
strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level) |
653 | 21x |
diff_v <- c(diff_v, sum(abs(weights_new - initial_weights))) |
654 | 8x |
if (diff_v[length(diff_v)] < tol) break |
655 | 13x |
initial_weights <- weights_new |
656 |
}
|
|
657 | ||
658 | 9x |
if (it == max_iterations) { |
659 | 1x |
warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations) |
660 |
}
|
|
661 | ||
662 | 9x |
list( |
663 | 9x |
"n_it" = it, |
664 | 9x |
"weights" = weights_new, |
665 | 9x |
"diff_v" = diff_v |
666 |
)
|
|
667 |
}
|
1 |
#' Bland-Altman analysis
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("experimental")`
|
|
4 |
#'
|
|
5 |
#' Statistics function that uses the Bland-Altman method to assess the agreement between two numerical vectors
|
|
6 |
#' and calculates a variety of statistics.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param y (`numeric`)\cr vector of numbers we want to analyze, to be compared with `x`.
|
|
10 |
#'
|
|
11 |
#' @return
|
|
12 |
#' A named list of the following elements:
|
|
13 |
#' * `df`
|
|
14 |
#' * `difference_mean`
|
|
15 |
#' * `ci_mean`
|
|
16 |
#' * `difference_sd`
|
|
17 |
#' * `difference_se`
|
|
18 |
#' * `upper_agreement_limit`
|
|
19 |
#' * `lower_agreement_limit`
|
|
20 |
#' * `agreement_limit_se`
|
|
21 |
#' * `upper_agreement_limit_ci`
|
|
22 |
#' * `lower_agreement_limit_ci`
|
|
23 |
#' * `t_value`
|
|
24 |
#' * `n`
|
|
25 |
#'
|
|
26 |
#' @examples
|
|
27 |
#' x <- seq(1, 60, 5)
|
|
28 |
#' y <- seq(5, 50, 4)
|
|
29 |
#'
|
|
30 |
#' s_bland_altman(x, y, conf_level = 0.9)
|
|
31 |
#'
|
|
32 |
#' @export
|
|
33 |
s_bland_altman <- function(x, y, conf_level = 0.95) { |
|
34 | 7x |
checkmate::assert_numeric(x, min.len = 1, any.missing = TRUE) |
35 | 6x |
checkmate::assert_numeric(y, len = length(x), any.missing = TRUE) |
36 | 5x |
checkmate::assert_numeric(conf_level, lower = 0, upper = 1, any.missing = TRUE) |
37 | ||
38 | 4x |
alpha <- 1 - conf_level |
39 | ||
40 | 4x |
ind <- complete.cases(x, y) # use only pairwise complete observations, and check if x and y have the same length |
41 | 4x |
x <- x[ind] |
42 | 4x |
y <- y[ind] |
43 | 4x |
n <- sum(ind) # number of 'observations' |
44 | ||
45 | 4x |
if (n == 0) { |
46 | ! |
stop("there is no valid paired data") |
47 |
}
|
|
48 | ||
49 | 4x |
difference <- x - y # vector of differences |
50 | 4x |
average <- (x + y) / 2 # vector of means |
51 | 4x |
difference_mean <- mean(difference) # mean difference |
52 | 4x |
difference_sd <- sd(difference) # SD of differences |
53 | 4x |
al <- qnorm(1 - alpha / 2) * difference_sd |
54 | 4x |
upper_agreement_limit <- difference_mean + al # agreement limits |
55 | 4x |
lower_agreement_limit <- difference_mean - al |
56 | ||
57 | 4x |
difference_se <- difference_sd / sqrt(n) # standard error of the mean |
58 | 4x |
al_se <- difference_sd * sqrt(3) / sqrt(n) # standard error of the agreement limit |
59 | 4x |
tvalue <- qt(1 - alpha / 2, n - 1) # t value for 95% CI calculation |
60 | 4x |
difference_mean_ci <- difference_se * tvalue |
61 | 4x |
al_ci <- al_se * tvalue |
62 | 4x |
upper_agreement_limit_ci <- c(upper_agreement_limit - al_ci, upper_agreement_limit + al_ci) |
63 | 4x |
lower_agreement_limit_ci <- c(lower_agreement_limit - al_ci, lower_agreement_limit + al_ci) |
64 | ||
65 | 4x |
list( |
66 | 4x |
df = data.frame(average, difference), |
67 | 4x |
difference_mean = difference_mean, |
68 | 4x |
ci_mean = difference_mean + c(-1, 1) * difference_mean_ci, |
69 | 4x |
difference_sd = difference_sd, |
70 | 4x |
difference_se = difference_se, |
71 | 4x |
upper_agreement_limit = upper_agreement_limit, |
72 | 4x |
lower_agreement_limit = lower_agreement_limit, |
73 | 4x |
agreement_limit_se = al_se, |
74 | 4x |
upper_agreement_limit_ci = upper_agreement_limit_ci, |
75 | 4x |
lower_agreement_limit_ci = lower_agreement_limit_ci, |
76 | 4x |
t_value = tvalue, |
77 | 4x |
n = n |
78 |
)
|
|
79 |
}
|
|
80 | ||
81 |
#' Bland-Altman plot
|
|
82 |
#'
|
|
83 |
#' @description `r lifecycle::badge("experimental")`
|
|
84 |
#'
|
|
85 |
#' Graphing function that produces a Bland-Altman plot.
|
|
86 |
#'
|
|
87 |
#' @inheritParams s_bland_altman
|
|
88 |
#'
|
|
89 |
#' @return A `ggplot` Bland-Altman plot.
|
|
90 |
#'
|
|
91 |
#' @examples
|
|
92 |
#' x <- seq(1, 60, 5)
|
|
93 |
#' y <- seq(5, 50, 4)
|
|
94 |
#'
|
|
95 |
#' g_bland_altman(x = x, y = y, conf_level = 0.9)
|
|
96 |
#'
|
|
97 |
#' @export
|
|
98 |
#' @aliases bland_altman
|
|
99 |
g_bland_altman <- function(x, y, conf_level = 0.95) { |
|
100 | 1x |
result_tem <- s_bland_altman(x, y, conf_level = conf_level) |
101 | 1x |
xpos <- max(result_tem$df$average) * 0.9 + min(result_tem$df$average) * 0.1 |
102 | 1x |
yrange <- diff(range(result_tem$df$difference)) |
103 | ||
104 | 1x |
p <- ggplot(result_tem$df) + |
105 | 1x |
geom_point(aes(x = average, y = difference), color = "blue") + |
106 | 1x |
geom_hline(yintercept = result_tem$difference_mean, color = "blue", linetype = 1) + |
107 | 1x |
geom_hline(yintercept = 0, color = "blue", linetype = 2) + |
108 | 1x |
geom_hline(yintercept = result_tem$lower_agreement_limit, color = "red", linetype = 2) + |
109 | 1x |
geom_hline(yintercept = result_tem$upper_agreement_limit, color = "red", linetype = 2) + |
110 | 1x |
annotate( |
111 | 1x |
"text",
|
112 | 1x |
x = xpos, |
113 | 1x |
y = result_tem$lower_agreement_limit + 0.03 * yrange, |
114 | 1x |
label = "lower limits of agreement", |
115 | 1x |
color = "red" |
116 |
) + |
|
117 | 1x |
annotate( |
118 | 1x |
"text",
|
119 | 1x |
x = xpos, |
120 | 1x |
y = result_tem$upper_agreement_limit + 0.03 * yrange, |
121 | 1x |
label = "upper limits of agreement", |
122 | 1x |
color = "red" |
123 |
) + |
|
124 | 1x |
annotate( |
125 | 1x |
"text",
|
126 | 1x |
x = xpos, |
127 | 1x |
y = result_tem$difference_mean + 0.03 * yrange, |
128 | 1x |
label = "mean of difference between two measures", |
129 | 1x |
color = "blue" |
130 |
) + |
|
131 | 1x |
annotate( |
132 | 1x |
"text",
|
133 | 1x |
x = xpos, |
134 | 1x |
y = result_tem$lower_agreement_limit - 0.03 * yrange, |
135 | 1x |
label = sprintf("%.2f", result_tem$lower_agreement_limit), |
136 | 1x |
color = "red" |
137 |
) + |
|
138 | 1x |
annotate( |
139 | 1x |
"text",
|
140 | 1x |
x = xpos, |
141 | 1x |
y = result_tem$upper_agreement_limit - 0.03 * yrange, |
142 | 1x |
label = sprintf("%.2f", result_tem$upper_agreement_limit), |
143 | 1x |
color = "red" |
144 |
) + |
|
145 | 1x |
annotate( |
146 | 1x |
"text",
|
147 | 1x |
x = xpos, |
148 | 1x |
y = result_tem$difference_mean - 0.03 * yrange, |
149 | 1x |
label = sprintf("%.2f", result_tem$difference_meanm), |
150 | 1x |
color = "blue" |
151 |
) + |
|
152 | 1x |
xlab("Average of two measures") + |
153 | 1x |
ylab("Difference between two measures") |
154 | ||
155 | 1x |
return(p) |
156 |
}
|
1 |
#' Proportion difference estimation
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analysis function [estimate_proportion_diff()] creates a layout element to estimate the difference in proportion
|
|
6 |
#' of responders within a studied population. The primary analysis variable, `vars`, is a logical variable indicating
|
|
7 |
#' whether a response has occurred for each record. See the `method` parameter for options of methods to use when
|
|
8 |
#' constructing the confidence interval of the proportion difference. A stratification variable can be supplied via the
|
|
9 |
#' `strata` element of the `variables` argument.
|
|
10 |
#'
|
|
11 |
#'
|
|
12 |
#' @inheritParams prop_diff_strat_nc
|
|
13 |
#' @inheritParams argument_convention
|
|
14 |
#' @param method (`string`)\cr the method used for the confidence interval estimation.
|
|
15 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
16 |
#'
|
|
17 |
#' Options are: ``r shQuote(get_stats("estimate_proportion_diff"), type = "sh")``
|
|
18 |
#'
|
|
19 |
#' @seealso [d_proportion_diff()]
|
|
20 |
#'
|
|
21 |
#' @name prop_diff
|
|
22 |
#' @order 1
|
|
23 |
NULL
|
|
24 | ||
25 |
#' @describeIn prop_diff Statistics function estimating the difference
|
|
26 |
#' in terms of responder proportion.
|
|
27 |
#'
|
|
28 |
#' @return
|
|
29 |
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
|
|
30 |
#'
|
|
31 |
#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
|
|
32 |
#' not permitted.
|
|
33 |
#'
|
|
34 |
#' @examples
|
|
35 |
#' s_proportion_diff(
|
|
36 |
#' df = subset(dta, grp == "A"),
|
|
37 |
#' .var = "rsp",
|
|
38 |
#' .ref_group = subset(dta, grp == "B"),
|
|
39 |
#' .in_ref_col = FALSE,
|
|
40 |
#' conf_level = 0.90,
|
|
41 |
#' method = "ha"
|
|
42 |
#' )
|
|
43 |
#'
|
|
44 |
#' # CMH example with strata
|
|
45 |
#' s_proportion_diff(
|
|
46 |
#' df = subset(dta, grp == "A"),
|
|
47 |
#' .var = "rsp",
|
|
48 |
#' .ref_group = subset(dta, grp == "B"),
|
|
49 |
#' .in_ref_col = FALSE,
|
|
50 |
#' variables = list(strata = c("f1", "f2")),
|
|
51 |
#' conf_level = 0.90,
|
|
52 |
#' method = "cmh"
|
|
53 |
#' )
|
|
54 |
#'
|
|
55 |
#' @export
|
|
56 |
s_proportion_diff <- function(df, |
|
57 |
.var,
|
|
58 |
.ref_group,
|
|
59 |
.in_ref_col,
|
|
60 |
variables = list(strata = NULL), |
|
61 |
conf_level = 0.95, |
|
62 |
method = c( |
|
63 |
"waldcc", "wald", "cmh", |
|
64 |
"ha", "newcombe", "newcombecc", |
|
65 |
"strat_newcombe", "strat_newcombecc" |
|
66 |
),
|
|
67 |
weights_method = "cmh", |
|
68 |
...) { |
|
69 | 11x |
method <- match.arg(method) |
70 | 11x |
if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) { |
71 | ! |
stop(paste( |
72 | ! |
"When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
|
73 | ! |
"permitted. Please choose a different method."
|
74 |
)) |
|
75 |
}
|
|
76 | 11x |
y <- list(diff = numeric(), diff_ci = numeric()) |
77 | ||
78 | 11x |
if (!.in_ref_col) { |
79 | 7x |
rsp <- c(.ref_group[[.var]], df[[.var]]) |
80 | 7x |
grp <- factor( |
81 | 7x |
rep( |
82 | 7x |
c("ref", "Not-ref"), |
83 | 7x |
c(nrow(.ref_group), nrow(df)) |
84 |
),
|
|
85 | 7x |
levels = c("ref", "Not-ref") |
86 |
)
|
|
87 | ||
88 | 7x |
if (!is.null(variables$strata)) { |
89 | 3x |
strata_colnames <- variables$strata |
90 | 3x |
checkmate::assert_character(strata_colnames, null.ok = FALSE) |
91 | 3x |
strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) |
92 | ||
93 | 3x |
assert_df_with_variables(df, strata_vars) |
94 | 3x |
assert_df_with_variables(.ref_group, strata_vars) |
95 | ||
96 |
# Merging interaction strata for reference group rows data and remaining
|
|
97 | 3x |
strata <- c( |
98 | 3x |
interaction(.ref_group[strata_colnames]), |
99 | 3x |
interaction(df[strata_colnames]) |
100 |
)
|
|
101 | 3x |
strata <- as.factor(strata) |
102 |
}
|
|
103 | ||
104 |
# Defining the std way to calculate weights for strat_newcombe
|
|
105 | 7x |
if (!is.null(variables$weights_method)) { |
106 | ! |
weights_method <- variables$weights_method |
107 |
} else { |
|
108 | 7x |
weights_method <- "cmh" |
109 |
}
|
|
110 | ||
111 | 7x |
y <- switch(method, |
112 | 7x |
"wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE), |
113 | 7x |
"waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE), |
114 | 7x |
"ha" = prop_diff_ha(rsp, grp, conf_level), |
115 | 7x |
"newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE), |
116 | 7x |
"newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE), |
117 | 7x |
"strat_newcombe" = prop_diff_strat_nc(rsp, |
118 | 7x |
grp,
|
119 | 7x |
strata,
|
120 | 7x |
weights_method,
|
121 | 7x |
conf_level,
|
122 | 7x |
correct = FALSE |
123 |
),
|
|
124 | 7x |
"strat_newcombecc" = prop_diff_strat_nc(rsp, |
125 | 7x |
grp,
|
126 | 7x |
strata,
|
127 | 7x |
weights_method,
|
128 | 7x |
conf_level,
|
129 | 7x |
correct = TRUE |
130 |
),
|
|
131 | 7x |
"cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")] |
132 |
)
|
|
133 | ||
134 | 7x |
y$diff <- setNames(y$diff * 100, paste0("diff_", method)) |
135 | 7x |
y$diff_ci <- setNames(y$diff_ci * 100, paste0("diff_ci_", method, c("_l", "_u"))) |
136 |
}
|
|
137 | ||
138 | 11x |
attr(y$diff, "label") <- "Difference in Response rate (%)" |
139 | 11x |
attr(y$diff_ci, "label") <- d_proportion_diff( |
140 | 11x |
conf_level, method, |
141 | 11x |
long = FALSE |
142 |
)
|
|
143 | ||
144 | 11x |
y
|
145 |
}
|
|
146 | ||
147 |
#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
|
|
148 |
#'
|
|
149 |
#' @return
|
|
150 |
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
151 |
#'
|
|
152 |
#' @examples
|
|
153 |
#' a_proportion_diff(
|
|
154 |
#' df = subset(dta, grp == "A"),
|
|
155 |
#' .stats = c("diff"),
|
|
156 |
#' .var = "rsp",
|
|
157 |
#' .ref_group = subset(dta, grp == "B"),
|
|
158 |
#' .in_ref_col = FALSE,
|
|
159 |
#' conf_level = 0.90,
|
|
160 |
#' method = "ha"
|
|
161 |
#' )
|
|
162 |
#'
|
|
163 |
#' @export
|
|
164 |
a_proportion_diff <- function(df, |
|
165 |
...,
|
|
166 |
.stats = NULL, |
|
167 |
.stat_names = NULL, |
|
168 |
.formats = NULL, |
|
169 |
.labels = NULL, |
|
170 |
.indent_mods = NULL) { |
|
171 | 9x |
dots_extra_args <- list(...) |
172 | ||
173 |
# Check if there are user-defined functions
|
|
174 | 9x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
175 | 9x |
.stats <- default_and_custom_stats_list$all_stats |
176 | 9x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
177 | ||
178 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
|
|
179 | 9x |
extra_afun_params <- retrieve_extra_afun_params( |
180 | 9x |
names(dots_extra_args$.additional_fun_parameters) |
181 |
)
|
|
182 | 9x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
183 | ||
184 |
# Main statistical functions application
|
|
185 | 9x |
x_stats <- .apply_stat_functions( |
186 | 9x |
default_stat_fnc = s_proportion_diff, |
187 | 9x |
custom_stat_fnc_list = custom_stat_functions, |
188 | 9x |
args_list = c( |
189 | 9x |
df = list(df), |
190 | 9x |
extra_afun_params,
|
191 | 9x |
dots_extra_args
|
192 |
)
|
|
193 |
)
|
|
194 | ||
195 |
# Fill in with stats defaults if needed
|
|
196 | 9x |
.stats <- get_stats("estimate_proportion_diff", |
197 | 9x |
stats_in = .stats, |
198 | 9x |
custom_stats_in = names(custom_stat_functions) |
199 |
)
|
|
200 | ||
201 | 9x |
x_stats <- x_stats[.stats] |
202 | ||
203 |
# Fill in formats/indents/labels with custom input and defaults
|
|
204 | 9x |
.formats <- get_formats_from_stats(.stats, .formats) |
205 | 9x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
206 | 9x |
if (is.null(.labels)) { |
207 | 9x |
.labels <- sapply(x_stats, attr, "label") |
208 | 9x |
.labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)] |
209 |
}
|
|
210 | 9x |
.labels <- get_labels_from_stats(.stats, .labels) |
211 | ||
212 |
# Auto format handling
|
|
213 | 9x |
.formats <- apply_auto_formatting( |
214 | 9x |
.formats,
|
215 | 9x |
x_stats,
|
216 | 9x |
extra_afun_params$.df_row, |
217 | 9x |
extra_afun_params$.var |
218 |
)
|
|
219 | ||
220 |
# Get and check statistical names from defaults
|
|
221 | 9x |
.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats |
222 | ||
223 | 9x |
in_rows( |
224 | 9x |
.list = x_stats, |
225 | 9x |
.formats = .formats, |
226 | 9x |
.names = names(.labels), |
227 | 9x |
.stat_names = .stat_names, |
228 | 9x |
.labels = .labels %>% .unlist_keep_nulls(), |
229 | 9x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
230 |
)
|
|
231 |
}
|
|
232 | ||
233 |
#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
|
|
234 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
235 |
#'
|
|
236 |
#' @return
|
|
237 |
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
|
|
238 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
239 |
#' the statistics from `s_proportion_diff()` to the table layout.
|
|
240 |
#'
|
|
241 |
#' @examples
|
|
242 |
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
|
|
243 |
#' nex <- 100 # Number of example rows
|
|
244 |
#' dta <- data.frame(
|
|
245 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
|
|
246 |
#' "grp" = sample(c("A", "B"), nex, TRUE),
|
|
247 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE),
|
|
248 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
|
|
249 |
#' stringsAsFactors = TRUE
|
|
250 |
#' )
|
|
251 |
#'
|
|
252 |
#' l <- basic_table() %>%
|
|
253 |
#' split_cols_by(var = "grp", ref_group = "B") %>%
|
|
254 |
#' estimate_proportion_diff(
|
|
255 |
#' vars = "rsp",
|
|
256 |
#' conf_level = 0.90,
|
|
257 |
#' method = "ha"
|
|
258 |
#' )
|
|
259 |
#'
|
|
260 |
#' build_table(l, df = dta)
|
|
261 |
#'
|
|
262 |
#' @export
|
|
263 |
#' @order 2
|
|
264 |
estimate_proportion_diff <- function(lyt, |
|
265 |
vars,
|
|
266 |
variables = list(strata = NULL), |
|
267 |
conf_level = 0.95, |
|
268 |
method = c( |
|
269 |
"waldcc", "wald", "cmh", |
|
270 |
"ha", "newcombe", "newcombecc", |
|
271 |
"strat_newcombe", "strat_newcombecc" |
|
272 |
),
|
|
273 |
weights_method = "cmh", |
|
274 |
var_labels = vars, |
|
275 |
na_str = default_na_str(), |
|
276 |
nested = TRUE, |
|
277 |
show_labels = "hidden", |
|
278 |
table_names = vars, |
|
279 |
section_div = NA_character_, |
|
280 |
...,
|
|
281 |
na_rm = TRUE, |
|
282 |
.stats = c("diff", "diff_ci"), |
|
283 |
.stat_names = NULL, |
|
284 |
.formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"), |
|
285 |
.labels = NULL, |
|
286 |
.indent_mods = c(diff = 0L, diff_ci = 1L)) { |
|
287 |
# Depending on main functions
|
|
288 | 4x |
extra_args <- list( |
289 | 4x |
"na_rm" = na_rm, |
290 | 4x |
"variables" = variables, |
291 | 4x |
"conf_level" = conf_level, |
292 | 4x |
"method" = method, |
293 | 4x |
"weights_method" = weights_method, |
294 |
...
|
|
295 |
)
|
|
296 | ||
297 |
# Needed defaults
|
|
298 | 4x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
299 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
300 | 4x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
301 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
302 | 4x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
303 | ||
304 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
|
|
305 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
306 | 4x |
formals(a_proportion_diff) <- c( |
307 | 4x |
formals(a_proportion_diff), |
308 | 4x |
extra_args[[".additional_fun_parameters"]] |
309 |
)
|
|
310 | ||
311 |
# Main {rtables} structural call
|
|
312 | 4x |
analyze( |
313 | 4x |
lyt = lyt, |
314 | 4x |
vars = vars, |
315 | 4x |
var_labels = var_labels, |
316 | 4x |
afun = a_proportion_diff, |
317 | 4x |
na_str = na_str, |
318 | 4x |
inclNAs = !na_rm, |
319 | 4x |
nested = nested, |
320 | 4x |
extra_args = extra_args, |
321 | 4x |
show_labels = show_labels, |
322 | 4x |
table_names = table_names, |
323 | 4x |
section_div = section_div |
324 |
)
|
|
325 |
}
|
|
326 | ||
327 |
#' Check proportion difference arguments
|
|
328 |
#'
|
|
329 |
#' Verifies that and/or convert arguments into valid values to be used in the
|
|
330 |
#' estimation of difference in responder proportions.
|
|
331 |
#'
|
|
332 |
#' @inheritParams prop_diff
|
|
333 |
#' @inheritParams prop_diff_wald
|
|
334 |
#'
|
|
335 |
#' @keywords internal
|
|
336 |
check_diff_prop_ci <- function(rsp, |
|
337 |
grp,
|
|
338 |
strata = NULL, |
|
339 |
conf_level,
|
|
340 |
correct = NULL) { |
|
341 | 26x |
checkmate::assert_logical(rsp, any.missing = FALSE) |
342 | 26x |
checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2) |
343 | 26x |
checkmate::assert_number(conf_level, lower = 0, upper = 1) |
344 | 26x |
checkmate::assert_flag(correct, null.ok = TRUE) |
345 | ||
346 | 26x |
if (!is.null(strata)) { |
347 | 12x |
checkmate::assert_factor(strata, len = length(rsp)) |
348 |
}
|
|
349 | ||
350 | 26x |
invisible() |
351 |
}
|
|
352 | ||
353 |
#' Description of method used for proportion comparison
|
|
354 |
#'
|
|
355 |
#' @description `r lifecycle::badge("stable")`
|
|
356 |
#'
|
|
357 |
#' This is an auxiliary function that describes the analysis in
|
|
358 |
#' [s_proportion_diff()].
|
|
359 |
#'
|
|
360 |
#' @inheritParams s_proportion_diff
|
|
361 |
#' @param long (`flag`)\cr whether a long (`TRUE`) or a short (`FALSE`, default) description is required.
|
|
362 |
#'
|
|
363 |
#' @return A `string` describing the analysis.
|
|
364 |
#'
|
|
365 |
#' @seealso [prop_diff]
|
|
366 |
#'
|
|
367 |
#' @export
|
|
368 |
d_proportion_diff <- function(conf_level, |
|
369 |
method,
|
|
370 |
long = FALSE) { |
|
371 | 11x |
label <- paste0(conf_level * 100, "% CI") |
372 | 11x |
if (long) { |
373 | ! |
label <- paste( |
374 | ! |
label,
|
375 | ! |
ifelse( |
376 | ! |
method == "cmh", |
377 | ! |
"for adjusted difference",
|
378 | ! |
"for difference"
|
379 |
)
|
|
380 |
)
|
|
381 |
}
|
|
382 | ||
383 | 11x |
method_part <- switch(method, |
384 | 11x |
"cmh" = "CMH, without correction", |
385 | 11x |
"waldcc" = "Wald, with correction", |
386 | 11x |
"wald" = "Wald, without correction", |
387 | 11x |
"ha" = "Anderson-Hauck", |
388 | 11x |
"newcombe" = "Newcombe, without correction", |
389 | 11x |
"newcombecc" = "Newcombe, with correction", |
390 | 11x |
"strat_newcombe" = "Stratified Newcombe, without correction", |
391 | 11x |
"strat_newcombecc" = "Stratified Newcombe, with correction", |
392 | 11x |
stop(paste(method, "does not have a description")) |
393 |
)
|
|
394 | 11x |
paste0(label, " (", method_part, ")") |
395 |
}
|
|
396 | ||
397 |
#' Helper functions to calculate proportion difference
|
|
398 |
#'
|
|
399 |
#' @description `r lifecycle::badge("stable")`
|
|
400 |
#'
|
|
401 |
#' @inheritParams argument_convention
|
|
402 |
#' @inheritParams prop_diff
|
|
403 |
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
|
|
404 |
#' (e.g. reference and treatment group).
|
|
405 |
#'
|
|
406 |
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
|
|
407 |
#' (proportion difference confidence interval).
|
|
408 |
#'
|
|
409 |
#' @seealso [prop_diff()] for implementation of these helper functions.
|
|
410 |
#'
|
|
411 |
#' @name h_prop_diff
|
|
412 |
NULL
|
|
413 | ||
414 |
#' @describeIn h_prop_diff The Wald interval follows the usual textbook
|
|
415 |
#' definition for a single proportion confidence interval using the normal
|
|
416 |
#' approximation. It is possible to include a continuity correction for Wald's
|
|
417 |
#' interval.
|
|
418 |
#'
|
|
419 |
#' @param correct (`flag`)\cr whether to include the continuity correction. For further
|
|
420 |
#' information, see [stats::prop.test()].
|
|
421 |
#'
|
|
422 |
#' @examples
|
|
423 |
#' # Wald confidence interval
|
|
424 |
#' set.seed(2)
|
|
425 |
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
|
|
426 |
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
|
|
427 |
#'
|
|
428 |
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
|
|
429 |
#'
|
|
430 |
#' @export
|
|
431 |
prop_diff_wald <- function(rsp, |
|
432 |
grp,
|
|
433 |
conf_level = 0.95, |
|
434 |
correct = FALSE) { |
|
435 | 8x |
if (isTRUE(correct)) { |
436 | 5x |
mthd <- "waldcc" |
437 |
} else { |
|
438 | 3x |
mthd <- "wald" |
439 |
}
|
|
440 | 8x |
grp <- as_factor_keep_attributes(grp) |
441 | 8x |
check_diff_prop_ci( |
442 | 8x |
rsp = rsp, grp = grp, conf_level = conf_level, correct = correct |
443 |
)
|
|
444 | ||
445 |
# check if binary response is coded as logical
|
|
446 | 8x |
checkmate::assert_logical(rsp, any.missing = FALSE) |
447 | 8x |
checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2) |
448 | ||
449 | 8x |
tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
450 |
# x1 and n1 are non-reference groups.
|
|
451 | 8x |
diff_ci <- desctools_binom( |
452 | 8x |
x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), |
453 | 8x |
x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), |
454 | 8x |
conf.level = conf_level, |
455 | 8x |
method = mthd |
456 |
)
|
|
457 | ||
458 | 8x |
list( |
459 | 8x |
"diff" = unname(diff_ci[, "est"]), |
460 | 8x |
"diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")]) |
461 |
)
|
|
462 |
}
|
|
463 | ||
464 |
#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
|
|
465 |
#'
|
|
466 |
#' @examples
|
|
467 |
#' # Anderson-Hauck confidence interval
|
|
468 |
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
|
|
469 |
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
|
|
470 |
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
|
|
471 |
#'
|
|
472 |
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
|
|
473 |
#'
|
|
474 |
#' ## Edge case: Same proportion of response in A and B.
|
|
475 |
#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
|
|
476 |
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
|
|
477 |
#'
|
|
478 |
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
|
|
479 |
#'
|
|
480 |
#' @export
|
|
481 |
prop_diff_ha <- function(rsp, |
|
482 |
grp,
|
|
483 |
conf_level) { |
|
484 | 4x |
grp <- as_factor_keep_attributes(grp) |
485 | 4x |
check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level) |
486 | ||
487 | 4x |
tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
488 |
# x1 and n1 are non-reference groups.
|
|
489 | 4x |
ci <- desctools_binom( |
490 | 4x |
x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), |
491 | 4x |
x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), |
492 | 4x |
conf.level = conf_level, |
493 | 4x |
method = "ha" |
494 |
)
|
|
495 | 4x |
list( |
496 | 4x |
"diff" = unname(ci[, "est"]), |
497 | 4x |
"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")]) |
498 |
)
|
|
499 |
}
|
|
500 | ||
501 |
#' @describeIn h_prop_diff Newcombe confidence interval. It is based on
|
|
502 |
#' the Wilson score confidence interval for a single binomial proportion.
|
|
503 |
#'
|
|
504 |
#' @examples
|
|
505 |
#' # Newcombe confidence interval
|
|
506 |
#'
|
|
507 |
#' set.seed(1)
|
|
508 |
#' rsp <- c(
|
|
509 |
#' sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
|
|
510 |
#' sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
|
|
511 |
#' )
|
|
512 |
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
|
|
513 |
#' table(rsp, grp)
|
|
514 |
#'
|
|
515 |
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
|
|
516 |
#'
|
|
517 |
#' @export
|
|
518 |
prop_diff_nc <- function(rsp, |
|
519 |
grp,
|
|
520 |
conf_level,
|
|
521 |
correct = FALSE) { |
|
522 | 2x |
if (isTRUE(correct)) { |
523 | ! |
mthd <- "scorecc" |
524 |
} else { |
|
525 | 2x |
mthd <- "score" |
526 |
}
|
|
527 | 2x |
grp <- as_factor_keep_attributes(grp) |
528 | 2x |
check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level) |
529 | ||
530 | 2x |
p_grp <- tapply(rsp, grp, mean) |
531 | 2x |
diff_p <- unname(diff(p_grp)) |
532 | 2x |
tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
533 | 2x |
ci <- desctools_binom( |
534 |
# x1 and n1 are non-reference groups.
|
|
535 | 2x |
x1 = tbl[2], n1 = sum(tbl[2], tbl[4]), |
536 | 2x |
x2 = tbl[1], n2 = sum(tbl[1], tbl[3]), |
537 | 2x |
conf.level = conf_level, |
538 | 2x |
method = mthd |
539 |
)
|
|
540 | 2x |
list( |
541 | 2x |
"diff" = unname(ci[, "est"]), |
542 | 2x |
"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")]) |
543 |
)
|
|
544 |
}
|
|
545 | ||
546 |
#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
|
|
547 |
#' response rates between the experimental treatment group and the control treatment group, adjusted
|
|
548 |
#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared
|
|
549 |
#' test, use [stats::mantelhaen.test()].
|
|
550 |
#'
|
|
551 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
|
|
552 |
#'
|
|
553 |
#' @examples
|
|
554 |
#' # Cochran-Mantel-Haenszel confidence interval
|
|
555 |
#'
|
|
556 |
#' set.seed(2)
|
|
557 |
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
|
|
558 |
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
|
|
559 |
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
|
|
560 |
#' strata_data <- data.frame(
|
|
561 |
#' "f1" = sample(c("a", "b"), 100, TRUE),
|
|
562 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
563 |
#' stringsAsFactors = TRUE
|
|
564 |
#' )
|
|
565 |
#'
|
|
566 |
#' prop_diff_cmh(
|
|
567 |
#' rsp = rsp, grp = grp, strata = interaction(strata_data),
|
|
568 |
#' conf_level = 0.90
|
|
569 |
#' )
|
|
570 |
#'
|
|
571 |
#' @export
|
|
572 |
prop_diff_cmh <- function(rsp, |
|
573 |
grp,
|
|
574 |
strata,
|
|
575 |
conf_level = 0.95) { |
|
576 | 8x |
grp <- as_factor_keep_attributes(grp) |
577 | 8x |
strata <- as_factor_keep_attributes(strata) |
578 | 8x |
check_diff_prop_ci( |
579 | 8x |
rsp = rsp, grp = grp, conf_level = conf_level, strata = strata |
580 |
)
|
|
581 | ||
582 | 8x |
if (any(tapply(rsp, strata, length) < 5)) { |
583 | 1x |
warning("Less than 5 observations in some strata.") |
584 |
}
|
|
585 | ||
586 |
# first dimension: FALSE, TRUE
|
|
587 |
# 2nd dimension: CONTROL, TX
|
|
588 |
# 3rd dimension: levels of strata
|
|
589 |
# rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
|
|
590 | 8x |
t_tbl <- table( |
591 | 8x |
factor(rsp, levels = c("FALSE", "TRUE")), |
592 | 8x |
grp,
|
593 | 8x |
strata
|
594 |
)
|
|
595 | 8x |
n1 <- colSums(t_tbl[1:2, 1, ]) |
596 | 8x |
n2 <- colSums(t_tbl[1:2, 2, ]) |
597 | 8x |
p1 <- t_tbl[2, 1, ] / n1 |
598 | 8x |
p2 <- t_tbl[2, 2, ] / n2 |
599 |
# CMH weights
|
|
600 | 8x |
use_stratum <- (n1 > 0) & (n2 > 0) |
601 | 8x |
n1 <- n1[use_stratum] |
602 | 8x |
n2 <- n2[use_stratum] |
603 | 8x |
p1 <- p1[use_stratum] |
604 | 8x |
p2 <- p2[use_stratum] |
605 | 8x |
wt <- (n1 * n2 / (n1 + n2)) |
606 | 8x |
wt_normalized <- wt / sum(wt) |
607 | 8x |
est1 <- sum(wt_normalized * p1) |
608 | 8x |
est2 <- sum(wt_normalized * p2) |
609 | 8x |
estimate <- c(est1, est2) |
610 | 8x |
names(estimate) <- levels(grp) |
611 | 8x |
se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1)) |
612 | 8x |
se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2)) |
613 | 8x |
z <- stats::qnorm((1 + conf_level) / 2) |
614 | 8x |
err1 <- z * se1 |
615 | 8x |
err2 <- z * se2 |
616 | 8x |
ci1 <- c((est1 - err1), (est1 + err1)) |
617 | 8x |
ci2 <- c((est2 - err2), (est2 + err2)) |
618 | 8x |
estimate_ci <- list(ci1, ci2) |
619 | 8x |
names(estimate_ci) <- levels(grp) |
620 | 8x |
diff_est <- est2 - est1 |
621 | 8x |
se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2)) |
622 | 8x |
diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff) |
623 | ||
624 | 8x |
list( |
625 | 8x |
prop = estimate, |
626 | 8x |
prop_ci = estimate_ci, |
627 | 8x |
diff = diff_est, |
628 | 8x |
diff_ci = diff_ci, |
629 | 8x |
weights = wt_normalized, |
630 | 8x |
n1 = n1, |
631 | 8x |
n2 = n2 |
632 |
)
|
|
633 |
}
|
|
634 | ||
635 |
#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response
|
|
636 |
#' rates between the experimental treatment group and the control treatment group, adjusted for stratification
|
|
637 |
#' factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
|
|
638 |
#' Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights
|
|
639 |
#' (see [prop_diff_cmh()]).
|
|
640 |
#'
|
|
641 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
|
|
642 |
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
|
|
643 |
#' and directs the way weights are estimated.
|
|
644 |
#'
|
|
645 |
#' @references
|
|
646 |
#' \insertRef{Yan2010-jt}{tern}
|
|
647 |
#'
|
|
648 |
#' @examples
|
|
649 |
#' # Stratified Newcombe confidence interval
|
|
650 |
#'
|
|
651 |
#' set.seed(2)
|
|
652 |
#' data_set <- data.frame(
|
|
653 |
#' "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
|
|
654 |
#' "f1" = sample(c("a", "b"), 100, TRUE),
|
|
655 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
|
|
656 |
#' "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
|
|
657 |
#' stringsAsFactors = TRUE
|
|
658 |
#' )
|
|
659 |
#'
|
|
660 |
#' prop_diff_strat_nc(
|
|
661 |
#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
|
|
662 |
#' weights_method = "cmh",
|
|
663 |
#' conf_level = 0.90
|
|
664 |
#' )
|
|
665 |
#'
|
|
666 |
#' prop_diff_strat_nc(
|
|
667 |
#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
|
|
668 |
#' weights_method = "wilson_h",
|
|
669 |
#' conf_level = 0.90
|
|
670 |
#' )
|
|
671 |
#'
|
|
672 |
#' @export
|
|
673 |
prop_diff_strat_nc <- function(rsp, |
|
674 |
grp,
|
|
675 |
strata,
|
|
676 |
weights_method = c("cmh", "wilson_h"), |
|
677 |
conf_level = 0.95, |
|
678 |
correct = FALSE) { |
|
679 | 4x |
weights_method <- match.arg(weights_method) |
680 | 4x |
grp <- as_factor_keep_attributes(grp) |
681 | 4x |
strata <- as_factor_keep_attributes(strata) |
682 | 4x |
check_diff_prop_ci( |
683 | 4x |
rsp = rsp, grp = grp, conf_level = conf_level, strata = strata |
684 |
)
|
|
685 | 4x |
checkmate::assert_number(conf_level, lower = 0, upper = 1) |
686 | 4x |
checkmate::assert_flag(correct) |
687 | 4x |
if (any(tapply(rsp, strata, length) < 5)) { |
688 | ! |
warning("Less than 5 observations in some strata.") |
689 |
}
|
|
690 | ||
691 | 4x |
rsp_by_grp <- split(rsp, f = grp) |
692 | 4x |
strata_by_grp <- split(strata, f = grp) |
693 | ||
694 |
# Finding the weights
|
|
695 | 4x |
weights <- if (identical(weights_method, "cmh")) { |
696 | 3x |
prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights |
697 | 4x |
} else if (identical(weights_method, "wilson_h")) { |
698 | 1x |
prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights |
699 |
}
|
|
700 | 4x |
weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0 |
701 | ||
702 |
# Calculating lower (`l`) and upper (`u`) confidence bounds per group.
|
|
703 | 4x |
strat_wilson_by_grp <- Map( |
704 | 4x |
prop_strat_wilson,
|
705 | 4x |
rsp = rsp_by_grp, |
706 | 4x |
strata = strata_by_grp, |
707 | 4x |
weights = list(weights, weights), |
708 | 4x |
conf_level = conf_level, |
709 | 4x |
correct = correct |
710 |
)
|
|
711 | ||
712 | 4x |
ci_ref <- strat_wilson_by_grp[[1]] |
713 | 4x |
ci_trt <- strat_wilson_by_grp[[2]] |
714 | 4x |
l_ref <- as.numeric(ci_ref$conf_int[1]) |
715 | 4x |
u_ref <- as.numeric(ci_ref$conf_int[2]) |
716 | 4x |
l_trt <- as.numeric(ci_trt$conf_int[1]) |
717 | 4x |
u_trt <- as.numeric(ci_trt$conf_int[2]) |
718 | ||
719 |
# Estimating the diff and n_ref, n_trt (it allows different weights to be used)
|
|
720 | 4x |
t_tbl <- table( |
721 | 4x |
factor(rsp, levels = c("FALSE", "TRUE")), |
722 | 4x |
grp,
|
723 | 4x |
strata
|
724 |
)
|
|
725 | 4x |
n_ref <- colSums(t_tbl[1:2, 1, ]) |
726 | 4x |
n_trt <- colSums(t_tbl[1:2, 2, ]) |
727 | 4x |
use_stratum <- (n_ref > 0) & (n_trt > 0) |
728 | 4x |
n_ref <- n_ref[use_stratum] |
729 | 4x |
n_trt <- n_trt[use_stratum] |
730 | 4x |
p_ref <- t_tbl[2, 1, use_stratum] / n_ref |
731 | 4x |
p_trt <- t_tbl[2, 2, use_stratum] / n_trt |
732 | 4x |
est1 <- sum(weights * p_ref) |
733 | 4x |
est2 <- sum(weights * p_trt) |
734 | 4x |
diff_est <- est2 - est1 |
735 | ||
736 | 4x |
lambda1 <- sum(weights^2 / n_ref) |
737 | 4x |
lambda2 <- sum(weights^2 / n_trt) |
738 | 4x |
z <- stats::qnorm((1 + conf_level) / 2) |
739 | ||
740 | 4x |
lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref)) |
741 | 4x |
upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt)) |
742 | ||
743 | 4x |
list( |
744 | 4x |
"diff" = diff_est, |
745 | 4x |
"diff_ci" = c("lower" = lower, "upper" = upper) |
746 |
)
|
|
747 |
}
|
1 |
# Utility functions to cooperate with {rtables} package
|
|
2 | ||
3 |
#' Convert table into matrix of strings
|
|
4 |
#'
|
|
5 |
#' @description `r lifecycle::badge("stable")`
|
|
6 |
#'
|
|
7 |
#' Helper function to use mostly within tests. `with_spaces`parameter allows
|
|
8 |
#' to test not only for content but also indentation and table structure.
|
|
9 |
#' `print_txt_to_copy` instead facilitate the testing development by returning a well
|
|
10 |
#' formatted text that needs only to be copied and pasted in the expected output.
|
|
11 |
#'
|
|
12 |
#' @inheritParams formatters::toString
|
|
13 |
#' @param x (`VTableTree`)\cr `rtables` table object.
|
|
14 |
#' @param with_spaces (`flag`)\cr whether the tested table should keep the indentation and other relevant spaces.
|
|
15 |
#' @param print_txt_to_copy (`flag`)\cr utility to have a way to copy the input table directly
|
|
16 |
#' into the expected variable instead of copying it too manually.
|
|
17 |
#'
|
|
18 |
#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the
|
|
19 |
#' table will be printed to console, ready to be copied as a expected value.
|
|
20 |
#'
|
|
21 |
#' @examples
|
|
22 |
#' tbl <- basic_table() %>%
|
|
23 |
#' split_rows_by("SEX") %>%
|
|
24 |
#' split_cols_by("ARM") %>%
|
|
25 |
#' analyze("AGE") %>%
|
|
26 |
#' build_table(tern_ex_adsl)
|
|
27 |
#'
|
|
28 |
#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2))
|
|
29 |
#'
|
|
30 |
#' @export
|
|
31 |
to_string_matrix <- function(x, widths = NULL, max_width = NULL, |
|
32 |
hsep = formatters::default_hsep(), |
|
33 |
with_spaces = TRUE, print_txt_to_copy = FALSE) { |
|
34 | 11x |
checkmate::assert_flag(with_spaces) |
35 | 11x |
checkmate::assert_flag(print_txt_to_copy) |
36 | 11x |
checkmate::assert_int(max_width, null.ok = TRUE) |
37 | ||
38 | 11x |
if (inherits(x, "MatrixPrintForm")) { |
39 | ! |
tx <- x |
40 |
} else { |
|
41 | 11x |
tx <- matrix_form(x, TRUE) |
42 |
}
|
|
43 | ||
44 | 11x |
tf_wrap <- FALSE |
45 | 11x |
if (!is.null(max_width)) { |
46 | ! |
tf_wrap <- TRUE |
47 |
}
|
|
48 | ||
49 |
# Producing the matrix to test
|
|
50 | 11x |
if (with_spaces) { |
51 | 2x |
out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\n")[[1]] |
52 |
} else { |
|
53 | 9x |
out <- tx$strings |
54 |
}
|
|
55 | ||
56 |
# Printing to console formatted output that needs to be copied in "expected"
|
|
57 | 11x |
if (print_txt_to_copy) { |
58 | 2x |
out_tmp <- out |
59 | 2x |
if (!with_spaces) { |
60 | 1x |
out_tmp <- apply(out, 1, paste0, collapse = '", "') |
61 |
}
|
|
62 | 2x |
cat(paste0('c(\n "', paste0(out_tmp, collapse = '",\n "'), '"\n)')) |
63 |
}
|
|
64 | ||
65 |
# Return values
|
|
66 | 11x |
out
|
67 |
}
|
|
68 | ||
69 |
#' Blank for missing input
|
|
70 |
#'
|
|
71 |
#' Helper function to use in tabulating model results.
|
|
72 |
#'
|
|
73 |
#' @param x (`vector`)\cr input for a cell.
|
|
74 |
#'
|
|
75 |
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
|
|
76 |
#' the unlisted version of `x`.
|
|
77 |
#'
|
|
78 |
#' @keywords internal
|
|
79 |
unlist_and_blank_na <- function(x) { |
|
80 | 267x |
unl <- unlist(x) |
81 | 267x |
if (all(is.na(unl))) { |
82 | 161x |
character() |
83 |
} else { |
|
84 | 106x |
unl
|
85 |
}
|
|
86 |
}
|
|
87 | ||
88 |
#' Constructor for content functions given a data frame with flag input
|
|
89 |
#'
|
|
90 |
#' This can be useful for tabulating model results.
|
|
91 |
#'
|
|
92 |
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
|
|
93 |
#' content function.
|
|
94 |
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
|
|
95 |
#' @param format (`string`)\cr `rtables` format to use.
|
|
96 |
#'
|
|
97 |
#' @return A content function which gives `df$analysis_var` at the row identified by
|
|
98 |
#' `.df_row$flag` in the given format.
|
|
99 |
#'
|
|
100 |
#' @keywords internal
|
|
101 |
cfun_by_flag <- function(analysis_var, |
|
102 |
flag_var,
|
|
103 |
format = "xx", |
|
104 |
.indent_mods = NULL) { |
|
105 | 61x |
checkmate::assert_string(analysis_var) |
106 | 61x |
checkmate::assert_string(flag_var) |
107 | 61x |
function(df, labelstr) { |
108 | 265x |
row_index <- which(df[[flag_var]]) |
109 | 265x |
x <- unlist_and_blank_na(df[[analysis_var]][row_index]) |
110 | 265x |
formatters::with_label( |
111 | 265x |
rcell(x, format = format, indent_mod = .indent_mods), |
112 | 265x |
labelstr
|
113 |
)
|
|
114 |
}
|
|
115 |
}
|
|
116 | ||
117 |
#' Content row function to add row total to labels
|
|
118 |
#'
|
|
119 |
#' This takes the label of the latest row split level and adds the row total from `df` in parentheses.
|
|
120 |
#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than
|
|
121 |
#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`.
|
|
122 |
#'
|
|
123 |
#' @inheritParams argument_convention
|
|
124 |
#'
|
|
125 |
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
|
|
126 |
#'
|
|
127 |
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
|
|
128 |
#' the former is already split by columns and will refer to the first column of the data only.
|
|
129 |
#'
|
|
130 |
#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from
|
|
131 |
#' `alt_counts_df` instead of `df`.
|
|
132 |
#'
|
|
133 |
#' @keywords internal
|
|
134 |
c_label_n <- function(df, |
|
135 |
labelstr,
|
|
136 |
.N_row) { # nolint |
|
137 | 273x |
label <- paste0(labelstr, " (N=", .N_row, ")") |
138 | 273x |
in_rows( |
139 | 273x |
.list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)), |
140 | 273x |
.formats = c(row_count = function(x, ...) "") |
141 |
)
|
|
142 |
}
|
|
143 | ||
144 |
#' Content row function to add `alt_counts_df` row total to labels
|
|
145 |
#'
|
|
146 |
#' This takes the label of the latest row split level and adds the row total from `alt_counts_df`
|
|
147 |
#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df`
|
|
148 |
#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`.
|
|
149 |
#'
|
|
150 |
#' @inheritParams argument_convention
|
|
151 |
#'
|
|
152 |
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
|
|
153 |
#'
|
|
154 |
#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead
|
|
155 |
#' of `alt_counts_df`.
|
|
156 |
#'
|
|
157 |
#' @keywords internal
|
|
158 |
c_label_n_alt <- function(df, |
|
159 |
labelstr,
|
|
160 |
.alt_df_row) { |
|
161 | 7x |
N_row_alt <- nrow(.alt_df_row) # nolint |
162 | 7x |
label <- paste0(labelstr, " (N=", N_row_alt, ")") |
163 | 7x |
in_rows( |
164 | 7x |
.list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)), |
165 | 7x |
.formats = c(row_count = function(x, ...) "") |
166 |
)
|
|
167 |
}
|
|
168 | ||
169 |
#' Layout-creating function to add row total counts
|
|
170 |
#'
|
|
171 |
#' @description `r lifecycle::badge("stable")`
|
|
172 |
#'
|
|
173 |
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
|
|
174 |
#' is a wrapper for [rtables::summarize_row_groups()].
|
|
175 |
#'
|
|
176 |
#' @inheritParams argument_convention
|
|
177 |
#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`)
|
|
178 |
#' or from `df` (`FALSE`). Defaults to `FALSE`.
|
|
179 |
#'
|
|
180 |
#' @return A modified layout where the latest row split labels now have the row-wise
|
|
181 |
#' total counts (i.e. without column-based subsetting) attached in parentheses.
|
|
182 |
#'
|
|
183 |
#' @note Row count values are contained in these row count rows but are not displayed
|
|
184 |
#' so that they are not considered zero rows by default when pruning.
|
|
185 |
#'
|
|
186 |
#' @examples
|
|
187 |
#' basic_table() %>%
|
|
188 |
#' split_cols_by("ARM") %>%
|
|
189 |
#' add_colcounts() %>%
|
|
190 |
#' split_rows_by("RACE", split_fun = drop_split_levels) %>%
|
|
191 |
#' add_rowcounts() %>%
|
|
192 |
#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
|
|
193 |
#' build_table(DM)
|
|
194 |
#'
|
|
195 |
#' @export
|
|
196 |
add_rowcounts <- function(lyt, alt_counts = FALSE) { |
|
197 | 7x |
summarize_row_groups( |
198 | 7x |
lyt,
|
199 | 7x |
cfun = if (alt_counts) c_label_n_alt else c_label_n |
200 |
)
|
|
201 |
}
|
|
202 | ||
203 |
#' Obtain column indices
|
|
204 |
#'
|
|
205 |
#' @description `r lifecycle::badge("stable")`
|
|
206 |
#'
|
|
207 |
#' Helper function to extract column indices from a `VTableTree` for a given
|
|
208 |
#' vector of column names.
|
|
209 |
#'
|
|
210 |
#' @param table_tree (`VTableTree`)\cr `rtables` table object to extract the indices from.
|
|
211 |
#' @param col_names (`character`)\cr vector of column names.
|
|
212 |
#'
|
|
213 |
#' @return A vector of column indices.
|
|
214 |
#'
|
|
215 |
#' @export
|
|
216 |
h_col_indices <- function(table_tree, col_names) { |
|
217 | 1256x |
checkmate::assert_class(table_tree, "VTableNodeInfo") |
218 | 1256x |
checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE) |
219 | 1256x |
match(col_names, names(attr(col_info(table_tree), "cextra_args"))) |
220 |
}
|
|
221 | ||
222 |
#' Labels or names of list elements
|
|
223 |
#'
|
|
224 |
#' Internal helper function for working with nested statistic function results which typically
|
|
225 |
#' don't have labels but names that we can use.
|
|
226 |
#'
|
|
227 |
#' @param x (`list`)\cr a list.
|
|
228 |
#'
|
|
229 |
#' @return A `character` vector with the labels or names for the list elements.
|
|
230 |
#'
|
|
231 |
#' @keywords internal
|
|
232 |
labels_or_names <- function(x) { |
|
233 | 190x |
checkmate::assert_multi_class(x, c("data.frame", "list")) |
234 | 190x |
labs <- sapply(x, obj_label) |
235 | 190x |
nams <- rlang::names2(x) |
236 | 190x |
label_is_null <- sapply(labs, is.null) |
237 | 190x |
result <- unlist(ifelse(label_is_null, nams, labs)) |
238 | 190x |
result
|
239 |
}
|
|
240 | ||
241 |
#' Convert to `rtable`
|
|
242 |
#'
|
|
243 |
#' @description `r lifecycle::badge("stable")`
|
|
244 |
#'
|
|
245 |
#' This is a new generic function to convert objects to `rtable` tables.
|
|
246 |
#'
|
|
247 |
#' @param x (`data.frame`)\cr the object which should be converted to an `rtable`.
|
|
248 |
#' @param ... additional arguments for methods.
|
|
249 |
#'
|
|
250 |
#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
|
|
251 |
#'
|
|
252 |
#' @export
|
|
253 |
as.rtable <- function(x, ...) { # nolint |
|
254 | 3x |
UseMethod("as.rtable", x) |
255 |
}
|
|
256 | ||
257 |
#' @describeIn as.rtable Method for converting a `data.frame` that contains numeric columns to `rtable`.
|
|
258 |
#'
|
|
259 |
#' @param format (`string` or `function`)\cr the format which should be used for the columns.
|
|
260 |
#'
|
|
261 |
#' @method as.rtable data.frame
|
|
262 |
#'
|
|
263 |
#' @examples
|
|
264 |
#' x <- data.frame(
|
|
265 |
#' a = 1:10,
|
|
266 |
#' b = rnorm(10)
|
|
267 |
#' )
|
|
268 |
#' as.rtable(x)
|
|
269 |
#'
|
|
270 |
#' @export
|
|
271 |
as.rtable.data.frame <- function(x, format = "xx.xx", ...) { |
|
272 | 3x |
checkmate::assert_numeric(unlist(x)) |
273 | 2x |
do.call( |
274 | 2x |
rtable,
|
275 | 2x |
c( |
276 | 2x |
list( |
277 | 2x |
header = labels_or_names(x), |
278 | 2x |
format = format |
279 |
),
|
|
280 | 2x |
Map( |
281 | 2x |
function(row, row_name) { |
282 | 20x |
do.call( |
283 | 20x |
rrow,
|
284 | 20x |
c(as.list(unname(row)), |
285 | 20x |
row.name = row_name |
286 |
)
|
|
287 |
)
|
|
288 |
},
|
|
289 | 2x |
row = as.data.frame(t(x)), |
290 | 2x |
row_name = rownames(x) |
291 |
)
|
|
292 |
)
|
|
293 |
)
|
|
294 |
}
|
|
295 | ||
296 |
#' Split parameters
|
|
297 |
#'
|
|
298 |
#' @description `r lifecycle::badge("deprecated")`
|
|
299 |
#'
|
|
300 |
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
|
|
301 |
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
|
|
302 |
#' specific analysis function.
|
|
303 |
#'
|
|
304 |
#' @param param (`vector`)\cr the parameter to be split.
|
|
305 |
#' @param value (`vector`)\cr the value used to split.
|
|
306 |
#' @param f (`list`)\cr the reference to make the split.
|
|
307 |
#'
|
|
308 |
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
|
|
309 |
#'
|
|
310 |
#' @examples
|
|
311 |
#' f <- list(
|
|
312 |
#' surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
|
|
313 |
#' surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
|
|
314 |
#' )
|
|
315 |
#'
|
|
316 |
#' .stats <- c("pt_at_risk", "rate_diff")
|
|
317 |
#' h_split_param(.stats, .stats, f = f)
|
|
318 |
#'
|
|
319 |
#' # $surv
|
|
320 |
#' # [1] "pt_at_risk"
|
|
321 |
#' #
|
|
322 |
#' # $surv_diff
|
|
323 |
#' # [1] "rate_diff"
|
|
324 |
#'
|
|
325 |
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
|
|
326 |
#' h_split_param(.formats, names(.formats), f = f)
|
|
327 |
#'
|
|
328 |
#' # $surv
|
|
329 |
#' # pt_at_risk event_free_rate
|
|
330 |
#' # "xx" "xxx"
|
|
331 |
#' #
|
|
332 |
#' # $surv_diff
|
|
333 |
#' # NULL
|
|
334 |
#'
|
|
335 |
#' @export
|
|
336 |
h_split_param <- function(param, |
|
337 |
value,
|
|
338 |
f) { |
|
339 | 2x |
lifecycle::deprecate_warn("0.9.8", "h_split_param()") |
340 | ||
341 | 2x |
y <- lapply(f, function(x) param[value %in% x]) |
342 | 2x |
lapply(y, function(x) if (length(x) == 0) NULL else x) |
343 |
}
|
|
344 | ||
345 |
#' Get selected statistics names
|
|
346 |
#'
|
|
347 |
#' Helper function to be used for creating `afun`.
|
|
348 |
#'
|
|
349 |
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
|
|
350 |
#' in this context that all default statistics should be used.
|
|
351 |
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
|
|
352 |
#'
|
|
353 |
#' @return A `character` vector with the selected statistics.
|
|
354 |
#'
|
|
355 |
#' @keywords internal
|
|
356 |
afun_selected_stats <- function(.stats, all_stats) { |
|
357 | 2x |
checkmate::assert_character(.stats, null.ok = TRUE) |
358 | 2x |
checkmate::assert_character(all_stats) |
359 | 2x |
if (is.null(.stats)) { |
360 | 1x |
all_stats
|
361 |
} else { |
|
362 | 1x |
intersect(.stats, all_stats) |
363 |
}
|
|
364 |
}
|
|
365 | ||
366 |
#' Add variable labels to top left corner in table
|
|
367 |
#'
|
|
368 |
#' @description `r lifecycle::badge("stable")`
|
|
369 |
#'
|
|
370 |
#' Helper layout-creating function to append the variable labels of a given variables vector
|
|
371 |
#' from a given dataset in the top left corner. If a variable label is not found then the
|
|
372 |
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
|
|
373 |
#'
|
|
374 |
#' @inheritParams argument_convention
|
|
375 |
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
|
|
376 |
#' @param indent (`integer(1)`)\cr non-negative number of nested indent space, default to 0L which means no indent.
|
|
377 |
#' 1L means two spaces indent, 2L means four spaces indent and so on.
|
|
378 |
#'
|
|
379 |
#' @return A modified layout with the new variable label(s) added to the top-left material.
|
|
380 |
#'
|
|
381 |
#' @note This is not an optimal implementation of course, since we are using here the data set
|
|
382 |
#' itself during the layout creation. When we have a more mature `rtables` implementation then
|
|
383 |
#' this will also be improved or not necessary anymore.
|
|
384 |
#'
|
|
385 |
#' @examples
|
|
386 |
#' lyt <- basic_table() %>%
|
|
387 |
#' split_cols_by("ARM") %>%
|
|
388 |
#' add_colcounts() %>%
|
|
389 |
#' split_rows_by("SEX") %>%
|
|
390 |
#' append_varlabels(DM, "SEX") %>%
|
|
391 |
#' analyze("AGE", afun = mean) %>%
|
|
392 |
#' append_varlabels(DM, "AGE", indent = 1)
|
|
393 |
#' build_table(lyt, DM)
|
|
394 |
#'
|
|
395 |
#' lyt <- basic_table() %>%
|
|
396 |
#' split_cols_by("ARM") %>%
|
|
397 |
#' split_rows_by("SEX") %>%
|
|
398 |
#' analyze("AGE", afun = mean) %>%
|
|
399 |
#' append_varlabels(DM, c("SEX", "AGE"))
|
|
400 |
#' build_table(lyt, DM)
|
|
401 |
#'
|
|
402 |
#' @export
|
|
403 |
append_varlabels <- function(lyt, df, vars, indent = 0L) { |
|
404 | 3x |
if (checkmate::test_flag(indent)) { |
405 | ! |
warning("indent argument is now accepting integers. Boolean indent will be converted to integers.") |
406 | ! |
indent <- as.integer(indent) |
407 |
}
|
|
408 | ||
409 | 3x |
checkmate::assert_data_frame(df) |
410 | 3x |
checkmate::assert_character(vars) |
411 | 3x |
checkmate::assert_count(indent) |
412 | ||
413 | 3x |
lab <- formatters::var_labels(df[vars], fill = TRUE) |
414 | 3x |
lab <- paste(lab, collapse = " / ") |
415 | 3x |
space <- paste(rep(" ", indent * 2), collapse = "") |
416 | 3x |
lab <- paste0(space, lab) |
417 | ||
418 | 3x |
append_topleft(lyt, lab) |
419 |
}
|
|
420 | ||
421 |
#' Default string replacement for `NA` values
|
|
422 |
#'
|
|
423 |
#' @description `r lifecycle::badge("stable")`
|
|
424 |
#'
|
|
425 |
#' The default string used to represent `NA` values. This value is used as the default
|
|
426 |
#' value for the `na_str` argument throughout the `tern` package, and printed in place
|
|
427 |
#' of `NA` values in output tables. If not specified for each `tern` function by the user
|
|
428 |
#' via the `na_str` argument, or in the R environment options via [set_default_na_str()],
|
|
429 |
#' then `NA` is used.
|
|
430 |
#'
|
|
431 |
#' @param na_str (`string`)\cr single string value to set in the R environment options as
|
|
432 |
#' the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the
|
|
433 |
#' current value set in the R environment (defaults to `NULL` if not set).
|
|
434 |
#'
|
|
435 |
#' @name default_na_str
|
|
436 |
NULL
|
|
437 | ||
438 |
#' @describeIn default_na_str Accessor for default `NA` value replacement string.
|
|
439 |
#'
|
|
440 |
#' @return
|
|
441 |
#' * `default_na_str` returns the current value if an R environment option has been set
|
|
442 |
#' for `"tern_default_na_str"`, or `NA_character_` otherwise.
|
|
443 |
#'
|
|
444 |
#' @examples
|
|
445 |
#' # Default settings
|
|
446 |
#' default_na_str()
|
|
447 |
#' getOption("tern_default_na_str")
|
|
448 |
#'
|
|
449 |
#' # Set custom value
|
|
450 |
#' set_default_na_str("<Missing>")
|
|
451 |
#'
|
|
452 |
#' # Settings after value has been set
|
|
453 |
#' default_na_str()
|
|
454 |
#' getOption("tern_default_na_str")
|
|
455 |
#'
|
|
456 |
#' @export
|
|
457 |
default_na_str <- function() { |
|
458 | 274x |
getOption("tern_default_na_str", default = NA_character_) |
459 |
}
|
|
460 | ||
461 |
#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the
|
|
462 |
#' option `"tern_default_na_str"` within the R environment.
|
|
463 |
#'
|
|
464 |
#' @return
|
|
465 |
#' * `set_default_na_str` has no return value.
|
|
466 |
#'
|
|
467 |
#' @export
|
|
468 |
set_default_na_str <- function(na_str) { |
|
469 | 4x |
checkmate::assert_character(na_str, len = 1, null.ok = TRUE) |
470 | 4x |
options("tern_default_na_str" = na_str) |
471 |
}
|
|
472 | ||
473 | ||
474 |
#' Utilities to handle extra arguments in analysis functions
|
|
475 |
#'
|
|
476 |
#' @description `r lifecycle::badge("stable")`
|
|
477 |
#' Important additional parameters, useful to modify behavior of analysis and summary
|
|
478 |
#' functions are listed in [rtables::additional_fun_params]. With these utility functions
|
|
479 |
#' we can retrieve a curated list of these parameters from the environment, and pass them
|
|
480 |
#' to the analysis functions with dedicated `...`; notice that the final `s_*` function
|
|
481 |
#' will get them through argument matching.
|
|
482 |
#'
|
|
483 |
#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be
|
|
484 |
#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params].
|
|
485 |
#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row`
|
|
486 |
#' parameters.
|
|
487 |
#'
|
|
488 |
#' @name util_handling_additional_fun_params
|
|
489 |
NULL
|
|
490 | ||
491 |
#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment.
|
|
492 |
#'
|
|
493 |
#' @return
|
|
494 |
#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment.
|
|
495 |
#'
|
|
496 |
#' @keywords internal
|
|
497 |
retrieve_extra_afun_params <- function(extra_afun_params) { |
|
498 | 1583x |
out <- list() |
499 | 1583x |
for (extra_param in extra_afun_params) { |
500 | 15851x |
out <- c(out, list(get(extra_param, envir = parent.frame()))) |
501 |
}
|
|
502 | 1583x |
setNames(out, extra_afun_params) |
503 |
}
|
|
504 | ||
505 |
#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for
|
|
506 |
#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions.
|
|
507 |
#'
|
|
508 |
#' @return
|
|
509 |
#' * `get_additional_afun_params` returns a list of additional parameters.
|
|
510 |
#'
|
|
511 |
#' @keywords internal
|
|
512 |
get_additional_afun_params <- function(add_alt_df = FALSE) { |
|
513 | 240x |
out_list <- list( |
514 | 240x |
.N_col = integer(), |
515 | 240x |
.N_total = integer(), |
516 | 240x |
.N_row = integer(), |
517 | 240x |
.df_row = data.frame(), |
518 | 240x |
.var = character(), |
519 | 240x |
.ref_group = character(), |
520 | 240x |
.ref_full = vector(mode = "numeric"), |
521 | 240x |
.in_ref_col = logical(), |
522 | 240x |
.spl_context = data.frame(), |
523 | 240x |
.all_col_exprs = vector(mode = "expression"), |
524 | 240x |
.all_col_counts = vector(mode = "integer") |
525 |
)
|
|
526 | ||
527 | 240x |
if (isTRUE(add_alt_df)) { |
528 | ! |
out_list <- c( |
529 | ! |
out_list,
|
530 | ! |
.alt_df_row = data.frame(), |
531 | ! |
.alt_df = data.frame() |
532 |
)
|
|
533 |
}
|
|
534 | ||
535 | 240x |
out_list
|
536 |
}
|
1 |
#' Survival time point analysis
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [surv_timepoint()] creates a layout element to analyze patient survival rates and difference
|
|
6 |
#' of survival rates between groups at a given time point. The primary analysis variable `vars` is the time variable.
|
|
7 |
#' Other required inputs are `time_point`, the numeric time point of interest, and `is_event`, a variable that
|
|
8 |
#' indicates whether or not an event has occurred. The `method` argument is used to specify whether you want to analyze
|
|
9 |
#' survival estimations (`"surv"`), difference in survival with the control (`"surv_diff"`), or both of these
|
|
10 |
#' (`"both"`).
|
|
11 |
#'
|
|
12 |
#' @inheritParams argument_convention
|
|
13 |
#' @inheritParams s_surv_time
|
|
14 |
#' @param time_point (`numeric(1)`)\cr survival time point of interest.
|
|
15 |
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
|
|
16 |
#' [control_surv_timepoint()]. Some possible parameter options are:
|
|
17 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
|
|
18 |
#' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
|
|
19 |
#' see more in [survival::survfit()]. Note option "none" is no longer supported.
|
|
20 |
#' @param method (`string`)\cr `"surv"` (survival estimations), `"surv_diff"` (difference in survival with the
|
|
21 |
#' control), or `"both"`.
|
|
22 |
#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
|
|
23 |
#' avoid warnings from duplicate table names.
|
|
24 |
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
|
|
25 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
|
|
26 |
#' for that statistic's row label.
|
|
27 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
28 |
#'
|
|
29 |
#' Options are: ``r shQuote(get_stats("surv_timepoint"), type = "sh")``
|
|
30 |
#'
|
|
31 |
#' @name survival_timepoint
|
|
32 |
#' @order 1
|
|
33 |
NULL
|
|
34 | ||
35 |
#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
|
|
36 |
#'
|
|
37 |
#' @return
|
|
38 |
#' * `s_surv_timepoint()` returns the statistics:
|
|
39 |
#' * `pt_at_risk`: Patients remaining at risk.
|
|
40 |
#' * `event_free_rate`: Event-free rate (%).
|
|
41 |
#' * `rate_se`: Standard error of event free rate.
|
|
42 |
#' * `rate_ci`: Confidence interval for event free rate.
|
|
43 |
#' * `event_free_rate_3d`: Event-free rate (%) with Confidence interval.
|
|
44 |
#'
|
|
45 |
#' @keywords internal
|
|
46 |
s_surv_timepoint <- function(df, |
|
47 |
.var,
|
|
48 |
time_point,
|
|
49 |
is_event,
|
|
50 |
control = control_surv_timepoint(), |
|
51 |
...) { |
|
52 | 35x |
checkmate::assert_string(.var) |
53 | 35x |
assert_df_with_variables(df, list(tte = .var, is_event = is_event)) |
54 | 35x |
checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE) |
55 | 35x |
checkmate::assert_number(time_point) |
56 | 35x |
checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE) |
57 | ||
58 | 35x |
conf_type <- control$conf_type |
59 | 35x |
conf_level <- control$conf_level |
60 | ||
61 | 35x |
formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1")) |
62 | 35x |
srv_fit <- survival::survfit( |
63 | 35x |
formula = formula, |
64 | 35x |
data = df, |
65 | 35x |
conf.int = conf_level, |
66 | 35x |
conf.type = conf_type |
67 |
)
|
|
68 | 35x |
s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE) |
69 | 35x |
df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")]) |
70 | 35x |
if (df_srv_fit[["n.risk"]] == 0) { |
71 | 1x |
pt_at_risk <- event_free_rate <- rate_se <- NA_real_ |
72 | 1x |
rate_ci <- c(NA_real_, NA_real_) |
73 |
} else { |
|
74 | 34x |
pt_at_risk <- df_srv_fit$n.risk |
75 | 34x |
event_free_rate <- df_srv_fit$surv |
76 | 34x |
rate_se <- df_srv_fit$std.err |
77 | 34x |
rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper) |
78 |
}
|
|
79 | 35x |
event_free_rate_3d <- c(event_free_rate, rate_ci) |
80 | 35x |
list( |
81 | 35x |
pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"), |
82 | 35x |
event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"), |
83 | 35x |
rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"), |
84 | 35x |
rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level)), |
85 | 35x |
event_free_rate_3d = formatters::with_label( |
86 | 35x |
event_free_rate_3d * 100, paste0("Event Free Rate (", f_conf_level(conf_level), ")") |
87 |
)
|
|
88 |
)
|
|
89 |
}
|
|
90 | ||
91 |
#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
|
|
92 |
#'
|
|
93 |
#' @return
|
|
94 |
#' * `s_surv_timepoint_diff()` returns the statistics:
|
|
95 |
#' * `rate_diff`: Event-free rate difference between two groups.
|
|
96 |
#' * `rate_diff_ci`: Confidence interval for the difference.
|
|
97 |
#' * `rate_diff_ci_3d`: Event-free rate difference and confidence interval between two groups.
|
|
98 |
#' * `ztest_pval`: p-value to test the difference is 0.
|
|
99 |
#'
|
|
100 |
#' @keywords internal
|
|
101 |
s_surv_timepoint_diff <- function(df, |
|
102 |
.var,
|
|
103 |
.ref_group,
|
|
104 |
.in_ref_col,
|
|
105 |
time_point,
|
|
106 |
control = control_surv_timepoint(), |
|
107 |
...) { |
|
108 | 14x |
if (.in_ref_col) { |
109 | 4x |
return( |
110 | 4x |
list( |
111 | 4x |
rate_diff = formatters::with_label(numeric(), "Difference in Event Free Rate"), |
112 | 4x |
rate_diff_ci = formatters::with_label(numeric(), f_conf_level(control$conf_level)), |
113 | 4x |
rate_diff_ci_3d = formatters::with_label( |
114 | 4x |
numeric(), paste0("Difference in Event Free Rate", f_conf_level(control$conf_level)) |
115 |
),
|
|
116 | 4x |
ztest_pval = formatters::with_label(numeric(), "p-value (Z-test)") |
117 |
)
|
|
118 |
)
|
|
119 |
}
|
|
120 | 10x |
data <- rbind(.ref_group, df) |
121 | 10x |
group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x")) |
122 | 10x |
res_per_group <- lapply(split(data, group), function(x) { |
123 | 20x |
s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...) |
124 |
}) |
|
125 | ||
126 | 10x |
res_x <- res_per_group[[2]] |
127 | 10x |
res_ref <- res_per_group[[1]] |
128 | 10x |
rate_diff <- res_x$event_free_rate - res_ref$event_free_rate |
129 | 10x |
se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2) |
130 | ||
131 | 10x |
qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2) |
132 | 10x |
rate_diff_ci <- rate_diff + qs * se_diff |
133 | 10x |
rate_diff_ci_3d <- c(rate_diff, rate_diff_ci) |
134 | 10x |
ztest_pval <- if (is.na(rate_diff)) { |
135 | 10x |
NA
|
136 |
} else { |
|
137 | 10x |
2 * (1 - stats::pnorm(abs(rate_diff) / se_diff)) |
138 |
}
|
|
139 | 10x |
list( |
140 | 10x |
rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"), |
141 | 10x |
rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)), |
142 | 10x |
rate_diff_ci_3d = formatters::with_label( |
143 | 10x |
rate_diff_ci_3d, paste0("Difference in Event Free Rate", f_conf_level(control$conf_level)) |
144 |
),
|
|
145 | 10x |
ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)") |
146 |
)
|
|
147 |
}
|
|
148 | ||
149 |
#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`.
|
|
150 |
#'
|
|
151 |
#' @return
|
|
152 |
#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
153 |
#'
|
|
154 |
#' @keywords internal
|
|
155 |
a_surv_timepoint <- function(df, |
|
156 |
...,
|
|
157 |
.stats = NULL, |
|
158 |
.stat_names = NULL, |
|
159 |
.formats = NULL, |
|
160 |
.labels = NULL, |
|
161 |
.indent_mods = NULL) { |
|
162 |
# Check for additional parameters to the statistics function
|
|
163 | 24x |
dots_extra_args <- list(...) |
164 | 24x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
165 | 24x |
dots_extra_args$.additional_fun_parameters <- NULL |
166 | 24x |
method <- dots_extra_args$method |
167 | ||
168 |
# Check for user-defined functions
|
|
169 | 24x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
170 | 24x |
.stats <- default_and_custom_stats_list$all_stats |
171 | 24x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
172 | ||
173 |
# Apply statistics function
|
|
174 | 24x |
x_stats <- .apply_stat_functions( |
175 | 24x |
default_stat_fnc = if (method == "surv") s_surv_timepoint else s_surv_timepoint_diff, |
176 | 24x |
custom_stat_fnc_list = custom_stat_functions, |
177 | 24x |
args_list = c( |
178 | 24x |
df = list(df), |
179 | 24x |
extra_afun_params,
|
180 | 24x |
dots_extra_args
|
181 |
)
|
|
182 |
)
|
|
183 | ||
184 |
# Fill in formatting defaults
|
|
185 | 24x |
.stats <- get_stats(if (method == "surv") "surv_timepoint" else "surv_timepoint_diff", |
186 | 24x |
stats_in = .stats, |
187 | 24x |
custom_stats_in = names(custom_stat_functions) |
188 |
)
|
|
189 | 24x |
x_stats <- x_stats[.stats] |
190 | 24x |
.formats <- get_formats_from_stats(.stats, .formats) |
191 | 24x |
.labels <- get_labels_from_stats( |
192 | 24x |
.stats, .labels, |
193 | 24x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
194 |
)
|
|
195 | 24x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
196 | ||
197 |
# Auto format handling
|
|
198 | 24x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
199 | ||
200 |
# Get and check statistical names
|
|
201 | 24x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
202 | ||
203 | 24x |
in_rows( |
204 | 24x |
.list = x_stats, |
205 | 24x |
.formats = .formats, |
206 | 24x |
.names = .labels %>% .unlist_keep_nulls(), |
207 | 24x |
.stat_names = .stat_names, |
208 | 24x |
.labels = .labels %>% .unlist_keep_nulls(), |
209 | 24x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
210 |
)
|
|
211 |
}
|
|
212 | ||
213 |
#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
|
|
214 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
215 |
#'
|
|
216 |
#' @return
|
|
217 |
#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
|
|
218 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
219 |
#' the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
|
|
220 |
#' the value of `method`.
|
|
221 |
#'
|
|
222 |
#' @examples
|
|
223 |
#' library(dplyr)
|
|
224 |
#'
|
|
225 |
#' adtte_f <- tern_ex_adtte %>%
|
|
226 |
#' filter(PARAMCD == "OS") %>%
|
|
227 |
#' mutate(
|
|
228 |
#' AVAL = day2month(AVAL),
|
|
229 |
#' is_event = CNSR == 0
|
|
230 |
#' )
|
|
231 |
#'
|
|
232 |
#' # Survival at given time points.
|
|
233 |
#' basic_table() %>%
|
|
234 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
|
|
235 |
#' add_colcounts() %>%
|
|
236 |
#' surv_timepoint(
|
|
237 |
#' vars = "AVAL",
|
|
238 |
#' var_labels = "Months",
|
|
239 |
#' is_event = "is_event",
|
|
240 |
#' time_point = 7
|
|
241 |
#' ) %>%
|
|
242 |
#' build_table(df = adtte_f)
|
|
243 |
#'
|
|
244 |
#' # Difference in survival at given time points.
|
|
245 |
#' basic_table() %>%
|
|
246 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
|
|
247 |
#' add_colcounts() %>%
|
|
248 |
#' surv_timepoint(
|
|
249 |
#' vars = "AVAL",
|
|
250 |
#' var_labels = "Months",
|
|
251 |
#' is_event = "is_event",
|
|
252 |
#' time_point = 9,
|
|
253 |
#' method = "surv_diff",
|
|
254 |
#' .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
|
|
255 |
#' ) %>%
|
|
256 |
#' build_table(df = adtte_f)
|
|
257 |
#'
|
|
258 |
#' # Survival and difference in survival at given time points.
|
|
259 |
#' basic_table() %>%
|
|
260 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
|
|
261 |
#' add_colcounts() %>%
|
|
262 |
#' surv_timepoint(
|
|
263 |
#' vars = "AVAL",
|
|
264 |
#' var_labels = "Months",
|
|
265 |
#' is_event = "is_event",
|
|
266 |
#' time_point = 9,
|
|
267 |
#' method = "both"
|
|
268 |
#' ) %>%
|
|
269 |
#' build_table(df = adtte_f)
|
|
270 |
#'
|
|
271 |
#' @export
|
|
272 |
#' @order 2
|
|
273 |
surv_timepoint <- function(lyt, |
|
274 |
vars,
|
|
275 |
time_point,
|
|
276 |
is_event,
|
|
277 |
control = control_surv_timepoint(), |
|
278 |
method = c("surv", "surv_diff", "both"), |
|
279 |
na_str = default_na_str(), |
|
280 |
nested = TRUE, |
|
281 |
...,
|
|
282 |
table_names_suffix = "", |
|
283 |
var_labels = "Time", |
|
284 |
show_labels = "visible", |
|
285 |
.stats = c( |
|
286 |
"pt_at_risk", "event_free_rate", "rate_ci", |
|
287 |
"rate_diff", "rate_diff_ci", "ztest_pval" |
|
288 |
),
|
|
289 |
.stat_names = NULL, |
|
290 |
.formats = list(rate_ci = "(xx.xx, xx.xx)"), |
|
291 |
.labels = NULL, |
|
292 |
.indent_mods = if (method == "both") { |
|
293 | 2x |
c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L) |
294 |
} else { |
|
295 | 4x |
c(rate_diff_ci = 1L, ztest_pval = 1L) |
296 |
}) { |
|
297 | 6x |
method <- match.arg(method) |
298 | 6x |
checkmate::assert_string(table_names_suffix) |
299 | ||
300 |
# Process standard extra arguments
|
|
301 | 6x |
extra_args <- list(".stats" = .stats) |
302 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
303 | 6x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
304 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
305 | 6x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
306 | ||
307 |
# Process additional arguments to the statistic function
|
|
308 | 6x |
extra_args <- c( |
309 | 6x |
extra_args,
|
310 | 6x |
time_point = list(time_point), is_event = is_event, control = list(control), |
311 |
...
|
|
312 |
)
|
|
313 | ||
314 |
# Append additional info from layout to the analysis function
|
|
315 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
316 | 6x |
formals(a_surv_timepoint) <- c(formals(a_surv_timepoint), extra_args[[".additional_fun_parameters"]]) |
317 | ||
318 | 6x |
for (i in seq_along(time_point)) { |
319 | 6x |
extra_args[["time_point"]] <- time_point[i] |
320 | ||
321 | 6x |
if (method %in% c("surv", "both")) { |
322 | 4x |
extra_args_i <- extra_args |
323 | 4x |
extra_args_i[["method"]] <- "surv" |
324 | ||
325 | 4x |
lyt <- analyze( |
326 | 4x |
lyt = lyt, |
327 | 4x |
vars = vars, |
328 | 4x |
afun = a_surv_timepoint, |
329 | 4x |
na_str = na_str, |
330 | 4x |
nested = nested, |
331 | 4x |
extra_args = extra_args_i, |
332 | 4x |
var_labels = paste(time_point[i], var_labels), |
333 | 4x |
show_labels = show_labels, |
334 | 4x |
table_names = paste0("surv_", time_point[i], table_names_suffix) |
335 |
)
|
|
336 |
}
|
|
337 | ||
338 | 6x |
if (method %in% c("surv_diff", "both")) { |
339 | 4x |
extra_args_i <- extra_args |
340 | 4x |
extra_args_i[["method"]] <- "surv_diff" |
341 | ||
342 | 4x |
lyt <- analyze( |
343 | 4x |
lyt = lyt, |
344 | 4x |
vars = vars, |
345 | 4x |
afun = a_surv_timepoint, |
346 | 4x |
na_str = na_str, |
347 | 4x |
nested = nested, |
348 | 4x |
extra_args = extra_args_i, |
349 | 4x |
var_labels = paste(time_point[i], var_labels), |
350 | 4x |
show_labels = ifelse(method == "both", "hidden", show_labels), |
351 | 4x |
table_names = paste0("surv_diff_", time_point[i], table_names_suffix) |
352 |
)
|
|
353 |
}
|
|
354 |
}
|
|
355 | ||
356 | 6x |
lyt
|
357 |
}
|
1 |
#' Kaplan-Meier plot
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' From a survival model, a graphic is rendered along with tabulated annotation
|
|
6 |
#' including the number of patient at risk at given time and the median survival
|
|
7 |
#' per group.
|
|
8 |
#'
|
|
9 |
#' @inheritParams argument_convention
|
|
10 |
#' @param variables (named `list`)\cr variable names. Details are:
|
|
11 |
#' * `tte` (`numeric`)\cr variable indicating time-to-event duration values.
|
|
12 |
#' * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored.
|
|
13 |
#' * `arm` (`factor`)\cr the treatment group variable.
|
|
14 |
#' * `strata` (`character` or `NULL`)\cr variable names indicating stratification factors.
|
|
15 |
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using
|
|
16 |
#' the helper function [control_surv_timepoint()]. Some possible parameter options are:
|
|
17 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
|
|
18 |
#' * `conf_type` (`string`)\cr `"plain"` (default), `"log"`, `"log-log"` for confidence interval type,
|
|
19 |
#' see more in [survival::survfit()]. Note that the option "none" is no longer supported.
|
|
20 |
#' @param col (`character`)\cr lines colors. Length of a vector should be equal
|
|
21 |
#' to number of strata from [survival::survfit()].
|
|
22 |
#' @param lty (`numeric`)\cr line type. If a vector is given, its length should be equal to the number of strata from
|
|
23 |
#' [survival::survfit()].
|
|
24 |
#' @param lwd (`numeric`)\cr line width. If a vector is given, its length should be equal to the number of strata from
|
|
25 |
#' [survival::survfit()].
|
|
26 |
#' @param censor_show (`flag`)\cr whether to show censored observations.
|
|
27 |
#' @param pch (`string`)\cr name of symbol or character to use as point symbol to indicate censored cases.
|
|
28 |
#' @param size (`numeric(1)`)\cr size of censored point symbols.
|
|
29 |
#' @param max_time (`numeric(1)`)\cr maximum value to show on x-axis. Only data values less than or up to
|
|
30 |
#' this threshold value will be plotted (defaults to `NULL`).
|
|
31 |
#' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing
|
|
32 |
#' between ticks on the x-axis. If `NULL` (default), [labeling::extended()] is used to determine
|
|
33 |
#' optimal tick positions on the x-axis.
|
|
34 |
#' @param xlab (`string`)\cr x-axis label.
|
|
35 |
#' @param yval (`string`)\cr type of plot, to be plotted on the y-axis. Options are `Survival` (default) and `Failure`
|
|
36 |
#' probability.
|
|
37 |
#' @param ylab (`string`)\cr y-axis label.
|
|
38 |
#' @param title (`string`)\cr plot title.
|
|
39 |
#' @param footnotes (`string`)\cr plot footnotes.
|
|
40 |
#' @param font_size (`numeric(1)`)\cr font size to use for all text.
|
|
41 |
#' @param ci_ribbon (`flag`)\cr whether the confidence interval should be drawn around the Kaplan-Meier curve.
|
|
42 |
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk
|
|
43 |
#' matching the main grid of the Kaplan-Meier curve.
|
|
44 |
#' @param annot_at_risk_title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk`
|
|
45 |
#' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`.
|
|
46 |
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the
|
|
47 |
#' median survival time per group.
|
|
48 |
#' @param annot_coxph (`flag`)\cr whether to add the annotation table from a [survival::coxph()] model.
|
|
49 |
#' @param annot_stats (`string` or `NULL`)\cr statistics annotations to add to the plot. Options are
|
|
50 |
#' `median` (median survival follow-up time) and `min` (minimum survival follow-up time).
|
|
51 |
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics
|
|
52 |
#' specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added.
|
|
53 |
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified using the helper function
|
|
54 |
#' [control_coxph()]. Some possible parameter options are:
|
|
55 |
#' * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1.
|
|
56 |
#' Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
|
|
57 |
#' * `ties` (`string`)\cr method for tie handling. Default is `"efron"`,
|
|
58 |
#' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]
|
|
59 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
|
|
60 |
#' @param ref_group_coxph (`string` or `NULL`)\cr level of arm variable to use as reference group in calculations for
|
|
61 |
#' `annot_coxph` table. If `NULL` (default), uses the first level of the arm variable.
|
|
62 |
#' @param control_annot_surv_med (`list`)\cr parameters to control the position and size of the annotation table added
|
|
63 |
#' to the plot when `annot_surv_med = TRUE`, specified using the [control_surv_med_annot()] function. Parameter
|
|
64 |
#' options are: `x`, `y`, `w`, `h`, and `fill`. See [control_surv_med_annot()] for details.
|
|
65 |
#' @param control_annot_coxph (`list`)\cr parameters to control the position and size of the annotation table added
|
|
66 |
#' to the plot when `annot_coxph = TRUE`, specified using the [control_coxph_annot()] function. Parameter
|
|
67 |
#' options are: `x`, `y`, `w`, `h`, `fill`, and `ref_lbls`. See [control_coxph_annot()] for details.
|
|
68 |
#' @param legend_pos (`numeric(2)` or `NULL`)\cr vector containing x- and y-coordinates, respectively, for the legend
|
|
69 |
#' position relative to the KM plot area. If `NULL` (default), the legend is positioned in the bottom right corner of
|
|
70 |
#' the plot, or the middle right of the plot if needed to prevent overlapping.
|
|
71 |
#' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the Kaplan-Meier plot.
|
|
72 |
#' Relative height of patients at risk table is then `1 - rel_height_plot`. If `annot_at_risk = FALSE` or
|
|
73 |
#' `as_list = TRUE`, this parameter is ignored.
|
|
74 |
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to format the Kaplan-Meier plot.
|
|
75 |
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `annot_at_risk = TRUE`.
|
|
76 |
#' If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the patients
|
|
77 |
#' at risk table is printed below the plot via [cowplot::plot_grid()].
|
|
78 |
#' @param draw `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
79 |
#' @param newpage `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
80 |
#' @param gp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
81 |
#' @param vp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
82 |
#' @param name `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
|
|
83 |
#' @param annot_coxph_ref_lbls `r lifecycle::badge("deprecated")` Please use the `ref_lbls` element of
|
|
84 |
#' `control_annot_coxph` instead.
|
|
85 |
#' @param position_coxph `r lifecycle::badge("deprecated")` Please use the `x` and `y` elements of
|
|
86 |
#' `control_annot_coxph` instead.
|
|
87 |
#' @param position_surv_med `r lifecycle::badge("deprecated")` Please use the `x` and `y` elements of
|
|
88 |
#' `control_annot_surv_med` instead.
|
|
89 |
#' @param width_annots `r lifecycle::badge("deprecated")` Please use the `w` element of `control_annot_surv_med`
|
|
90 |
#' (for `surv_med`) and `control_annot_coxph` (for `coxph`)."
|
|
91 |
#'
|
|
92 |
#' @return A `ggplot` Kaplan-Meier plot and (optionally) summary table.
|
|
93 |
#'
|
|
94 |
#' @examples
|
|
95 |
#' library(dplyr)
|
|
96 |
#'
|
|
97 |
#' df <- tern_ex_adtte %>%
|
|
98 |
#' filter(PARAMCD == "OS") %>%
|
|
99 |
#' mutate(is_event = CNSR == 0)
|
|
100 |
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD")
|
|
101 |
#'
|
|
102 |
#' # Basic examples
|
|
103 |
#' g_km(df = df, variables = variables)
|
|
104 |
#' g_km(df = df, variables = variables, yval = "Failure")
|
|
105 |
#'
|
|
106 |
#' # Examples with customization parameters applied
|
|
107 |
#' g_km(
|
|
108 |
#' df = df,
|
|
109 |
#' variables = variables,
|
|
110 |
#' control_surv = control_surv_timepoint(conf_level = 0.9),
|
|
111 |
#' col = c("grey25", "grey50", "grey75"),
|
|
112 |
#' annot_at_risk_title = FALSE,
|
|
113 |
#' lty = 1:3,
|
|
114 |
#' font_size = 8
|
|
115 |
#' )
|
|
116 |
#' g_km(
|
|
117 |
#' df = df,
|
|
118 |
#' variables = variables,
|
|
119 |
#' annot_stats = c("min", "median"),
|
|
120 |
#' annot_stats_vlines = TRUE,
|
|
121 |
#' max_time = 3000,
|
|
122 |
#' ggtheme = ggplot2::theme_minimal()
|
|
123 |
#' )
|
|
124 |
#'
|
|
125 |
#' # Example with pairwise Cox-PH analysis annotation table, adjusted annotation tables
|
|
126 |
#' g_km(
|
|
127 |
#' df = df, variables = variables,
|
|
128 |
#' annot_coxph = TRUE,
|
|
129 |
#' control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
|
|
130 |
#' control_annot_coxph = control_coxph_annot(x = 0.26, w = 0.35),
|
|
131 |
#' control_annot_surv_med = control_surv_med_annot(x = 0.8, y = 0.9, w = 0.35)
|
|
132 |
#' )
|
|
133 |
#'
|
|
134 |
#' @aliases kaplan_meier
|
|
135 |
#' @export
|
|
136 |
g_km <- function(df, |
|
137 |
variables,
|
|
138 |
control_surv = control_surv_timepoint(), |
|
139 |
col = NULL, |
|
140 |
lty = NULL, |
|
141 |
lwd = 0.5, |
|
142 |
censor_show = TRUE, |
|
143 |
pch = 3, |
|
144 |
size = 2, |
|
145 |
max_time = NULL, |
|
146 |
xticks = NULL, |
|
147 |
xlab = "Days", |
|
148 |
yval = c("Survival", "Failure"), |
|
149 |
ylab = paste(yval, "Probability"), |
|
150 |
ylim = NULL, |
|
151 |
title = NULL, |
|
152 |
footnotes = NULL, |
|
153 |
font_size = 10, |
|
154 |
ci_ribbon = FALSE, |
|
155 |
annot_at_risk = TRUE, |
|
156 |
annot_at_risk_title = TRUE, |
|
157 |
annot_surv_med = TRUE, |
|
158 |
annot_coxph = FALSE, |
|
159 |
annot_stats = NULL, |
|
160 |
annot_stats_vlines = FALSE, |
|
161 |
control_coxph_pw = control_coxph(), |
|
162 |
ref_group_coxph = NULL, |
|
163 |
control_annot_surv_med = control_surv_med_annot(), |
|
164 |
control_annot_coxph = control_coxph_annot(), |
|
165 |
legend_pos = NULL, |
|
166 |
rel_height_plot = 0.75, |
|
167 |
ggtheme = NULL, |
|
168 |
as_list = FALSE, |
|
169 |
draw = lifecycle::deprecated(), |
|
170 |
newpage = lifecycle::deprecated(), |
|
171 |
gp = lifecycle::deprecated(), |
|
172 |
vp = lifecycle::deprecated(), |
|
173 |
name = lifecycle::deprecated(), |
|
174 |
annot_coxph_ref_lbls = lifecycle::deprecated(), |
|
175 |
position_coxph = lifecycle::deprecated(), |
|
176 |
position_surv_med = lifecycle::deprecated(), |
|
177 |
width_annots = lifecycle::deprecated()) { |
|
178 |
# Deprecated argument warnings
|
|
179 | 10x |
if (lifecycle::is_present(draw)) { |
180 | 1x |
lifecycle::deprecate_warn( |
181 | 1x |
"0.9.4", "g_km(draw)", |
182 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
183 |
)
|
|
184 |
}
|
|
185 | 10x |
if (lifecycle::is_present(newpage)) { |
186 | 1x |
lifecycle::deprecate_warn( |
187 | 1x |
"0.9.4", "g_km(newpage)", |
188 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
189 |
)
|
|
190 |
}
|
|
191 | 10x |
if (lifecycle::is_present(gp)) { |
192 | 1x |
lifecycle::deprecate_warn( |
193 | 1x |
"0.9.4", "g_km(gp)", |
194 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
195 |
)
|
|
196 |
}
|
|
197 | 10x |
if (lifecycle::is_present(vp)) { |
198 | 1x |
lifecycle::deprecate_warn( |
199 | 1x |
"0.9.4", "g_km(vp)", |
200 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
201 |
)
|
|
202 |
}
|
|
203 | 10x |
if (lifecycle::is_present(name)) { |
204 | 1x |
lifecycle::deprecate_warn( |
205 | 1x |
"0.9.4", "g_km(name)", |
206 | 1x |
details = "This argument is no longer used since the plot is now generated as a `ggplot2` object." |
207 |
)
|
|
208 |
}
|
|
209 | 10x |
if (lifecycle::is_present(annot_coxph_ref_lbls)) { |
210 | 1x |
lifecycle::deprecate_warn( |
211 | 1x |
"0.9.4", "g_km(annot_coxph_ref_lbls)", |
212 | 1x |
details = "Please specify this setting using the 'ref_lbls' element of control_annot_coxph." |
213 |
)
|
|
214 | 1x |
control_annot_coxph[["ref_lbls"]] <- annot_coxph_ref_lbls |
215 |
}
|
|
216 | 10x |
if (lifecycle::is_present(position_coxph)) { |
217 | 1x |
lifecycle::deprecate_warn( |
218 | 1x |
"0.9.4", "g_km(position_coxph)", |
219 | 1x |
details = "Please specify this setting using the 'x' and 'y' elements of control_annot_coxph." |
220 |
)
|
|
221 | 1x |
control_annot_coxph[["x"]] <- position_coxph[1] |
222 | 1x |
control_annot_coxph[["y"]] <- position_coxph[2] |
223 |
}
|
|
224 | 10x |
if (lifecycle::is_present(position_surv_med)) { |
225 | 1x |
lifecycle::deprecate_warn( |
226 | 1x |
"0.9.4", "g_km(position_surv_med)", |
227 | 1x |
details = "Please specify this setting using the 'x' and 'y' elements of control_annot_surv_med." |
228 |
)
|
|
229 | 1x |
control_annot_surv_med[["x"]] <- position_surv_med[1] |
230 | 1x |
control_annot_surv_med[["y"]] <- position_surv_med[2] |
231 |
}
|
|
232 | 10x |
if (lifecycle::is_present(width_annots)) { |
233 | 1x |
lifecycle::deprecate_warn( |
234 | 1x |
"0.9.4", "g_km(width_annots)", |
235 | 1x |
details = paste( |
236 | 1x |
"Please specify widths of annotation tables relative to the plot area using the 'w' element of",
|
237 | 1x |
"control_annot_surv_med (for surv_med) and control_annot_coxph (for coxph)."
|
238 |
)
|
|
239 |
)
|
|
240 | 1x |
control_annot_surv_med[["w"]] <- as.numeric(width_annots[["surv_med"]]) |
241 | 1x |
control_annot_coxph[["w"]] <- as.numeric(width_annots[["coxph"]]) |
242 |
}
|
|
243 | ||
244 | 10x |
checkmate::assert_list(variables) |
245 | 10x |
checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables)) |
246 | 10x |
checkmate::assert_logical(censor_show, len = 1) |
247 | 10x |
checkmate::assert_numeric(size, len = 1) |
248 | 10x |
checkmate::assert_numeric(max_time, len = 1, null.ok = TRUE) |
249 | 10x |
checkmate::assert_numeric(xticks, null.ok = TRUE) |
250 | 10x |
checkmate::assert_character(xlab, len = 1, null.ok = TRUE) |
251 | 10x |
checkmate::assert_character(yval) |
252 | 10x |
checkmate::assert_character(ylab, null.ok = TRUE) |
253 | 10x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) |
254 | 10x |
checkmate::assert_character(title, len = 1, null.ok = TRUE) |
255 | 10x |
checkmate::assert_character(footnotes, len = 1, null.ok = TRUE) |
256 | 10x |
checkmate::assert_numeric(font_size, len = 1) |
257 | 10x |
checkmate::assert_logical(ci_ribbon, len = 1) |
258 | 10x |
checkmate::assert_logical(annot_at_risk, len = 1) |
259 | 10x |
checkmate::assert_logical(annot_at_risk_title, len = 1) |
260 | 10x |
checkmate::assert_logical(annot_surv_med, len = 1) |
261 | 10x |
checkmate::assert_logical(annot_coxph, len = 1) |
262 | 10x |
checkmate::assert_subset(annot_stats, c("median", "min")) |
263 | 10x |
checkmate::assert_logical(annot_stats_vlines) |
264 | 10x |
checkmate::assert_list(control_coxph_pw) |
265 | 10x |
checkmate::assert_character(ref_group_coxph, len = 1, null.ok = TRUE) |
266 | 10x |
checkmate::assert_list(control_annot_surv_med) |
267 | 10x |
checkmate::assert_list(control_annot_coxph) |
268 | 10x |
checkmate::assert_numeric(legend_pos, finite = TRUE, any.missing = FALSE, len = 2, null.ok = TRUE) |
269 | 10x |
assert_proportion_value(rel_height_plot) |
270 | 10x |
checkmate::assert_logical(as_list) |
271 | ||
272 | 10x |
tte <- variables$tte |
273 | 10x |
is_event <- variables$is_event |
274 | 10x |
arm <- variables$arm |
275 | 10x |
assert_valid_factor(df[[arm]]) |
276 | 10x |
armval <- as.character(unique(df[[arm]])) |
277 | 10x |
assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) |
278 | 10x |
checkmate::assert_logical(df[[is_event]], min.len = 1) |
279 | 10x |
checkmate::assert_numeric(df[[tte]], min.len = 1) |
280 | 10x |
checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) |
281 | 10x |
checkmate::assert_vector(lty, null.ok = TRUE) |
282 | 10x |
checkmate::assert_numeric(lwd, len = 1, null.ok = TRUE) |
283 | ||
284 | 10x |
if (annot_coxph && length(armval) < 2) { |
285 | ! |
stop(paste( |
286 | ! |
"When `annot_coxph` = TRUE, `df` must contain at least 2 levels of `variables$arm`",
|
287 | ! |
"in order to calculate the hazard ratio."
|
288 |
)) |
|
289 |
}
|
|
290 | ||
291 |
# process model
|
|
292 | 10x |
yval <- match.arg(yval) |
293 | 10x |
formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) |
294 | 10x |
fit_km <- survival::survfit( |
295 | 10x |
formula = formula, |
296 | 10x |
data = df, |
297 | 10x |
conf.int = control_surv$conf_level, |
298 | 10x |
conf.type = control_surv$conf_type |
299 |
)
|
|
300 | 10x |
data <- h_data_plot(fit_km, armval = armval, max_time = max_time) |
301 | ||
302 |
# calculate x-ticks
|
|
303 | 10x |
xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) |
304 | ||
305 |
# change estimates of survival to estimates of failure (1 - survival)
|
|
306 | 10x |
if (yval == "Failure") { |
307 | ! |
data[c("estimate", "conf.low", "conf.high", "censor")] <- list( |
308 | ! |
1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor |
309 |
)
|
|
310 |
}
|
|
311 | ||
312 |
# derive y-axis limits
|
|
313 | 10x |
if (is.null(ylim)) { |
314 | 10x |
if (!is.null(max_time)) { |
315 | 1x |
y_lwr <- min(data[data$time < max_time, ][["estimate"]]) |
316 | 1x |
y_upr <- max(data[data$time < max_time, ][["estimate"]]) |
317 |
} else { |
|
318 | 9x |
y_lwr <- min(data[["estimate"]]) |
319 | 9x |
y_upr <- max(data[["estimate"]]) |
320 |
}
|
|
321 | 10x |
ylim <- c(y_lwr, y_upr) |
322 |
}
|
|
323 | ||
324 |
# initialize ggplot
|
|
325 | 10x |
gg_plt <- ggplot( |
326 | 10x |
data = data, |
327 | 10x |
mapping = aes( |
328 | 10x |
x = .data[["time"]], |
329 | 10x |
y = .data[["estimate"]], |
330 | 10x |
ymin = .data[["conf.low"]], |
331 | 10x |
ymax = .data[["conf.high"]], |
332 | 10x |
color = .data[["strata"]], |
333 | 10x |
fill = .data[["strata"]] |
334 |
)
|
|
335 |
) + |
|
336 | 10x |
theme_bw(base_size = font_size) + |
337 | 10x |
scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + |
338 | 10x |
labs(title = title, x = xlab, y = ylab, caption = footnotes) + |
339 | 10x |
theme( |
340 | 10x |
axis.text = element_text(size = font_size), |
341 | 10x |
axis.title = element_text(size = font_size), |
342 | 10x |
legend.title = element_blank(), |
343 | 10x |
legend.text = element_text(size = font_size), |
344 | 10x |
legend.box.background = element_rect(fill = "white", linewidth = 0.5), |
345 | 10x |
legend.background = element_blank(), |
346 | 10x |
legend.position = "inside", |
347 | 10x |
legend.spacing.y = unit(-0.02, "npc"), |
348 | 10x |
panel.grid.major = element_blank(), |
349 | 10x |
panel.grid.minor = element_blank() |
350 |
)
|
|
351 | ||
352 |
# derive x-axis limits
|
|
353 | 10x |
if (!is.null(max_time) && !is.null(xticks)) { |
354 | 1x |
gg_plt <- gg_plt + scale_x_continuous( |
355 | 1x |
breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) |
356 |
)
|
|
357 | 9x |
} else if (!is.null(xticks)) { |
358 | 9x |
if (max(data$time) <= max(xticks)) { |
359 | 9x |
gg_plt <- gg_plt + scale_x_continuous( |
360 | 9x |
breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) |
361 |
)
|
|
362 |
} else { |
|
363 | ! |
gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) |
364 |
}
|
|
365 | ! |
} else if (!is.null(max_time)) { |
366 | ! |
gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) |
367 |
}
|
|
368 | ||
369 |
# set legend position
|
|
370 | 10x |
if (!is.null(legend_pos)) { |
371 | 2x |
gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) |
372 |
} else { |
|
373 | 8x |
max_time2 <- sort( |
374 | 8x |
data$time, |
375 | 8x |
partial = nrow(data) - length(armval) - 1 |
376 | 8x |
)[nrow(data) - length(armval) - 1] |
377 | ||
378 | 8x |
y_rng <- ylim[2] - ylim[1] |
379 | ||
380 | 8x |
if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && |
381 | 8x |
all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint |
382 | 1x |
gg_plt <- gg_plt + |
383 | 1x |
theme( |
384 | 1x |
legend.position.inside = c(1, 0.5), |
385 | 1x |
legend.justification = c(1.1, 0.6) |
386 |
)
|
|
387 |
} else { |
|
388 | 7x |
gg_plt <- gg_plt + |
389 | 7x |
theme( |
390 | 7x |
legend.position.inside = c(1, 0), |
391 | 7x |
legend.justification = c(1.1, -0.4) |
392 |
)
|
|
393 |
}
|
|
394 |
}
|
|
395 | ||
396 |
# add lines
|
|
397 | 10x |
gg_plt <- if (is.null(lty)) { |
398 | 9x |
gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) |
399 | 10x |
} else if (length(lty) == 1) { |
400 | ! |
gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) |
401 |
} else { |
|
402 | 1x |
gg_plt + |
403 | 1x |
geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + |
404 | 1x |
scale_linetype_manual(values = lty) |
405 |
}
|
|
406 | ||
407 |
# add censor marks
|
|
408 | 10x |
if (censor_show) { |
409 | 10x |
gg_plt <- gg_plt + geom_point( |
410 | 10x |
data = data[data$n.censor != 0, ], |
411 | 10x |
aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), |
412 | 10x |
size = size, |
413 | 10x |
na.rm = TRUE |
414 |
) + |
|
415 | 10x |
scale_shape_manual(name = NULL, values = pch) + |
416 | 10x |
guides(fill = guide_legend(override.aes = list(shape = NA))) |
417 |
}
|
|
418 | ||
419 |
# add ci ribbon
|
|
420 | 1x |
if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) |
421 | ||
422 |
# control aesthetics
|
|
423 | 10x |
if (!is.null(col)) { |
424 | 1x |
gg_plt <- gg_plt + |
425 | 1x |
scale_color_manual(values = col) + |
426 | 1x |
scale_fill_manual(values = col) |
427 |
}
|
|
428 | ! |
if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme |
429 | ||
430 |
# annotate with stats (text/vlines)
|
|
431 | 10x |
if (!is.null(annot_stats)) { |
432 | ! |
if ("median" %in% annot_stats) { |
433 | ! |
fit_km_all <- survival::survfit( |
434 | ! |
formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)), |
435 | ! |
data = df, |
436 | ! |
conf.int = control_surv$conf_level, |
437 | ! |
conf.type = control_surv$conf_type |
438 |
)
|
|
439 | ! |
gg_plt <- gg_plt + |
440 | ! |
annotate( |
441 | ! |
"text",
|
442 | ! |
size = font_size / .pt, col = 1, lineheight = 0.95, |
443 | ! |
x = stats::median(fit_km_all) + 0.07 * max(data$time), |
444 | ! |
y = ifelse(yval == "Survival", 0.65, 0.35), |
445 | ! |
label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1])) |
446 |
)
|
|
447 | ! |
if (annot_stats_vlines) { |
448 | ! |
gg_plt <- gg_plt + |
449 | ! |
annotate( |
450 | ! |
"segment",
|
451 | ! |
x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf, |
452 | ! |
linetype = 2, col = "darkgray" |
453 |
)
|
|
454 |
}
|
|
455 |
}
|
|
456 | ! |
if ("min" %in% annot_stats) { |
457 | ! |
min_fu <- min(df[[tte]]) |
458 | ! |
gg_plt <- gg_plt + |
459 | ! |
annotate( |
460 | ! |
"text",
|
461 | ! |
size = font_size / .pt, col = 1, lineheight = 0.95, |
462 | ! |
x = min_fu + max(data$time) * 0.07, |
463 | ! |
y = ifelse(yval == "Survival", 0.96, 0.05), |
464 | ! |
label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1])) |
465 |
)
|
|
466 | ! |
if (annot_stats_vlines) { |
467 | ! |
gg_plt <- gg_plt + |
468 | ! |
annotate( |
469 | ! |
"segment",
|
470 | ! |
linetype = 2, col = "darkgray", |
471 | ! |
x = min_fu, xend = min_fu, y = Inf, yend = -Inf |
472 |
)
|
|
473 |
}
|
|
474 |
}
|
|
475 | ! |
gg_plt <- gg_plt + guides(fill = guide_legend(override.aes = list(shape = NA, label = ""))) |
476 |
}
|
|
477 | ||
478 |
# add at risk annotation table
|
|
479 | 10x |
if (annot_at_risk) { |
480 | 9x |
annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) |
481 | 9x |
annot_tbl <- if (is.null(fit_km$strata)) { |
482 | ! |
data.frame( |
483 | ! |
n.risk = annot_tbl$n.risk, |
484 | ! |
time = annot_tbl$time, |
485 | ! |
strata = armval |
486 |
)
|
|
487 |
} else { |
|
488 | 9x |
strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") |
489 | 9x |
levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] |
490 | 9x |
data.frame( |
491 | 9x |
n.risk = annot_tbl$n.risk, |
492 | 9x |
time = annot_tbl$time, |
493 | 9x |
strata = annot_tbl$strata |
494 |
)
|
|
495 |
}
|
|
496 | ||
497 | 9x |
at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) |
498 | 9x |
at_risk_tbl[is.na(at_risk_tbl)] <- 0 |
499 | 9x |
rownames(at_risk_tbl) <- levels(annot_tbl$strata) |
500 | ||
501 | 9x |
gg_at_risk <- df2gg( |
502 | 9x |
at_risk_tbl,
|
503 | 9x |
font_size = font_size, col_labels = FALSE, hline = FALSE, |
504 | 9x |
colwidths = rep(1, ncol(at_risk_tbl)) |
505 |
) + |
|
506 | 9x |
labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + |
507 | 9x |
theme_bw(base_size = font_size) + |
508 | 9x |
theme( |
509 | 9x |
plot.title = element_text(size = font_size, vjust = 3, face = "bold"), |
510 | 9x |
panel.border = element_blank(), |
511 | 9x |
panel.grid = element_blank(), |
512 | 9x |
axis.title.y = element_blank(), |
513 | 9x |
axis.ticks.y = element_blank(), |
514 | 9x |
axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), |
515 | 9x |
axis.text.x = element_text(size = font_size), |
516 | 9x |
axis.line.x = element_line() |
517 |
) + |
|
518 | 9x |
coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) |
519 | 9x |
gg_at_risk <- suppressMessages( |
520 | 9x |
gg_at_risk + |
521 | 9x |
scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + |
522 | 9x |
scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) |
523 |
)
|
|
524 | ||
525 | 9x |
if (!as_list) { |
526 | 8x |
gg_plt <- cowplot::plot_grid( |
527 | 8x |
gg_plt,
|
528 | 8x |
gg_at_risk,
|
529 | 8x |
align = "v", |
530 | 8x |
axis = "tblr", |
531 | 8x |
ncol = 1, |
532 | 8x |
rel_heights = c(rel_height_plot, 1 - rel_height_plot) |
533 |
)
|
|
534 |
}
|
|
535 |
}
|
|
536 | ||
537 |
# add median survival time annotation table
|
|
538 | 10x |
if (annot_surv_med) { |
539 | 8x |
surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) |
540 | 8x |
bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] |
541 | ||
542 | 8x |
gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + |
543 | 8x |
theme( |
544 | 8x |
axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), |
545 | 8x |
plot.margin = margin(0, 2, 0, 5) |
546 |
) + |
|
547 | 8x |
coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) |
548 | 8x |
gg_surv_med <- suppressMessages( |
549 | 8x |
gg_surv_med + |
550 | 8x |
scale_x_continuous(expand = c(0.025, 0)) + |
551 | 8x |
scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) |
552 |
)
|
|
553 | ||
554 | 8x |
gg_plt <- cowplot::ggdraw(gg_plt) + |
555 | 8x |
cowplot::draw_plot( |
556 | 8x |
gg_surv_med,
|
557 | 8x |
control_annot_surv_med[["x"]], |
558 | 8x |
control_annot_surv_med[["y"]], |
559 | 8x |
width = control_annot_surv_med[["w"]], |
560 | 8x |
height = control_annot_surv_med[["h"]], |
561 | 8x |
vjust = 0.5, |
562 | 8x |
hjust = 0.5 |
563 |
)
|
|
564 |
}
|
|
565 | ||
566 |
# add coxph annotation table
|
|
567 | 10x |
if (annot_coxph) { |
568 | 1x |
coxph_tbl <- h_tbl_coxph_pairwise( |
569 | 1x |
df = df, |
570 | 1x |
variables = variables, |
571 | 1x |
ref_group_coxph = ref_group_coxph, |
572 | 1x |
control_coxph_pw = control_coxph_pw, |
573 | 1x |
annot_coxph_ref_lbls = control_annot_coxph[["ref_lbls"]] |
574 |
)
|
|
575 | 1x |
bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] |
576 | ||
577 | 1x |
gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + |
578 | 1x |
theme( |
579 | 1x |
axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), |
580 | 1x |
plot.margin = margin(0, 2, 0, 5) |
581 |
) + |
|
582 | 1x |
coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) |
583 | 1x |
gg_coxph <- suppressMessages( |
584 | 1x |
gg_coxph + |
585 | 1x |
scale_x_continuous(expand = c(0.025, 0)) + |
586 | 1x |
scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) |
587 |
)
|
|
588 | ||
589 | 1x |
gg_plt <- cowplot::ggdraw(gg_plt) + |
590 | 1x |
cowplot::draw_plot( |
591 | 1x |
gg_coxph,
|
592 | 1x |
control_annot_coxph[["x"]], |
593 | 1x |
control_annot_coxph[["y"]], |
594 | 1x |
width = control_annot_coxph[["w"]], |
595 | 1x |
height = control_annot_coxph[["h"]], |
596 | 1x |
vjust = 0.5, |
597 | 1x |
hjust = 0.5 |
598 |
)
|
|
599 |
}
|
|
600 | ||
601 | 10x |
if (as_list) { |
602 | 1x |
list(plot = gg_plt, table = gg_at_risk) |
603 |
} else { |
|
604 | 9x |
gg_plt
|
605 |
}
|
|
606 |
}
|
1 |
#' Helper functions for subgroup treatment effect pattern (STEP) calculations
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions that are used internally for the STEP calculations.
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#'
|
|
9 |
#' @name h_step
|
|
10 |
#' @include control_step.R
|
|
11 |
NULL
|
|
12 | ||
13 |
#' @describeIn h_step Creates the windows for STEP, based on the control settings
|
|
14 |
#' provided.
|
|
15 |
#'
|
|
16 |
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
|
|
17 |
#' @param control (named `list`)\cr output from `control_step()`.
|
|
18 |
#'
|
|
19 |
#' @return
|
|
20 |
#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
|
|
21 |
#' and the interval information matrix `interval`.
|
|
22 |
#'
|
|
23 |
#' @export
|
|
24 |
h_step_window <- function(x, |
|
25 |
control = control_step()) { |
|
26 | 12x |
checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) |
27 | 12x |
checkmate::assert_list(control, names = "named") |
28 | ||
29 | 12x |
sel <- matrix(FALSE, length(x), control$num_points) |
30 | 12x |
out <- matrix(0, control$num_points, 3) |
31 | 12x |
colnames(out) <- paste("Interval", c("Center", "Lower", "Upper")) |
32 | 12x |
if (control$use_percentile) { |
33 |
# Create windows according to percentile cutoffs.
|
|
34 | 9x |
out <- cbind(out, out) |
35 | 9x |
colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper")) |
36 | 9x |
xs <- seq(0, 1, length.out = control$num_points + 2)[-1] |
37 | 9x |
for (i in seq_len(control$num_points)) { |
38 | 185x |
out[i, 2:3] <- c( |
39 | 185x |
max(xs[i] - control$bandwidth, 0), |
40 | 185x |
min(xs[i] + control$bandwidth, 1) |
41 |
)
|
|
42 | 185x |
out[i, 5:6] <- stats::quantile(x, out[i, 2:3]) |
43 | 185x |
sel[, i] <- x >= out[i, 5] & x <= out[i, 6] |
44 |
}
|
|
45 |
# Center is the middle point of the percentile window.
|
|
46 | 9x |
out[, 1] <- xs[-control$num_points - 1] |
47 | 9x |
out[, 4] <- stats::quantile(x, out[, 1]) |
48 |
} else { |
|
49 |
# Create windows according to cutoffs.
|
|
50 | 3x |
m <- c(min(x), max(x)) |
51 | 3x |
xs <- seq(m[1], m[2], length.out = control$num_points + 2)[-1] |
52 | 3x |
for (i in seq_len(control$num_points)) { |
53 | 11x |
out[i, 2:3] <- c( |
54 | 11x |
max(xs[i] - control$bandwidth, m[1]), |
55 | 11x |
min(xs[i] + control$bandwidth, m[2]) |
56 |
)
|
|
57 | 11x |
sel[, i] <- x >= out[i, 2] & x <= out[i, 3] |
58 |
}
|
|
59 |
# Center is the same as the point for predicting.
|
|
60 | 3x |
out[, 1] <- xs[-control$num_points - 1] |
61 |
}
|
|
62 | 12x |
list(sel = sel, interval = out) |
63 |
}
|
|
64 | ||
65 |
#' @describeIn h_step Calculates the estimated treatment effect estimate
|
|
66 |
#' on the linear predictor scale and corresponding standard error from a STEP `model` fitted
|
|
67 |
#' on `data` given `variables` specification, for a single biomarker value `x`.
|
|
68 |
#' This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
|
|
69 |
#' ratio estimates.
|
|
70 |
#'
|
|
71 |
#' @param model (`coxph` or `glm`)\cr the regression model object.
|
|
72 |
#'
|
|
73 |
#' @return
|
|
74 |
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
|
|
75 |
#'
|
|
76 |
#' @export
|
|
77 |
h_step_trt_effect <- function(data, |
|
78 |
model,
|
|
79 |
variables,
|
|
80 |
x) { |
|
81 | 208x |
checkmate::assert_multi_class(model, c("coxph", "glm")) |
82 | 208x |
checkmate::assert_number(x) |
83 | 208x |
assert_df_with_variables(data, variables) |
84 | 208x |
checkmate::assert_factor(data[[variables$arm]], n.levels = 2) |
85 | ||
86 | 208x |
newdata <- data[c(1, 1), ] |
87 | 208x |
newdata[, variables$biomarker] <- x |
88 | 208x |
newdata[, variables$arm] <- levels(data[[variables$arm]]) |
89 | 208x |
model_terms <- stats::delete.response(stats::terms(model)) |
90 | 208x |
model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels) |
91 | 208x |
mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts) |
92 | 208x |
coefs <- stats::coef(model) |
93 |
# Note: It is important to use the coef subset from matrix, otherwise intercept and
|
|
94 |
# strata are included for coxph() models.
|
|
95 | 208x |
mat <- mat[, names(coefs)] |
96 | 208x |
mat_diff <- diff(mat) |
97 | 208x |
est <- mat_diff %*% coefs |
98 | 208x |
var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff) |
99 | 208x |
se <- sqrt(var) |
100 | 208x |
c( |
101 | 208x |
est = est, |
102 | 208x |
se = se |
103 |
)
|
|
104 |
}
|
|
105 | ||
106 |
#' @describeIn h_step Builds the model formula used in survival STEP calculations.
|
|
107 |
#'
|
|
108 |
#' @return
|
|
109 |
#' * `h_step_survival_formula()` returns a model formula.
|
|
110 |
#'
|
|
111 |
#' @export
|
|
112 |
h_step_survival_formula <- function(variables, |
|
113 |
control = control_step()) { |
|
114 | 10x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
115 | ||
116 | 10x |
assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")]) |
117 | 10x |
form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm) |
118 | 10x |
if (control$degree > 0) { |
119 | 5x |
form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)") |
120 |
}
|
|
121 | 10x |
if (!is.null(variables$covariates)) { |
122 | 6x |
form <- paste(form, "+", paste(variables$covariates, collapse = "+")) |
123 |
}
|
|
124 | 10x |
if (!is.null(variables$strata)) { |
125 | 2x |
form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")") |
126 |
}
|
|
127 | 10x |
stats::as.formula(form) |
128 |
}
|
|
129 | ||
130 |
#' @describeIn h_step Estimates the model with `formula` built based on
|
|
131 |
#' `variables` in `data` for a given `subset` and `control` parameters for the
|
|
132 |
#' Cox regression.
|
|
133 |
#'
|
|
134 |
#' @param formula (`formula`)\cr the regression model formula.
|
|
135 |
#' @param subset (`logical`)\cr subset vector.
|
|
136 |
#'
|
|
137 |
#' @return
|
|
138 |
#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
|
|
139 |
#' `events`, log hazard ratio estimates `loghr`, standard error `se`,
|
|
140 |
#' and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
|
|
141 |
#' included for each biomarker value in `x`.
|
|
142 |
#'
|
|
143 |
#' @export
|
|
144 |
h_step_survival_est <- function(formula, |
|
145 |
data,
|
|
146 |
variables,
|
|
147 |
x,
|
|
148 |
subset = rep(TRUE, nrow(data)), |
|
149 |
control = control_coxph()) { |
|
150 | 55x |
checkmate::assert_formula(formula) |
151 | 55x |
assert_df_with_variables(data, variables) |
152 | 55x |
checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE) |
153 | 55x |
checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) |
154 | 55x |
checkmate::assert_list(control, names = "named") |
155 | ||
156 |
# Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
|
|
157 | 55x |
data$.subset <- subset |
158 | 55x |
coxph_warnings <- NULL |
159 | 55x |
tryCatch( |
160 | 55x |
withCallingHandlers( |
161 | 55x |
expr = { |
162 | 55x |
fit <- survival::coxph( |
163 | 55x |
formula = formula, |
164 | 55x |
data = data, |
165 | 55x |
subset = .subset, |
166 | 55x |
ties = control$ties |
167 |
)
|
|
168 |
},
|
|
169 | 55x |
warning = function(w) { |
170 | 1x |
coxph_warnings <<- c(coxph_warnings, w) |
171 | 1x |
invokeRestart("muffleWarning") |
172 |
}
|
|
173 |
),
|
|
174 | 55x |
finally = { |
175 |
}
|
|
176 |
)
|
|
177 | 55x |
if (!is.null(coxph_warnings)) { |
178 | 1x |
warning(paste( |
179 | 1x |
"Fit warnings occurred, please consider using a simpler model, or",
|
180 | 1x |
"larger `bandwidth`, less `num_points` in `control_step()` settings"
|
181 |
)) |
|
182 |
}
|
|
183 |
# Produce a matrix with one row per `x` and columns `est` and `se`.
|
|
184 | 55x |
estimates <- t(vapply( |
185 | 55x |
X = x, |
186 | 55x |
FUN = h_step_trt_effect, |
187 | 55x |
FUN.VALUE = c(1, 2), |
188 | 55x |
data = data, |
189 | 55x |
model = fit, |
190 | 55x |
variables = variables |
191 |
)) |
|
192 | 55x |
q_norm <- stats::qnorm((1 + control$conf_level) / 2) |
193 | 55x |
cbind( |
194 | 55x |
n = fit$n, |
195 | 55x |
events = fit$nevent, |
196 | 55x |
loghr = estimates[, "est"], |
197 | 55x |
se = estimates[, "se"], |
198 | 55x |
ci_lower = estimates[, "est"] - q_norm * estimates[, "se"], |
199 | 55x |
ci_upper = estimates[, "est"] + q_norm * estimates[, "se"] |
200 |
)
|
|
201 |
}
|
|
202 | ||
203 |
#' @describeIn h_step Builds the model formula used in response STEP calculations.
|
|
204 |
#'
|
|
205 |
#' @return
|
|
206 |
#' * `h_step_rsp_formula()` returns a model formula.
|
|
207 |
#'
|
|
208 |
#' @export
|
|
209 |
h_step_rsp_formula <- function(variables, |
|
210 |
control = c(control_step(), control_logistic())) { |
|
211 | 14x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
212 | 14x |
assert_list_of_variables(variables[c("arm", "biomarker", "response")]) |
213 | 14x |
response_definition <- sub( |
214 | 14x |
pattern = "response", |
215 | 14x |
replacement = variables$response, |
216 | 14x |
x = control$response_definition, |
217 | 14x |
fixed = TRUE |
218 |
)
|
|
219 | 14x |
form <- paste0(response_definition, " ~ ", variables$arm) |
220 | 14x |
if (control$degree > 0) { |
221 | 8x |
form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)") |
222 |
}
|
|
223 | 14x |
if (!is.null(variables$covariates)) { |
224 | 8x |
form <- paste(form, "+", paste(variables$covariates, collapse = "+")) |
225 |
}
|
|
226 | 14x |
if (!is.null(variables$strata)) { |
227 | 5x |
strata_arg <- if (length(variables$strata) > 1) { |
228 | 2x |
paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))") |
229 |
} else { |
|
230 | 3x |
variables$strata |
231 |
}
|
|
232 | 5x |
form <- paste0(form, "+ strata(", strata_arg, ")") |
233 |
}
|
|
234 | 14x |
stats::as.formula(form) |
235 |
}
|
|
236 | ||
237 |
#' @describeIn h_step Estimates the model with `formula` built based on
|
|
238 |
#' `variables` in `data` for a given `subset` and `control` parameters for the
|
|
239 |
#' logistic regression.
|
|
240 |
#'
|
|
241 |
#' @param formula (`formula`)\cr the regression model formula.
|
|
242 |
#' @param subset (`logical`)\cr subset vector.
|
|
243 |
#'
|
|
244 |
#' @return
|
|
245 |
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
|
|
246 |
#' ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
|
|
247 |
#' `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
|
|
248 |
#'
|
|
249 |
#' @export
|
|
250 |
h_step_rsp_est <- function(formula, |
|
251 |
data,
|
|
252 |
variables,
|
|
253 |
x,
|
|
254 |
subset = rep(TRUE, nrow(data)), |
|
255 |
control = control_logistic()) { |
|
256 | 58x |
checkmate::assert_formula(formula) |
257 | 58x |
assert_df_with_variables(data, variables) |
258 | 58x |
checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE) |
259 | 58x |
checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE) |
260 | 58x |
checkmate::assert_list(control, names = "named") |
261 |
# Note: `subset` in `glm` needs to be an expression referring to `data` variables.
|
|
262 | 58x |
data$.subset <- subset |
263 | 58x |
fit_warnings <- NULL |
264 | 58x |
tryCatch( |
265 | 58x |
withCallingHandlers( |
266 | 58x |
expr = { |
267 | 58x |
fit <- if (is.null(variables$strata)) { |
268 | 54x |
stats::glm( |
269 | 54x |
formula = formula, |
270 | 54x |
data = data, |
271 | 54x |
subset = .subset, |
272 | 54x |
family = stats::binomial("logit") |
273 |
)
|
|
274 |
} else { |
|
275 |
# clogit needs coxph and strata imported
|
|
276 | 4x |
survival::clogit( |
277 | 4x |
formula = formula, |
278 | 4x |
data = data, |
279 | 4x |
subset = .subset |
280 |
)
|
|
281 |
}
|
|
282 |
},
|
|
283 | 58x |
warning = function(w) { |
284 | 19x |
fit_warnings <<- c(fit_warnings, w) |
285 | 19x |
invokeRestart("muffleWarning") |
286 |
}
|
|
287 |
),
|
|
288 | 58x |
finally = { |
289 |
}
|
|
290 |
)
|
|
291 | 58x |
if (!is.null(fit_warnings)) { |
292 | 13x |
warning(paste( |
293 | 13x |
"Fit warnings occurred, please consider using a simpler model, or",
|
294 | 13x |
"larger `bandwidth`, less `num_points` in `control_step()` settings"
|
295 |
)) |
|
296 |
}
|
|
297 |
# Produce a matrix with one row per `x` and columns `est` and `se`.
|
|
298 | 58x |
estimates <- t(vapply( |
299 | 58x |
X = x, |
300 | 58x |
FUN = h_step_trt_effect, |
301 | 58x |
FUN.VALUE = c(1, 2), |
302 | 58x |
data = data, |
303 | 58x |
model = fit, |
304 | 58x |
variables = variables |
305 |
)) |
|
306 | 58x |
q_norm <- stats::qnorm((1 + control$conf_level) / 2) |
307 | 58x |
cbind( |
308 | 58x |
n = length(fit$y), |
309 | 58x |
logor = estimates[, "est"], |
310 | 58x |
se = estimates[, "se"], |
311 | 58x |
ci_lower = estimates[, "est"] - q_norm * estimates[, "se"], |
312 | 58x |
ci_upper = estimates[, "est"] + q_norm * estimates[, "se"] |
313 |
)
|
|
314 |
}
|
1 |
#' Horizontal waterfall plot
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
|
|
6 |
#'
|
|
7 |
#' @param height (`numeric`)\cr vector containing values to be plotted as the waterfall bars.
|
|
8 |
#' @param id (`character`)\cr vector containing identifiers to use as the x-axis label for the waterfall bars.
|
|
9 |
#' @param col (`character`)\cr color(s).
|
|
10 |
#' @param col_var (`factor`, `character`, or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
|
|
11 |
#' @param xlab (`string`)\cr x label. Default is `"ID"`.
|
|
12 |
#' @param ylab (`string`)\cr y label. Default is `"Value"`.
|
|
13 |
#' @param title (`string`)\cr text to be displayed as plot title.
|
|
14 |
#' @param col_legend_title (`string`)\cr text to be displayed as legend title.
|
|
15 |
#'
|
|
16 |
#' @return A `ggplot` waterfall plot.
|
|
17 |
#'
|
|
18 |
#' @examples
|
|
19 |
#' library(dplyr)
|
|
20 |
#'
|
|
21 |
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
|
|
22 |
#'
|
|
23 |
#' g_waterfall(
|
|
24 |
#' height = c(3, 5, -1),
|
|
25 |
#' id = letters[1:3],
|
|
26 |
#' col_var = letters[1:3]
|
|
27 |
#' )
|
|
28 |
#'
|
|
29 |
#' adsl_f <- tern_ex_adsl %>%
|
|
30 |
#' select(USUBJID, STUDYID, ARM, ARMCD, SEX)
|
|
31 |
#'
|
|
32 |
#' adrs_f <- tern_ex_adrs %>%
|
|
33 |
#' filter(PARAMCD == "OVRINV") %>%
|
|
34 |
#' mutate(pchg = rnorm(n(), 10, 50))
|
|
35 |
#'
|
|
36 |
#' adrs_f <- head(adrs_f, 30)
|
|
37 |
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
|
|
38 |
#' head(adrs_f)
|
|
39 |
#'
|
|
40 |
#' g_waterfall(
|
|
41 |
#' height = adrs_f$pchg,
|
|
42 |
#' id = adrs_f$USUBJID,
|
|
43 |
#' col_var = adrs_f$AVALC
|
|
44 |
#' )
|
|
45 |
#'
|
|
46 |
#' g_waterfall(
|
|
47 |
#' height = adrs_f$pchg,
|
|
48 |
#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
|
|
49 |
#' col_var = adrs_f$SEX
|
|
50 |
#' )
|
|
51 |
#'
|
|
52 |
#' g_waterfall(
|
|
53 |
#' height = adrs_f$pchg,
|
|
54 |
#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
|
|
55 |
#' xlab = "ID",
|
|
56 |
#' ylab = "Percentage Change",
|
|
57 |
#' title = "Waterfall plot"
|
|
58 |
#' )
|
|
59 |
#'
|
|
60 |
#' @export
|
|
61 |
g_waterfall <- function(height, |
|
62 |
id,
|
|
63 |
col_var = NULL, |
|
64 |
col = getOption("ggplot2.discrete.colour"), |
|
65 |
xlab = NULL, |
|
66 |
ylab = NULL, |
|
67 |
col_legend_title = NULL, |
|
68 |
title = NULL) { |
|
69 | 2x |
if (!is.null(col_var)) { |
70 | 1x |
check_same_n(height = height, id = id, col_var = col_var) |
71 |
} else { |
|
72 | 1x |
check_same_n(height = height, id = id) |
73 |
}
|
|
74 | ||
75 | 2x |
checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE) |
76 | 2x |
checkmate::assert_character(col, null.ok = TRUE) |
77 | ||
78 | 2x |
xlabel <- deparse(substitute(id)) |
79 | 2x |
ylabel <- deparse(substitute(height)) |
80 | ||
81 | 2x |
col_label <- if (!missing(col_var)) { |
82 | 1x |
deparse(substitute(col_var)) |
83 |
}
|
|
84 | ||
85 | 2x |
xlab <- if (is.null(xlab)) xlabel else xlab |
86 | 2x |
ylab <- if (is.null(ylab)) ylabel else ylab |
87 | 2x |
col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title |
88 | ||
89 | 2x |
plot_data <- data.frame( |
90 | 2x |
height = height, |
91 | 2x |
id = as.character(id), |
92 | 2x |
col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)), |
93 | 2x |
stringsAsFactors = FALSE |
94 |
)
|
|
95 | ||
96 | 2x |
plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ] |
97 | ||
98 | 2x |
p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) + |
99 | 2x |
ggplot2::geom_col() + |
100 | 2x |
ggplot2::geom_text( |
101 | 2x |
label = format(plot_data_ord$height, digits = 2), |
102 | 2x |
vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5) |
103 |
) + |
|
104 | 2x |
ggplot2::xlab(xlab) + |
105 | 2x |
ggplot2::ylab(ylab) + |
106 | 2x |
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5)) |
107 | ||
108 | 2x |
if (!is.null(col_var)) { |
109 | 1x |
p <- p + |
110 | 1x |
ggplot2::aes(fill = col_var) + |
111 | 1x |
ggplot2::labs(fill = col_legend_title) + |
112 | 1x |
ggplot2::theme( |
113 | 1x |
legend.position = "bottom", |
114 | 1x |
legend.background = ggplot2::element_blank(), |
115 | 1x |
legend.title = ggplot2::element_text(face = "bold"), |
116 | 1x |
legend.box.background = ggplot2::element_rect(colour = "black") |
117 |
)
|
|
118 |
}
|
|
119 | ||
120 | 2x |
if (!is.null(col)) { |
121 | 1x |
p <- p + |
122 | 1x |
ggplot2::scale_fill_manual(values = col) |
123 |
}
|
|
124 | ||
125 | 2x |
if (!is.null(title)) { |
126 | 1x |
p <- p + |
127 | 1x |
ggplot2::labs(title = title) + |
128 | 1x |
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) |
129 |
}
|
|
130 | ||
131 | 2x |
p
|
132 |
}
|
1 |
#' Tabulate biomarker effects on binary response by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The [tabulate_rsp_biomarkers()] function creates a layout element to tabulate the estimated biomarker effects on a
|
|
6 |
#' binary response endpoint across subgroups, returning statistics including response rate and odds ratio for each
|
|
7 |
#' population subgroup. The table is created from `df`, a list of data frames returned by [extract_rsp_biomarkers()],
|
|
8 |
#' with the statistics to include specified via the `vars` parameter.
|
|
9 |
#'
|
|
10 |
#' A forest plot can be created from the resulting table using the [g_forest()] function.
|
|
11 |
#'
|
|
12 |
#' @inheritParams argument_convention
|
|
13 |
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
|
|
14 |
#' [extract_rsp_biomarkers()].
|
|
15 |
#' @param vars (`character`)\cr the names of statistics to be reported among:
|
|
16 |
#' * `n_tot`: Total number of patients per group.
|
|
17 |
#' * `n_rsp`: Total number of responses per group.
|
|
18 |
#' * `prop`: Total response proportion per group.
|
|
19 |
#' * `or`: Odds ratio.
|
|
20 |
#' * `ci`: Confidence interval of odds ratio.
|
|
21 |
#' * `pval`: p-value of the effect.
|
|
22 |
#' Note, the statistics `n_tot`, `or` and `ci` are required.
|
|
23 |
#'
|
|
24 |
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
|
|
25 |
#'
|
|
26 |
#' @details These functions create a layout starting from a data frame which contains
|
|
27 |
#' the required statistics. The tables are then typically used as input for forest plots.
|
|
28 |
#'
|
|
29 |
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
|
|
30 |
#' not start from an input layout `lyt`. This is because internally the table is
|
|
31 |
#' created by combining multiple subtables.
|
|
32 |
#'
|
|
33 |
#' @seealso [extract_rsp_biomarkers()]
|
|
34 |
#'
|
|
35 |
#' @examples
|
|
36 |
#' library(dplyr)
|
|
37 |
#' library(forcats)
|
|
38 |
#'
|
|
39 |
#' adrs <- tern_ex_adrs
|
|
40 |
#' adrs_labels <- formatters::var_labels(adrs)
|
|
41 |
#'
|
|
42 |
#' adrs_f <- adrs %>%
|
|
43 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
44 |
#' mutate(rsp = AVALC == "CR")
|
|
45 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
|
|
46 |
#'
|
|
47 |
#' df <- extract_rsp_biomarkers(
|
|
48 |
#' variables = list(
|
|
49 |
#' rsp = "rsp",
|
|
50 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
51 |
#' covariates = "SEX",
|
|
52 |
#' subgroups = "BMRKR2"
|
|
53 |
#' ),
|
|
54 |
#' data = adrs_f
|
|
55 |
#' )
|
|
56 |
#'
|
|
57 |
#' \donttest{
|
|
58 |
#' ## Table with default columns.
|
|
59 |
#' tabulate_rsp_biomarkers(df)
|
|
60 |
#'
|
|
61 |
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
|
|
62 |
#' tab <- tabulate_rsp_biomarkers(
|
|
63 |
#' df = df,
|
|
64 |
#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
|
|
65 |
#' )
|
|
66 |
#'
|
|
67 |
#' ## Finally produce the forest plot.
|
|
68 |
#' g_forest(tab, xlim = c(0.7, 1.4))
|
|
69 |
#' }
|
|
70 |
#'
|
|
71 |
#' @export
|
|
72 |
#' @name response_biomarkers_subgroups
|
|
73 |
tabulate_rsp_biomarkers <- function(df, |
|
74 |
vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"), |
|
75 |
na_str = default_na_str(), |
|
76 |
...,
|
|
77 |
.stat_names = NULL, |
|
78 |
.formats = NULL, |
|
79 |
.labels = NULL, |
|
80 |
.indent_mods = NULL) { |
|
81 | 4x |
checkmate::assert_data_frame(df) |
82 | 4x |
checkmate::assert_character(df$biomarker) |
83 | 4x |
checkmate::assert_character(df$biomarker_label) |
84 | 4x |
checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers")) |
85 | ||
86 |
# Process standard extra arguments
|
|
87 | 4x |
extra_args <- list(".stats" = vars) |
88 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
89 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
90 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
91 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
92 | ||
93 | 4x |
colvars <- d_rsp_subgroups_colvars( |
94 | 4x |
vars,
|
95 | 4x |
conf_level = df$conf_level[1], |
96 | 4x |
method = df$pval_label[1] |
97 |
)
|
|
98 | ||
99 |
# Process additional arguments to the statistic function
|
|
100 | 4x |
extra_args <- c(extra_args, biomarker = TRUE, ...) |
101 | ||
102 |
# Adding additional info from layout to analysis function
|
|
103 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
104 | 4x |
formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]]) |
105 | ||
106 |
# Create "ci" column from "lcl" and "ucl"
|
|
107 | 4x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
108 | ||
109 | 4x |
df_subs <- split(df, f = df$biomarker) |
110 | 4x |
tbls <- lapply( |
111 | 4x |
df_subs,
|
112 | 4x |
function(df) { |
113 | 7x |
lyt <- basic_table() |
114 | ||
115 |
# Split cols by the multiple variables to populate into columns.
|
|
116 | 7x |
lyt <- split_cols_by_multivar( |
117 | 7x |
lyt = lyt, |
118 | 7x |
vars = colvars$vars, |
119 | 7x |
varlabels = colvars$labels |
120 |
)
|
|
121 | ||
122 |
# Row split by biomarker
|
|
123 | 7x |
lyt <- split_rows_by( |
124 | 7x |
lyt = lyt, |
125 | 7x |
var = "biomarker_label", |
126 | 7x |
nested = FALSE |
127 |
)
|
|
128 | ||
129 |
# Add "All Patients" row
|
|
130 | 7x |
lyt <- split_rows_by( |
131 | 7x |
lyt = lyt, |
132 | 7x |
var = "row_type", |
133 | 7x |
split_fun = keep_split_levels("content"), |
134 | 7x |
nested = TRUE, |
135 | 7x |
child_labels = "hidden" |
136 |
)
|
|
137 | 7x |
lyt <- analyze_colvars( |
138 | 7x |
lyt = lyt, |
139 | 7x |
afun = a_response_subgroups, |
140 | 7x |
na_str = na_str, |
141 | 7x |
extra_args = c(extra_args, overall = TRUE) |
142 |
)
|
|
143 | ||
144 |
# Add analysis rows
|
|
145 | 7x |
if ("analysis" %in% df$row_type) { |
146 | 4x |
lyt <- split_rows_by( |
147 | 4x |
lyt = lyt, |
148 | 4x |
var = "row_type", |
149 | 4x |
split_fun = keep_split_levels("analysis"), |
150 | 4x |
nested = TRUE, |
151 | 4x |
child_labels = "hidden" |
152 |
)
|
|
153 | 4x |
lyt <- split_rows_by( |
154 | 4x |
lyt = lyt, |
155 | 4x |
var = "var_label", |
156 | 4x |
nested = TRUE, |
157 | 4x |
indent_mod = 1L |
158 |
)
|
|
159 | 4x |
lyt <- analyze_colvars( |
160 | 4x |
lyt = lyt, |
161 | 4x |
afun = a_response_subgroups, |
162 | 4x |
na_str = na_str, |
163 | 4x |
inclNAs = TRUE, |
164 | 4x |
extra_args = extra_args |
165 |
)
|
|
166 |
}
|
|
167 | 7x |
build_table(lyt, df = df) |
168 |
}
|
|
169 |
)
|
|
170 | ||
171 | 4x |
result <- do.call(rbind, tbls) |
172 | ||
173 | 4x |
n_id <- grep("n_tot", vars) |
174 | 4x |
or_id <- match("or", vars) |
175 | 4x |
ci_id <- match("ci", vars) |
176 | 4x |
structure( |
177 | 4x |
result,
|
178 | 4x |
forest_header = paste0(c("Lower", "Higher"), "\nBetter"), |
179 | 4x |
col_x = or_id, |
180 | 4x |
col_ci = ci_id, |
181 | 4x |
col_symbol_size = n_id |
182 |
)
|
|
183 |
}
|
|
184 | ||
185 |
#' Prepare response data estimates for multiple biomarkers in a single data frame
|
|
186 |
#'
|
|
187 |
#' @description `r lifecycle::badge("stable")`
|
|
188 |
#'
|
|
189 |
#' Prepares estimates for number of responses, patients and overall response rate,
|
|
190 |
#' as well as odds ratio estimates, confidence intervals and p-values,
|
|
191 |
#' for multiple biomarkers across population subgroups in a single data frame.
|
|
192 |
#' `variables` corresponds to the names of variables found in `data`, passed as a
|
|
193 |
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
|
|
194 |
#' biomarker variables) and optionally `covariates`, `subgroups` and `strata`.
|
|
195 |
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
|
|
196 |
#'
|
|
197 |
#' @inheritParams argument_convention
|
|
198 |
#' @inheritParams response_subgroups
|
|
199 |
#' @param control (named `list`)\cr controls for the response definition and the
|
|
200 |
#' confidence level produced by [control_logistic()].
|
|
201 |
#'
|
|
202 |
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
|
|
203 |
#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
|
|
204 |
#' `var_label`, and `row_type`.
|
|
205 |
#'
|
|
206 |
#' @note You can also specify a continuous variable in `rsp` and then use the
|
|
207 |
#' `response_definition` control to convert that internally to a logical
|
|
208 |
#' variable reflecting binary response.
|
|
209 |
#'
|
|
210 |
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
|
|
211 |
#'
|
|
212 |
#' @examples
|
|
213 |
#' library(dplyr)
|
|
214 |
#' library(forcats)
|
|
215 |
#'
|
|
216 |
#' adrs <- tern_ex_adrs
|
|
217 |
#' adrs_labels <- formatters::var_labels(adrs)
|
|
218 |
#'
|
|
219 |
#' adrs_f <- adrs %>%
|
|
220 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
221 |
#' mutate(rsp = AVALC == "CR")
|
|
222 |
#'
|
|
223 |
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
|
|
224 |
#' # in logistic regression models with one covariate `RACE`. The subgroups
|
|
225 |
#' # are defined by the levels of `BMRKR2`.
|
|
226 |
#' df <- extract_rsp_biomarkers(
|
|
227 |
#' variables = list(
|
|
228 |
#' rsp = "rsp",
|
|
229 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
230 |
#' covariates = "SEX",
|
|
231 |
#' subgroups = "BMRKR2"
|
|
232 |
#' ),
|
|
233 |
#' data = adrs_f
|
|
234 |
#' )
|
|
235 |
#' df
|
|
236 |
#'
|
|
237 |
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
|
|
238 |
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
|
|
239 |
#' # which is then binarized internally (response is defined as this variable
|
|
240 |
#' # being larger than 750).
|
|
241 |
#' df_grouped <- extract_rsp_biomarkers(
|
|
242 |
#' variables = list(
|
|
243 |
#' rsp = "EOSDY",
|
|
244 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
245 |
#' covariates = "SEX",
|
|
246 |
#' subgroups = "BMRKR2",
|
|
247 |
#' strata = "STRATA1"
|
|
248 |
#' ),
|
|
249 |
#' data = adrs_f,
|
|
250 |
#' groups_lists = list(
|
|
251 |
#' BMRKR2 = list(
|
|
252 |
#' "low" = "LOW",
|
|
253 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
254 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
255 |
#' )
|
|
256 |
#' ),
|
|
257 |
#' control = control_logistic(
|
|
258 |
#' response_definition = "I(response > 750)"
|
|
259 |
#' )
|
|
260 |
#' )
|
|
261 |
#' df_grouped
|
|
262 |
#'
|
|
263 |
#' @export
|
|
264 |
extract_rsp_biomarkers <- function(variables, |
|
265 |
data,
|
|
266 |
groups_lists = list(), |
|
267 |
control = control_logistic(), |
|
268 |
label_all = "All Patients") { |
|
269 | 5x |
if ("strat" %in% names(variables)) { |
270 | ! |
warning( |
271 | ! |
"Warning: the `strat` element name of the `variables` list argument to `extract_rsp_biomarkers() ",
|
272 | ! |
"was deprecated in tern 0.9.4.\n ",
|
273 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
274 |
)
|
|
275 | ! |
variables[["strata"]] <- variables[["strat"]] |
276 |
}
|
|
277 | ||
278 | 5x |
assert_list_of_variables(variables) |
279 | 5x |
checkmate::assert_string(variables$rsp) |
280 | 5x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
281 | 5x |
checkmate::assert_string(label_all) |
282 | ||
283 |
# Start with all patients.
|
|
284 | 5x |
result_all <- h_logistic_mult_cont_df( |
285 | 5x |
variables = variables, |
286 | 5x |
data = data, |
287 | 5x |
control = control |
288 |
)
|
|
289 | 5x |
result_all$subgroup <- label_all |
290 | 5x |
result_all$var <- "ALL" |
291 | 5x |
result_all$var_label <- label_all |
292 | 5x |
result_all$row_type <- "content" |
293 | 5x |
if (is.null(variables$subgroups)) { |
294 |
# Only return result for all patients.
|
|
295 | 1x |
result_all
|
296 |
} else { |
|
297 |
# Add subgroups results.
|
|
298 | 4x |
l_data <- h_split_by_subgroups( |
299 | 4x |
data,
|
300 | 4x |
variables$subgroups, |
301 | 4x |
groups_lists = groups_lists |
302 |
)
|
|
303 | 4x |
l_result <- lapply(l_data, function(grp) { |
304 | 20x |
result <- h_logistic_mult_cont_df( |
305 | 20x |
variables = variables, |
306 | 20x |
data = grp$df, |
307 | 20x |
control = control |
308 |
)
|
|
309 | 20x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
310 | 20x |
cbind(result, result_labels) |
311 |
}) |
|
312 | 4x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
313 | 4x |
result_subgroups$row_type <- "analysis" |
314 | 4x |
rbind( |
315 | 4x |
result_all,
|
316 | 4x |
result_subgroups
|
317 |
)
|
|
318 |
}
|
|
319 |
}
|
1 |
#' Count patients with marked laboratory abnormalities
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_abnormal_by_marked()] creates a layout element to count patients with marked laboratory
|
|
6 |
#' abnormalities for each direction of abnormality, categorized by parameter value.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `var` which indicates whether a single, replicated,
|
|
9 |
#' or last marked laboratory abnormality was observed. Levels of `var` to include for each marked lab
|
|
10 |
#' abnormality (`single` and `last_replicated`) can be supplied via the `category` parameter. Additional
|
|
11 |
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults
|
|
12 |
#' to `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a
|
|
13 |
#' variable to indicate parameter values, and `direction` (defaults to `abn_dir`), a variable to indicate
|
|
14 |
#' abnormality directions.
|
|
15 |
#'
|
|
16 |
#' For each combination of `param` and `direction` levels, marked lab abnormality counts are calculated
|
|
17 |
#' as follows:
|
|
18 |
#' * `Single, not last` & `Last or replicated`: The number of patients with `Single, not last`
|
|
19 |
#' and `Last or replicated` values, respectively.
|
|
20 |
#' * `Any`: The number of patients with either single or replicated marked abnormalities.
|
|
21 |
#'
|
|
22 |
#' Fractions are calculated by dividing the above counts by the number of patients with at least one
|
|
23 |
#' valid measurement recorded during the analysis.
|
|
24 |
#'
|
|
25 |
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two
|
|
26 |
#' row splits, one on variable `param` and one on variable `direction`.
|
|
27 |
#'
|
|
28 |
#' @inheritParams argument_convention
|
|
29 |
#' @param category (`list`)\cr a list with different marked category names for single
|
|
30 |
#' and last or replicated.
|
|
31 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
32 |
#'
|
|
33 |
#' Options are: ``r shQuote(get_stats("abnormal_by_marked"), type = "sh")``
|
|
34 |
#'
|
|
35 |
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
|
|
36 |
#' abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
|
|
37 |
#' patient will be counted only under the `Last or replicated` category.
|
|
38 |
#'
|
|
39 |
#' @name abnormal_by_marked
|
|
40 |
#' @order 1
|
|
41 |
NULL
|
|
42 | ||
43 |
#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
|
|
44 |
#'
|
|
45 |
#' @return
|
|
46 |
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
|
|
47 |
#' `Last or replicated`, and `Any` results.
|
|
48 |
#'
|
|
49 |
#' @keywords internal
|
|
50 |
s_count_abnormal_by_marked <- function(df, |
|
51 |
.var = "AVALCAT1", |
|
52 |
.spl_context,
|
|
53 |
category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")), |
|
54 |
variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"), |
|
55 |
...) { |
|
56 | 9x |
checkmate::assert_string(.var) |
57 | 9x |
checkmate::assert_list(variables) |
58 | 9x |
checkmate::assert_list(category) |
59 | 9x |
checkmate::assert_subset(names(category), c("single", "last_replicated")) |
60 | 9x |
checkmate::assert_subset(names(variables), c("id", "param", "direction")) |
61 | 9x |
checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1) |
62 | ||
63 | 8x |
assert_df_with_variables(df, c(aval = .var, variables)) |
64 | 8x |
checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) |
65 | 8x |
checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) |
66 | ||
67 | ||
68 | 8x |
first_row <- .spl_context[.spl_context$split == variables[["param"]], ] |
69 |
# Patients in the denominator have at least one post-baseline visit.
|
|
70 | 8x |
subj <- first_row$full_parent_df[[1]][[variables[["id"]]]] |
71 | 8x |
subj_cur_col <- subj[first_row$cur_col_subset[[1]]] |
72 |
# Some subjects may have a record for high and low directions but
|
|
73 |
# should be counted only once.
|
|
74 | 8x |
denom <- length(unique(subj_cur_col)) |
75 | ||
76 | 8x |
if (denom != 0) { |
77 | 8x |
subjects_last_replicated <- unique( |
78 | 8x |
df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE] |
79 |
)
|
|
80 | 8x |
subjects_single <- unique( |
81 | 8x |
df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE] |
82 |
)
|
|
83 |
# Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
|
|
84 | 8x |
subjects_single <- setdiff(subjects_single, subjects_last_replicated) |
85 | 8x |
n_single <- length(subjects_single) |
86 | 8x |
n_last_replicated <- length(subjects_last_replicated) |
87 | 8x |
n_any <- n_single + n_last_replicated |
88 | 8x |
result <- list(count_fraction = list( |
89 | 8x |
"Single, not last" = c(n_single, n_single / denom), |
90 | 8x |
"Last or replicated" = c(n_last_replicated, n_last_replicated / denom), |
91 | 8x |
"Any Abnormality" = c(n_any, n_any / denom) |
92 |
)) |
|
93 |
} else { |
|
94 | ! |
result <- list(count_fraction = list( |
95 | ! |
"Single, not last" = c(0, 0), |
96 | ! |
"Last or replicated" = c(0, 0), |
97 | ! |
"Any Abnormality" = c(0, 0) |
98 |
)) |
|
99 |
}
|
|
100 | ||
101 | 8x |
result
|
102 |
}
|
|
103 | ||
104 |
#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
|
|
105 |
#' in `count_abnormal_by_marked()`.
|
|
106 |
#'
|
|
107 |
#' @return
|
|
108 |
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
109 |
#'
|
|
110 |
#' @keywords internal
|
|
111 |
a_count_abnormal_by_marked <- function(df, |
|
112 |
...,
|
|
113 |
.stats = NULL, |
|
114 |
.stat_names = NULL, |
|
115 |
.formats = NULL, |
|
116 |
.labels = NULL, |
|
117 |
.indent_mods = NULL) { |
|
118 |
# Check for additional parameters to the statistics function
|
|
119 | 6x |
dots_extra_args <- list(...) |
120 | 6x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
121 | 6x |
dots_extra_args$.additional_fun_parameters <- NULL |
122 | ||
123 |
# Check for user-defined functions
|
|
124 | 6x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
125 | 6x |
.stats <- default_and_custom_stats_list$all_stats |
126 | 6x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
127 | ||
128 |
# Apply statistics function
|
|
129 | 6x |
x_stats <- .apply_stat_functions( |
130 | 6x |
default_stat_fnc = s_count_abnormal_by_marked, |
131 | 6x |
custom_stat_fnc_list = custom_stat_functions, |
132 | 6x |
args_list = c( |
133 | 6x |
df = list(df), |
134 | 6x |
extra_afun_params,
|
135 | 6x |
dots_extra_args
|
136 |
)
|
|
137 |
)
|
|
138 | ||
139 |
# Fill in formatting defaults
|
|
140 | 6x |
.stats <- get_stats("abnormal_by_marked", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
141 | 6x |
levels_per_stats <- lapply(x_stats, names) |
142 | 6x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
143 | 6x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
144 | 6x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
145 | ||
146 | 6x |
x_stats <- x_stats[.stats] %>% |
147 | 6x |
.unlist_keep_nulls() %>% |
148 | 6x |
setNames(names(.formats)) |
149 | ||
150 |
# Auto format handling
|
|
151 | 6x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
152 | ||
153 |
# Get and check statistical names
|
|
154 | 6x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
155 | ||
156 | 6x |
in_rows( |
157 | 6x |
.list = x_stats, |
158 | 6x |
.formats = .formats, |
159 | 6x |
.names = .labels %>% .unlist_keep_nulls(), |
160 | 6x |
.stat_names = .stat_names, |
161 | 6x |
.labels = .labels %>% .unlist_keep_nulls(), |
162 | 6x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
163 |
)
|
|
164 |
}
|
|
165 | ||
166 |
#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
|
|
167 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
168 |
#'
|
|
169 |
#' @return
|
|
170 |
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
|
|
171 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
172 |
#' the statistics from `s_count_abnormal_by_marked()` to the table layout.
|
|
173 |
#'
|
|
174 |
#' @examples
|
|
175 |
#' library(dplyr)
|
|
176 |
#'
|
|
177 |
#' df <- data.frame(
|
|
178 |
#' USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
|
|
179 |
#' ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
|
|
180 |
#' ANRIND = factor(c(
|
|
181 |
#' "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
|
|
182 |
#' "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
|
|
183 |
#' "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
|
|
184 |
#' )),
|
|
185 |
#' ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
|
|
186 |
#' PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
|
|
187 |
#' AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
|
|
188 |
#' stringsAsFactors = FALSE
|
|
189 |
#' )
|
|
190 |
#'
|
|
191 |
#' df <- df %>%
|
|
192 |
#' mutate(abn_dir = factor(
|
|
193 |
#' case_when(
|
|
194 |
#' ANRIND == "LOW LOW" ~ "Low",
|
|
195 |
#' ANRIND == "HIGH HIGH" ~ "High",
|
|
196 |
#' TRUE ~ ""
|
|
197 |
#' ),
|
|
198 |
#' levels = c("Low", "High")
|
|
199 |
#' ))
|
|
200 |
#'
|
|
201 |
#' # Select only post-baseline records.
|
|
202 |
#' df <- df %>% filter(ONTRTFL == "Y")
|
|
203 |
#' df_crp <- df %>%
|
|
204 |
#' filter(PARAMCD == "CRP") %>%
|
|
205 |
#' droplevels()
|
|
206 |
#' full_parent_df <- list(df_crp, "not_needed")
|
|
207 |
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
|
|
208 |
#' spl_context <- data.frame(
|
|
209 |
#' split = c("PARAMCD", "GRADE_DIR"),
|
|
210 |
#' full_parent_df = I(full_parent_df),
|
|
211 |
#' cur_col_subset = I(cur_col_subset)
|
|
212 |
#' )
|
|
213 |
#'
|
|
214 |
#' map <- unique(
|
|
215 |
#' df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
|
|
216 |
#' ) %>%
|
|
217 |
#' lapply(as.character) %>%
|
|
218 |
#' as.data.frame() %>%
|
|
219 |
#' arrange(PARAMCD, abn_dir)
|
|
220 |
#'
|
|
221 |
#' basic_table() %>%
|
|
222 |
#' split_cols_by("ARMCD") %>%
|
|
223 |
#' split_rows_by("PARAMCD") %>%
|
|
224 |
#' summarize_num_patients(
|
|
225 |
#' var = "USUBJID",
|
|
226 |
#' .stats = "unique_count"
|
|
227 |
#' ) %>%
|
|
228 |
#' split_rows_by(
|
|
229 |
#' "abn_dir",
|
|
230 |
#' split_fun = trim_levels_to_map(map)
|
|
231 |
#' ) %>%
|
|
232 |
#' count_abnormal_by_marked(
|
|
233 |
#' var = "AVALCAT1",
|
|
234 |
#' variables = list(
|
|
235 |
#' id = "USUBJID",
|
|
236 |
#' param = "PARAMCD",
|
|
237 |
#' direction = "abn_dir"
|
|
238 |
#' )
|
|
239 |
#' ) %>%
|
|
240 |
#' build_table(df = df)
|
|
241 |
#'
|
|
242 |
#' basic_table() %>%
|
|
243 |
#' split_cols_by("ARMCD") %>%
|
|
244 |
#' split_rows_by("PARAMCD") %>%
|
|
245 |
#' summarize_num_patients(
|
|
246 |
#' var = "USUBJID",
|
|
247 |
#' .stats = "unique_count"
|
|
248 |
#' ) %>%
|
|
249 |
#' split_rows_by(
|
|
250 |
#' "abn_dir",
|
|
251 |
#' split_fun = trim_levels_in_group("abn_dir")
|
|
252 |
#' ) %>%
|
|
253 |
#' count_abnormal_by_marked(
|
|
254 |
#' var = "AVALCAT1",
|
|
255 |
#' variables = list(
|
|
256 |
#' id = "USUBJID",
|
|
257 |
#' param = "PARAMCD",
|
|
258 |
#' direction = "abn_dir"
|
|
259 |
#' )
|
|
260 |
#' ) %>%
|
|
261 |
#' build_table(df = df)
|
|
262 |
#'
|
|
263 |
#' @export
|
|
264 |
#' @order 2
|
|
265 |
count_abnormal_by_marked <- function(lyt, |
|
266 |
var,
|
|
267 |
category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")), |
|
268 |
variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"), |
|
269 |
na_str = default_na_str(), |
|
270 |
nested = TRUE, |
|
271 |
...,
|
|
272 |
.stats = "count_fraction", |
|
273 |
.stat_names = NULL, |
|
274 |
.formats = list(count_fraction = format_count_fraction), |
|
275 |
.labels = NULL, |
|
276 |
.indent_mods = NULL) { |
|
277 | 1x |
checkmate::assert_string(var) |
278 | ||
279 |
# Process standard extra arguments
|
|
280 | 1x |
extra_args <- list(".stats" = .stats) |
281 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
282 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
283 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
284 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
285 | ||
286 |
# Process additional arguments to the statistic function
|
|
287 | 1x |
extra_args <- c(extra_args, "category" = list(category), "variables" = list(variables), ...) |
288 | ||
289 |
# Append additional info from layout to the analysis function
|
|
290 | 1x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
291 | 1x |
formals(a_count_abnormal_by_marked) <- c( |
292 | 1x |
formals(a_count_abnormal_by_marked), extra_args[[".additional_fun_parameters"]] |
293 |
)
|
|
294 | ||
295 | 1x |
analyze( |
296 | 1x |
lyt = lyt, |
297 | 1x |
vars = var, |
298 | 1x |
afun = a_count_abnormal_by_marked, |
299 | 1x |
na_str = na_str, |
300 | 1x |
nested = nested, |
301 | 1x |
extra_args = extra_args, |
302 | 1x |
show_labels = "hidden" |
303 |
)
|
|
304 |
}
|
1 |
#' Individual patient plots
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
|
|
6 |
#' Patients' individual baseline values can be added to the plot(s) as reference.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
|
|
10 |
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
|
|
11 |
#' @param xlab (`string`)\cr plot label for x-axis.
|
|
12 |
#' @param ylab (`string`)\cr plot label for y-axis.
|
|
13 |
#' @param id_var (`string`)\cr variable used as patient identifier.
|
|
14 |
#' @param title (`string`)\cr title for plot.
|
|
15 |
#' @param subtitle (`string`)\cr subtitle for plot.
|
|
16 |
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
|
|
17 |
#' plot when `TRUE`.
|
|
18 |
#' @param yvar_baseline (`string`)\cr variable with baseline values only.
|
|
19 |
#' Ignored when `add_baseline_hline` is `FALSE`.
|
|
20 |
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
|
|
21 |
#' by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
|
|
22 |
#' @param plotting_choices (`string`)\cr specifies options for displaying
|
|
23 |
#' plots. Must be one of `"all_in_one"`, `"split_by_max_obs"`, or `"separate_by_obs"`.
|
|
24 |
#' @param max_obs_per_plot (`integer(1)`)\cr number of observations to be plotted on one
|
|
25 |
#' plot. Ignored if `plotting_choices` is not `"separate_by_obs"`.
|
|
26 |
#' @param caption (`string`)\cr optional caption below the plot.
|
|
27 |
#' @param col (`character`)\cr line colors.
|
|
28 |
#'
|
|
29 |
#' @seealso Relevant helper function [h_g_ipp()].
|
|
30 |
#'
|
|
31 |
#' @name g_ipp
|
|
32 |
#' @aliases individual_patient_plot
|
|
33 |
NULL
|
|
34 | ||
35 |
#' Helper function to create simple line plot over time
|
|
36 |
#'
|
|
37 |
#' @description `r lifecycle::badge("stable")`
|
|
38 |
#'
|
|
39 |
#' Function that generates a simple line plot displaying parameter trends over time.
|
|
40 |
#'
|
|
41 |
#' @inheritParams argument_convention
|
|
42 |
#' @inheritParams g_ipp
|
|
43 |
#'
|
|
44 |
#' @return A `ggplot` line plot.
|
|
45 |
#'
|
|
46 |
#' @seealso [g_ipp()] which uses this function.
|
|
47 |
#'
|
|
48 |
#' @examples
|
|
49 |
#' library(dplyr)
|
|
50 |
#'
|
|
51 |
#' # Select a small sample of data to plot.
|
|
52 |
#' adlb <- tern_ex_adlb %>%
|
|
53 |
#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
|
|
54 |
#' slice(1:36)
|
|
55 |
#'
|
|
56 |
#' p <- h_g_ipp(
|
|
57 |
#' df = adlb,
|
|
58 |
#' xvar = "AVISIT",
|
|
59 |
#' yvar = "AVAL",
|
|
60 |
#' xlab = "Visit",
|
|
61 |
#' id_var = "USUBJID",
|
|
62 |
#' ylab = "SGOT/ALT (U/L)",
|
|
63 |
#' add_baseline_hline = TRUE
|
|
64 |
#' )
|
|
65 |
#' p
|
|
66 |
#'
|
|
67 |
#' @export
|
|
68 |
h_g_ipp <- function(df, |
|
69 |
xvar,
|
|
70 |
yvar,
|
|
71 |
xlab,
|
|
72 |
ylab,
|
|
73 |
id_var,
|
|
74 |
title = "Individual Patient Plots", |
|
75 |
subtitle = "", |
|
76 |
caption = NULL, |
|
77 |
add_baseline_hline = FALSE, |
|
78 |
yvar_baseline = "BASE", |
|
79 |
ggtheme = nestcolor::theme_nest(), |
|
80 |
col = NULL) { |
|
81 | 13x |
checkmate::assert_string(xvar) |
82 | 13x |
checkmate::assert_string(yvar) |
83 | 13x |
checkmate::assert_string(yvar_baseline) |
84 | 13x |
checkmate::assert_string(id_var) |
85 | 13x |
checkmate::assert_string(xlab) |
86 | 13x |
checkmate::assert_string(ylab) |
87 | 13x |
checkmate::assert_string(title) |
88 | 13x |
checkmate::assert_string(subtitle) |
89 | 13x |
checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df)) |
90 | 13x |
checkmate::assert_data_frame(df) |
91 | 13x |
checkmate::assert_flag(add_baseline_hline) |
92 | 13x |
checkmate::assert_character(col, null.ok = TRUE) |
93 | ||
94 | 13x |
p <- ggplot2::ggplot( |
95 | 13x |
data = df, |
96 | 13x |
mapping = ggplot2::aes( |
97 | 13x |
x = .data[[xvar]], |
98 | 13x |
y = .data[[yvar]], |
99 | 13x |
group = .data[[id_var]], |
100 | 13x |
colour = .data[[id_var]] |
101 |
)
|
|
102 |
) + |
|
103 | 13x |
ggplot2::geom_line(linewidth = 0.4) + |
104 | 13x |
ggplot2::geom_point(size = 2) + |
105 | 13x |
ggplot2::labs( |
106 | 13x |
x = xlab, |
107 | 13x |
y = ylab, |
108 | 13x |
title = title, |
109 | 13x |
subtitle = subtitle, |
110 | 13x |
caption = caption |
111 |
) + |
|
112 | 13x |
ggtheme
|
113 | ||
114 | 13x |
if (add_baseline_hline) { |
115 | 12x |
baseline_df <- df[, c(id_var, yvar_baseline)] |
116 | 12x |
baseline_df <- unique(baseline_df) |
117 | ||
118 | 12x |
p <- p + |
119 | 12x |
ggplot2::geom_hline( |
120 | 12x |
data = baseline_df, |
121 | 12x |
mapping = ggplot2::aes( |
122 | 12x |
yintercept = .data[[yvar_baseline]], |
123 | 12x |
colour = .data[[id_var]] |
124 |
),
|
|
125 | 12x |
linetype = "dotdash", |
126 | 12x |
linewidth = 0.4 |
127 |
) + |
|
128 | 12x |
ggplot2::geom_text( |
129 | 12x |
data = baseline_df, |
130 | 12x |
mapping = ggplot2::aes( |
131 | 12x |
x = 1, |
132 | 12x |
y = .data[[yvar_baseline]], |
133 | 12x |
label = .data[[id_var]], |
134 | 12x |
colour = .data[[id_var]] |
135 |
),
|
|
136 | 12x |
nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)), |
137 | 12x |
vjust = "right", |
138 | 12x |
size = 2 |
139 |
)
|
|
140 | ||
141 | 12x |
if (!is.null(col)) { |
142 | 1x |
p <- p + |
143 | 1x |
ggplot2::scale_color_manual(values = col) |
144 |
}
|
|
145 |
}
|
|
146 | 13x |
p
|
147 |
}
|
|
148 | ||
149 |
#' @describeIn g_ipp Plotting function for individual patient plots which, depending on user
|
|
150 |
#' preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
|
|
151 |
#' values over time.
|
|
152 |
#'
|
|
153 |
#' @return A `ggplot` object or a list of `ggplot` objects.
|
|
154 |
#'
|
|
155 |
#' @examples
|
|
156 |
#' library(dplyr)
|
|
157 |
#'
|
|
158 |
#' # Select a small sample of data to plot.
|
|
159 |
#' adlb <- tern_ex_adlb %>%
|
|
160 |
#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
|
|
161 |
#' slice(1:36)
|
|
162 |
#'
|
|
163 |
#' plot_list <- g_ipp(
|
|
164 |
#' df = adlb,
|
|
165 |
#' xvar = "AVISIT",
|
|
166 |
#' yvar = "AVAL",
|
|
167 |
#' xlab = "Visit",
|
|
168 |
#' ylab = "SGOT/ALT (U/L)",
|
|
169 |
#' title = "Individual Patient Plots",
|
|
170 |
#' add_baseline_hline = TRUE,
|
|
171 |
#' plotting_choices = "split_by_max_obs",
|
|
172 |
#' max_obs_per_plot = 5
|
|
173 |
#' )
|
|
174 |
#' plot_list
|
|
175 |
#'
|
|
176 |
#' @export
|
|
177 |
g_ipp <- function(df, |
|
178 |
xvar,
|
|
179 |
yvar,
|
|
180 |
xlab,
|
|
181 |
ylab,
|
|
182 |
id_var = "USUBJID", |
|
183 |
title = "Individual Patient Plots", |
|
184 |
subtitle = "", |
|
185 |
caption = NULL, |
|
186 |
add_baseline_hline = FALSE, |
|
187 |
yvar_baseline = "BASE", |
|
188 |
ggtheme = nestcolor::theme_nest(), |
|
189 |
plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"), |
|
190 |
max_obs_per_plot = 4, |
|
191 |
col = NULL) { |
|
192 | 3x |
checkmate::assert_count(max_obs_per_plot) |
193 | 3x |
checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs")) |
194 | 3x |
checkmate::assert_character(col, null.ok = TRUE) |
195 | ||
196 | 3x |
plotting_choices <- match.arg(plotting_choices) |
197 | ||
198 | 3x |
if (plotting_choices == "all_in_one") { |
199 | 1x |
p <- h_g_ipp( |
200 | 1x |
df = df, |
201 | 1x |
xvar = xvar, |
202 | 1x |
yvar = yvar, |
203 | 1x |
xlab = xlab, |
204 | 1x |
ylab = ylab, |
205 | 1x |
id_var = id_var, |
206 | 1x |
title = title, |
207 | 1x |
subtitle = subtitle, |
208 | 1x |
caption = caption, |
209 | 1x |
add_baseline_hline = add_baseline_hline, |
210 | 1x |
yvar_baseline = yvar_baseline, |
211 | 1x |
ggtheme = ggtheme, |
212 | 1x |
col = col |
213 |
)
|
|
214 | ||
215 | 1x |
return(p) |
216 | 2x |
} else if (plotting_choices == "split_by_max_obs") { |
217 | 1x |
id_vec <- unique(df[[id_var]]) |
218 | 1x |
id_list <- split( |
219 | 1x |
id_vec,
|
220 | 1x |
rep(1:ceiling(length(id_vec) / max_obs_per_plot), |
221 | 1x |
each = max_obs_per_plot, |
222 | 1x |
length.out = length(id_vec) |
223 |
)
|
|
224 |
)
|
|
225 | ||
226 | 1x |
df_list <- list() |
227 | 1x |
plot_list <- list() |
228 | ||
229 | 1x |
for (i in seq_along(id_list)) { |
230 | 2x |
df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ] |
231 | ||
232 | 2x |
plots <- h_g_ipp( |
233 | 2x |
df = df_list[[i]], |
234 | 2x |
xvar = xvar, |
235 | 2x |
yvar = yvar, |
236 | 2x |
xlab = xlab, |
237 | 2x |
ylab = ylab, |
238 | 2x |
id_var = id_var, |
239 | 2x |
title = title, |
240 | 2x |
subtitle = subtitle, |
241 | 2x |
caption = caption, |
242 | 2x |
add_baseline_hline = add_baseline_hline, |
243 | 2x |
yvar_baseline = yvar_baseline, |
244 | 2x |
ggtheme = ggtheme, |
245 | 2x |
col = col |
246 |
)
|
|
247 | ||
248 | 2x |
plot_list[[i]] <- plots |
249 |
}
|
|
250 | 1x |
return(plot_list) |
251 |
} else { |
|
252 | 1x |
ind_df <- split(df, df[[id_var]]) |
253 | 1x |
plot_list <- lapply( |
254 | 1x |
ind_df,
|
255 | 1x |
function(x) { |
256 | 8x |
h_g_ipp( |
257 | 8x |
df = x, |
258 | 8x |
xvar = xvar, |
259 | 8x |
yvar = yvar, |
260 | 8x |
xlab = xlab, |
261 | 8x |
ylab = ylab, |
262 | 8x |
id_var = id_var, |
263 | 8x |
title = title, |
264 | 8x |
subtitle = subtitle, |
265 | 8x |
caption = caption, |
266 | 8x |
add_baseline_hline = add_baseline_hline, |
267 | 8x |
yvar_baseline = yvar_baseline, |
268 | 8x |
ggtheme = ggtheme, |
269 | 8x |
col = col |
270 |
)
|
|
271 |
}
|
|
272 |
)
|
|
273 | ||
274 | 1x |
return(plot_list) |
275 |
}
|
|
276 |
}
|
1 |
#' Helper functions for tabulating binary response by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions that tabulate in a data frame statistics such as response rate
|
|
6 |
#' and odds ratio for population subgroups.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @inheritParams response_subgroups
|
|
10 |
#' @param arm (`factor`)\cr the treatment group variable.
|
|
11 |
#'
|
|
12 |
#' @details Main functionality is to prepare data for use in a layout-creating function.
|
|
13 |
#'
|
|
14 |
#' @examples
|
|
15 |
#' library(dplyr)
|
|
16 |
#' library(forcats)
|
|
17 |
#'
|
|
18 |
#' adrs <- tern_ex_adrs
|
|
19 |
#' adrs_labels <- formatters::var_labels(adrs)
|
|
20 |
#'
|
|
21 |
#' adrs_f <- adrs %>%
|
|
22 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
23 |
#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
|
|
24 |
#' droplevels() %>%
|
|
25 |
#' mutate(
|
|
26 |
#' # Reorder levels of factor to make the placebo group the reference arm.
|
|
27 |
#' ARM = fct_relevel(ARM, "B: Placebo"),
|
|
28 |
#' rsp = AVALC == "CR"
|
|
29 |
#' )
|
|
30 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
|
|
31 |
#'
|
|
32 |
#' @name h_response_subgroups
|
|
33 |
NULL
|
|
34 | ||
35 |
#' @describeIn h_response_subgroups Helper to prepare a data frame of binary responses by arm.
|
|
36 |
#'
|
|
37 |
#' @return
|
|
38 |
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
|
|
39 |
#'
|
|
40 |
#' @examples
|
|
41 |
#' h_proportion_df(
|
|
42 |
#' c(TRUE, FALSE, FALSE),
|
|
43 |
#' arm = factor(c("A", "A", "B"), levels = c("A", "B"))
|
|
44 |
#' )
|
|
45 |
#'
|
|
46 |
#' @export
|
|
47 |
h_proportion_df <- function(rsp, arm) { |
|
48 | 79x |
checkmate::assert_logical(rsp) |
49 | 78x |
assert_valid_factor(arm, len = length(rsp)) |
50 | 78x |
non_missing_rsp <- !is.na(rsp) |
51 | 78x |
rsp <- rsp[non_missing_rsp] |
52 | 78x |
arm <- arm[non_missing_rsp] |
53 | ||
54 | 78x |
lst_rsp <- split(rsp, arm) |
55 | 78x |
lst_results <- Map(function(x, arm) { |
56 | 156x |
if (length(x) > 0) { |
57 | 154x |
s_prop <- s_proportion(df = x) |
58 | 154x |
data.frame( |
59 | 154x |
arm = arm, |
60 | 154x |
n = length(x), |
61 | 154x |
n_rsp = unname(s_prop$n_prop[1]), |
62 | 154x |
prop = unname(s_prop$n_prop[2]), |
63 | 154x |
stringsAsFactors = FALSE |
64 |
)
|
|
65 |
} else { |
|
66 | 2x |
data.frame( |
67 | 2x |
arm = arm, |
68 | 2x |
n = 0L, |
69 | 2x |
n_rsp = NA, |
70 | 2x |
prop = NA, |
71 | 2x |
stringsAsFactors = FALSE |
72 |
)
|
|
73 |
}
|
|
74 | 78x |
}, lst_rsp, names(lst_rsp)) |
75 | ||
76 | 78x |
df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE)) |
77 | 78x |
df$arm <- factor(df$arm, levels = levels(arm)) |
78 | 78x |
df
|
79 |
}
|
|
80 | ||
81 |
#' @describeIn h_response_subgroups Summarizes proportion of binary responses by arm and across subgroups
|
|
82 |
#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
|
|
83 |
#' requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
|
|
84 |
#' groupings for `subgroups` variables.
|
|
85 |
#'
|
|
86 |
#' @return
|
|
87 |
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
|
|
88 |
#' `var`, `var_label`, and `row_type`.
|
|
89 |
#'
|
|
90 |
#' @examples
|
|
91 |
#' h_proportion_subgroups_df(
|
|
92 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
93 |
#' data = adrs_f
|
|
94 |
#' )
|
|
95 |
#'
|
|
96 |
#' # Define groupings for BMRKR2 levels.
|
|
97 |
#' h_proportion_subgroups_df(
|
|
98 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
99 |
#' data = adrs_f,
|
|
100 |
#' groups_lists = list(
|
|
101 |
#' BMRKR2 = list(
|
|
102 |
#' "low" = "LOW",
|
|
103 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
104 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
105 |
#' )
|
|
106 |
#' )
|
|
107 |
#' )
|
|
108 |
#'
|
|
109 |
#' @export
|
|
110 |
h_proportion_subgroups_df <- function(variables, |
|
111 |
data,
|
|
112 |
groups_lists = list(), |
|
113 |
label_all = "All Patients") { |
|
114 | 17x |
checkmate::assert_character(variables$rsp) |
115 | 17x |
checkmate::assert_character(variables$arm) |
116 | 17x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
117 | 17x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
118 | 17x |
assert_df_with_variables(data, variables) |
119 | 17x |
checkmate::assert_string(label_all) |
120 | ||
121 |
# Add All Patients.
|
|
122 | 17x |
result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]]) |
123 | 17x |
result_all$subgroup <- label_all |
124 | 17x |
result_all$var <- "ALL" |
125 | 17x |
result_all$var_label <- label_all |
126 | 17x |
result_all$row_type <- "content" |
127 | ||
128 |
# Add Subgroups.
|
|
129 | 17x |
if (is.null(variables$subgroups)) { |
130 | 3x |
result_all
|
131 |
} else { |
|
132 | 14x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
133 | ||
134 | 14x |
l_result <- lapply(l_data, function(grp) { |
135 | 58x |
result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]]) |
136 | 58x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
137 | 58x |
cbind(result, result_labels) |
138 |
}) |
|
139 | 14x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
140 | 14x |
result_subgroups$row_type <- "analysis" |
141 | ||
142 | 14x |
rbind( |
143 | 14x |
result_all,
|
144 | 14x |
result_subgroups
|
145 |
)
|
|
146 |
}
|
|
147 |
}
|
|
148 | ||
149 |
#' @describeIn h_response_subgroups Helper to prepare a data frame with estimates of
|
|
150 |
#' the odds ratio between a treatment and a control arm.
|
|
151 |
#'
|
|
152 |
#' @inheritParams response_subgroups
|
|
153 |
#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed.
|
|
154 |
#'
|
|
155 |
#' @return
|
|
156 |
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
|
|
157 |
#' optionally `pval` and `pval_label`.
|
|
158 |
#'
|
|
159 |
#' @examples
|
|
160 |
#' # Unstratatified analysis.
|
|
161 |
#' h_odds_ratio_df(
|
|
162 |
#' c(TRUE, FALSE, FALSE, TRUE),
|
|
163 |
#' arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
|
|
164 |
#' )
|
|
165 |
#'
|
|
166 |
#' # Include p-value.
|
|
167 |
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
|
|
168 |
#'
|
|
169 |
#' # Stratatified analysis.
|
|
170 |
#' h_odds_ratio_df(
|
|
171 |
#' rsp = adrs_f$rsp,
|
|
172 |
#' arm = adrs_f$ARM,
|
|
173 |
#' strata_data = adrs_f[, c("STRATA1", "STRATA2")],
|
|
174 |
#' method = "cmh"
|
|
175 |
#' )
|
|
176 |
#'
|
|
177 |
#' @export
|
|
178 |
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) { |
|
179 | 84x |
assert_valid_factor(arm, n.levels = 2, len = length(rsp)) |
180 | ||
181 | 84x |
df_rsp <- data.frame( |
182 | 84x |
rsp = rsp, |
183 | 84x |
arm = arm |
184 |
)
|
|
185 | ||
186 | 84x |
if (!is.null(strata_data)) { |
187 | 11x |
strata_var <- interaction(strata_data, drop = TRUE) |
188 | 11x |
strata_name <- "strata" |
189 | ||
190 | 11x |
assert_valid_factor(strata_var, len = nrow(df_rsp)) |
191 | ||
192 | 11x |
df_rsp[[strata_name]] <- strata_var |
193 |
} else { |
|
194 | 73x |
strata_name <- NULL |
195 |
}
|
|
196 | ||
197 | 84x |
l_df <- split(df_rsp, arm) |
198 | ||
199 | 84x |
if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) { |
200 |
# Odds ratio and CI.
|
|
201 | 82x |
result_odds_ratio <- s_odds_ratio( |
202 | 82x |
df = l_df[[2]], |
203 | 82x |
.var = "rsp", |
204 | 82x |
.ref_group = l_df[[1]], |
205 | 82x |
.in_ref_col = FALSE, |
206 | 82x |
.df_row = df_rsp, |
207 | 82x |
variables = list(arm = "arm", strata = strata_name), |
208 | 82x |
conf_level = conf_level |
209 |
)
|
|
210 | ||
211 | 82x |
df <- data.frame( |
212 |
# Dummy column needed downstream to create a nested header.
|
|
213 | 82x |
arm = " ", |
214 | 82x |
n_tot = unname(result_odds_ratio$n_tot["n_tot"]), |
215 | 82x |
or = unname(result_odds_ratio$or_ci["est"]), |
216 | 82x |
lcl = unname(result_odds_ratio$or_ci["lcl"]), |
217 | 82x |
ucl = unname(result_odds_ratio$or_ci["ucl"]), |
218 | 82x |
conf_level = conf_level, |
219 | 82x |
stringsAsFactors = FALSE |
220 |
)
|
|
221 | ||
222 | 82x |
if (!is.null(method)) { |
223 |
# Test for difference.
|
|
224 | 44x |
result_test <- s_test_proportion_diff( |
225 | 44x |
df = l_df[[2]], |
226 | 44x |
.var = "rsp", |
227 | 44x |
.ref_group = l_df[[1]], |
228 | 44x |
.in_ref_col = FALSE, |
229 | 44x |
variables = list(strata = strata_name), |
230 | 44x |
method = method |
231 |
)
|
|
232 | ||
233 | 44x |
df$pval <- as.numeric(result_test$pval) |
234 | 44x |
df$pval_label <- obj_label(result_test$pval) |
235 |
}
|
|
236 | ||
237 |
# In those cases cannot go through the model so will obtain n_tot from data.
|
|
238 |
} else if ( |
|
239 | 2x |
(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) || |
240 | 2x |
(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0) |
241 |
) { |
|
242 | 2x |
df <- data.frame( |
243 |
# Dummy column needed downstream to create a nested header.
|
|
244 | 2x |
arm = " ", |
245 | 2x |
n_tot = sum(stats::complete.cases(df_rsp)), |
246 | 2x |
or = NA, |
247 | 2x |
lcl = NA, |
248 | 2x |
ucl = NA, |
249 | 2x |
conf_level = conf_level, |
250 | 2x |
stringsAsFactors = FALSE |
251 |
)
|
|
252 | 2x |
if (!is.null(method)) { |
253 | 2x |
df$pval <- NA |
254 | 2x |
df$pval_label <- NA |
255 |
}
|
|
256 |
} else { |
|
257 | ! |
df <- data.frame( |
258 |
# Dummy column needed downstream to create a nested header.
|
|
259 | ! |
arm = " ", |
260 | ! |
n_tot = 0L, |
261 | ! |
or = NA, |
262 | ! |
lcl = NA, |
263 | ! |
ucl = NA, |
264 | ! |
conf_level = conf_level, |
265 | ! |
stringsAsFactors = FALSE |
266 |
)
|
|
267 | ||
268 | ! |
if (!is.null(method)) { |
269 | ! |
df$pval <- NA |
270 | ! |
df$pval_label <- NA |
271 |
}
|
|
272 |
}
|
|
273 | ||
274 | 84x |
df
|
275 |
}
|
|
276 | ||
277 |
#' @describeIn h_response_subgroups Summarizes estimates of the odds ratio between a treatment and a control
|
|
278 |
#' arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
|
|
279 |
#' `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
|
|
280 |
#' and `strata`. `groups_lists` optionally specifies groupings for `subgroups` variables.
|
|
281 |
#'
|
|
282 |
#' @return
|
|
283 |
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
|
|
284 |
#' `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
|
|
285 |
#'
|
|
286 |
#' @examples
|
|
287 |
#' # Unstratified analysis.
|
|
288 |
#' h_odds_ratio_subgroups_df(
|
|
289 |
#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
|
|
290 |
#' data = adrs_f
|
|
291 |
#' )
|
|
292 |
#'
|
|
293 |
#' # Stratified analysis.
|
|
294 |
#' h_odds_ratio_subgroups_df(
|
|
295 |
#' variables = list(
|
|
296 |
#' rsp = "rsp",
|
|
297 |
#' arm = "ARM",
|
|
298 |
#' subgroups = c("SEX", "BMRKR2"),
|
|
299 |
#' strata = c("STRATA1", "STRATA2")
|
|
300 |
#' ),
|
|
301 |
#' data = adrs_f
|
|
302 |
#' )
|
|
303 |
#'
|
|
304 |
#' # Define groupings of BMRKR2 levels.
|
|
305 |
#' h_odds_ratio_subgroups_df(
|
|
306 |
#' variables = list(
|
|
307 |
#' rsp = "rsp",
|
|
308 |
#' arm = "ARM",
|
|
309 |
#' subgroups = c("SEX", "BMRKR2")
|
|
310 |
#' ),
|
|
311 |
#' data = adrs_f,
|
|
312 |
#' groups_lists = list(
|
|
313 |
#' BMRKR2 = list(
|
|
314 |
#' "low" = "LOW",
|
|
315 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
316 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
317 |
#' )
|
|
318 |
#' )
|
|
319 |
#' )
|
|
320 |
#'
|
|
321 |
#' @export
|
|
322 |
h_odds_ratio_subgroups_df <- function(variables, |
|
323 |
data,
|
|
324 |
groups_lists = list(), |
|
325 |
conf_level = 0.95, |
|
326 |
method = NULL, |
|
327 |
label_all = "All Patients") { |
|
328 | 18x |
if ("strat" %in% names(variables)) { |
329 | ! |
warning( |
330 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_odds_ratio_subgroups_df() ",
|
331 | ! |
"was deprecated in tern 0.9.4.\n ",
|
332 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
333 |
)
|
|
334 | ! |
variables[["strata"]] <- variables[["strat"]] |
335 |
}
|
|
336 | ||
337 | 18x |
checkmate::assert_character(variables$rsp) |
338 | 18x |
checkmate::assert_character(variables$arm) |
339 | 18x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
340 | 18x |
checkmate::assert_character(variables$strata, null.ok = TRUE) |
341 | 18x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
342 | 18x |
assert_df_with_variables(data, variables) |
343 | 18x |
checkmate::assert_string(label_all) |
344 | ||
345 | 18x |
strata_data <- if (is.null(variables$strata)) { |
346 | 16x |
NULL
|
347 |
} else { |
|
348 | 2x |
data[, variables$strata, drop = FALSE] |
349 |
}
|
|
350 | ||
351 |
# Add All Patients.
|
|
352 | 18x |
result_all <- h_odds_ratio_df( |
353 | 18x |
rsp = data[[variables$rsp]], |
354 | 18x |
arm = data[[variables$arm]], |
355 | 18x |
strata_data = strata_data, |
356 | 18x |
conf_level = conf_level, |
357 | 18x |
method = method |
358 |
)
|
|
359 | 18x |
result_all$subgroup <- label_all |
360 | 18x |
result_all$var <- "ALL" |
361 | 18x |
result_all$var_label <- label_all |
362 | 18x |
result_all$row_type <- "content" |
363 | ||
364 | 18x |
if (is.null(variables$subgroups)) { |
365 | 3x |
result_all
|
366 |
} else { |
|
367 | 15x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
368 | ||
369 | 15x |
l_result <- lapply(l_data, function(grp) { |
370 | 62x |
grp_strata_data <- if (is.null(variables$strata)) { |
371 | 54x |
NULL
|
372 |
} else { |
|
373 | 8x |
grp$df[, variables$strata, drop = FALSE] |
374 |
}
|
|
375 | ||
376 | 62x |
result <- h_odds_ratio_df( |
377 | 62x |
rsp = grp$df[[variables$rsp]], |
378 | 62x |
arm = grp$df[[variables$arm]], |
379 | 62x |
strata_data = grp_strata_data, |
380 | 62x |
conf_level = conf_level, |
381 | 62x |
method = method |
382 |
)
|
|
383 | 62x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
384 | 62x |
cbind(result, result_labels) |
385 |
}) |
|
386 | ||
387 | 15x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
388 | 15x |
result_subgroups$row_type <- "analysis" |
389 | ||
390 | 15x |
rbind( |
391 | 15x |
result_all,
|
392 | 15x |
result_subgroups
|
393 |
)
|
|
394 |
}
|
|
395 |
}
|
1 |
#' Get default statistical methods and their associated formats, labels, and indent modifiers
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("experimental")`
|
|
4 |
#'
|
|
5 |
#' Utility functions to get valid statistic methods for different method groups
|
|
6 |
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers
|
|
7 |
#' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be
|
|
8 |
#' seen in [analyze_vars()]. See notes to understand why this is experimental.
|
|
9 |
#'
|
|
10 |
#' @param stats (`character`)\cr statistical methods to return defaults for.
|
|
11 |
#' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a
|
|
12 |
#' statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name),
|
|
13 |
#' each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only
|
|
14 |
#' calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be
|
|
15 |
#' flattened such that the names of the list elements returned by the function have the format `statistic.level` (or
|
|
16 |
#' just `statistic` for statistics calculated for a single row). Defaults to `NULL`.
|
|
17 |
#' @param tern_defaults (`list` or `vector`)\cr defaults to use to fill in missing values if no user input is given.
|
|
18 |
#' Must be of the same type as the values that are being filled in (e.g. indentation must be integers).
|
|
19 |
#'
|
|
20 |
#' @details
|
|
21 |
#' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`.
|
|
22 |
#'
|
|
23 |
#' @note
|
|
24 |
#' These defaults are experimental because we use the names of functions to retrieve the default
|
|
25 |
#' statistics. This should be generalized in groups of methods according to more reasonable groupings.
|
|
26 |
#'
|
|
27 |
#' @name default_stats_formats_labels
|
|
28 |
NULL
|
|
29 | ||
30 |
#' @describeIn default_stats_formats_labels Get statistics available for a given method
|
|
31 |
#' group (analyze function). To check available defaults see `tern::tern_default_stats` list.
|
|
32 |
#'
|
|
33 |
#' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function)
|
|
34 |
#' to retrieve default statistics for. A character vector can be used to specify more than one statistical
|
|
35 |
#' method group.
|
|
36 |
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical
|
|
37 |
#' functions are used, `stats_in` needs to have them in too.
|
|
38 |
#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics.
|
|
39 |
#' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains
|
|
40 |
#' `"analyze_vars_counts"`) be added to the statistical methods?
|
|
41 |
#'
|
|
42 |
#' @return
|
|
43 |
#' * `get_stats()` returns a `character` vector of statistical methods.
|
|
44 |
#'
|
|
45 |
#' @examples
|
|
46 |
#' # analyze_vars is numeric
|
|
47 |
#' num_stats <- get_stats("analyze_vars_numeric") # also the default
|
|
48 |
#'
|
|
49 |
#' # Other type
|
|
50 |
#' cnt_stats <- get_stats("analyze_vars_counts")
|
|
51 |
#'
|
|
52 |
#' # Weirdly taking the pval from count_occurrences
|
|
53 |
#' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval")
|
|
54 |
#'
|
|
55 |
#' # All count_occurrences
|
|
56 |
#' all_cnt_occ <- get_stats("count_occurrences")
|
|
57 |
#'
|
|
58 |
#' # Multiple
|
|
59 |
#' get_stats(c("count_occurrences", "analyze_vars_counts"))
|
|
60 |
#'
|
|
61 |
#' @export
|
|
62 |
get_stats <- function(method_groups = "analyze_vars_numeric", |
|
63 |
stats_in = NULL, custom_stats_in = NULL, add_pval = FALSE) { |
|
64 | 1646x |
checkmate::assert_character(method_groups) |
65 | 1646x |
checkmate::assert_character(stats_in, null.ok = TRUE) |
66 | 1646x |
checkmate::assert_character(custom_stats_in, null.ok = TRUE) |
67 | 1646x |
checkmate::assert_flag(add_pval) |
68 | ||
69 |
# Default is still numeric
|
|
70 | 1646x |
if (any(method_groups == "analyze_vars")) { |
71 | 3x |
method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric" |
72 |
}
|
|
73 | ||
74 | 1646x |
type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks |
75 | ||
76 |
# Defaults for loop
|
|
77 | 1646x |
out <- NULL |
78 | ||
79 |
# Loop for multiple method groups
|
|
80 | 1646x |
for (mgi in method_groups) { |
81 | 1673x |
if (mgi %in% names(tern_default_stats)) { |
82 | 1672x |
out_tmp <- tern_default_stats[[mgi]] |
83 |
} else { |
|
84 | 1x |
stop("The selected method group (", mgi, ") has no default statistical method.") |
85 |
}
|
|
86 | 1672x |
out <- unique(c(out, out_tmp)) |
87 |
}
|
|
88 | ||
89 |
# Add custom stats
|
|
90 | 1645x |
out <- c(out, custom_stats_in) |
91 | ||
92 |
# If you added pval to the stats_in you certainly want it
|
|
93 | 1645x |
if (!is.null(stats_in) && any(grepl("^pval", stats_in))) { |
94 | 136x |
stats_in_pval_value <- stats_in[grepl("^pval", stats_in)] |
95 | ||
96 |
# Must be only one value between choices
|
|
97 | 136x |
checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts", "pvalue")) |
98 | ||
99 |
# Mismatch with counts and numeric
|
|
100 | 135x |
if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" || |
101 | 135x |
any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint |
102 | 2x |
stop( |
103 | 2x |
"Inserted p-value (", stats_in_pval_value, ") is not valid for type ", |
104 | 2x |
type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")), |
105 | 2x |
" instead."
|
106 |
)
|
|
107 |
}
|
|
108 | ||
109 |
# Lets add it even if present (thanks to unique)
|
|
110 | 133x |
add_pval <- TRUE |
111 |
}
|
|
112 | ||
113 |
# Mainly used in "analyze_vars" but it could be necessary elsewhere
|
|
114 | 1642x |
if (isTRUE(add_pval)) { |
115 | 143x |
if (any(grepl("counts", method_groups))) { |
116 | 16x |
out <- unique(c(out, "pval_counts")) |
117 |
} else { |
|
118 | 127x |
out <- unique(c(out, "pval")) |
119 |
}
|
|
120 |
}
|
|
121 | ||
122 |
# Filtering for stats_in (character vector)
|
|
123 | 1642x |
if (!is.null(stats_in)) { |
124 | 1590x |
out <- intersect(stats_in, out) # It orders them too |
125 |
}
|
|
126 | ||
127 |
# If intersect did not find matches (and no pval?) -> error
|
|
128 | 1642x |
if (length(out) == 0) { |
129 | 2x |
stop( |
130 | 2x |
"The selected method group(s) (", paste0(method_groups, collapse = ", "), ")", |
131 | 2x |
" do not have the required default statistical methods:\n",
|
132 | 2x |
paste0(stats_in, collapse = " ") |
133 |
)
|
|
134 |
}
|
|
135 | ||
136 | 1640x |
out
|
137 |
}
|
|
138 | ||
139 |
#' @describeIn default_stats_formats_labels Get statistical *names* available for a given method
|
|
140 |
#' group (analyze function). Please use the `s_*` functions to get the statistical names.
|
|
141 |
#' @param stat_results (`list`)\cr list of statistical results. It should be used close to the end of
|
|
142 |
#' a statistical function. See examples for a structure with two statistical results and two groups.
|
|
143 |
#' @param stat_names_in (`character`)\cr custom modification of statistical values.
|
|
144 |
#'
|
|
145 |
#' @return
|
|
146 |
#' * `get_stat_names()` returns a named list of`character` vectors, indicating the names of
|
|
147 |
#' statistical outputs.
|
|
148 |
#'
|
|
149 |
#' @examples
|
|
150 |
#' stat_results <- list(
|
|
151 |
#' "n" = list("M" = 1, "F" = 2),
|
|
152 |
#' "count_fraction" = list("M" = c(1, 0.2), "F" = c(2, 0.1))
|
|
153 |
#' )
|
|
154 |
#' get_stat_names(stat_results)
|
|
155 |
#' get_stat_names(stat_results, list("n" = "argh"))
|
|
156 |
#'
|
|
157 |
#' @export
|
|
158 |
get_stat_names <- function(stat_results, stat_names_in = NULL) { |
|
159 | 1576x |
checkmate::assert_character(names(stat_results), min.len = 1) |
160 | 1576x |
checkmate::assert_list(stat_names_in, null.ok = TRUE) |
161 | ||
162 | 1576x |
stat_nms_from_stats <- lapply(stat_results, function(si) { |
163 | 5699x |
nm <- names(si) |
164 | 5699x |
if (is.null(nm)) { |
165 | 2718x |
nm <- rep(NA_character_, length(si)) # no statistical names |
166 |
}
|
|
167 | 5699x |
nm
|
168 |
}) |
|
169 | ||
170 |
# Modify some with custom stat names
|
|
171 | 1576x |
if (!is.null(stat_names_in)) { |
172 |
# Stats is the main
|
|
173 | 6x |
common_names <- intersect(names(stat_nms_from_stats), names(stat_names_in)) |
174 | 6x |
stat_nms_from_stats[common_names] <- stat_names_in[common_names] |
175 |
}
|
|
176 | ||
177 | 1576x |
stat_nms_from_stats
|
178 |
}
|
|
179 | ||
180 |
# Utility function used to separate custom stats (user-defined functions) from defaults
|
|
181 |
.split_std_from_custom_stats <- function(stats_in) { |
|
182 | 873x |
out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL) |
183 | 873x |
if (is.list(stats_in)) { |
184 | 12x |
is_custom_fnc <- sapply(stats_in, is.function) |
185 | 12x |
checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named") |
186 | 12x |
out[["custom_stats"]] <- stats_in[is_custom_fnc] |
187 | 12x |
out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) |
188 | 12x |
all_stats <- names(stats_in) # to keep the order |
189 | 12x |
all_stats[!is_custom_fnc] <- out[["default_stats"]] |
190 | 12x |
out[["all_stats"]] <- all_stats |
191 |
} else { |
|
192 | 861x |
out[["default_stats"]] <- out[["all_stats"]] <- stats_in |
193 |
}
|
|
194 | 873x |
out
|
195 |
}
|
|
196 | ||
197 |
# Utility function to apply statistical functions
|
|
198 |
.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) { |
|
199 |
# Default checks
|
|
200 | 896x |
checkmate::assert_function(default_stat_fnc) |
201 | 896x |
checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named") |
202 | 896x |
checkmate::assert_list(args_list) |
203 | ||
204 |
# Checking custom stats have same formals
|
|
205 | 896x |
if (!is.null(custom_stat_fnc_list)) { |
206 | 12x |
fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]] |
207 | 12x |
for (fnc in custom_stat_fnc_list) { |
208 | 17x |
if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) { |
209 | 1x |
stop( |
210 | 1x |
"The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
|
211 | 1x |
"as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]], |
212 | 1x |
" as first parameter, while the default function has ", fundamental_call_to_data, "." |
213 |
)
|
|
214 |
}
|
|
215 | 16x |
if (!any(names(formals(fnc)) == "...")) { |
216 | 1x |
stop( |
217 | 1x |
"The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
|
218 | 1x |
"In this case your custom function does not have `...`."
|
219 |
)
|
|
220 |
}
|
|
221 |
}
|
|
222 |
}
|
|
223 | ||
224 |
# Applying
|
|
225 | 894x |
out_default <- do.call(default_stat_fnc, args = args_list) |
226 | 892x |
out_custom <- lapply(custom_stat_fnc_list, function(fnc) do.call(fnc, args = args_list)) |
227 | ||
228 |
# Merging
|
|
229 | 892x |
c(out_default, out_custom) |
230 |
}
|
|
231 | ||
232 |
#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
|
|
233 |
#' To check available defaults see list `tern::tern_default_formats`.
|
|
234 |
#'
|
|
235 |
#' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with
|
|
236 |
#' values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows
|
|
237 |
#' with no value is provided.
|
|
238 |
#'
|
|
239 |
#' @return
|
|
240 |
#' * `get_formats_from_stats()` returns a named list of formats as strings or functions.
|
|
241 |
#'
|
|
242 |
#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and
|
|
243 |
#' return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`.
|
|
244 |
#'
|
|
245 |
#' @examples
|
|
246 |
#' # Defaults formats
|
|
247 |
#' get_formats_from_stats(num_stats)
|
|
248 |
#' get_formats_from_stats(cnt_stats)
|
|
249 |
#' get_formats_from_stats(only_pval)
|
|
250 |
#' get_formats_from_stats(all_cnt_occ)
|
|
251 |
#'
|
|
252 |
#' # Addition of customs
|
|
253 |
#' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx")))
|
|
254 |
#' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx")))
|
|
255 |
#'
|
|
256 |
#' @seealso [formatting_functions]
|
|
257 |
#'
|
|
258 |
#' @export
|
|
259 |
get_formats_from_stats <- function(stats, |
|
260 |
formats_in = NULL, |
|
261 |
levels_per_stats = NULL, |
|
262 |
tern_defaults = tern_default_formats) { |
|
263 | 1651x |
checkmate::assert_character(stats, min.len = 1) |
264 |
# It may be a list if there is a function in the formats
|
|
265 | 1651x |
if (checkmate::test_list(formats_in, null.ok = TRUE)) { |
266 | 1532x |
checkmate::assert_list(formats_in, null.ok = TRUE) |
267 |
# Or it may be a vector of characters
|
|
268 |
} else { |
|
269 | 119x |
checkmate::assert_character(formats_in, null.ok = TRUE) |
270 |
}
|
|
271 | 1651x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
272 | ||
273 |
# If unnamed formats given as formats_in and same number of stats, use one format per stat
|
|
274 |
if ( |
|
275 | 1651x |
!is.null(formats_in) && length(formats_in) == length(stats) && |
276 | 1651x |
is.null(names(formats_in)) && is.null(levels_per_stats) |
277 |
) { |
|
278 | 2x |
out <- as.list(formats_in) %>% setNames(stats) |
279 | 2x |
return(out) |
280 |
}
|
|
281 | ||
282 |
# If levels_per_stats not given, assume one row per statistic
|
|
283 | 359x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) |
284 | ||
285 |
# Apply custom formats
|
|
286 | 1649x |
out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults) |
287 | ||
288 |
# Default to NULL if no format
|
|
289 | 1649x |
case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE) |
290 | 1649x |
out[names(out) == out | case_input_is_not_stat] <- list(NULL) |
291 | ||
292 | 1649x |
out
|
293 |
}
|
|
294 | ||
295 |
#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics.
|
|
296 |
#' To check for available defaults see list `tern::tern_default_labels`.
|
|
297 |
#'
|
|
298 |
#' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the
|
|
299 |
#' variable level (if rows correspond to levels of a variable) or statistic name will be used as label.
|
|
300 |
#' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list
|
|
301 |
#' of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `""`
|
|
302 |
#' values are provided.
|
|
303 |
#'
|
|
304 |
#' @return
|
|
305 |
#' * `get_labels_from_stats()` returns a named list of labels as strings.
|
|
306 |
#'
|
|
307 |
#' @examples
|
|
308 |
#' # Defaults labels
|
|
309 |
#' get_labels_from_stats(num_stats)
|
|
310 |
#' get_labels_from_stats(cnt_stats)
|
|
311 |
#' get_labels_from_stats(only_pval)
|
|
312 |
#' get_labels_from_stats(all_cnt_occ)
|
|
313 |
#'
|
|
314 |
#' # Addition of customs
|
|
315 |
#' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction"))
|
|
316 |
#' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions")))
|
|
317 |
#'
|
|
318 |
#' @export
|
|
319 |
get_labels_from_stats <- function(stats, |
|
320 |
labels_in = NULL, |
|
321 |
levels_per_stats = NULL, |
|
322 |
label_attr_from_stats = NULL, |
|
323 |
tern_defaults = tern_default_labels) { |
|
324 | 1621x |
checkmate::assert_character(stats, min.len = 1) |
325 | ||
326 |
# If labels_in is NULL, use label_attr_from_stats
|
|
327 | 1621x |
if (is.null(labels_in)) { |
328 | 1349x |
labels_in <- label_attr_from_stats |
329 | 1349x |
labels_in <- label_attr_from_stats[ |
330 | 1349x |
nzchar(label_attr_from_stats) & |
331 | 1349x |
!sapply(label_attr_from_stats, is.null) & |
332 | 1349x |
!is.na(label_attr_from_stats) |
333 |
]
|
|
334 |
}
|
|
335 | ||
336 |
# It may be a list
|
|
337 | 1621x |
if (checkmate::test_list(labels_in, null.ok = TRUE)) { |
338 | 1419x |
checkmate::assert_list(labels_in, null.ok = TRUE) |
339 |
# Or it may be a vector of characters
|
|
340 |
} else { |
|
341 | 202x |
checkmate::assert_character(labels_in, null.ok = TRUE) |
342 |
}
|
|
343 | 1621x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
344 | ||
345 |
# If unnamed labels given as labels_in and same number of stats, use one label per stat
|
|
346 |
if ( |
|
347 | 1621x |
!is.null(labels_in) && length(labels_in) == length(stats) && |
348 | 1621x |
is.null(names(labels_in)) && is.null(levels_per_stats) |
349 |
) { |
|
350 | 2x |
out <- as.list(labels_in) %>% setNames(stats) |
351 | 2x |
return(out) |
352 |
}
|
|
353 | ||
354 |
# If levels_per_stats not given, assume one row per statistic
|
|
355 | 327x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) |
356 | ||
357 |
# Apply custom labels
|
|
358 | 1619x |
out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults) |
359 | 1619x |
out
|
360 |
}
|
|
361 | ||
362 |
#' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows.
|
|
363 |
#'
|
|
364 |
#' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for
|
|
365 |
#' all values.
|
|
366 |
#' @param row_nms `r lifecycle::badge("deprecated")` Deprecation cycle started. See the `levels_per_stats` parameter
|
|
367 |
#' for details.
|
|
368 |
#'
|
|
369 |
#' @return
|
|
370 |
#' * `get_indents_from_stats()` returns a named list of indentation modifiers as integers.
|
|
371 |
#'
|
|
372 |
#' @examples
|
|
373 |
#' get_indents_from_stats(all_cnt_occ, indents_in = 3L)
|
|
374 |
#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L))
|
|
375 |
#' get_indents_from_stats(
|
|
376 |
#' all_cnt_occ,
|
|
377 |
#' indents_in = list(a = 2L, count.a = 1L, count.b = 5L)
|
|
378 |
#' )
|
|
379 |
#'
|
|
380 |
#' @export
|
|
381 |
get_indents_from_stats <- function(stats, |
|
382 |
indents_in = NULL, |
|
383 |
levels_per_stats = NULL, |
|
384 |
tern_defaults = as.list(rep(0L, length(stats))) %>% setNames(stats), |
|
385 |
row_nms = lifecycle::deprecated()) { |
|
386 | 1578x |
checkmate::assert_character(stats, min.len = 1) |
387 |
# It may be a list
|
|
388 | 1578x |
if (checkmate::test_list(indents_in, null.ok = TRUE)) { |
389 | 1490x |
checkmate::assert_list(indents_in, null.ok = TRUE) |
390 |
# Or it may be a vector of integers
|
|
391 |
} else { |
|
392 | 88x |
checkmate::assert_integerish(indents_in, null.ok = TRUE) |
393 |
}
|
|
394 | 1578x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
395 | ||
396 |
# If levels_per_stats not given, assume one row per statistic
|
|
397 | 288x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats) |
398 | ||
399 |
# Single indentation level for all rows
|
|
400 | 1578x |
if (is.null(names(indents_in)) && length(indents_in) == 1) { |
401 | 20x |
out <- rep(indents_in, length(levels_per_stats %>% unlist())) |
402 | 20x |
return(out) |
403 |
}
|
|
404 | ||
405 |
# Apply custom indentation
|
|
406 | 1558x |
out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults) |
407 | 1558x |
out
|
408 |
}
|
|
409 | ||
410 |
# Function to loop over each stat and levels to set correct values
|
|
411 |
.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) { |
|
412 | 4826x |
out <- list() |
413 | ||
414 | 4826x |
for (stat_i in names(levels_per_stats)) { |
415 |
# Get all levels of the statistic
|
|
416 | 7726x |
all_lvls <- levels_per_stats[[stat_i]] |
417 | ||
418 | 7726x |
if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) { # One row per statistic |
419 | 3972x |
out[[stat_i]] <- if (stat_i %in% names(user_in)) { # 1. Check for stat_i in user input |
420 | 776x |
user_in[[stat_i]] |
421 | 3972x |
} else if (stat_i %in% names(tern_defaults)) { # 2. Check for stat_i in tern defaults |
422 | 3148x |
tern_defaults[[stat_i]] |
423 | 3972x |
} else { # 3. Otherwise stat_i |
424 | 48x |
stat_i
|
425 |
}
|
|
426 |
} else { # One row per combination of variable level and statistic |
|
427 |
# Loop over levels for each statistic
|
|
428 | 3754x |
for (lev_i in all_lvls) { |
429 |
# Construct row name (stat_i.lev_i)
|
|
430 | 13522x |
row_nm <- paste(stat_i, lev_i, sep = ".") |
431 | ||
432 | 13522x |
out[[row_nm]] <- if (row_nm %in% names(user_in)) { # 1. Check for stat_i.lev_i in user input |
433 | 43x |
user_in[[row_nm]] |
434 | 13522x |
} else if (lev_i %in% names(user_in)) { # 2. Check for lev_i in user input |
435 | 52x |
user_in[[lev_i]] |
436 | 13522x |
} else if (stat_i %in% names(user_in)) { # 3. Check for stat_i in user input |
437 | 503x |
user_in[[stat_i]] |
438 | 13522x |
} else if (lev_i %in% names(tern_defaults)) { # 4. Check for lev_i in tern defaults (only used for labels) |
439 | 1549x |
tern_defaults[[lev_i]] |
440 | 13522x |
} else if (stat_i %in% names(tern_defaults)) { # 5. Check for stat_i in tern defaults |
441 | 8465x |
tern_defaults[[stat_i]] |
442 | 13522x |
} else { # 6. Otherwise lev_i |
443 | 2910x |
lev_i
|
444 |
}
|
|
445 |
}
|
|
446 |
}
|
|
447 |
}
|
|
448 | ||
449 | 4826x |
out
|
450 |
}
|
|
451 | ||
452 |
# Custom unlist function to retain NULL as "NULL" or NA
|
|
453 |
.unlist_keep_nulls <- function(lst, null_placeholder = "NULL", recursive = FALSE) { |
|
454 | 4785x |
lapply(lst, function(x) if (is.null(x)) null_placeholder else x) %>% |
455 | 4785x |
unlist(recursive = recursive) |
456 |
}
|
|
457 | ||
458 |
#' Update labels according to control specifications
|
|
459 |
#'
|
|
460 |
#' @description `r lifecycle::badge("stable")`
|
|
461 |
#'
|
|
462 |
#' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant
|
|
463 |
#' control specification. For example, if control has element `conf_level` set to `0.9`, the default
|
|
464 |
#' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied
|
|
465 |
#' via `labels_custom` will not be updated regardless of `control`.
|
|
466 |
#'
|
|
467 |
#' @param labels_default (named `character`)\cr a named vector of statistic labels to modify
|
|
468 |
#' according to the control specifications. Labels that are explicitly defined in `labels_custom` will
|
|
469 |
#' not be affected.
|
|
470 |
#' @param labels_custom (named `character`)\cr named vector of labels that are customized by
|
|
471 |
#' the user and should not be affected by `control`.
|
|
472 |
#' @param control (named `list`)\cr list of control parameters to apply to adjust default labels.
|
|
473 |
#'
|
|
474 |
#' @return A named character vector of labels with control specifications applied to relevant labels.
|
|
475 |
#'
|
|
476 |
#' @examples
|
|
477 |
#' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57)
|
|
478 |
#' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>%
|
|
479 |
#' labels_use_control(control = control)
|
|
480 |
#'
|
|
481 |
#' @export
|
|
482 |
labels_use_control <- function(labels_default, control, labels_custom = NULL) { |
|
483 | 21x |
if ("conf_level" %in% names(control)) { |
484 | 21x |
labels_default <- sapply( |
485 | 21x |
names(labels_default), |
486 | 21x |
function(x) { |
487 | 111x |
if (!x %in% names(labels_custom)) { |
488 | 108x |
gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]])) |
489 |
} else { |
|
490 | 3x |
labels_default[[x]] |
491 |
}
|
|
492 |
}
|
|
493 |
)
|
|
494 |
}
|
|
495 | 21x |
if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) && |
496 | 21x |
!"quantiles" %in% names(labels_custom)) { # nolint |
497 | 16x |
labels_default["quantiles"] <- gsub( |
498 | 16x |
"[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""), |
499 | 16x |
labels_default["quantiles"] |
500 |
)
|
|
501 |
}
|
|
502 | 21x |
if ("quantiles" %in% names(control) && "quantiles_lower" %in% names(labels_default) && |
503 | 21x |
!"quantiles_lower" %in% names(labels_custom)) { # nolint |
504 | 6x |
labels_default["quantiles_lower"] <- gsub( |
505 | 6x |
"[0-9]+%-ile", paste0(control[["quantiles"]][1] * 100, "%-ile", ""), |
506 | 6x |
labels_default["quantiles_lower"] |
507 |
)
|
|
508 |
}
|
|
509 | 21x |
if ("quantiles" %in% names(control) && "quantiles_upper" %in% names(labels_default) && |
510 | 21x |
!"quantiles_upper" %in% names(labels_custom)) { # nolint |
511 | 6x |
labels_default["quantiles_upper"] <- gsub( |
512 | 6x |
"[0-9]+%-ile", paste0(control[["quantiles"]][2] * 100, "%-ile", ""), |
513 | 6x |
labels_default["quantiles_upper"] |
514 |
)
|
|
515 |
}
|
|
516 | 21x |
if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) && |
517 | 21x |
!"mean_pval" %in% names(labels_custom)) { # nolint |
518 | 2x |
labels_default["mean_pval"] <- gsub( |
519 | 2x |
"p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"] |
520 |
)
|
|
521 |
}
|
|
522 | ||
523 | 21x |
labels_default
|
524 |
}
|
|
525 | ||
526 |
# tern_default_stats -----------------------------------------------------------
|
|
527 |
#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`.
|
|
528 |
#'
|
|
529 |
#' @format
|
|
530 |
#' * `tern_default_stats` is a named list of available statistics, with each element
|
|
531 |
#' named for their corresponding statistical method group.
|
|
532 |
#'
|
|
533 |
#' @export
|
|
534 |
tern_default_stats <- list( |
|
535 |
abnormal = c("fraction"), |
|
536 |
abnormal_by_baseline = c("fraction"), |
|
537 |
abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"), |
|
538 |
abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"), |
|
539 |
abnormal_lab_worsen_by_baseline = c("fraction"), |
|
540 |
analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"), |
|
541 |
analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "fraction", "n_blq"), |
|
542 |
analyze_vars_numeric = c( |
|
543 |
"n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval", |
|
544 |
"median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv", |
|
545 |
"geom_mean", "geom_sd", "geom_mean_sd", "geom_mean_ci", "geom_cv", |
|
546 |
"median_ci_3d",
|
|
547 |
"mean_ci_3d", "geom_mean_ci_3d" |
|
548 |
),
|
|
549 |
count_cumulative = c("count_fraction"), |
|
550 |
count_missed_doses = c("n", "count_fraction"), |
|
551 |
count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"), |
|
552 |
count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"), |
|
553 |
count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
554 |
count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
555 |
count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"), |
|
556 |
coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"), |
|
557 |
estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci", "n_unique", "n_rate"), |
|
558 |
estimate_multinomial_response = c("n_prop", "prop_ci"), |
|
559 |
estimate_odds_ratio = c("or_ci", "n_tot"), |
|
560 |
estimate_proportion = c("n_prop", "prop_ci"), |
|
561 |
estimate_proportion_diff = c("diff", "diff_ci"), |
|
562 |
summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"), |
|
563 |
summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"), |
|
564 |
summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), |
|
565 |
summarize_num_patients = c("unique", "nonunique", "unique_count"), |
|
566 |
summarize_patients_events_in_cols = c("unique", "all"), |
|
567 |
surv_time = c( |
|
568 |
"median", "median_ci", "median_ci_3d", "quantiles", |
|
569 |
"quantiles_lower", "quantiles_upper", "range_censor", "range_event", "range" |
|
570 |
),
|
|
571 |
surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_rate_3d"), |
|
572 |
surv_timepoint_diff = c("rate_diff", "rate_diff_ci", "ztest_pval", "rate_diff_ci_3d"), |
|
573 |
tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"), |
|
574 |
tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval", "riskdiff"), |
|
575 |
tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), |
|
576 |
tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval", "riskdiff"), |
|
577 |
test_proportion_diff = c("pval") |
|
578 |
)
|
|
579 | ||
580 |
# tern_default_formats ---------------------------------------------------------
|
|
581 |
#' @describeIn default_stats_formats_labels Named vector of default formats for `tern`.
|
|
582 |
#'
|
|
583 |
#' @format
|
|
584 |
#' * `tern_default_formats` is a named vector of available default formats, with each element
|
|
585 |
#' named for their corresponding statistic.
|
|
586 |
#'
|
|
587 |
#' @export
|
|
588 |
tern_default_formats <- c( |
|
589 |
ci = list(format_extreme_values_ci(2L)), |
|
590 |
count = "xx.", |
|
591 |
count_fraction = format_count_fraction, |
|
592 |
count_fraction_fixed_dp = format_count_fraction_fixed_dp, |
|
593 |
cv = "xx.x", |
|
594 |
event_free_rate = "xx.xx", |
|
595 |
fraction = format_fraction_fixed_dp, |
|
596 |
geom_cv = "xx.x", |
|
597 |
geom_mean = "xx.x", |
|
598 |
geom_mean_ci = "(xx.xx, xx.xx)", |
|
599 |
geom_mean_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
600 |
geom_mean_sd = "xx.x (xx.x)", |
|
601 |
geom_sd = "xx.x", |
|
602 |
hr = list(format_extreme_values(2L)), |
|
603 |
hr_ci = "(xx.xx, xx.xx)", |
|
604 |
hr_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
605 |
iqr = "xx.x", |
|
606 |
lsmean = "xx.xx", |
|
607 |
lsmean_diff = "xx.xx", |
|
608 |
lsmean_diff_ci = "(xx.xx, xx.xx)", |
|
609 |
mad = "xx.x", |
|
610 |
max = "xx.x", |
|
611 |
mean = "xx.x", |
|
612 |
mean_ci = "(xx.xx, xx.xx)", |
|
613 |
mean_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
614 |
mean_pval = "x.xxxx | (<0.0001)", |
|
615 |
mean_sd = "xx.x (xx.x)", |
|
616 |
mean_sdi = "(xx.xx, xx.xx)", |
|
617 |
mean_se = "xx.x (xx.x)", |
|
618 |
mean_sei = "(xx.xx, xx.xx)", |
|
619 |
median = "xx.x", |
|
620 |
median_ci = "(xx.xx, xx.xx)", |
|
621 |
median_ci_3d = "xx.xx (xx.xx - xx.xx)", |
|
622 |
median_range = "xx.x (xx.x - xx.x)", |
|
623 |
min = "xx.x", |
|
624 |
n = "xx.", |
|
625 |
n_blq = "xx.", |
|
626 |
n_events = "xx", |
|
627 |
n_patients = "xx (xx.x%)", |
|
628 |
n_prop = "xx (xx.x%)", |
|
629 |
n_rate = "xx (xx.x)", |
|
630 |
n_rsp = "xx", |
|
631 |
n_tot = "xx", |
|
632 |
n_tot_events = "xx", |
|
633 |
n_unique = "xx", |
|
634 |
nonunique = "xx", |
|
635 |
or = list(format_extreme_values(2L)), |
|
636 |
or_ci = "xx.xx (xx.xx - xx.xx)", |
|
637 |
person_years = "xx.x", |
|
638 |
prop = "xx.x%", |
|
639 |
prop_ci = "(xx.x, xx.x)", |
|
640 |
pt_at_risk = "xx", |
|
641 |
pval = "x.xxxx | (<0.0001)", |
|
642 |
pvalue = "x.xxxx | (<0.0001)", |
|
643 |
pval_counts = "x.xxxx | (<0.0001)", |
|
644 |
quantiles = "xx.x - xx.x", |
|
645 |
quantiles_lower = "xx.xx (xx.xx - xx.xx)", |
|
646 |
quantiles_upper = "xx.xx (xx.xx - xx.xx)", |
|
647 |
range = "xx.x - xx.x", |
|
648 |
range_censor = "xx.x to xx.x", |
|
649 |
range_event = "xx.x to xx.x", |
|
650 |
rate = "xx.xxxx", |
|
651 |
rate_ci = "(xx.xxxx, xx.xxxx)", |
|
652 |
rate_diff = "xx.xx", |
|
653 |
rate_diff_ci = "(xx.xx, xx.xx)", |
|
654 |
rate_diff_ci_3d = format_xx("xx.xx (xx.xx, xx.xx)"), |
|
655 |
rate_ratio = "xx.xxxx", |
|
656 |
rate_ratio_ci = "(xx.xxxx, xx.xxxx)", |
|
657 |
rate_se = "xx.xx", |
|
658 |
riskdiff = "xx.x (xx.x - xx.x)", |
|
659 |
sd = "xx.x", |
|
660 |
se = "xx.x", |
|
661 |
sum = "xx.x", |
|
662 |
sum_exposure = "xx", |
|
663 |
unique = format_count_fraction_fixed_dp, |
|
664 |
unique_count = "xx", |
|
665 |
ztest_pval = "x.xxxx | (<0.0001)" |
|
666 |
)
|
|
667 | ||
668 |
# tern_default_labels ----------------------------------------------------------
|
|
669 |
#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`.
|
|
670 |
#'
|
|
671 |
#' @format
|
|
672 |
#' * `tern_default_labels` is a named `character` vector of available default labels, with each element
|
|
673 |
#' named for their corresponding statistic.
|
|
674 |
#'
|
|
675 |
#' @export
|
|
676 |
tern_default_labels <- c( |
|
677 |
cv = "CV (%)", |
|
678 |
iqr = "IQR", |
|
679 |
geom_cv = "CV % Geometric Mean", |
|
680 |
geom_mean = "Geometric Mean", |
|
681 |
geom_mean_sd = "Geometric Mean (SD)", |
|
682 |
geom_mean_ci = "Geometric Mean 95% CI", |
|
683 |
geom_mean_ci_3d = "Geometric Mean (95% CI)", |
|
684 |
geom_sd = "Geometric SD", |
|
685 |
mad = "Median Absolute Deviation", |
|
686 |
max = "Maximum", |
|
687 |
mean = "Mean", |
|
688 |
mean_ci = "Mean 95% CI", |
|
689 |
mean_ci_3d = "Mean (95% CI)", |
|
690 |
mean_pval = "Mean p-value (H0: mean = 0)", |
|
691 |
mean_sd = "Mean (SD)", |
|
692 |
mean_sdi = "Mean -/+ 1xSD", |
|
693 |
mean_se = "Mean (SE)", |
|
694 |
mean_sei = "Mean -/+ 1xSE", |
|
695 |
median = "Median", |
|
696 |
median_ci = "Median 95% CI", |
|
697 |
median_ci_3d = "Median (95% CI)", |
|
698 |
median_range = "Median (Min - Max)", |
|
699 |
min = "Minimum", |
|
700 |
n = "n", |
|
701 |
n_blq = "n_blq", |
|
702 |
nonunique = "Number of events", |
|
703 |
pval = "p-value (t-test)", # Default for numeric |
|
704 |
pval_counts = "p-value (chi-squared test)", # Default for counts |
|
705 |
quantiles = "25% and 75%-ile", |
|
706 |
quantiles_lower = "25%-ile (95% CI)", |
|
707 |
quantiles_upper = "75%-ile (95% CI)", |
|
708 |
range = "Min - Max", |
|
709 |
range_censor = "Range (censored)", |
|
710 |
range_event = "Range (event)", |
|
711 |
rate = "Adjusted Rate", |
|
712 |
rate_ratio = "Adjusted Rate Ratio", |
|
713 |
sd = "SD", |
|
714 |
se = "SE", |
|
715 |
sum = "Sum", |
|
716 |
unique = "Number of patients with at least one event" |
|
717 |
)
|
|
718 | ||
719 |
#' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics:
|
|
720 |
#' [analyze_vars()] and [analyze_vars_in_cols()] principally.
|
|
721 |
#'
|
|
722 |
#' @param type (`string`)\cr `"numeric"` or `"counts"`.
|
|
723 |
#'
|
|
724 |
#' @return
|
|
725 |
#' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type.
|
|
726 |
#'
|
|
727 |
#' @examples
|
|
728 |
#' summary_formats()
|
|
729 |
#' summary_formats(type = "counts", include_pval = TRUE)
|
|
730 |
#'
|
|
731 |
#' @export
|
|
732 |
summary_formats <- function(type = "numeric", include_pval = FALSE) { |
|
733 | 2x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_") |
734 | 2x |
get_formats_from_stats(get_stats(met_grp, add_pval = include_pval)) |
735 |
}
|
|
736 | ||
737 |
#' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics.
|
|
738 |
#' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`.
|
|
739 |
#'
|
|
740 |
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()].
|
|
741 |
#'
|
|
742 |
#' @details
|
|
743 |
#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or
|
|
744 |
#' `get_formats_from_stats` respectively to retrieve relevant information.
|
|
745 |
#'
|
|
746 |
#' @return
|
|
747 |
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
|
|
748 |
#'
|
|
749 |
#' @examples
|
|
750 |
#' summary_labels()
|
|
751 |
#' summary_labels(type = "counts", include_pval = TRUE)
|
|
752 |
#'
|
|
753 |
#' @export
|
|
754 |
summary_labels <- function(type = "numeric", include_pval = FALSE) { |
|
755 | 2x |
met_grp <- paste0(c("analyze_vars", type), collapse = "_") |
756 | 2x |
get_labels_from_stats(get_stats(met_grp, add_pval = include_pval)) |
757 |
}
|
1 |
#' Line plot with optional table
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Line plot with optional table.
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only)
|
|
9 |
#' to counts objects in groups for stratification.
|
|
10 |
#' @param variables (named `character`) vector of variable names in `df` which should include:
|
|
11 |
#' * `x` (`string`)\cr name of x-axis variable.
|
|
12 |
#' * `y` (`string`)\cr name of y-axis variable.
|
|
13 |
#' * `group_var` (`string` or `NULL`)\cr name of grouping variable (or strata), i.e. treatment arm.
|
|
14 |
#' Can be `NA` to indicate lack of groups.
|
|
15 |
#' * `subject_var` (`string` or `NULL`)\cr name of subject variable. Only applies if `group_var` is
|
|
16 |
#' not NULL.
|
|
17 |
#' * `paramcd` (`string` or `NA`)\cr name of the variable for parameter's code. Used for y-axis label and plot's
|
|
18 |
#' subtitle. Can be `NA` if `paramcd` is not to be added to the y-axis label or subtitle.
|
|
19 |
#' * `y_unit` (`string` or `NA`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
|
|
20 |
#' Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
|
|
21 |
#' * `facet_var` (`string` or `NA`)\cr name of the secondary grouping variable used for plot faceting, i.e. treatment
|
|
22 |
#' arm. Can be `NA` to indicate lack of groups.
|
|
23 |
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
|
|
24 |
#' All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
|
|
25 |
#' and be of a `double` or `numeric` type vector of length one.
|
|
26 |
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
|
|
27 |
#' All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
|
|
28 |
#' and be of a `double` or `numeric` type vector of length two. Set `interval = NULL` if intervals should not be
|
|
29 |
#' added to the plot.
|
|
30 |
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Names must match names
|
|
31 |
#' of the list element `interval` that will be returned by `sfun` (e.g. `mean_ci_lwr` element of
|
|
32 |
#' `sfun(x)[["mean_ci"]]`). It is possible to specify one whisker only, or to suppress all whiskers by setting
|
|
33 |
#' `interval = NULL`.
|
|
34 |
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
|
|
35 |
#' All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
|
|
36 |
#' @param sfun (`function`)\cr the function to compute the values of required statistics. It must return a named `list`
|
|
37 |
#' with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
|
|
38 |
#' `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
|
|
39 |
#' @param ... optional arguments to `sfun`.
|
|
40 |
#' @param mid_type (`string`)\cr controls the type of the `mid` plot, it can be point (`"p"`), line (`"l"`),
|
|
41 |
#' or point and line (`"pl"`).
|
|
42 |
#' @param mid_point_size (`numeric(1)`)\cr font size of the `mid` plot points.
|
|
43 |
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
|
|
44 |
#' a call to a position adjustment function.
|
|
45 |
#' @param legend_title (`string`)\cr legend title.
|
|
46 |
#' @param legend_position (`string`)\cr the position of the plot legend (`"none"`, `"left"`, `"right"`, `"bottom"`,
|
|
47 |
#' `"top"`, or a two-element numeric vector).
|
|
48 |
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
|
|
49 |
#' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing
|
|
50 |
#' between ticks on the x-axis, for use when `variables$x` is numeric. If `NULL` (default), [labeling::extended()] is
|
|
51 |
#' used to determine optimal tick positions on the x-axis. If `variables$x` is not numeric, this argument is ignored.
|
|
52 |
#' @param x_lab (`string` or `NULL`)\cr x-axis label. If `NULL` then no label will be added.
|
|
53 |
#' @param y_lab (`string` or `NULL`)\cr y-axis label. If `NULL` then no label will be added.
|
|
54 |
#' @param y_lab_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be added
|
|
55 |
#' to the y-axis label (`y_lab`).
|
|
56 |
#' @param y_lab_add_unit (`flag`)\cr whether y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be added
|
|
57 |
#' to the y-axis label (`y_lab`).
|
|
58 |
#' @param title (`string`)\cr plot title.
|
|
59 |
#' @param subtitle (`string`)\cr plot subtitle.
|
|
60 |
#' @param subtitle_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be
|
|
61 |
#' added to the plot's subtitle (`subtitle`).
|
|
62 |
#' @param subtitle_add_unit (`flag`)\cr whether the y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be
|
|
63 |
#' added to the plot's subtitle (`subtitle`).
|
|
64 |
#' @param caption (`string`)\cr optional caption below the plot.
|
|
65 |
#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
|
|
66 |
#' (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
|
|
67 |
#' parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
|
|
68 |
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
|
|
69 |
#' appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
|
|
70 |
#' @param table_font_size (`numeric(1)`)\cr font size of the text in the table.
|
|
71 |
#' @param newpage `r lifecycle::badge("deprecated")` not used.
|
|
72 |
#' @param col (`character`)\cr color(s). See `?ggplot2::aes_colour_fill_alpha` for example values.
|
|
73 |
#' @param linetype (`character`)\cr line type(s). See `?ggplot2::aes_linetype_size_shape` for example values.
|
|
74 |
#' @param errorbar_width (`numeric(1)`)\cr width of the error bars.
|
|
75 |
#' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the line plot.
|
|
76 |
#' Relative height of annotation table is then `1 - rel_height_plot`. If `table = NULL`, this parameter is ignored.
|
|
77 |
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `table` is not `NULL`.
|
|
78 |
#' If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the
|
|
79 |
#' annotation table is printed below the plot via [cowplot::plot_grid()].
|
|
80 |
#'
|
|
81 |
#' @return A `ggplot` line plot (and statistics table if applicable).
|
|
82 |
#'
|
|
83 |
#' @examples
|
|
84 |
#'
|
|
85 |
#' adsl <- tern_ex_adsl
|
|
86 |
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
|
|
87 |
#' adlb$AVISIT <- droplevels(adlb$AVISIT)
|
|
88 |
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
|
|
89 |
#'
|
|
90 |
#' # Mean with CI
|
|
91 |
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
|
|
92 |
#'
|
|
93 |
#' # Mean with CI, no stratification with group_var
|
|
94 |
#' g_lineplot(adlb, variables = control_lineplot_vars(group_var = NA))
|
|
95 |
#'
|
|
96 |
#' # Mean, upper whisker of CI, no group_var(strata) counts N
|
|
97 |
#' g_lineplot(
|
|
98 |
#' adlb,
|
|
99 |
#' whiskers = "mean_ci_upr",
|
|
100 |
#' title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
|
|
101 |
#' )
|
|
102 |
#'
|
|
103 |
#' # Median with CI
|
|
104 |
#' g_lineplot(
|
|
105 |
#' adlb,
|
|
106 |
#' adsl,
|
|
107 |
#' mid = "median",
|
|
108 |
#' interval = "median_ci",
|
|
109 |
#' whiskers = c("median_ci_lwr", "median_ci_upr"),
|
|
110 |
#' title = "Plot of Median and 95% Confidence Limits by Visit"
|
|
111 |
#' )
|
|
112 |
#'
|
|
113 |
#' # Mean, +/- SD
|
|
114 |
#' g_lineplot(adlb, adsl,
|
|
115 |
#' interval = "mean_sdi",
|
|
116 |
#' whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
|
|
117 |
#' title = "Plot of Median +/- SD by Visit"
|
|
118 |
#' )
|
|
119 |
#'
|
|
120 |
#' # Mean with CI plot with stats table
|
|
121 |
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
|
|
122 |
#'
|
|
123 |
#' # Mean with CI, table and customized confidence level
|
|
124 |
#' g_lineplot(
|
|
125 |
#' adlb,
|
|
126 |
#' adsl,
|
|
127 |
#' table = c("n", "mean", "mean_ci"),
|
|
128 |
#' control = control_analyze_vars(conf_level = 0.80),
|
|
129 |
#' title = "Plot of Mean and 80% Confidence Limits by Visit"
|
|
130 |
#' )
|
|
131 |
#'
|
|
132 |
#' # Mean with CI, table, filtered data
|
|
133 |
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE")
|
|
134 |
#' g_lineplot(adlb_f, table = c("n", "mean"))
|
|
135 |
#'
|
|
136 |
#' @export
|
|
137 |
g_lineplot <- function(df, |
|
138 |
alt_counts_df = NULL, |
|
139 |
variables = control_lineplot_vars(), |
|
140 |
mid = "mean", |
|
141 |
interval = "mean_ci", |
|
142 |
whiskers = c("mean_ci_lwr", "mean_ci_upr"), |
|
143 |
table = NULL, |
|
144 |
sfun = s_summary, |
|
145 |
...,
|
|
146 |
mid_type = "pl", |
|
147 |
mid_point_size = 2, |
|
148 |
position = ggplot2::position_dodge(width = 0.4), |
|
149 |
legend_title = NULL, |
|
150 |
legend_position = "bottom", |
|
151 |
ggtheme = nestcolor::theme_nest(), |
|
152 |
xticks = NULL, |
|
153 |
xlim = NULL, |
|
154 |
ylim = NULL, |
|
155 |
x_lab = obj_label(df[[variables[["x"]]]]), |
|
156 |
y_lab = NULL, |
|
157 |
y_lab_add_paramcd = TRUE, |
|
158 |
y_lab_add_unit = TRUE, |
|
159 |
title = "Plot of Mean and 95% Confidence Limits by Visit", |
|
160 |
subtitle = "", |
|
161 |
subtitle_add_paramcd = TRUE, |
|
162 |
subtitle_add_unit = TRUE, |
|
163 |
caption = NULL, |
|
164 |
table_format = NULL, |
|
165 |
table_labels = NULL, |
|
166 |
table_font_size = 3, |
|
167 |
errorbar_width = 0.45, |
|
168 |
newpage = lifecycle::deprecated(), |
|
169 |
col = NULL, |
|
170 |
linetype = NULL, |
|
171 |
rel_height_plot = 0.5, |
|
172 |
as_list = FALSE) { |
|
173 | 13x |
checkmate::assert_character(variables, any.missing = TRUE) |
174 | 13x |
checkmate::assert_character(mid, null.ok = TRUE) |
175 | 13x |
checkmate::assert_character(interval, null.ok = TRUE) |
176 | 13x |
checkmate::assert_character(col, null.ok = TRUE) |
177 | 13x |
checkmate::assert_character(linetype, null.ok = TRUE) |
178 | 13x |
checkmate::assert_numeric(xticks, null.ok = TRUE) |
179 | 13x |
checkmate::assert_numeric(xlim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) |
180 | 13x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE) |
181 | 13x |
checkmate::assert_number(errorbar_width, lower = 0) |
182 | 13x |
checkmate::assert_string(title, null.ok = TRUE) |
183 | 13x |
checkmate::assert_string(subtitle, null.ok = TRUE) |
184 | 13x |
assert_proportion_value(rel_height_plot) |
185 | 13x |
checkmate::assert_logical(as_list) |
186 | ||
187 | 13x |
if (!is.null(table)) { |
188 | 5x |
table_format <- get_formats_from_stats(table) |
189 | 5x |
table_labels <- get_labels_from_stats(table) %>% .unlist_keep_nulls() |
190 |
}
|
|
191 | ||
192 | 13x |
extra_args <- list(...) |
193 | 13x |
if ("control" %in% names(extra_args)) { |
194 | 4x |
if (!is.null(table) && all(table_labels == .unlist_keep_nulls(get_labels_from_stats(table)))) { |
195 | 3x |
table_labels <- table_labels %>% labels_use_control(extra_args[["control"]]) |
196 |
}
|
|
197 |
}
|
|
198 | ||
199 | 13x |
if (is.character(interval)) { |
200 | 13x |
checkmate::assert_vector(whiskers, min.len = 0, max.len = 2) |
201 |
}
|
|
202 | ||
203 | 13x |
if (length(whiskers) == 1) { |
204 | ! |
checkmate::assert_character(mid) |
205 |
}
|
|
206 | ||
207 | 13x |
if (is.character(mid)) { |
208 | 13x |
checkmate::assert_scalar(mid_type) |
209 | 13x |
checkmate::assert_subset(mid_type, c("pl", "p", "l")) |
210 |
}
|
|
211 | ||
212 | 13x |
x <- variables[["x"]] |
213 | 13x |
y <- variables[["y"]] |
214 | 13x |
paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables |
215 | 13x |
y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables |
216 | 13x |
if (is.na(variables["group_var"])) { |
217 | 1x |
group_var <- NULL # NULL if group_var == NA or it is not in variables |
218 |
} else { |
|
219 | 12x |
group_var <- variables[["group_var"]] |
220 | 12x |
subject_var <- variables[["subject_var"]] |
221 |
}
|
|
222 | 13x |
if (is.na(variables["facet_var"])) { |
223 | 12x |
facet_var <- NULL # NULL if facet_var == NA or it is not in variables |
224 |
} else { |
|
225 | 1x |
facet_var <- variables[["facet_var"]] |
226 |
}
|
|
227 | 13x |
checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE) |
228 | 13x |
checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE) |
229 | 13x |
if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) { |
230 | 13x |
checkmate::assert_false(is.na(paramcd)) |
231 | 13x |
checkmate::assert_scalar(unique(df[[paramcd]])) |
232 |
}
|
|
233 | ||
234 | 13x |
checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE) |
235 | 13x |
checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE) |
236 | 13x |
if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) { |
237 | 13x |
checkmate::assert_false(is.na(y_unit)) |
238 | 13x |
checkmate::assert_scalar(unique(df[[y_unit]])) |
239 |
}
|
|
240 | ||
241 | 13x |
if (!is.null(group_var) && !is.null(alt_counts_df)) { |
242 | 8x |
checkmate::assert_set_equal(unique(alt_counts_df[[group_var]]), unique(df[[group_var]])) |
243 |
}
|
|
244 | ||
245 |
####################################### |
|
|
246 |
# ---- Compute required statistics ----
|
|
247 |
####################################### |
|
|
248 |
# Remove unused levels for x-axis
|
|
249 | 13x |
if (is.factor(df[[x]])) { |
250 | 12x |
df[[x]] <- droplevels(df[[x]]) |
251 |
}
|
|
252 | ||
253 | 13x |
if (!is.null(facet_var) && !is.null(group_var)) { |
254 | 1x |
df_grp <- tidyr::expand(df, .data[[facet_var]], .data[[group_var]], .data[[x]]) # expand based on levels of factors |
255 | 12x |
} else if (!is.null(group_var)) { |
256 | 11x |
df_grp <- tidyr::expand(df, .data[[group_var]], .data[[x]]) # expand based on levels of factors |
257 |
} else { |
|
258 | 1x |
df_grp <- tidyr::expand(df, NULL, .data[[x]]) |
259 |
}
|
|
260 | ||
261 | 13x |
df_grp <- df_grp %>% |
262 | 13x |
dplyr::full_join(y = df[, c(facet_var, group_var, x, y)], by = c(facet_var, group_var, x), multiple = "all") %>% |
263 | 13x |
dplyr::group_by_at(c(facet_var, group_var, x)) |
264 | ||
265 | 13x |
df_stats <- df_grp %>% |
266 | 13x |
dplyr::summarise( |
267 | 13x |
data.frame(t(do.call(c, unname(sfun(.data[[y]])[c(mid, interval)])))), |
268 | 13x |
.groups = "drop" |
269 |
)
|
|
270 | ||
271 | 13x |
df_stats <- df_stats[!is.na(df_stats[[mid]]), ] |
272 | ||
273 |
# add number of objects N in group_var (strata)
|
|
274 | 13x |
if (!is.null(group_var) && !is.null(alt_counts_df)) { |
275 | 8x |
strata_N <- paste0(group_var, "_N") # nolint |
276 | ||
277 | 8x |
df_N <- stats::aggregate(eval(parse(text = subject_var)) ~ eval(parse(text = group_var)), data = alt_counts_df, FUN = function(x) length(unique(x))) # nolint |
278 | 8x |
colnames(df_N) <- c(group_var, "N") # nolint |
279 | 8x |
df_N[[strata_N]] <- paste0(df_N[[group_var]], " (N = ", df_N$N, ")") # nolint |
280 | ||
281 |
# keep strata factor levels
|
|
282 | 8x |
matches <- sapply(unique(df_N[[group_var]]), function(x) { |
283 | 22x |
regex_pattern <- gsub("([][(){}^$.|*+?\\\\])", "\\\\\\1", x) |
284 | 22x |
unique(df_N[[paste0(group_var, "_N")]])[grepl( |
285 | 22x |
paste0("^", regex_pattern), |
286 | 22x |
unique(df_N[[paste0(group_var, "_N")]]) |
287 |
)] |
|
288 |
}) |
|
289 | 8x |
df_N[[paste0(group_var, "_N")]] <- factor(df_N[[group_var]]) # nolint |
290 | 8x |
levels(df_N[[paste0(group_var, "_N")]]) <- unlist(matches) # nolint |
291 | ||
292 |
# strata_N should not be in colnames(df_stats)
|
|
293 | 8x |
checkmate::assert_disjunct(strata_N, colnames(df_stats)) |
294 | ||
295 | 8x |
df_stats <- merge(x = df_stats, y = df_N[, c(group_var, strata_N)], by = group_var) |
296 | 5x |
} else if (!is.null(group_var)) { |
297 | 4x |
strata_N <- group_var # nolint |
298 |
} else { |
|
299 | 1x |
strata_N <- NULL # nolint |
300 |
}
|
|
301 | ||
302 |
############################################### |
|
|
303 |
# ---- Prepare certain plot's properties. ----
|
|
304 |
############################################### |
|
|
305 |
# legend title
|
|
306 | 13x |
if (is.null(legend_title) && !is.null(group_var) && legend_position != "none") { |
307 | 12x |
legend_title <- attr(df[[group_var]], "label") |
308 |
}
|
|
309 | ||
310 |
# y label
|
|
311 | 13x |
if (!is.null(y_lab)) { |
312 | 4x |
if (y_lab_add_paramcd) { |
313 | 4x |
y_lab <- paste(y_lab, unique(df[[paramcd]])) |
314 |
}
|
|
315 | ||
316 | 4x |
if (y_lab_add_unit) { |
317 | 4x |
y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")") |
318 |
}
|
|
319 | ||
320 | 4x |
y_lab <- trimws(y_lab) |
321 |
}
|
|
322 | ||
323 |
# subtitle
|
|
324 | 13x |
if (!is.null(subtitle)) { |
325 | 13x |
if (subtitle_add_paramcd) { |
326 | 13x |
subtitle <- paste(subtitle, unique(df[[paramcd]])) |
327 |
}
|
|
328 | ||
329 | 13x |
if (subtitle_add_unit) { |
330 | 13x |
subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")") |
331 |
}
|
|
332 | ||
333 | 13x |
subtitle <- trimws(subtitle) |
334 |
}
|
|
335 | ||
336 |
############################### |
|
|
337 |
# ---- Build plot object. ----
|
|
338 |
############################### |
|
|
339 | 13x |
p <- ggplot2::ggplot( |
340 | 13x |
data = df_stats, |
341 | 13x |
mapping = ggplot2::aes( |
342 | 13x |
x = .data[[x]], y = .data[[mid]], |
343 | 13x |
color = if (is.null(strata_N)) NULL else .data[[strata_N]], |
344 | 13x |
shape = if (is.null(strata_N)) NULL else .data[[strata_N]], |
345 | 13x |
lty = if (is.null(strata_N)) NULL else .data[[strata_N]], |
346 | 13x |
group = if (is.null(strata_N)) NULL else .data[[strata_N]] |
347 |
)
|
|
348 |
)
|
|
349 | ||
350 | 13x |
if (!is.null(group_var) && nlevels(df_stats[[strata_N]]) > 6) { |
351 | 1x |
p <- p + |
352 | 1x |
scale_shape_manual(values = seq(15, 15 + nlevels(df_stats[[strata_N]]))) |
353 |
}
|
|
354 | ||
355 | 13x |
if (!is.null(mid)) { |
356 |
# points
|
|
357 | 13x |
if (grepl("p", mid_type, fixed = TRUE)) { |
358 | 13x |
p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE) |
359 |
}
|
|
360 | ||
361 |
# lines - plotted only if there is a strata grouping (group_var)
|
|
362 | 13x |
if (grepl("l", mid_type, fixed = TRUE) && !is.null(strata_N)) { |
363 | 12x |
p <- p + ggplot2::geom_line(position = position, na.rm = TRUE) |
364 |
}
|
|
365 |
}
|
|
366 | ||
367 |
# interval
|
|
368 | 13x |
if (!is.null(interval)) { |
369 | 13x |
p <- p + |
370 | 13x |
ggplot2::geom_errorbar( |
371 | 13x |
ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]), |
372 | 13x |
width = errorbar_width, |
373 | 13x |
position = position |
374 |
)
|
|
375 | ||
376 | 13x |
if (length(whiskers) == 1) { # lwr or upr only; mid is then required |
377 |
# workaround as geom_errorbar does not provide single-direction whiskers
|
|
378 | ! |
p <- p + |
379 | ! |
ggplot2::geom_linerange( |
380 | ! |
data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings |
381 | ! |
ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]), |
382 | ! |
position = position, |
383 | ! |
na.rm = TRUE, |
384 | ! |
show.legend = FALSE |
385 |
)
|
|
386 |
}
|
|
387 |
}
|
|
388 | ||
389 | 13x |
if (is.numeric(df_stats[[x]])) { |
390 | 1x |
if (length(xticks) == 1) xticks <- seq(from = min(df_stats[[x]]), to = max(df_stats[[x]]), by = xticks) |
391 | 1x |
p <- p + ggplot2::scale_x_continuous(breaks = if (!is.null(xticks)) xticks else waiver(), limits = xlim) |
392 |
}
|
|
393 | ||
394 | 13x |
p <- p + |
395 | 13x |
ggplot2::scale_y_continuous(labels = scales::comma, limits = ylim) + |
396 | 13x |
ggplot2::labs( |
397 | 13x |
title = title, |
398 | 13x |
subtitle = subtitle, |
399 | 13x |
caption = caption, |
400 | 13x |
color = legend_title, |
401 | 13x |
lty = legend_title, |
402 | 13x |
shape = legend_title, |
403 | 13x |
x = x_lab, |
404 | 13x |
y = y_lab |
405 |
)
|
|
406 | ||
407 | 13x |
if (!is.null(col)) { |
408 | 1x |
p <- p + |
409 | 1x |
ggplot2::scale_color_manual(values = col) |
410 |
}
|
|
411 | 13x |
if (!is.null(linetype)) { |
412 | 1x |
p <- p + |
413 | 1x |
ggplot2::scale_linetype_manual(values = linetype) |
414 |
}
|
|
415 | ||
416 | 13x |
if (!is.null(facet_var)) { |
417 | 1x |
p <- p + |
418 | 1x |
facet_grid(cols = vars(df_stats[[facet_var]])) |
419 |
}
|
|
420 | ||
421 | 13x |
if (!is.null(ggtheme)) { |
422 | 13x |
p <- p + ggtheme |
423 |
} else { |
|
424 | ! |
p <- p + |
425 | ! |
ggplot2::theme_bw() + |
426 | ! |
ggplot2::theme( |
427 | ! |
legend.key.width = grid::unit(1, "cm"), |
428 | ! |
legend.position = legend_position, |
429 | ! |
legend.direction = ifelse( |
430 | ! |
legend_position %in% c("top", "bottom"), |
431 | ! |
"horizontal",
|
432 | ! |
"vertical"
|
433 |
)
|
|
434 |
)
|
|
435 |
}
|
|
436 | ||
437 |
############################################################# |
|
|
438 |
# ---- Optionally, add table to the bottom of the plot. ----
|
|
439 |
############################################################# |
|
|
440 | 13x |
if (!is.null(table)) { |
441 | 5x |
df_stats_table <- df_grp %>% |
442 | 5x |
dplyr::summarise( |
443 | 5x |
h_format_row( |
444 | 5x |
x = sfun(.data[[y]], ...)[table], |
445 | 5x |
format = table_format, |
446 | 5x |
labels = table_labels |
447 |
),
|
|
448 | 5x |
.groups = "drop" |
449 |
)
|
|
450 | ||
451 | 5x |
stats_lev <- rev(setdiff(colnames(df_stats_table), c(group_var, x))) |
452 | ||
453 | 5x |
df_stats_table <- df_stats_table %>% |
454 | 5x |
tidyr::pivot_longer( |
455 | 5x |
cols = -dplyr::all_of(c(group_var, x)), |
456 | 5x |
names_to = "stat", |
457 | 5x |
values_to = "value", |
458 | 5x |
names_ptypes = list(stat = factor(levels = stats_lev)) |
459 |
)
|
|
460 | ||
461 | 5x |
tbl <- ggplot2::ggplot( |
462 | 5x |
df_stats_table,
|
463 | 5x |
ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]]) |
464 |
) + |
|
465 | 5x |
ggplot2::geom_text(size = table_font_size) + |
466 | 5x |
ggplot2::theme_bw() + |
467 | 5x |
ggplot2::theme( |
468 | 5x |
panel.border = ggplot2::element_blank(), |
469 | 5x |
panel.grid.major = ggplot2::element_blank(), |
470 | 5x |
panel.grid.minor = ggplot2::element_blank(), |
471 | 5x |
axis.ticks = ggplot2::element_blank(), |
472 | 5x |
axis.title = ggplot2::element_blank(), |
473 | 5x |
axis.text.x = ggplot2::element_blank(), |
474 | 5x |
axis.text.y = ggplot2::element_text( |
475 | 5x |
size = table_font_size * ggplot2::.pt, |
476 | 5x |
margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5) |
477 |
),
|
|
478 | 5x |
strip.text = ggplot2::element_text(hjust = 0), |
479 | 5x |
strip.text.x = ggplot2::element_text( |
480 | 5x |
size = table_font_size * ggplot2::.pt, |
481 | 5x |
margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt") |
482 |
),
|
|
483 | 5x |
strip.background = ggplot2::element_rect(fill = "grey95", color = NA), |
484 | 5x |
legend.position = "none" |
485 |
)
|
|
486 | ||
487 | 5x |
if (!is.null(group_var)) { |
488 | 5x |
tbl <- tbl + ggplot2::facet_wrap(facets = group_var, ncol = 1) |
489 |
}
|
|
490 | ||
491 | 5x |
if (!as_list) { |
492 |
# align plot and table
|
|
493 | 4x |
cowplot::plot_grid( |
494 | 4x |
p,
|
495 | 4x |
tbl,
|
496 | 4x |
ncol = 1, |
497 | 4x |
align = "v", |
498 | 4x |
axis = "tblr", |
499 | 4x |
rel_heights = c(rel_height_plot, 1 - rel_height_plot) |
500 |
)
|
|
501 |
} else { |
|
502 | 1x |
list(plot = p, table = tbl) |
503 |
}
|
|
504 |
} else { |
|
505 | 8x |
p
|
506 |
}
|
|
507 |
}
|
|
508 | ||
509 |
#' Helper function to format the optional `g_lineplot` table
|
|
510 |
#'
|
|
511 |
#' @description `r lifecycle::badge("stable")`
|
|
512 |
#'
|
|
513 |
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
|
|
514 |
#' Elements of `x` must be `numeric` vectors.
|
|
515 |
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
|
|
516 |
#' match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
|
|
517 |
#' function through the `format` parameter.
|
|
518 |
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
|
|
519 |
#' match the names of `x`. When a label is not specified for an element of `x`,
|
|
520 |
#' then this function tries to use `label` or `names` (in this order) attribute of that element
|
|
521 |
#' (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
|
|
522 |
#' are attached to a given element of `x`, then the label is automatically generated.
|
|
523 |
#'
|
|
524 |
#' @return A single row `data.frame` object.
|
|
525 |
#'
|
|
526 |
#' @examples
|
|
527 |
#' mean_ci <- c(48, 51)
|
|
528 |
#' x <- list(mean = 50, mean_ci = mean_ci)
|
|
529 |
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
|
|
530 |
#' labels <- c(mean = "My Mean")
|
|
531 |
#' h_format_row(x, format, labels)
|
|
532 |
#'
|
|
533 |
#' attr(mean_ci, "label") <- "Mean 95% CI"
|
|
534 |
#' x <- list(mean = 50, mean_ci = mean_ci)
|
|
535 |
#' h_format_row(x, format, labels)
|
|
536 |
#'
|
|
537 |
#' @export
|
|
538 |
h_format_row <- function(x, format, labels = NULL) { |
|
539 |
# cell: one row, one column data.frame
|
|
540 | 92x |
format_cell <- function(x, format, label = NULL) { |
541 | 238x |
fc <- format_rcell(x = x, format = unlist(format)) |
542 | 238x |
if (is.na(fc)) { |
543 | ! |
fc <- "NA" |
544 |
}
|
|
545 | 238x |
x_label <- attr(x, "label") |
546 | 238x |
if (!is.null(label) && !is.na(label)) { |
547 | 236x |
names(fc) <- label |
548 | 2x |
} else if (!is.null(x_label) && !is.na(x_label)) { |
549 | 1x |
names(fc) <- x_label |
550 | 1x |
} else if (length(x) == length(fc)) { |
551 | ! |
names(fc) <- names(x) |
552 |
}
|
|
553 | 238x |
as.data.frame(t(fc)) |
554 |
}
|
|
555 | ||
556 | 92x |
row <- do.call( |
557 | 92x |
cbind,
|
558 | 92x |
lapply( |
559 | 92x |
names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn]) |
560 |
)
|
|
561 |
)
|
|
562 | ||
563 | 92x |
row
|
564 |
}
|
|
565 | ||
566 |
#' Control function for `g_lineplot()`
|
|
567 |
#'
|
|
568 |
#' @description `r lifecycle::badge("stable")`
|
|
569 |
#'
|
|
570 |
#' Default values for `variables` parameter in `g_lineplot` function.
|
|
571 |
#' A variable's default value can be overwritten for any variable.
|
|
572 |
#'
|
|
573 |
#' @param x (`string`)\cr x-variable name.
|
|
574 |
#' @param y (`string`)\cr y-variable name.
|
|
575 |
#' @param group_var (`string` or `NA`)\cr group variable name.
|
|
576 |
#' @param subject_var (`string` or `NA`)\cr subject variable name.
|
|
577 |
#' @param facet_var (`string` or `NA`)\cr faceting variable name.
|
|
578 |
#' @param paramcd (`string` or `NA`)\cr parameter code variable name.
|
|
579 |
#' @param y_unit (`string` or `NA`)\cr y-axis unit variable name.
|
|
580 |
#'
|
|
581 |
#' @return A named character vector of variable names.
|
|
582 |
#'
|
|
583 |
#' @examples
|
|
584 |
#' control_lineplot_vars()
|
|
585 |
#' control_lineplot_vars(group_var = NA)
|
|
586 |
#'
|
|
587 |
#' @export
|
|
588 |
control_lineplot_vars <- function(x = "AVISIT", |
|
589 |
y = "AVAL", |
|
590 |
group_var = "ARM", |
|
591 |
facet_var = NA, |
|
592 |
paramcd = "PARAMCD", |
|
593 |
y_unit = "AVALU", |
|
594 |
subject_var = "USUBJID") { |
|
595 | 16x |
checkmate::assert_string(x) |
596 | 16x |
checkmate::assert_string(y) |
597 | 16x |
checkmate::assert_string(group_var, na.ok = TRUE, null.ok = TRUE) |
598 | 16x |
checkmate::assert_string(facet_var, na.ok = TRUE, null.ok = TRUE) |
599 | 16x |
checkmate::assert_string(subject_var, na.ok = TRUE, null.ok = TRUE) |
600 | 16x |
checkmate::assert_string(paramcd, na.ok = TRUE, null.ok = TRUE) |
601 | 16x |
checkmate::assert_string(y_unit, na.ok = TRUE, null.ok = TRUE) |
602 | ||
603 | 16x |
variables <- c( |
604 | 16x |
x = x, y = y, group_var = group_var, paramcd = paramcd, |
605 | 16x |
y_unit = y_unit, subject_var = subject_var, facet_var = facet_var |
606 |
)
|
|
607 | 16x |
return(variables) |
608 |
}
|
1 |
#' Helper functions for tabulating survival duration by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions that tabulate in a data frame statistics such as median survival
|
|
6 |
#' time and hazard ratio for population subgroups.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @inheritParams survival_coxph_pairwise
|
|
10 |
#' @inheritParams survival_duration_subgroups
|
|
11 |
#' @param arm (`factor`)\cr the treatment group variable.
|
|
12 |
#'
|
|
13 |
#' @details Main functionality is to prepare data for use in a layout-creating function.
|
|
14 |
#'
|
|
15 |
#' @examples
|
|
16 |
#' library(dplyr)
|
|
17 |
#' library(forcats)
|
|
18 |
#'
|
|
19 |
#' adtte <- tern_ex_adtte
|
|
20 |
#'
|
|
21 |
#' # Save variable labels before data processing steps.
|
|
22 |
#' adtte_labels <- formatters::var_labels(adtte)
|
|
23 |
#'
|
|
24 |
#' adtte_f <- adtte %>%
|
|
25 |
#' filter(
|
|
26 |
#' PARAMCD == "OS",
|
|
27 |
#' ARM %in% c("B: Placebo", "A: Drug X"),
|
|
28 |
#' SEX %in% c("M", "F")
|
|
29 |
#' ) %>%
|
|
30 |
#' mutate(
|
|
31 |
#' # Reorder levels of ARM to display reference arm before treatment arm.
|
|
32 |
#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
|
|
33 |
#' SEX = droplevels(SEX),
|
|
34 |
#' is_event = CNSR == 0
|
|
35 |
#' )
|
|
36 |
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
|
|
37 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
|
|
38 |
#'
|
|
39 |
#' @name h_survival_duration_subgroups
|
|
40 |
NULL
|
|
41 | ||
42 |
#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame of median survival times by arm.
|
|
43 |
#'
|
|
44 |
#' @return
|
|
45 |
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
|
|
46 |
#'
|
|
47 |
#' @examples
|
|
48 |
#' # Extract median survival time for one group.
|
|
49 |
#' h_survtime_df(
|
|
50 |
#' tte = adtte_f$AVAL,
|
|
51 |
#' is_event = adtte_f$is_event,
|
|
52 |
#' arm = adtte_f$ARM
|
|
53 |
#' )
|
|
54 |
#'
|
|
55 |
#' @export
|
|
56 |
h_survtime_df <- function(tte, is_event, arm) { |
|
57 | 79x |
checkmate::assert_numeric(tte) |
58 | 78x |
checkmate::assert_logical(is_event, len = length(tte)) |
59 | 78x |
assert_valid_factor(arm, len = length(tte)) |
60 | ||
61 | 78x |
df_tte <- data.frame( |
62 | 78x |
tte = tte, |
63 | 78x |
is_event = is_event, |
64 | 78x |
stringsAsFactors = FALSE |
65 |
)
|
|
66 | ||
67 |
# Delete NAs
|
|
68 | 78x |
non_missing_rows <- stats::complete.cases(df_tte) |
69 | 78x |
df_tte <- df_tte[non_missing_rows, ] |
70 | 78x |
arm <- arm[non_missing_rows] |
71 | ||
72 | 78x |
lst_tte <- split(df_tte, arm) |
73 | 78x |
lst_results <- Map(function(x, arm) { |
74 | 156x |
if (nrow(x) > 0) { |
75 | 152x |
s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event") |
76 | 152x |
median_est <- unname(as.numeric(s_surv$median)) |
77 | 152x |
n_events <- sum(x$is_event) |
78 |
} else { |
|
79 | 4x |
median_est <- NA |
80 | 4x |
n_events <- NA |
81 |
}
|
|
82 | ||
83 | 156x |
data.frame( |
84 | 156x |
arm = arm, |
85 | 156x |
n = nrow(x), |
86 | 156x |
n_events = n_events, |
87 | 156x |
median = median_est, |
88 | 156x |
stringsAsFactors = FALSE |
89 |
)
|
|
90 | 78x |
}, lst_tte, names(lst_tte)) |
91 | ||
92 | 78x |
df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE)) |
93 | 78x |
df$arm <- factor(df$arm, levels = levels(arm)) |
94 | 78x |
df
|
95 |
}
|
|
96 | ||
97 |
#' @describeIn h_survival_duration_subgroups Summarizes median survival times by arm and across subgroups
|
|
98 |
#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
|
|
99 |
#' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
|
|
100 |
#' groupings for `subgroups` variables.
|
|
101 |
#'
|
|
102 |
#' @return
|
|
103 |
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
|
|
104 |
#' `var`, `var_label`, and `row_type`.
|
|
105 |
#'
|
|
106 |
#' @examples
|
|
107 |
#' # Extract median survival time for multiple groups.
|
|
108 |
#' h_survtime_subgroups_df(
|
|
109 |
#' variables = list(
|
|
110 |
#' tte = "AVAL",
|
|
111 |
#' is_event = "is_event",
|
|
112 |
#' arm = "ARM",
|
|
113 |
#' subgroups = c("SEX", "BMRKR2")
|
|
114 |
#' ),
|
|
115 |
#' data = adtte_f
|
|
116 |
#' )
|
|
117 |
#'
|
|
118 |
#' # Define groupings for BMRKR2 levels.
|
|
119 |
#' h_survtime_subgroups_df(
|
|
120 |
#' variables = list(
|
|
121 |
#' tte = "AVAL",
|
|
122 |
#' is_event = "is_event",
|
|
123 |
#' arm = "ARM",
|
|
124 |
#' subgroups = c("SEX", "BMRKR2")
|
|
125 |
#' ),
|
|
126 |
#' data = adtte_f,
|
|
127 |
#' groups_lists = list(
|
|
128 |
#' BMRKR2 = list(
|
|
129 |
#' "low" = "LOW",
|
|
130 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
131 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
132 |
#' )
|
|
133 |
#' )
|
|
134 |
#' )
|
|
135 |
#'
|
|
136 |
#' @export
|
|
137 |
h_survtime_subgroups_df <- function(variables, |
|
138 |
data,
|
|
139 |
groups_lists = list(), |
|
140 |
label_all = "All Patients") { |
|
141 | 15x |
checkmate::assert_character(variables$tte) |
142 | 15x |
checkmate::assert_character(variables$is_event) |
143 | 15x |
checkmate::assert_character(variables$arm) |
144 | 15x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
145 | ||
146 | 15x |
assert_df_with_variables(data, variables) |
147 | ||
148 | 15x |
checkmate::assert_string(label_all) |
149 | ||
150 |
# Add All Patients.
|
|
151 | 15x |
result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]]) |
152 | 15x |
result_all$subgroup <- label_all |
153 | 15x |
result_all$var <- "ALL" |
154 | 15x |
result_all$var_label <- label_all |
155 | 15x |
result_all$row_type <- "content" |
156 | ||
157 |
# Add Subgroups.
|
|
158 | 15x |
if (is.null(variables$subgroups)) { |
159 | 3x |
result_all
|
160 |
} else { |
|
161 | 12x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
162 | 12x |
l_result <- lapply(l_data, function(grp) { |
163 | 60x |
result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]]) |
164 | 60x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
165 | 60x |
cbind(result, result_labels) |
166 |
}) |
|
167 | 12x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
168 | 12x |
result_subgroups$row_type <- "analysis" |
169 | 12x |
rbind( |
170 | 12x |
result_all,
|
171 | 12x |
result_subgroups
|
172 |
)
|
|
173 |
}
|
|
174 |
}
|
|
175 | ||
176 |
#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame with estimates of
|
|
177 |
#' treatment hazard ratio.
|
|
178 |
#'
|
|
179 |
#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed.
|
|
180 |
#'
|
|
181 |
#' @return
|
|
182 |
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
|
|
183 |
#' `conf_level`, `pval` and `pval_label`.
|
|
184 |
#'
|
|
185 |
#' @examples
|
|
186 |
#' # Extract hazard ratio for one group.
|
|
187 |
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
|
|
188 |
#'
|
|
189 |
#' # Extract hazard ratio for one group with stratification factor.
|
|
190 |
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
|
|
191 |
#'
|
|
192 |
#' @export
|
|
193 |
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) { |
|
194 | 85x |
checkmate::assert_numeric(tte) |
195 | 85x |
checkmate::assert_logical(is_event, len = length(tte)) |
196 | 85x |
assert_valid_factor(arm, n.levels = 2, len = length(tte)) |
197 | ||
198 | 85x |
df_tte <- data.frame(tte = tte, is_event = is_event) |
199 | 85x |
strata_vars <- NULL |
200 | ||
201 | 85x |
if (!is.null(strata_data)) { |
202 | 5x |
if (is.data.frame(strata_data)) { |
203 | 4x |
strata_vars <- names(strata_data) |
204 | 4x |
checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte)) |
205 | 4x |
assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars))) |
206 |
} else { |
|
207 | 1x |
assert_valid_factor(strata_data, len = nrow(df_tte)) |
208 | 1x |
strata_vars <- "strata_data" |
209 |
}
|
|
210 | 5x |
df_tte[strata_vars] <- strata_data |
211 |
}
|
|
212 | ||
213 | 85x |
l_df <- split(df_tte, arm) |
214 | ||
215 | 85x |
if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) { |
216 |
# Hazard ratio and CI.
|
|
217 | 79x |
result <- s_coxph_pairwise( |
218 | 79x |
df = l_df[[2]], |
219 | 79x |
.ref_group = l_df[[1]], |
220 | 79x |
.in_ref_col = FALSE, |
221 | 79x |
.var = "tte", |
222 | 79x |
is_event = "is_event", |
223 | 79x |
strata = strata_vars, |
224 | 79x |
control = control |
225 |
)
|
|
226 | ||
227 | 79x |
df <- data.frame( |
228 |
# Dummy column needed downstream to create a nested header.
|
|
229 | 79x |
arm = " ", |
230 | 79x |
n_tot = unname(as.numeric(result$n_tot)), |
231 | 79x |
n_tot_events = unname(as.numeric(result$n_tot_events)), |
232 | 79x |
hr = unname(as.numeric(result$hr)), |
233 | 79x |
lcl = unname(result$hr_ci[1]), |
234 | 79x |
ucl = unname(result$hr_ci[2]), |
235 | 79x |
conf_level = control[["conf_level"]], |
236 | 79x |
pval = as.numeric(result$pvalue), |
237 | 79x |
pval_label = obj_label(result$pvalue), |
238 | 79x |
stringsAsFactors = FALSE |
239 |
)
|
|
240 |
} else if ( |
|
241 | 6x |
(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) || |
242 | 6x |
(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0) |
243 |
) { |
|
244 | 6x |
df_tte_complete <- df_tte[stats::complete.cases(df_tte), ] |
245 | 6x |
df <- data.frame( |
246 |
# Dummy column needed downstream to create a nested header.
|
|
247 | 6x |
arm = " ", |
248 | 6x |
n_tot = nrow(df_tte_complete), |
249 | 6x |
n_tot_events = sum(df_tte_complete$is_event), |
250 | 6x |
hr = NA, |
251 | 6x |
lcl = NA, |
252 | 6x |
ucl = NA, |
253 | 6x |
conf_level = control[["conf_level"]], |
254 | 6x |
pval = NA, |
255 | 6x |
pval_label = NA, |
256 | 6x |
stringsAsFactors = FALSE |
257 |
)
|
|
258 |
} else { |
|
259 | ! |
df <- data.frame( |
260 |
# Dummy column needed downstream to create a nested header.
|
|
261 | ! |
arm = " ", |
262 | ! |
n_tot = 0L, |
263 | ! |
n_tot_events = 0L, |
264 | ! |
hr = NA, |
265 | ! |
lcl = NA, |
266 | ! |
ucl = NA, |
267 | ! |
conf_level = control[["conf_level"]], |
268 | ! |
pval = NA, |
269 | ! |
pval_label = NA, |
270 | ! |
stringsAsFactors = FALSE |
271 |
)
|
|
272 |
}
|
|
273 | ||
274 | 85x |
df
|
275 |
}
|
|
276 | ||
277 |
#' @describeIn h_survival_duration_subgroups Summarizes estimates of the treatment hazard ratio
|
|
278 |
#' across subgroups in a data frame. `variables` corresponds to the names of variables found in
|
|
279 |
#' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
|
|
280 |
#' optionally `subgroups` and `strata`. `groups_lists` optionally specifies
|
|
281 |
#' groupings for `subgroups` variables.
|
|
282 |
#'
|
|
283 |
#' @return
|
|
284 |
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
|
|
285 |
#' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
|
|
286 |
#'
|
|
287 |
#' @examples
|
|
288 |
#' # Extract hazard ratio for multiple groups.
|
|
289 |
#' h_coxph_subgroups_df(
|
|
290 |
#' variables = list(
|
|
291 |
#' tte = "AVAL",
|
|
292 |
#' is_event = "is_event",
|
|
293 |
#' arm = "ARM",
|
|
294 |
#' subgroups = c("SEX", "BMRKR2")
|
|
295 |
#' ),
|
|
296 |
#' data = adtte_f
|
|
297 |
#' )
|
|
298 |
#'
|
|
299 |
#' # Define groupings of BMRKR2 levels.
|
|
300 |
#' h_coxph_subgroups_df(
|
|
301 |
#' variables = list(
|
|
302 |
#' tte = "AVAL",
|
|
303 |
#' is_event = "is_event",
|
|
304 |
#' arm = "ARM",
|
|
305 |
#' subgroups = c("SEX", "BMRKR2")
|
|
306 |
#' ),
|
|
307 |
#' data = adtte_f,
|
|
308 |
#' groups_lists = list(
|
|
309 |
#' BMRKR2 = list(
|
|
310 |
#' "low" = "LOW",
|
|
311 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
312 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
313 |
#' )
|
|
314 |
#' )
|
|
315 |
#' )
|
|
316 |
#'
|
|
317 |
#' # Extract hazard ratio for multiple groups with stratification factors.
|
|
318 |
#' h_coxph_subgroups_df(
|
|
319 |
#' variables = list(
|
|
320 |
#' tte = "AVAL",
|
|
321 |
#' is_event = "is_event",
|
|
322 |
#' arm = "ARM",
|
|
323 |
#' subgroups = c("SEX", "BMRKR2"),
|
|
324 |
#' strata = c("STRATA1", "STRATA2")
|
|
325 |
#' ),
|
|
326 |
#' data = adtte_f
|
|
327 |
#' )
|
|
328 |
#'
|
|
329 |
#' @export
|
|
330 |
h_coxph_subgroups_df <- function(variables, |
|
331 |
data,
|
|
332 |
groups_lists = list(), |
|
333 |
control = control_coxph(), |
|
334 |
label_all = "All Patients") { |
|
335 | 17x |
if ("strat" %in% names(variables)) { |
336 | ! |
warning( |
337 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_coxph_subgroups_df() ",
|
338 | ! |
"was deprecated in tern 0.9.4.\n ",
|
339 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
340 |
)
|
|
341 | ! |
variables[["strata"]] <- variables[["strat"]] |
342 |
}
|
|
343 | ||
344 | 17x |
checkmate::assert_character(variables$tte) |
345 | 17x |
checkmate::assert_character(variables$is_event) |
346 | 17x |
checkmate::assert_character(variables$arm) |
347 | 17x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
348 | 17x |
checkmate::assert_character(variables$strata, null.ok = TRUE) |
349 | 17x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
350 | 17x |
assert_df_with_variables(data, variables) |
351 | 17x |
checkmate::assert_string(label_all) |
352 | ||
353 |
# Add All Patients.
|
|
354 | 17x |
result_all <- h_coxph_df( |
355 | 17x |
tte = data[[variables$tte]], |
356 | 17x |
is_event = data[[variables$is_event]], |
357 | 17x |
arm = data[[variables$arm]], |
358 | 17x |
strata_data = if (is.null(variables$strata)) NULL else data[variables$strata], |
359 | 17x |
control = control |
360 |
)
|
|
361 | 17x |
result_all$subgroup <- label_all |
362 | 17x |
result_all$var <- "ALL" |
363 | 17x |
result_all$var_label <- label_all |
364 | 17x |
result_all$row_type <- "content" |
365 | ||
366 |
# Add Subgroups.
|
|
367 | 17x |
if (is.null(variables$subgroups)) { |
368 | 3x |
result_all
|
369 |
} else { |
|
370 | 14x |
l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists) |
371 | ||
372 | 14x |
l_result <- lapply(l_data, function(grp) { |
373 | 64x |
result <- h_coxph_df( |
374 | 64x |
tte = grp$df[[variables$tte]], |
375 | 64x |
is_event = grp$df[[variables$is_event]], |
376 | 64x |
arm = grp$df[[variables$arm]], |
377 | 64x |
strata_data = if (is.null(variables$strata)) NULL else grp$df[variables$strata], |
378 | 64x |
control = control |
379 |
)
|
|
380 | 64x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
381 | 64x |
cbind(result, result_labels) |
382 |
}) |
|
383 | ||
384 | 14x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
385 | 14x |
result_subgroups$row_type <- "analysis" |
386 | ||
387 | 14x |
rbind( |
388 | 14x |
result_all,
|
389 | 14x |
result_subgroups
|
390 |
)
|
|
391 |
}
|
|
392 |
}
|
|
393 | ||
394 |
#' Split data frame by subgroups
|
|
395 |
#'
|
|
396 |
#' @description `r lifecycle::badge("stable")`
|
|
397 |
#'
|
|
398 |
#' Split a data frame into a non-nested list of subsets.
|
|
399 |
#'
|
|
400 |
#' @inheritParams argument_convention
|
|
401 |
#' @inheritParams survival_duration_subgroups
|
|
402 |
#' @param data (`data.frame`)\cr dataset to split.
|
|
403 |
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
|
|
404 |
#' Unused levels not present in `data` are dropped. Note that the order in this vector
|
|
405 |
#' determines the order in the downstream table.
|
|
406 |
#'
|
|
407 |
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
|
|
408 |
#'
|
|
409 |
#' @details Main functionality is to prepare data for use in forest plot layouts.
|
|
410 |
#'
|
|
411 |
#' @examples
|
|
412 |
#' df <- data.frame(
|
|
413 |
#' x = c(1:5),
|
|
414 |
#' y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
|
|
415 |
#' z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
|
|
416 |
#' )
|
|
417 |
#' formatters::var_labels(df) <- paste("label for", names(df))
|
|
418 |
#'
|
|
419 |
#' h_split_by_subgroups(
|
|
420 |
#' data = df,
|
|
421 |
#' subgroups = c("y", "z")
|
|
422 |
#' )
|
|
423 |
#'
|
|
424 |
#' h_split_by_subgroups(
|
|
425 |
#' data = df,
|
|
426 |
#' subgroups = c("y", "z"),
|
|
427 |
#' groups_lists = list(
|
|
428 |
#' y = list("AB" = c("A", "B"), "C" = "C")
|
|
429 |
#' )
|
|
430 |
#' )
|
|
431 |
#'
|
|
432 |
#' @export
|
|
433 |
h_split_by_subgroups <- function(data, |
|
434 |
subgroups,
|
|
435 |
groups_lists = list()) { |
|
436 | 66x |
checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE) |
437 | 66x |
checkmate::assert_list(groups_lists, names = "named") |
438 | 66x |
checkmate::assert_subset(names(groups_lists), subgroups) |
439 | 66x |
assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups))) |
440 | ||
441 | 66x |
data_labels <- unname(formatters::var_labels(data)) |
442 | 66x |
df_subgroups <- data[, subgroups, drop = FALSE] |
443 | 66x |
subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE) |
444 | ||
445 | 66x |
l_labels <- Map(function(grp_i, name_i) { |
446 | 120x |
existing_levels <- levels(droplevels(grp_i)) |
447 | 120x |
grp_levels <- if (name_i %in% names(groups_lists)) { |
448 |
# For this variable groupings are defined. We check which groups are contained in the data.
|
|
449 | 11x |
group_list_i <- groups_lists[[name_i]] |
450 | 11x |
group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE) |
451 | 11x |
names(which(group_has_levels)) |
452 |
} else { |
|
453 | 109x |
existing_levels
|
454 |
}
|
|
455 | 120x |
df_labels <- data.frame( |
456 | 120x |
subgroup = grp_levels, |
457 | 120x |
var = name_i, |
458 | 120x |
var_label = unname(subgroup_labels[name_i]), |
459 | 120x |
stringsAsFactors = FALSE # Rationale is that subgroups may not be unique. |
460 |
)
|
|
461 | 66x |
}, df_subgroups, names(df_subgroups)) |
462 | ||
463 |
# Create a data frame with one row per subgroup.
|
|
464 | 66x |
df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE)) |
465 | 66x |
row_label <- paste0(df_labels$var, ".", df_labels$subgroup) |
466 | 66x |
row_split_var <- factor(row_label, levels = row_label) |
467 | ||
468 |
# Create a list of data subsets.
|
|
469 | 66x |
lapply(split(df_labels, row_split_var), function(row_i) { |
470 | 294x |
which_row <- if (row_i$var %in% names(groups_lists)) { |
471 | 31x |
data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]] |
472 |
} else { |
|
473 | 263x |
data[[row_i$var]] == row_i$subgroup |
474 |
}
|
|
475 | 294x |
df <- data[which_row, ] |
476 | 294x |
rownames(df) <- NULL |
477 | 294x |
formatters::var_labels(df) <- data_labels |
478 | ||
479 | 294x |
list( |
480 | 294x |
df = df, |
481 | 294x |
df_labels = data.frame(row_i, row.names = NULL) |
482 |
)
|
|
483 |
}) |
|
484 |
}
|
1 |
#' Count the number of patients with a particular event
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_patients_with_event()] creates a layout element to calculate patient counts for a
|
|
6 |
#' user-specified set of events.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `vars` which indicates unique subject identifiers. Events
|
|
9 |
#' are defined by the user as a named vector via the `filters` argument, where each name corresponds to a
|
|
10 |
#' variable and each value is the value(s) that that variable takes for the event.
|
|
11 |
#'
|
|
12 |
#' If there are multiple records with the same event recorded for a patient, only one occurrence is counted.
|
|
13 |
#'
|
|
14 |
#' @inheritParams argument_convention
|
|
15 |
#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
|
|
16 |
#' to be used for counting the number of unique identifiers satisfying such conditions.
|
|
17 |
#' Multiple column names and flags are accepted in this format
|
|
18 |
#' `c("column_name1" = "flag1", "column_name2" = "flag2")`.
|
|
19 |
#' Note that only equality is being accepted as condition.
|
|
20 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
21 |
#'
|
|
22 |
#' Options are: ``r shQuote(get_stats("count_patients_with_event"), type = "sh")``
|
|
23 |
#'
|
|
24 |
#' @seealso [count_patients_with_flags()]
|
|
25 |
#'
|
|
26 |
#' @name count_patients_with_event
|
|
27 |
#' @order 1
|
|
28 |
NULL
|
|
29 | ||
30 |
#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
|
|
31 |
#' the defined event has occurred.
|
|
32 |
#'
|
|
33 |
#' @inheritParams analyze_variables
|
|
34 |
#' @param .var (`string`)\cr name of the column that contains the unique identifier.
|
|
35 |
#'
|
|
36 |
#' @return
|
|
37 |
#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
|
|
38 |
#'
|
|
39 |
#' @examples
|
|
40 |
#' s_count_patients_with_event(
|
|
41 |
#' tern_ex_adae,
|
|
42 |
#' .var = "SUBJID",
|
|
43 |
#' filters = c("TRTEMFL" = "Y"),
|
|
44 |
#' )
|
|
45 |
#'
|
|
46 |
#' s_count_patients_with_event(
|
|
47 |
#' tern_ex_adae,
|
|
48 |
#' .var = "SUBJID",
|
|
49 |
#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
|
|
50 |
#' )
|
|
51 |
#'
|
|
52 |
#' s_count_patients_with_event(
|
|
53 |
#' tern_ex_adae,
|
|
54 |
#' .var = "SUBJID",
|
|
55 |
#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
|
|
56 |
#' denom = "N_col",
|
|
57 |
#' .N_col = 456
|
|
58 |
#' )
|
|
59 |
#'
|
|
60 |
#' @export
|
|
61 |
s_count_patients_with_event <- function(df, |
|
62 |
.var,
|
|
63 |
.N_col = ncol(df), # nolint |
|
64 |
.N_row = nrow(df), # nolint |
|
65 |
...,
|
|
66 |
filters,
|
|
67 |
denom = c("n", "N_col", "N_row")) { |
|
68 | 51x |
col_names <- names(filters) |
69 | 51x |
filter_values <- filters |
70 | ||
71 | 51x |
checkmate::assert_subset(col_names, colnames(df)) |
72 | ||
73 | 51x |
temp <- Map( |
74 | 51x |
function(x, y) which(df[[x]] == y), |
75 | 51x |
col_names,
|
76 | 51x |
filter_values
|
77 |
)
|
|
78 | 51x |
position_satisfy_filters <- Reduce(intersect, temp) |
79 | 51x |
id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]])) |
80 | 51x |
result <- s_count_values( |
81 | 51x |
as.character(unique(df[[.var]])), |
82 | 51x |
id_satisfy_filters,
|
83 | 51x |
denom = denom, |
84 | 51x |
.N_col = .N_col, |
85 | 51x |
.N_row = .N_row |
86 |
)
|
|
87 | 51x |
result
|
88 |
}
|
|
89 | ||
90 |
#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
|
|
91 |
#' in `count_patients_with_event()`.
|
|
92 |
#'
|
|
93 |
#' @return
|
|
94 |
#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
95 |
#'
|
|
96 |
#' @examples
|
|
97 |
#' a_count_patients_with_event(
|
|
98 |
#' tern_ex_adae,
|
|
99 |
#' .var = "SUBJID",
|
|
100 |
#' filters = c("TRTEMFL" = "Y"),
|
|
101 |
#' .N_col = 100,
|
|
102 |
#' .N_row = 100
|
|
103 |
#' )
|
|
104 |
#'
|
|
105 |
#' @export
|
|
106 |
a_count_patients_with_event <- function(df, |
|
107 |
labelstr = "", |
|
108 |
...,
|
|
109 |
.stats = NULL, |
|
110 |
.stat_names = NULL, |
|
111 |
.formats = NULL, |
|
112 |
.labels = NULL, |
|
113 |
.indent_mods = NULL) { |
|
114 |
# Check for additional parameters to the statistics function
|
|
115 | 19x |
dots_extra_args <- list(...) |
116 | 19x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
117 | 19x |
dots_extra_args$.additional_fun_parameters <- NULL |
118 | ||
119 |
# Check for user-defined functions
|
|
120 | 19x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
121 | 19x |
.stats <- default_and_custom_stats_list$all_stats |
122 | 19x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
123 | ||
124 |
# Apply statistics function
|
|
125 | 19x |
x_stats <- .apply_stat_functions( |
126 | 19x |
default_stat_fnc = s_count_patients_with_event, |
127 | 19x |
custom_stat_fnc_list = custom_stat_functions, |
128 | 19x |
args_list = c( |
129 | 19x |
df = list(df), |
130 | 19x |
extra_afun_params,
|
131 | 19x |
dots_extra_args
|
132 |
)
|
|
133 |
)
|
|
134 | ||
135 |
# Fill in formatting defaults
|
|
136 | 19x |
.stats <- get_stats("count_patients_with_event", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
137 | 19x |
.formats <- get_formats_from_stats(.stats, .formats) |
138 | 19x |
.labels <- get_labels_from_stats(.stats, .labels) |
139 | 19x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
140 | ||
141 | 19x |
x_stats <- x_stats[.stats] |
142 | ||
143 |
# Auto format handling
|
|
144 | 19x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
145 | ||
146 |
# Get and check statistical names
|
|
147 | 19x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
148 | ||
149 | 19x |
in_rows( |
150 | 19x |
.list = x_stats, |
151 | 19x |
.formats = .formats, |
152 | 19x |
.names = names(.labels), |
153 | 19x |
.stat_names = .stat_names, |
154 | 19x |
.labels = .labels %>% .unlist_keep_nulls(), |
155 | 19x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
156 |
)
|
|
157 |
}
|
|
158 | ||
159 |
#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
|
|
160 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
161 |
#'
|
|
162 |
#' @return
|
|
163 |
#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
|
|
164 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
165 |
#' the statistics from `s_count_patients_with_event()` to the table layout.
|
|
166 |
#'
|
|
167 |
#' @examples
|
|
168 |
#' lyt <- basic_table() %>%
|
|
169 |
#' split_cols_by("ARM") %>%
|
|
170 |
#' add_colcounts() %>%
|
|
171 |
#' count_values(
|
|
172 |
#' "STUDYID",
|
|
173 |
#' values = "AB12345",
|
|
174 |
#' .stats = "count",
|
|
175 |
#' .labels = c(count = "Total AEs")
|
|
176 |
#' ) %>%
|
|
177 |
#' count_patients_with_event(
|
|
178 |
#' "SUBJID",
|
|
179 |
#' filters = c("TRTEMFL" = "Y"),
|
|
180 |
#' .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
|
|
181 |
#' table_names = "tbl_all"
|
|
182 |
#' ) %>%
|
|
183 |
#' count_patients_with_event(
|
|
184 |
#' "SUBJID",
|
|
185 |
#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
|
|
186 |
#' .labels = c(count_fraction = "Total number of patients with fatal AEs"),
|
|
187 |
#' table_names = "tbl_fatal"
|
|
188 |
#' ) %>%
|
|
189 |
#' count_patients_with_event(
|
|
190 |
#' "SUBJID",
|
|
191 |
#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
|
|
192 |
#' .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
|
|
193 |
#' .indent_mods = c(count_fraction = 2L),
|
|
194 |
#' table_names = "tbl_rel_fatal"
|
|
195 |
#' )
|
|
196 |
#'
|
|
197 |
#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
|
|
198 |
#'
|
|
199 |
#' @export
|
|
200 |
#' @order 2
|
|
201 |
count_patients_with_event <- function(lyt, |
|
202 |
vars,
|
|
203 |
filters,
|
|
204 |
riskdiff = FALSE, |
|
205 |
na_str = default_na_str(), |
|
206 |
nested = TRUE, |
|
207 |
show_labels = ifelse(length(vars) > 1, "visible", "hidden"), |
|
208 |
...,
|
|
209 |
table_names = vars, |
|
210 |
.stats = "count_fraction", |
|
211 |
.stat_names = NULL, |
|
212 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
213 |
.labels = NULL, |
|
214 |
.indent_mods = NULL) { |
|
215 | 7x |
checkmate::assert_flag(riskdiff) |
216 | 7x |
afun <- if (isFALSE(riskdiff)) a_count_patients_with_event else afun_riskdiff |
217 | ||
218 |
# Process standard extra arguments
|
|
219 | 7x |
extra_args <- list(".stats" = .stats) |
220 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
221 | 7x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
222 | 6x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
223 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
224 | ||
225 |
# Process additional arguments to the statistic function
|
|
226 | 7x |
extra_args <- c( |
227 | 7x |
extra_args,
|
228 | 7x |
filters = list(filters), |
229 | 7x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_event" = a_count_patients_with_event)), |
230 |
...
|
|
231 |
)
|
|
232 | ||
233 |
# Append additional info from layout to the analysis function
|
|
234 | 7x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
235 | 7x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
236 | ||
237 | 7x |
analyze( |
238 | 7x |
lyt = lyt, |
239 | 7x |
vars = vars, |
240 | 7x |
afun = afun, |
241 | 7x |
na_str = na_str, |
242 | 7x |
nested = nested, |
243 | 7x |
extra_args = extra_args, |
244 | 7x |
show_labels = show_labels, |
245 | 7x |
table_names = table_names |
246 |
)
|
|
247 |
}
|
1 |
#' Analyze numeric variables in columns
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("experimental")`
|
|
4 |
#'
|
|
5 |
#' The layout-creating function [analyze_vars_in_cols()] creates a layout element to generate a column-wise
|
|
6 |
#' analysis table.
|
|
7 |
#'
|
|
8 |
#' This function sets the analysis methods as column labels and is a wrapper for [rtables::analyze_colvars()].
|
|
9 |
#' It was designed principally for PK tables.
|
|
10 |
#'
|
|
11 |
#' @inheritParams argument_convention
|
|
12 |
#' @inheritParams rtables::analyze_colvars
|
|
13 |
#' @param imp_rule (`string` or `NULL`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can
|
|
14 |
#' also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order
|
|
15 |
#' to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()]
|
|
16 |
#' for more details on imputation.
|
|
17 |
#' @param avalcat_var (`string`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a
|
|
18 |
#' row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of
|
|
19 |
#' the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable
|
|
20 |
#' used to calculate the `n_blq` statistic (if included in `.stats`).
|
|
21 |
#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will
|
|
22 |
#' speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is
|
|
23 |
#' used for multiple tables with different data. Defaults to `FALSE`.
|
|
24 |
#' @param row_labels (`character`)\cr as this function works in columns space, usually `.labels`
|
|
25 |
#' character vector applies on the column space. You can change the row labels by defining this
|
|
26 |
#' parameter to a named character vector with names corresponding to the split values. It defaults
|
|
27 |
#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
|
|
28 |
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
|
|
29 |
#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
|
|
30 |
#' to define row labels. This behavior is not supported as we never need to overload row labels.
|
|
31 |
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
|
|
32 |
#' This option allows you to add multiple instances of this functions, also in a nested fashion,
|
|
33 |
#' without adding more splits. This split must happen only one time on a single layout.
|
|
34 |
#'
|
|
35 |
#' @return
|
|
36 |
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
|
|
37 |
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
|
|
38 |
#' in columns, and add it to the table layout.
|
|
39 |
#'
|
|
40 |
#' @note
|
|
41 |
#' * This is an experimental implementation of [rtables::summarize_row_groups()] and [rtables::analyze_colvars()]
|
|
42 |
#' that may be subjected to changes as `rtables` extends its support to more complex analysis pipelines in the
|
|
43 |
#' column space. We encourage users to read the examples carefully and file issues for different use cases.
|
|
44 |
#' * In this function, `labelstr` behaves atypically. If `labelstr = NULL` (the default), row labels are assigned
|
|
45 |
#' automatically as the split values if `do_summarize_row_groups = FALSE` (the default), and as the group label
|
|
46 |
#' if `do_summarize_row_groups = TRUE`.
|
|
47 |
#'
|
|
48 |
#' @seealso [analyze_vars()], [rtables::analyze_colvars()].
|
|
49 |
#'
|
|
50 |
#' @examples
|
|
51 |
#' library(dplyr)
|
|
52 |
#'
|
|
53 |
#' # Data preparation
|
|
54 |
#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
|
|
55 |
#'
|
|
56 |
#' lyt <- basic_table() %>%
|
|
57 |
#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
|
|
58 |
#' split_rows_by(
|
|
59 |
#' var = "SEX",
|
|
60 |
#' label_pos = "topleft",
|
|
61 |
#' child_labels = "hidden"
|
|
62 |
#' ) %>% # Removes duplicated labels
|
|
63 |
#' analyze_vars_in_cols(vars = "AGE")
|
|
64 |
#' result <- build_table(lyt = lyt, df = adpp)
|
|
65 |
#' result
|
|
66 |
#'
|
|
67 |
#' # By selecting just some statistics and ad-hoc labels
|
|
68 |
#' lyt <- basic_table() %>%
|
|
69 |
#' split_rows_by(var = "ARM", label_pos = "topleft") %>%
|
|
70 |
#' split_rows_by(
|
|
71 |
#' var = "SEX",
|
|
72 |
#' label_pos = "topleft",
|
|
73 |
#' child_labels = "hidden",
|
|
74 |
#' split_fun = drop_split_levels
|
|
75 |
#' ) %>%
|
|
76 |
#' analyze_vars_in_cols(
|
|
77 |
#' vars = "AGE",
|
|
78 |
#' .stats = c("n", "cv", "geom_mean"),
|
|
79 |
#' .labels = c(
|
|
80 |
#' n = "aN",
|
|
81 |
#' cv = "aCV",
|
|
82 |
#' geom_mean = "aGeomMean"
|
|
83 |
#' )
|
|
84 |
#' )
|
|
85 |
#' result <- build_table(lyt = lyt, df = adpp)
|
|
86 |
#' result
|
|
87 |
#'
|
|
88 |
#' # Changing row labels
|
|
89 |
#' lyt <- basic_table() %>%
|
|
90 |
#' analyze_vars_in_cols(
|
|
91 |
#' vars = "AGE",
|
|
92 |
#' row_labels = "some custom label"
|
|
93 |
#' )
|
|
94 |
#' result <- build_table(lyt, df = adpp)
|
|
95 |
#' result
|
|
96 |
#'
|
|
97 |
#' # Pharmacokinetic parameters
|
|
98 |
#' lyt <- basic_table() %>%
|
|
99 |
#' split_rows_by(
|
|
100 |
#' var = "TLG_DISPLAY",
|
|
101 |
#' split_label = "PK Parameter",
|
|
102 |
#' label_pos = "topleft",
|
|
103 |
#' child_labels = "hidden"
|
|
104 |
#' ) %>%
|
|
105 |
#' analyze_vars_in_cols(
|
|
106 |
#' vars = "AVAL"
|
|
107 |
#' )
|
|
108 |
#' result <- build_table(lyt, df = adpp)
|
|
109 |
#' result
|
|
110 |
#'
|
|
111 |
#' # Multiple calls (summarize label and analyze underneath)
|
|
112 |
#' lyt <- basic_table() %>%
|
|
113 |
#' split_rows_by(
|
|
114 |
#' var = "TLG_DISPLAY",
|
|
115 |
#' split_label = "PK Parameter",
|
|
116 |
#' label_pos = "topleft"
|
|
117 |
#' ) %>%
|
|
118 |
#' analyze_vars_in_cols(
|
|
119 |
#' vars = "AVAL",
|
|
120 |
#' do_summarize_row_groups = TRUE # does a summarize level
|
|
121 |
#' ) %>%
|
|
122 |
#' split_rows_by("SEX",
|
|
123 |
#' child_labels = "hidden",
|
|
124 |
#' label_pos = "topleft"
|
|
125 |
#' ) %>%
|
|
126 |
#' analyze_vars_in_cols(
|
|
127 |
#' vars = "AVAL",
|
|
128 |
#' split_col_vars = FALSE # avoids re-splitting the columns
|
|
129 |
#' )
|
|
130 |
#' result <- build_table(lyt, df = adpp)
|
|
131 |
#' result
|
|
132 |
#'
|
|
133 |
#' @export
|
|
134 |
analyze_vars_in_cols <- function(lyt, |
|
135 |
vars,
|
|
136 |
...,
|
|
137 |
.stats = c( |
|
138 |
"n",
|
|
139 |
"mean",
|
|
140 |
"sd",
|
|
141 |
"se",
|
|
142 |
"cv",
|
|
143 |
"geom_cv"
|
|
144 |
),
|
|
145 |
.labels = c( |
|
146 |
n = "n", |
|
147 |
mean = "Mean", |
|
148 |
sd = "SD", |
|
149 |
se = "SE", |
|
150 |
cv = "CV (%)", |
|
151 |
geom_cv = "CV % Geometric Mean" |
|
152 |
),
|
|
153 |
row_labels = NULL, |
|
154 |
do_summarize_row_groups = FALSE, |
|
155 |
split_col_vars = TRUE, |
|
156 |
imp_rule = NULL, |
|
157 |
avalcat_var = "AVALCAT1", |
|
158 |
cache = FALSE, |
|
159 |
.indent_mods = NULL, |
|
160 |
na_str = default_na_str(), |
|
161 |
nested = TRUE, |
|
162 |
.formats = NULL, |
|
163 |
.aligns = NULL) { |
|
164 | 26x |
extra_args <- list(...) |
165 | ||
166 | 26x |
checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE) |
167 | 26x |
checkmate::assert_character(row_labels, null.ok = TRUE) |
168 | 26x |
checkmate::assert_int(.indent_mods, null.ok = TRUE) |
169 | 26x |
checkmate::assert_flag(nested) |
170 | 26x |
checkmate::assert_flag(split_col_vars) |
171 | 26x |
checkmate::assert_flag(do_summarize_row_groups) |
172 | ||
173 |
# Filtering
|
|
174 | 26x |
met_grps <- paste0("analyze_vars", c("_numeric", "_counts")) |
175 | 26x |
.stats <- get_stats(met_grps, stats_in = .stats) |
176 | 26x |
formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats) |
177 | 26x |
labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels) %>% .unlist_keep_nulls() |
178 | ! |
if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels) |
179 | ||
180 |
# Check for vars in the case that one or more are used
|
|
181 | 26x |
if (length(vars) == 1) { |
182 | 21x |
vars <- rep(vars, length(.stats)) |
183 | 5x |
} else if (length(vars) != length(.stats)) { |
184 | 1x |
stop( |
185 | 1x |
"Analyzed variables (vars) does not have the same ",
|
186 | 1x |
"number of elements of specified statistics (.stats)."
|
187 |
)
|
|
188 |
}
|
|
189 | ||
190 | 25x |
if (split_col_vars) { |
191 |
# Checking there is not a previous identical column split
|
|
192 | 21x |
clyt <- tail(clayout(lyt), 1)[[1]] |
193 | ||
194 | 21x |
dummy_lyt <- split_cols_by_multivar( |
195 | 21x |
lyt = basic_table(), |
196 | 21x |
vars = vars, |
197 | 21x |
varlabels = labels_v |
198 |
)
|
|
199 | ||
200 | 21x |
if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) { |
201 | 2x |
stop( |
202 | 2x |
"Column split called again with the same values. ",
|
203 | 2x |
"This can create many unwanted columns. Please consider adding ",
|
204 | 2x |
"split_col_vars = FALSE to the last call of ",
|
205 | 2x |
deparse(sys.calls()[[sys.nframe() - 1]]), "." |
206 |
)
|
|
207 |
}
|
|
208 | ||
209 |
# Main col split
|
|
210 | 19x |
lyt <- split_cols_by_multivar( |
211 | 19x |
lyt = lyt, |
212 | 19x |
vars = vars, |
213 | 19x |
varlabels = labels_v |
214 |
)
|
|
215 |
}
|
|
216 | ||
217 | 23x |
env <- new.env() # create caching environment |
218 | ||
219 | 23x |
if (do_summarize_row_groups) { |
220 | 8x |
if (length(unique(vars)) > 1) { |
221 | ! |
stop("When using do_summarize_row_groups only one label level var should be inserted.") |
222 |
}
|
|
223 | ||
224 |
# Function list for do_summarize_row_groups. Slightly different handling of labels
|
|
225 | 8x |
cfun_list <- Map( |
226 | 8x |
function(stat, use_cache, cache_env) { |
227 | 48x |
function(u, .spl_context, labelstr, .df_row, ...) { |
228 |
# Statistic
|
|
229 | 152x |
var_row_val <- paste( |
230 | 152x |
gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")), |
231 | 152x |
paste(.spl_context$value, collapse = "_"), |
232 | 152x |
sep = "_" |
233 |
)
|
|
234 | 152x |
if (use_cache) { |
235 | ! |
if (is.null(cache_env[[var_row_val]])) { |
236 | ! |
cache_env[[var_row_val]] <- s_summary(u, ...) |
237 |
}
|
|
238 | ! |
x_stats <- cache_env[[var_row_val]] |
239 |
} else { |
|
240 | 152x |
x_stats <- s_summary(u, ...) |
241 |
}
|
|
242 | ||
243 | 152x |
if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) { |
244 | 152x |
res <- x_stats[[stat]] |
245 |
} else { |
|
246 | ! |
timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1))) |
247 | ! |
res_imp <- imputation_rule( |
248 | ! |
.df_row, x_stats, stat, |
249 | ! |
imp_rule = imp_rule, |
250 | ! |
post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0, |
251 | ! |
avalcat_var = avalcat_var |
252 |
)
|
|
253 | ! |
res <- res_imp[["val"]] |
254 | ! |
na_str <- res_imp[["na_str"]] |
255 |
}
|
|
256 | ||
257 |
# Label check and replacement
|
|
258 | 152x |
if (length(row_labels) > 1) { |
259 | 32x |
if (!(labelstr %in% names(row_labels))) { |
260 | 2x |
stop( |
261 | 2x |
"Replacing the labels in do_summarize_row_groups needs a named vector",
|
262 | 2x |
"that contains the split values. In the current split variable ",
|
263 | 2x |
.spl_context$split[nrow(.spl_context)], |
264 | 2x |
" the labelstr value (split value by default) ", labelstr, " is not in", |
265 | 2x |
" row_labels names: ", names(row_labels) |
266 |
)
|
|
267 |
}
|
|
268 | 30x |
lbl <- unlist(row_labels[labelstr]) |
269 |
} else { |
|
270 | 120x |
lbl <- labelstr |
271 |
}
|
|
272 | ||
273 |
# Cell creation
|
|
274 | 150x |
rcell(res, |
275 | 150x |
label = lbl, |
276 | 150x |
format = formats_v[names(formats_v) == stat][[1]], |
277 | 150x |
format_na_str = na_str, |
278 | 150x |
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods), |
279 | 150x |
align = .aligns |
280 |
)
|
|
281 |
}
|
|
282 |
},
|
|
283 | 8x |
stat = .stats, |
284 | 8x |
use_cache = cache, |
285 | 8x |
cache_env = replicate(length(.stats), env) |
286 |
)
|
|
287 | ||
288 |
# Main call to rtables
|
|
289 | 8x |
summarize_row_groups( |
290 | 8x |
lyt = lyt, |
291 | 8x |
var = unique(vars), |
292 | 8x |
cfun = cfun_list, |
293 | 8x |
na_str = na_str, |
294 | 8x |
extra_args = extra_args |
295 |
)
|
|
296 |
} else { |
|
297 |
# Function list for analyze_colvars
|
|
298 | 15x |
afun_list <- Map( |
299 | 15x |
function(stat, use_cache, cache_env) { |
300 | 76x |
function(u, .spl_context, .df_row, ...) { |
301 |
# Main statistics
|
|
302 | 468x |
var_row_val <- paste( |
303 | 468x |
gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")), |
304 | 468x |
paste(.spl_context$value, collapse = "_"), |
305 | 468x |
sep = "_" |
306 |
)
|
|
307 | 468x |
if (use_cache) { |
308 | 16x |
if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...) |
309 | 56x |
x_stats <- cache_env[[var_row_val]] |
310 |
} else { |
|
311 | 412x |
x_stats <- s_summary(u, ...) |
312 |
}
|
|
313 | ||
314 | 468x |
if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) { |
315 | 348x |
res <- x_stats[[stat]] |
316 |
} else { |
|
317 | 120x |
timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1))) |
318 | 120x |
res_imp <- imputation_rule( |
319 | 120x |
.df_row, x_stats, stat, |
320 | 120x |
imp_rule = imp_rule, |
321 | 120x |
post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0, |
322 | 120x |
avalcat_var = avalcat_var |
323 |
)
|
|
324 | 120x |
res <- res_imp[["val"]] |
325 | 120x |
na_str <- res_imp[["na_str"]] |
326 |
}
|
|
327 | ||
328 | 468x |
if (is.list(res)) { |
329 | 52x |
if (length(res) > 1) { |
330 | 1x |
stop("The analyzed column produced more than one category of results.") |
331 |
} else { |
|
332 | 51x |
res <- unlist(res) |
333 |
}
|
|
334 |
}
|
|
335 | ||
336 |
# Label from context
|
|
337 | 467x |
label_from_context <- .spl_context$value[nrow(.spl_context)] |
338 | ||
339 |
# Label switcher
|
|
340 | 467x |
if (is.null(row_labels)) { |
341 | 387x |
lbl <- label_from_context |
342 |
} else { |
|
343 | 80x |
if (length(row_labels) > 1) { |
344 | 68x |
if (!(label_from_context %in% names(row_labels))) { |
345 | 2x |
stop( |
346 | 2x |
"Replacing the labels in do_summarize_row_groups needs a named vector",
|
347 | 2x |
"that contains the split values. In the current split variable ",
|
348 | 2x |
.spl_context$split[nrow(.spl_context)], |
349 | 2x |
" the split value ", label_from_context, " is not in", |
350 | 2x |
" row_labels names: ", names(row_labels) |
351 |
)
|
|
352 |
}
|
|
353 | 66x |
lbl <- unlist(row_labels[label_from_context]) |
354 |
} else { |
|
355 | 12x |
lbl <- row_labels |
356 |
}
|
|
357 |
}
|
|
358 | ||
359 |
# Cell creation
|
|
360 | 465x |
rcell(res, |
361 | 465x |
label = lbl, |
362 | 465x |
format = formats_v[names(formats_v) == stat][[1]], |
363 | 465x |
format_na_str = na_str, |
364 | 465x |
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods), |
365 | 465x |
align = .aligns |
366 |
)
|
|
367 |
}
|
|
368 |
},
|
|
369 | 15x |
stat = .stats, |
370 | 15x |
use_cache = cache, |
371 | 15x |
cache_env = replicate(length(.stats), env) |
372 |
)
|
|
373 | ||
374 |
# Main call to rtables
|
|
375 | 15x |
analyze_colvars(lyt, |
376 | 15x |
afun = afun_list, |
377 | 15x |
na_str = na_str, |
378 | 15x |
nested = nested, |
379 | 15x |
extra_args = extra_args |
380 |
)
|
|
381 |
}
|
|
382 |
}
|
|
383 | ||
384 |
# Helper function
|
|
385 |
get_last_col_split <- function(lyt) { |
|
386 | 3x |
tail(tail(clayout(lyt), 1)[[1]], 1)[[1]] |
387 |
}
|
1 |
#' Helper function to create a new SMQ variable in ADAE by stacking SMQ and/or CQ records.
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper function to create a new SMQ variable in ADAE that consists of all adverse events belonging to
|
|
6 |
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
|
|
7 |
#' belonging to any of the selected baskets. Remember that `na_str` must match the needed pre-processing
|
|
8 |
#' done with [df_explicit_na()] to have the desired output.
|
|
9 |
#'
|
|
10 |
#' @inheritParams argument_convention
|
|
11 |
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
|
|
12 |
#' @param smq_varlabel (`string`)\cr a label for the new variable created.
|
|
13 |
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
|
|
14 |
#' @param aag_summary (`data.frame`)\cr containing the SMQ baskets and the levels of interest for the final SMQ
|
|
15 |
#' variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
|
|
16 |
#' The two columns of this dataset should be named `basket` and `basket_name`.
|
|
17 |
#'
|
|
18 |
#' @return A `data.frame` with variables in `keys` taken from `df` and new variable SMQ containing
|
|
19 |
#' records belonging to the baskets selected via the `baskets` argument.
|
|
20 |
#'
|
|
21 |
#' @examples
|
|
22 |
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
|
|
23 |
#' h_stack_by_baskets(df = adae)
|
|
24 |
#'
|
|
25 |
#' aag <- data.frame(
|
|
26 |
#' NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
|
|
27 |
#' REFNAME = c(
|
|
28 |
#' "D.2.1.5.3/A.1.1.1.1 aesi", "X.9.9.9.9/Y.8.8.8.8 aesi",
|
|
29 |
#' "C.1.1.1.3/B.2.2.3.1 aesi", "C.1.1.1.3/B.3.3.3.3 aesi"
|
|
30 |
#' ),
|
|
31 |
#' SCOPE = c("", "", "BROAD", "BROAD"),
|
|
32 |
#' stringsAsFactors = FALSE
|
|
33 |
#' )
|
|
34 |
#'
|
|
35 |
#' basket_name <- character(nrow(aag))
|
|
36 |
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
|
|
37 |
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
|
|
38 |
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
|
|
39 |
#' basket_name[smq_pos] <- paste0(
|
|
40 |
#' aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
|
|
41 |
#' )
|
|
42 |
#'
|
|
43 |
#' aag_summary <- data.frame(
|
|
44 |
#' basket = aag$NAMVAR,
|
|
45 |
#' basket_name = basket_name,
|
|
46 |
#' stringsAsFactors = TRUE
|
|
47 |
#' )
|
|
48 |
#'
|
|
49 |
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
|
|
50 |
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
|
|
51 |
#'
|
|
52 |
#' h_stack_by_baskets(
|
|
53 |
#' df = adae,
|
|
54 |
#' aag_summary = NULL,
|
|
55 |
#' keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
|
|
56 |
#' baskets = "SMQ01NAM"
|
|
57 |
#' )
|
|
58 |
#'
|
|
59 |
#' @export
|
|
60 |
h_stack_by_baskets <- function(df, |
|
61 |
baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE), |
|
62 |
smq_varlabel = "Standardized MedDRA Query", |
|
63 |
keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"), |
|
64 |
aag_summary = NULL, |
|
65 |
na_str = "<Missing>") { |
|
66 | 5x |
smq_nam <- baskets[startsWith(baskets, "SMQ")] |
67 |
# SC corresponding to NAM
|
|
68 | 5x |
smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE) |
69 | 5x |
smq <- stats::setNames(smq_sc, smq_nam) |
70 | ||
71 | 5x |
checkmate::assert_character(baskets) |
72 | 5x |
checkmate::assert_string(smq_varlabel) |
73 | 5x |
checkmate::assert_data_frame(df) |
74 | 5x |
checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ"))) |
75 | 4x |
checkmate::assert_true(all(endsWith(baskets, "NAM"))) |
76 | 3x |
checkmate::assert_subset(baskets, names(df)) |
77 | 3x |
checkmate::assert_subset(keys, names(df)) |
78 | 3x |
checkmate::assert_subset(smq_sc, names(df)) |
79 | 3x |
checkmate::assert_string(na_str) |
80 | ||
81 | 3x |
if (!is.null(aag_summary)) { |
82 | 1x |
assert_df_with_variables( |
83 | 1x |
df = aag_summary, |
84 | 1x |
variables = list(val = c("basket", "basket_name")) |
85 |
)
|
|
86 |
# Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
|
|
87 |
# Honestly, I think those should completely match. Target baskets should be the same.
|
|
88 | 1x |
if (length(intersect(baskets, unique(aag_summary$basket))) == 0) { |
89 | ! |
warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.") |
90 |
}
|
|
91 |
}
|
|
92 | ||
93 | 3x |
var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel) |
94 | ||
95 |
# convert `na_str` records from baskets to NA for the later loop and from wide to long steps
|
|
96 | 3x |
df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_str] <- NA |
97 | ||
98 | 3x |
if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets |
99 | 1x |
df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty data frame keeping all factor levels |
100 |
} else { |
|
101 |
# Concatenate SMQxxxNAM with corresponding SMQxxxSC
|
|
102 | 2x |
df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])] |
103 | ||
104 | 2x |
for (nam in names(smq)) { |
105 | 4x |
sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM |
106 | 4x |
nam_notna <- !is.na(df[[nam]]) |
107 | 4x |
new_colname <- paste(nam, sc, sep = "_") |
108 | 4x |
df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna] |
109 |
}
|
|
110 | ||
111 | 2x |
df_cnct$unique_id <- seq(1, nrow(df_cnct)) |
112 | 2x |
var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))] |
113 |
# have to convert df_cnct from tibble to data frame
|
|
114 |
# as it throws a warning otherwise about rownames.
|
|
115 |
# tibble do not support rownames and reshape creates rownames
|
|
116 | ||
117 | 2x |
df_long <- stats::reshape( |
118 | 2x |
data = as.data.frame(df_cnct), |
119 | 2x |
varying = var_cols, |
120 | 2x |
v.names = "SMQ", |
121 | 2x |
idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")], |
122 | 2x |
direction = "long", |
123 | 2x |
new.row.names = seq(prod(length(var_cols), nrow(df_cnct))) |
124 |
)
|
|
125 | ||
126 | 2x |
df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))] |
127 | 2x |
df_long$SMQ <- as.factor(df_long$SMQ) |
128 |
}
|
|
129 | ||
130 | 3x |
smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_str) |
131 | ||
132 | 3x |
if (!is.null(aag_summary)) { |
133 |
# A warning in case there is no match between df and aag_summary records
|
|
134 | 1x |
if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) { |
135 | 1x |
warning("There are 0 basket levels in common between aag_summary$basket_name and df.") |
136 |
}
|
|
137 | 1x |
df_long[["SMQ"]] <- factor( |
138 | 1x |
df_long[["SMQ"]], |
139 | 1x |
levels = sort( |
140 | 1x |
c( |
141 | 1x |
smq_levels,
|
142 | 1x |
setdiff(unique(aag_summary$basket_name), smq_levels) |
143 |
)
|
|
144 |
)
|
|
145 |
)
|
|
146 |
} else { |
|
147 | 2x |
all_na_basket_flag <- vapply(df[, baskets], function(x) { |
148 | 6x |
all(is.na(x)) |
149 | 2x |
}, FUN.VALUE = logical(1)) |
150 | 2x |
all_na_basket <- baskets[all_na_basket_flag] |
151 | ||
152 | 2x |
df_long[["SMQ"]] <- factor( |
153 | 2x |
df_long[["SMQ"]], |
154 | 2x |
levels = sort(c(smq_levels, all_na_basket)) |
155 |
)
|
|
156 |
}
|
|
157 | 3x |
formatters::var_labels(df_long) <- var_labels |
158 | 3x |
tibble::tibble(df_long) |
159 |
}
|
1 |
#' Incidence rate estimation
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [estimate_incidence_rate()] creates a layout element to estimate an event rate adjusted for
|
|
6 |
#' person-years at risk, otherwise known as incidence rate. The primary analysis variable specified via `vars` is
|
|
7 |
#' the person-years at risk. In addition to this variable, the `n_events` variable for number of events observed (where
|
|
8 |
#' a value of 1 means an event was observed and 0 means that no event was observed) must also be specified.
|
|
9 |
#'
|
|
10 |
#' @inheritParams argument_convention
|
|
11 |
#' @param control (`list`)\cr parameters for estimation details, specified by using
|
|
12 |
#' the helper function [control_incidence_rate()]. Possible parameter options are:
|
|
13 |
#' * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
|
|
14 |
#' * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
|
|
15 |
#' for confidence interval type.
|
|
16 |
#' * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default)
|
|
17 |
#' indicating time unit for data input.
|
|
18 |
#' * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years).
|
|
19 |
#' @param n_events (`string`)\cr name of integer variable indicating whether an event has been observed (1) or not (0).
|
|
20 |
#' @param id_var (`string`)\cr name of variable used as patient identifier if `"n_unique"` is included in `.stats`.
|
|
21 |
#' Defaults to `"USUBJID"`.
|
|
22 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
23 |
#'
|
|
24 |
#' Options are: ``r shQuote(get_stats("estimate_incidence_rate"), type = "sh")``
|
|
25 |
#' @param summarize (`flag`)\cr whether the function should act as an analyze function (`summarize = FALSE`), or a
|
|
26 |
#' summarize function (`summarize = TRUE`). Defaults to `FALSE`.
|
|
27 |
#' @param label_fmt (`string`)\cr how labels should be formatted after a row split occurs if `summarize = TRUE`. The
|
|
28 |
#' string should use `"%s"` to represent row split levels, and `"%.labels"` to represent labels supplied to the
|
|
29 |
#' `.labels` argument. Defaults to `"%s - %.labels"`.
|
|
30 |
#'
|
|
31 |
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
|
|
32 |
#'
|
|
33 |
#' @examples
|
|
34 |
#' df <- data.frame(
|
|
35 |
#' USUBJID = as.character(seq(6)),
|
|
36 |
#' CNSR = c(0, 1, 1, 0, 0, 0),
|
|
37 |
#' AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
|
|
38 |
#' ARM = factor(c("A", "A", "A", "B", "B", "B")),
|
|
39 |
#' STRATA1 = factor(c("X", "Y", "Y", "X", "X", "Y"))
|
|
40 |
#' )
|
|
41 |
#' df$n_events <- 1 - df$CNSR
|
|
42 |
#'
|
|
43 |
#' @name incidence_rate
|
|
44 |
#' @order 1
|
|
45 |
NULL
|
|
46 | ||
47 |
#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
|
|
48 |
#' associated confidence interval.
|
|
49 |
#'
|
|
50 |
#' @return
|
|
51 |
#' * `s_incidence_rate()` returns the following statistics:
|
|
52 |
#' - `person_years`: Total person-years at risk.
|
|
53 |
#' - `n_events`: Total number of events observed.
|
|
54 |
#' - `rate`: Estimated incidence rate.
|
|
55 |
#' - `rate_ci`: Confidence interval for the incidence rate.
|
|
56 |
#' - `n_unique`: Total number of patients with at least one event observed.
|
|
57 |
#' - `n_rate`: Total number of events observed & estimated incidence rate.
|
|
58 |
#'
|
|
59 |
#' @keywords internal
|
|
60 |
s_incidence_rate <- function(df, |
|
61 |
.var,
|
|
62 |
...,
|
|
63 |
n_events,
|
|
64 |
is_event = lifecycle::deprecated(), |
|
65 |
id_var = "USUBJID", |
|
66 |
control = control_incidence_rate()) { |
|
67 | 17x |
if (lifecycle::is_present(is_event)) { |
68 | ! |
checkmate::assert_string(is_event) |
69 | ! |
lifecycle::deprecate_warn( |
70 | ! |
"0.9.6", "s_incidence_rate(is_event)", "s_incidence_rate(n_events)" |
71 |
)
|
|
72 | ! |
n_events <- is_event |
73 | ! |
df[[n_events]] <- as.numeric(df[[is_event]]) |
74 |
}
|
|
75 | ||
76 | 17x |
assert_df_with_variables(df, list(tte = .var, n_events = n_events)) |
77 | 17x |
checkmate::assert_string(.var) |
78 | 17x |
checkmate::assert_string(n_events) |
79 | 17x |
checkmate::assert_string(id_var) |
80 | 17x |
checkmate::assert_numeric(df[[.var]], any.missing = FALSE) |
81 | 17x |
checkmate::assert_integerish(df[[n_events]], any.missing = FALSE) |
82 | ||
83 | 17x |
n_unique <- n_available(unique(df[[id_var]][df[[n_events]] == 1])) |
84 | 17x |
input_time_unit <- control$input_time_unit |
85 | 17x |
num_pt_year <- control$num_pt_year |
86 | 17x |
conf_level <- control$conf_level |
87 | 17x |
person_years <- sum(df[[.var]], na.rm = TRUE) * ( |
88 | 17x |
1 * (input_time_unit == "year") + |
89 | 17x |
1 / 12 * (input_time_unit == "month") + |
90 | 17x |
1 / 52.14 * (input_time_unit == "week") + |
91 | 17x |
1 / 365.24 * (input_time_unit == "day") |
92 |
)
|
|
93 | 17x |
n_events <- sum(df[[n_events]], na.rm = TRUE) |
94 | ||
95 | 17x |
result <- h_incidence_rate( |
96 | 17x |
person_years,
|
97 | 17x |
n_events,
|
98 | 17x |
control
|
99 |
)
|
|
100 | 17x |
list( |
101 | 17x |
person_years = formatters::with_label(person_years, "Total patient-years at risk"), |
102 | 17x |
n_events = formatters::with_label(n_events, "Number of adverse events observed"), |
103 | 17x |
rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")), |
104 | 17x |
rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level)), |
105 | 17x |
n_unique = formatters::with_label(n_unique, "Total number of patients with at least one adverse event"), |
106 | 17x |
n_rate = formatters::with_label( |
107 | 17x |
c(n_events, result$rate), |
108 | 17x |
paste("Number of adverse events observed (AE rate per", num_pt_year, "patient-years)") |
109 |
)
|
|
110 |
)
|
|
111 |
}
|
|
112 | ||
113 |
#' @describeIn incidence_rate Formatted analysis function which is used as `afun` in `estimate_incidence_rate()`.
|
|
114 |
#'
|
|
115 |
#' @return
|
|
116 |
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
117 |
#'
|
|
118 |
#' @examples
|
|
119 |
#' a_incidence_rate(
|
|
120 |
#' df,
|
|
121 |
#' .var = "AVAL",
|
|
122 |
#' .df_row = df,
|
|
123 |
#' n_events = "n_events"
|
|
124 |
#' )
|
|
125 |
#'
|
|
126 |
#' @export
|
|
127 |
a_incidence_rate <- function(df, |
|
128 |
labelstr = "", |
|
129 |
label_fmt = "%s - %.labels", |
|
130 |
...,
|
|
131 |
.stats = NULL, |
|
132 |
.stat_names = NULL, |
|
133 |
.formats = NULL, |
|
134 |
.labels = NULL, |
|
135 |
.indent_mods = NULL) { |
|
136 | 16x |
checkmate::assert_string(label_fmt) |
137 | ||
138 |
# Check for additional parameters to the statistics function
|
|
139 | 16x |
dots_extra_args <- list(...) |
140 | 16x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
141 | 16x |
dots_extra_args$.additional_fun_parameters <- NULL |
142 | ||
143 |
# Check for user-defined functions
|
|
144 | 16x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
145 | 16x |
.stats <- default_and_custom_stats_list$all_stats |
146 | 16x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
147 | ||
148 |
# Main statistic calculations
|
|
149 | 16x |
x_stats <- .apply_stat_functions( |
150 | 16x |
default_stat_fnc = s_incidence_rate, |
151 | 16x |
custom_stat_fnc_list = custom_stat_functions, |
152 | 16x |
args_list = c( |
153 | 16x |
df = list(df), |
154 | 16x |
extra_afun_params,
|
155 | 16x |
dots_extra_args
|
156 |
)
|
|
157 |
)
|
|
158 | ||
159 |
# Fill in formatting defaults
|
|
160 | 16x |
.stats <- get_stats("estimate_incidence_rate", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
161 | 16x |
x_stats <- x_stats[.stats] |
162 | 16x |
.formats <- get_formats_from_stats(.stats, .formats) |
163 | 16x |
.labels <- get_labels_from_stats(.stats, .labels, tern_defaults = lapply(x_stats, attr, "label")) |
164 | 16x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
165 | ||
166 |
# Apply label format
|
|
167 | 16x |
if (nzchar(labelstr) > 0) { |
168 | 8x |
.labels <- sapply(.labels, function(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt))) |
169 |
}
|
|
170 | ||
171 |
# Auto format handling
|
|
172 | 16x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
173 | ||
174 |
# Get and check statistical names
|
|
175 | 16x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
176 | ||
177 | 16x |
in_rows( |
178 | 16x |
.list = x_stats, |
179 | 16x |
.formats = .formats, |
180 | 16x |
.names = names(.labels), |
181 | 16x |
.stat_names = .stat_names, |
182 | 16x |
.labels = .labels %>% .unlist_keep_nulls(), |
183 | 16x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
184 |
)
|
|
185 |
}
|
|
186 | ||
187 |
#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
|
|
188 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
189 |
#'
|
|
190 |
#' @return
|
|
191 |
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
|
|
192 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
193 |
#' the statistics from `s_incidence_rate()` to the table layout.
|
|
194 |
#'
|
|
195 |
#' @examples
|
|
196 |
#' basic_table(show_colcounts = TRUE) %>%
|
|
197 |
#' split_cols_by("ARM") %>%
|
|
198 |
#' estimate_incidence_rate(
|
|
199 |
#' vars = "AVAL",
|
|
200 |
#' n_events = "n_events",
|
|
201 |
#' control = control_incidence_rate(
|
|
202 |
#' input_time_unit = "month",
|
|
203 |
#' num_pt_year = 100
|
|
204 |
#' )
|
|
205 |
#' ) %>%
|
|
206 |
#' build_table(df)
|
|
207 |
#'
|
|
208 |
#' # summarize = TRUE
|
|
209 |
#' basic_table(show_colcounts = TRUE) %>%
|
|
210 |
#' split_cols_by("ARM") %>%
|
|
211 |
#' split_rows_by("STRATA1", child_labels = "visible") %>%
|
|
212 |
#' estimate_incidence_rate(
|
|
213 |
#' vars = "AVAL",
|
|
214 |
#' n_events = "n_events",
|
|
215 |
#' .stats = c("n_unique", "n_rate"),
|
|
216 |
#' summarize = TRUE,
|
|
217 |
#' label_fmt = "%.labels"
|
|
218 |
#' ) %>%
|
|
219 |
#' build_table(df)
|
|
220 |
#'
|
|
221 |
#' @export
|
|
222 |
#' @order 2
|
|
223 |
estimate_incidence_rate <- function(lyt, |
|
224 |
vars,
|
|
225 |
n_events,
|
|
226 |
id_var = "USUBJID", |
|
227 |
control = control_incidence_rate(), |
|
228 |
na_str = default_na_str(), |
|
229 |
nested = TRUE, |
|
230 |
summarize = FALSE, |
|
231 |
label_fmt = "%s - %.labels", |
|
232 |
...,
|
|
233 |
show_labels = "hidden", |
|
234 |
table_names = vars, |
|
235 |
.stats = c("person_years", "n_events", "rate", "rate_ci"), |
|
236 |
.stat_names = NULL, |
|
237 |
.formats = list(rate = "xx.xx", rate_ci = "(xx.xx, xx.xx)"), |
|
238 |
.labels = NULL, |
|
239 |
.indent_mods = NULL) { |
|
240 |
# Process standard extra arguments
|
|
241 | 5x |
extra_args <- list(".stats" = .stats) |
242 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
243 | 5x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
244 | 1x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
245 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
246 | ||
247 |
# Process additional arguments to the statistic function
|
|
248 | 5x |
extra_args <- c( |
249 | 5x |
extra_args,
|
250 | 5x |
n_events = n_events, id_var = id_var, control = list(control), label_fmt = label_fmt, |
251 |
...
|
|
252 |
)
|
|
253 | ||
254 |
# Adding additional info from layout to analysis function
|
|
255 | 5x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
256 | 5x |
formals(a_incidence_rate) <- c(formals(a_incidence_rate), extra_args[[".additional_fun_parameters"]]) |
257 | ||
258 | 5x |
if (!summarize) { |
259 | 3x |
analyze( |
260 | 3x |
lyt = lyt, |
261 | 3x |
vars = vars, |
262 | 3x |
afun = a_incidence_rate, |
263 | 3x |
na_str = na_str, |
264 | 3x |
nested = nested, |
265 | 3x |
extra_args = extra_args, |
266 | 3x |
show_labels = show_labels, |
267 | 3x |
table_names = table_names |
268 |
)
|
|
269 |
} else { |
|
270 | 2x |
summarize_row_groups( |
271 | 2x |
lyt = lyt, |
272 | 2x |
var = vars, |
273 | 2x |
cfun = a_incidence_rate, |
274 | 2x |
na_str = na_str, |
275 | 2x |
extra_args = extra_args |
276 |
)
|
|
277 |
}
|
|
278 |
}
|
1 |
#' Tabulate biomarker effects on survival by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The [tabulate_survival_biomarkers()] function creates a layout element to tabulate the estimated effects of multiple
|
|
6 |
#' continuous biomarker variables on survival across subgroups, returning statistics including median survival time and
|
|
7 |
#' hazard ratio for each population subgroup. The table is created from `df`, a list of data frames returned by
|
|
8 |
#' [extract_survival_biomarkers()], with the statistics to include specified via the `vars` parameter.
|
|
9 |
#'
|
|
10 |
#' A forest plot can be created from the resulting table using the [g_forest()] function.
|
|
11 |
#'
|
|
12 |
#' @inheritParams fit_coxreg_multivar
|
|
13 |
#' @inheritParams survival_duration_subgroups
|
|
14 |
#' @inheritParams argument_convention
|
|
15 |
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
|
|
16 |
#' [extract_survival_biomarkers()].
|
|
17 |
#' @param vars (`character`)\cr the names of statistics to be reported among:
|
|
18 |
#' * `n_tot_events`: Total number of events per group.
|
|
19 |
#' * `n_tot`: Total number of observations per group.
|
|
20 |
#' * `median`: Median survival time.
|
|
21 |
#' * `hr`: Hazard ratio.
|
|
22 |
#' * `ci`: Confidence interval of hazard ratio.
|
|
23 |
#' * `pval`: p-value of the effect.
|
|
24 |
#' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required.
|
|
25 |
#'
|
|
26 |
#' @details These functions create a layout starting from a data frame which contains
|
|
27 |
#' the required statistics. The tables are then typically used as input for forest plots.
|
|
28 |
#'
|
|
29 |
#' @examples
|
|
30 |
#' library(dplyr)
|
|
31 |
#'
|
|
32 |
#' adtte <- tern_ex_adtte
|
|
33 |
#'
|
|
34 |
#' # Save variable labels before data processing steps.
|
|
35 |
#' adtte_labels <- formatters::var_labels(adtte)
|
|
36 |
#'
|
|
37 |
#' adtte_f <- adtte %>%
|
|
38 |
#' filter(PARAMCD == "OS") %>%
|
|
39 |
#' mutate(
|
|
40 |
#' AVALU = as.character(AVALU),
|
|
41 |
#' is_event = CNSR == 0
|
|
42 |
#' )
|
|
43 |
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
|
|
44 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
|
|
45 |
#'
|
|
46 |
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
|
|
47 |
#' # in multiple regression models containing one covariate `RACE`,
|
|
48 |
#' # as well as one stratification variable `STRATA1`. The subgroups
|
|
49 |
#' # are defined by the levels of `BMRKR2`.
|
|
50 |
#'
|
|
51 |
#' df <- extract_survival_biomarkers(
|
|
52 |
#' variables = list(
|
|
53 |
#' tte = "AVAL",
|
|
54 |
#' is_event = "is_event",
|
|
55 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
56 |
#' strata = "STRATA1",
|
|
57 |
#' covariates = "SEX",
|
|
58 |
#' subgroups = "BMRKR2"
|
|
59 |
#' ),
|
|
60 |
#' label_all = "Total Patients",
|
|
61 |
#' data = adtte_f
|
|
62 |
#' )
|
|
63 |
#' df
|
|
64 |
#'
|
|
65 |
#' # Here we group the levels of `BMRKR2` manually.
|
|
66 |
#' df_grouped <- extract_survival_biomarkers(
|
|
67 |
#' variables = list(
|
|
68 |
#' tte = "AVAL",
|
|
69 |
#' is_event = "is_event",
|
|
70 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
71 |
#' strata = "STRATA1",
|
|
72 |
#' covariates = "SEX",
|
|
73 |
#' subgroups = "BMRKR2"
|
|
74 |
#' ),
|
|
75 |
#' data = adtte_f,
|
|
76 |
#' groups_lists = list(
|
|
77 |
#' BMRKR2 = list(
|
|
78 |
#' "low" = "LOW",
|
|
79 |
#' "low/medium" = c("LOW", "MEDIUM"),
|
|
80 |
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
|
|
81 |
#' )
|
|
82 |
#' )
|
|
83 |
#' )
|
|
84 |
#' df_grouped
|
|
85 |
#'
|
|
86 |
#' @name survival_biomarkers_subgroups
|
|
87 |
#' @order 1
|
|
88 |
NULL
|
|
89 | ||
90 |
#' Prepare survival data estimates for multiple biomarkers in a single data frame
|
|
91 |
#'
|
|
92 |
#' @description `r lifecycle::badge("stable")`
|
|
93 |
#'
|
|
94 |
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates,
|
|
95 |
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame.
|
|
96 |
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements
|
|
97 |
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strata`.
|
|
98 |
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
|
|
99 |
#'
|
|
100 |
#' @inheritParams argument_convention
|
|
101 |
#' @inheritParams fit_coxreg_multivar
|
|
102 |
#' @inheritParams survival_duration_subgroups
|
|
103 |
#'
|
|
104 |
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`,
|
|
105 |
#' `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
|
|
106 |
#' `var_label`, and `row_type`.
|
|
107 |
#'
|
|
108 |
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()].
|
|
109 |
#'
|
|
110 |
#' @export
|
|
111 |
extract_survival_biomarkers <- function(variables, |
|
112 |
data,
|
|
113 |
groups_lists = list(), |
|
114 |
control = control_coxreg(), |
|
115 |
label_all = "All Patients") { |
|
116 | 6x |
if ("strat" %in% names(variables)) { |
117 | ! |
warning( |
118 | ! |
"Warning: the `strat` element name of the `variables` list argument to `extract_survival_biomarkers() ",
|
119 | ! |
"was deprecated in tern 0.9.4.\n ",
|
120 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
121 |
)
|
|
122 | ! |
variables[["strata"]] <- variables[["strat"]] |
123 |
}
|
|
124 | ||
125 | 6x |
checkmate::assert_list(variables) |
126 | 6x |
checkmate::assert_character(variables$subgroups, null.ok = TRUE) |
127 | 6x |
checkmate::assert_string(label_all) |
128 | ||
129 |
# Start with all patients.
|
|
130 | 6x |
result_all <- h_coxreg_mult_cont_df( |
131 | 6x |
variables = variables, |
132 | 6x |
data = data, |
133 | 6x |
control = control |
134 |
)
|
|
135 | 6x |
result_all$subgroup <- label_all |
136 | 6x |
result_all$var <- "ALL" |
137 | 6x |
result_all$var_label <- label_all |
138 | 6x |
result_all$row_type <- "content" |
139 | 6x |
if (is.null(variables$subgroups)) { |
140 |
# Only return result for all patients.
|
|
141 | 1x |
result_all
|
142 |
} else { |
|
143 |
# Add subgroups results.
|
|
144 | 5x |
l_data <- h_split_by_subgroups( |
145 | 5x |
data,
|
146 | 5x |
variables$subgroups, |
147 | 5x |
groups_lists = groups_lists |
148 |
)
|
|
149 | 5x |
l_result <- lapply(l_data, function(grp) { |
150 | 25x |
result <- h_coxreg_mult_cont_df( |
151 | 25x |
variables = variables, |
152 | 25x |
data = grp$df, |
153 | 25x |
control = control |
154 |
)
|
|
155 | 25x |
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ] |
156 | 25x |
cbind(result, result_labels) |
157 |
}) |
|
158 | 5x |
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
159 | 5x |
result_subgroups$row_type <- "analysis" |
160 | 5x |
rbind( |
161 | 5x |
result_all,
|
162 | 5x |
result_subgroups
|
163 |
)
|
|
164 |
}
|
|
165 |
}
|
|
166 | ||
167 |
#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table
|
|
168 |
#' summarizing biomarker effects on survival by subgroup.
|
|
169 |
#'
|
|
170 |
#' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the
|
|
171 |
#' [extract_survival_biomarkers()] function when creating `df`.
|
|
172 |
#'
|
|
173 |
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup.
|
|
174 |
#'
|
|
175 |
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does
|
|
176 |
#' not start from an input layout `lyt`. This is because internally the table is
|
|
177 |
#' created by combining multiple subtables.
|
|
178 |
#'
|
|
179 |
#' @seealso [extract_survival_biomarkers()]
|
|
180 |
#'
|
|
181 |
#' @examples
|
|
182 |
#' ## Table with default columns.
|
|
183 |
#' tabulate_survival_biomarkers(df)
|
|
184 |
#'
|
|
185 |
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
|
|
186 |
#' tab <- tabulate_survival_biomarkers(
|
|
187 |
#' df = df,
|
|
188 |
#' vars = c("n_tot_events", "ci", "n_tot", "median", "hr"),
|
|
189 |
#' time_unit = as.character(adtte_f$AVALU[1])
|
|
190 |
#' )
|
|
191 |
#'
|
|
192 |
#' ## Finally produce the forest plot.
|
|
193 |
#' \donttest{
|
|
194 |
#' g_forest(tab, xlim = c(0.8, 1.2))
|
|
195 |
#' }
|
|
196 |
#'
|
|
197 |
#' @export
|
|
198 |
#' @order 2
|
|
199 |
tabulate_survival_biomarkers <- function(df, |
|
200 |
vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"), |
|
201 |
groups_lists = list(), |
|
202 |
control = control_coxreg(), |
|
203 |
label_all = lifecycle::deprecated(), |
|
204 |
time_unit = NULL, |
|
205 |
na_str = default_na_str(), |
|
206 |
...,
|
|
207 |
.stat_names = NULL, |
|
208 |
.formats = NULL, |
|
209 |
.labels = NULL, |
|
210 |
.indent_mods = NULL) { |
|
211 | 5x |
if (lifecycle::is_present(label_all)) { |
212 | 1x |
lifecycle::deprecate_warn( |
213 | 1x |
"0.9.5", "tabulate_survival_biomarkers(label_all)", |
214 | 1x |
details = paste( |
215 | 1x |
"Please assign the `label_all` parameter within the",
|
216 | 1x |
"`extract_survival_biomarkers()` function when creating `df`."
|
217 |
)
|
|
218 |
)
|
|
219 |
}
|
|
220 | ||
221 | 5x |
checkmate::assert_data_frame(df) |
222 | 5x |
checkmate::assert_character(df$biomarker) |
223 | 5x |
checkmate::assert_character(df$biomarker_label) |
224 | 5x |
checkmate::assert_subset(vars, get_stats("tabulate_survival_biomarkers")) |
225 | ||
226 |
# Process standard extra arguments
|
|
227 | 5x |
extra_args <- list(".stats" = vars) |
228 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
229 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
230 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
231 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
232 | ||
233 | 5x |
colvars <- d_survival_subgroups_colvars( |
234 | 5x |
vars,
|
235 | 5x |
conf_level = df$conf_level[1], |
236 | 5x |
method = df$pval_label[1], |
237 | 5x |
time_unit = time_unit |
238 |
)
|
|
239 | ||
240 |
# Process additional arguments to the statistic function
|
|
241 | 5x |
extra_args <- c( |
242 | 5x |
extra_args,
|
243 | 5x |
groups_lists = list(groups_lists), control = list(control), biomarker = TRUE, |
244 |
...
|
|
245 |
)
|
|
246 | ||
247 |
# Adding additional info from layout to analysis function
|
|
248 | 5x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
249 | 5x |
formals(a_survival_subgroups) <- c(formals(a_survival_subgroups), extra_args[[".additional_fun_parameters"]]) |
250 | ||
251 |
# Create "ci" column from "lcl" and "ucl"
|
|
252 | 5x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
253 | ||
254 | 5x |
df_subs <- split(df, f = df$biomarker) |
255 | 5x |
tbls <- lapply( |
256 | 5x |
df_subs,
|
257 | 5x |
function(df) { |
258 | 9x |
lyt <- basic_table() |
259 | ||
260 |
# Split cols by the multiple variables to populate into columns.
|
|
261 | 9x |
lyt <- split_cols_by_multivar( |
262 | 9x |
lyt = lyt, |
263 | 9x |
vars = colvars$vars, |
264 | 9x |
varlabels = colvars$labels |
265 |
)
|
|
266 | ||
267 |
# Row split by biomarker
|
|
268 | 9x |
lyt <- split_rows_by( |
269 | 9x |
lyt = lyt, |
270 | 9x |
var = "biomarker_label", |
271 | 9x |
nested = FALSE |
272 |
)
|
|
273 | ||
274 |
# Add "All Patients" row
|
|
275 | 9x |
lyt <- split_rows_by( |
276 | 9x |
lyt = lyt, |
277 | 9x |
var = "row_type", |
278 | 9x |
split_fun = keep_split_levels("content"), |
279 | 9x |
nested = TRUE, |
280 | 9x |
child_labels = "hidden" |
281 |
)
|
|
282 | 9x |
lyt <- analyze_colvars( |
283 | 9x |
lyt = lyt, |
284 | 9x |
afun = a_survival_subgroups, |
285 | 9x |
na_str = na_str, |
286 | 9x |
extra_args = c(extra_args, overall = TRUE) |
287 |
)
|
|
288 | ||
289 |
# Add analysis rows
|
|
290 | 9x |
if ("analysis" %in% df$row_type) { |
291 | 6x |
lyt <- split_rows_by( |
292 | 6x |
lyt = lyt, |
293 | 6x |
var = "row_type", |
294 | 6x |
split_fun = keep_split_levels("analysis"), |
295 | 6x |
nested = TRUE, |
296 | 6x |
child_labels = "hidden" |
297 |
)
|
|
298 | 6x |
lyt <- split_rows_by( |
299 | 6x |
lyt = lyt, |
300 | 6x |
var = "var_label", |
301 | 6x |
nested = TRUE, |
302 | 6x |
indent_mod = 1L |
303 |
)
|
|
304 | 6x |
lyt <- analyze_colvars( |
305 | 6x |
lyt = lyt, |
306 | 6x |
afun = a_survival_subgroups, |
307 | 6x |
na_str = na_str, |
308 | 6x |
inclNAs = TRUE, |
309 | 6x |
extra_args = extra_args |
310 |
)
|
|
311 |
}
|
|
312 | 9x |
build_table(lyt, df = df) |
313 |
}
|
|
314 |
)
|
|
315 | ||
316 | 5x |
result <- do.call(rbind, tbls) |
317 | ||
318 | 5x |
n_tot_ids <- grep("^n_tot", vars) |
319 | 5x |
hr_id <- match("hr", vars) |
320 | 5x |
ci_id <- match("ci", vars) |
321 | 5x |
structure( |
322 | 5x |
result,
|
323 | 5x |
forest_header = paste0(c("Higher", "Lower"), "\nBetter"), |
324 | 5x |
col_x = hr_id, |
325 | 5x |
col_ci = ci_id, |
326 | 5x |
col_symbol_size = n_tot_ids[1] |
327 |
)
|
|
328 |
}
|
1 |
#' Summarize analysis of covariance (ANCOVA) results
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [summarize_ancova()] creates a layout element to summarize ANCOVA results.
|
|
6 |
#'
|
|
7 |
#' This function can be used to analyze multiple endpoints and/or multiple timepoints within the response variable(s)
|
|
8 |
#' specified as `vars`.
|
|
9 |
#'
|
|
10 |
#' Additional variables for the analysis, namely an arm (grouping) variable and covariate variables, can be defined
|
|
11 |
#' via the `variables` argument. See below for more details on how to specify `variables`. An interaction term can
|
|
12 |
#' be implemented in the model if needed. The interaction variable that should interact with the arm variable is
|
|
13 |
#' specified via the `interaction_term` parameter, and the specific value of `interaction_term` for which to extract
|
|
14 |
#' the ANCOVA results via the `interaction_y` parameter.
|
|
15 |
#'
|
|
16 |
#' @inheritParams h_ancova
|
|
17 |
#' @inheritParams argument_convention
|
|
18 |
#' @param interaction_y (`string` or `flag`)\cr a selected item inside of the `interaction_item` variable which will be
|
|
19 |
#' used to select the specific ANCOVA results. if the interaction is not needed, the default option is `FALSE`.
|
|
20 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
21 |
#'
|
|
22 |
#' Options are: ``r shQuote(get_stats("summarize_ancova"), type = "sh")``
|
|
23 |
#'
|
|
24 |
#' @name summarize_ancova
|
|
25 |
#' @order 1
|
|
26 |
NULL
|
|
27 | ||
28 |
#' Helper function to return results of a linear model
|
|
29 |
#'
|
|
30 |
#' @description `r lifecycle::badge("stable")`
|
|
31 |
#'
|
|
32 |
#' @inheritParams argument_convention
|
|
33 |
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
|
|
34 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with expected elements:
|
|
35 |
#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
|
|
36 |
#' summarized. Specifically, the first level of `arm` variable is taken as the reference group.
|
|
37 |
#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
|
|
38 |
#' interaction terms indicated by `"X1 * X2"`.
|
|
39 |
#' @param interaction_item (`string` or `NULL`)\cr name of the variable that should have interactions
|
|
40 |
#' with arm. if the interaction is not needed, the default option is `NULL`.
|
|
41 |
#'
|
|
42 |
#' @return The summary of a linear model.
|
|
43 |
#'
|
|
44 |
#' @examples
|
|
45 |
#' h_ancova(
|
|
46 |
#' .var = "Sepal.Length",
|
|
47 |
#' .df_row = iris,
|
|
48 |
#' variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
|
|
49 |
#' )
|
|
50 |
#'
|
|
51 |
#' @export
|
|
52 |
h_ancova <- function(.var, |
|
53 |
.df_row,
|
|
54 |
variables,
|
|
55 |
interaction_item = NULL) { |
|
56 | 27x |
checkmate::assert_string(.var) |
57 | 27x |
checkmate::assert_list(variables) |
58 | 27x |
checkmate::assert_subset(names(variables), c("arm", "covariates")) |
59 | 27x |
assert_df_with_variables(.df_row, list(rsp = .var)) |
60 | ||
61 | 26x |
arm <- variables$arm |
62 | 26x |
covariates <- variables$covariates |
63 | 26x |
if (!is.null(covariates) && length(covariates) > 0) { |
64 |
# Get all covariate variable names in the model.
|
|
65 | 11x |
var_list <- get_covariates(covariates) |
66 | 11x |
assert_df_with_variables(.df_row, var_list) |
67 |
}
|
|
68 | ||
69 | 25x |
covariates_part <- paste(covariates, collapse = " + ") |
70 | 25x |
if (covariates_part != "") { |
71 | 10x |
formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm)) |
72 |
} else { |
|
73 | 15x |
formula <- stats::as.formula(paste0(.var, " ~ ", arm)) |
74 |
}
|
|
75 | ||
76 | 25x |
if (is.null(interaction_item)) { |
77 | 21x |
specs <- arm |
78 |
} else { |
|
79 | 4x |
specs <- c(arm, interaction_item) |
80 |
}
|
|
81 | ||
82 | 25x |
lm_fit <- stats::lm( |
83 | 25x |
formula = formula, |
84 | 25x |
data = .df_row |
85 |
)
|
|
86 | 25x |
emmeans_fit <- emmeans::emmeans( |
87 | 25x |
lm_fit,
|
88 |
# Specify here the group variable over which EMM are desired.
|
|
89 | 25x |
specs = specs, |
90 |
# Pass the data again so that the factor levels of the arm variable can be inferred.
|
|
91 | 25x |
data = .df_row |
92 |
)
|
|
93 | ||
94 | 25x |
emmeans_fit
|
95 |
}
|
|
96 | ||
97 |
#' @describeIn summarize_ancova Statistics function that produces a named list of results
|
|
98 |
#' of the investigated linear model.
|
|
99 |
#'
|
|
100 |
#' @return
|
|
101 |
#' * `s_ancova()` returns a named list of 5 statistics:
|
|
102 |
#' * `n`: Count of complete sample size for the group.
|
|
103 |
#' * `lsmean`: Estimated marginal means in the group.
|
|
104 |
#' * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
|
|
105 |
#' If working with the reference group, this will be empty.
|
|
106 |
#' * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
|
|
107 |
#' to the reference group.
|
|
108 |
#' * `pval`: p-value (not adjusted for multiple comparisons).
|
|
109 |
#'
|
|
110 |
#' @keywords internal
|
|
111 |
s_ancova <- function(df, |
|
112 |
.var,
|
|
113 |
.df_row,
|
|
114 |
.ref_group,
|
|
115 |
.in_ref_col,
|
|
116 |
variables,
|
|
117 |
conf_level,
|
|
118 |
interaction_y = FALSE, |
|
119 |
interaction_item = NULL, |
|
120 |
...) { |
|
121 | 24x |
emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item) |
122 | ||
123 | 24x |
sum_fit <- summary( |
124 | 24x |
emmeans_fit,
|
125 | 24x |
level = conf_level |
126 |
)
|
|
127 | ||
128 | 24x |
arm <- variables$arm |
129 | ||
130 | 24x |
sum_level <- as.character(unique(df[[arm]])) |
131 | ||
132 |
# Ensure that there is only one element in sum_level.
|
|
133 | 24x |
checkmate::assert_scalar(sum_level) |
134 | ||
135 | 23x |
sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ] |
136 | ||
137 |
# Get the index of the ref arm
|
|
138 | 23x |
if (interaction_y != FALSE) { |
139 | 4x |
y <- unlist(df[(df[[interaction_item]] == interaction_y), .var]) |
140 |
# convert characters selected in interaction_y into the numeric order
|
|
141 | 4x |
interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y) |
142 | 4x |
sum_fit_level <- sum_fit_level[interaction_y, ] |
143 |
# if interaction is called, reset the index
|
|
144 | 4x |
ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])]) |
145 | 4x |
ref_key <- tail(ref_key, n = 1) |
146 | 4x |
ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key |
147 |
} else { |
|
148 | 19x |
y <- df[[.var]] |
149 |
# Get the index of the ref arm when interaction is not called
|
|
150 | 19x |
ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])]) |
151 | 19x |
ref_key <- tail(ref_key, n = 1) |
152 |
}
|
|
153 | ||
154 | 23x |
if (.in_ref_col) { |
155 | 8x |
list( |
156 | 8x |
n = length(y[!is.na(y)]), |
157 | 8x |
lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"), |
158 | 8x |
lsmean_diff = formatters::with_label(numeric(), "Difference in Adjusted Means"), |
159 | 8x |
lsmean_diff_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), |
160 | 8x |
pval = formatters::with_label(numeric(), "p-value") |
161 |
)
|
|
162 |
} else { |
|
163 |
# Estimate the differences between the marginal means.
|
|
164 | 15x |
emmeans_contrasts <- emmeans::contrast( |
165 | 15x |
emmeans_fit,
|
166 |
# Compare all arms versus the control arm.
|
|
167 | 15x |
method = "trt.vs.ctrl", |
168 |
# Take the arm factor from .ref_group as the control arm.
|
|
169 | 15x |
ref = ref_key, |
170 | 15x |
level = conf_level |
171 |
)
|
|
172 | 15x |
sum_contrasts <- summary( |
173 | 15x |
emmeans_contrasts,
|
174 |
# Derive confidence intervals, t-tests and p-values.
|
|
175 | 15x |
infer = TRUE, |
176 |
# Do not adjust the p-values for multiplicity.
|
|
177 | 15x |
adjust = "none" |
178 |
)
|
|
179 | ||
180 | 15x |
contrast_lvls <- gsub( |
181 | 15x |
"^\\(|\\)$", "", gsub(paste0(" - \\(*", .ref_group[[arm]][1], ".*"), "", sum_contrasts$contrast) |
182 |
)
|
|
183 | 15x |
if (!is.null(interaction_item)) { |
184 | 2x |
sum_contrasts_level <- sum_contrasts[grepl(sum_level, contrast_lvls, fixed = TRUE), ] |
185 |
} else { |
|
186 | 13x |
sum_contrasts_level <- sum_contrasts[sum_level == contrast_lvls, ] |
187 |
}
|
|
188 | 15x |
if (interaction_y != FALSE) { |
189 | 2x |
sum_contrasts_level <- sum_contrasts_level[interaction_y, ] |
190 |
}
|
|
191 | ||
192 | 15x |
list( |
193 | 15x |
n = length(y[!is.na(y)]), |
194 | 15x |
lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"), |
195 | 15x |
lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"), |
196 | 15x |
lsmean_diff_ci = formatters::with_label( |
197 | 15x |
c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL), |
198 | 15x |
f_conf_level(conf_level) |
199 |
),
|
|
200 | 15x |
pval = formatters::with_label(sum_contrasts_level$p.value, "p-value") |
201 |
)
|
|
202 |
}
|
|
203 |
}
|
|
204 | ||
205 |
#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
|
|
206 |
#'
|
|
207 |
#' @return
|
|
208 |
#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
209 |
#'
|
|
210 |
#' @keywords internal
|
|
211 |
a_ancova <- function(df, |
|
212 |
...,
|
|
213 |
.stats = NULL, |
|
214 |
.stat_names = NULL, |
|
215 |
.formats = NULL, |
|
216 |
.labels = NULL, |
|
217 |
.indent_mods = NULL) { |
|
218 |
# Check for additional parameters to the statistics function
|
|
219 | 21x |
dots_extra_args <- list(...) |
220 | 21x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
221 | 21x |
dots_extra_args$.additional_fun_parameters <- NULL |
222 | ||
223 |
# Check for user-defined functions
|
|
224 | 21x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
225 | 21x |
.stats <- default_and_custom_stats_list$all_stats |
226 | 21x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
227 | ||
228 |
# Apply statistics function
|
|
229 | 21x |
x_stats <- .apply_stat_functions( |
230 | 21x |
default_stat_fnc = s_ancova, |
231 | 21x |
custom_stat_fnc_list = custom_stat_functions, |
232 | 21x |
args_list = c( |
233 | 21x |
df = list(df), |
234 | 21x |
extra_afun_params,
|
235 | 21x |
dots_extra_args
|
236 |
)
|
|
237 |
)
|
|
238 | ||
239 |
# Fill in formatting defaults
|
|
240 | 21x |
.stats <- get_stats("summarize_ancova", |
241 | 21x |
stats_in = .stats, |
242 | 21x |
custom_stats_in = names(custom_stat_functions) |
243 |
)
|
|
244 | 21x |
x_stats <- x_stats[.stats] |
245 | 21x |
.formats <- get_formats_from_stats(.stats, .formats) |
246 | 21x |
.labels <- get_labels_from_stats( |
247 | 21x |
.stats, .labels, |
248 | 21x |
tern_defaults = c(lapply(x_stats[names(x_stats) != "n"], attr, "label"), tern_default_labels) |
249 |
)
|
|
250 | 21x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
251 | ||
252 |
# Auto format handling
|
|
253 | 21x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
254 | ||
255 |
# Get and check statistical names
|
|
256 | 21x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
257 | ||
258 | 21x |
in_rows( |
259 | 21x |
.list = x_stats, |
260 | 21x |
.formats = .formats, |
261 | 21x |
.names = .labels %>% .unlist_keep_nulls(), |
262 | 21x |
.stat_names = .stat_names, |
263 | 21x |
.labels = .labels %>% .unlist_keep_nulls(), |
264 | 21x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
265 |
)
|
|
266 |
}
|
|
267 | ||
268 |
#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
|
|
269 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
270 |
#'
|
|
271 |
#' @return
|
|
272 |
#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
|
|
273 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
274 |
#' the statistics from `s_ancova()` to the table layout.
|
|
275 |
#'
|
|
276 |
#' @examples
|
|
277 |
#' basic_table() %>%
|
|
278 |
#' split_cols_by("Species", ref_group = "setosa") %>%
|
|
279 |
#' add_colcounts() %>%
|
|
280 |
#' summarize_ancova(
|
|
281 |
#' vars = "Petal.Length",
|
|
282 |
#' variables = list(arm = "Species", covariates = NULL),
|
|
283 |
#' table_names = "unadj",
|
|
284 |
#' conf_level = 0.95, var_labels = "Unadjusted comparison",
|
|
285 |
#' .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
|
|
286 |
#' ) %>%
|
|
287 |
#' summarize_ancova(
|
|
288 |
#' vars = "Petal.Length",
|
|
289 |
#' variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
|
|
290 |
#' table_names = "adj",
|
|
291 |
#' conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
|
|
292 |
#' ) %>%
|
|
293 |
#' build_table(iris)
|
|
294 |
#'
|
|
295 |
#' @export
|
|
296 |
#' @order 2
|
|
297 |
summarize_ancova <- function(lyt, |
|
298 |
vars,
|
|
299 |
variables,
|
|
300 |
conf_level,
|
|
301 |
interaction_y = FALSE, |
|
302 |
interaction_item = NULL, |
|
303 |
var_labels,
|
|
304 |
na_str = default_na_str(), |
|
305 |
nested = TRUE, |
|
306 |
...,
|
|
307 |
show_labels = "visible", |
|
308 |
table_names = vars, |
|
309 |
.stats = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"), |
|
310 |
.stat_names = NULL, |
|
311 |
.formats = NULL, |
|
312 |
.labels = NULL, |
|
313 |
.indent_mods = list("lsmean_diff_ci" = 1L, "pval" = 1L)) { |
|
314 |
# Process standard extra arguments
|
|
315 | 7x |
extra_args <- list(".stats" = .stats) |
316 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
317 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
318 | 3x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
319 | 7x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
320 | ||
321 |
# Process additional arguments to the statistic function
|
|
322 | 7x |
extra_args <- c( |
323 | 7x |
extra_args,
|
324 | 7x |
variables = list(variables), conf_level = list(conf_level), interaction_y = list(interaction_y), |
325 | 7x |
interaction_item = list(interaction_item), |
326 |
...
|
|
327 |
)
|
|
328 | ||
329 |
# Append additional info from layout to the analysis function
|
|
330 | 7x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
331 | 7x |
formals(a_ancova) <- c(formals(a_ancova), extra_args[[".additional_fun_parameters"]]) |
332 | ||
333 | 7x |
analyze( |
334 | 7x |
lyt = lyt, |
335 | 7x |
vars = vars, |
336 | 7x |
afun = a_ancova, |
337 | 7x |
na_str = na_str, |
338 | 7x |
nested = nested, |
339 | 7x |
extra_args = extra_args, |
340 | 7x |
var_labels = var_labels, |
341 | 7x |
show_labels = show_labels, |
342 | 7x |
table_names = table_names |
343 |
)
|
|
344 |
}
|
1 |
#' Helper function for deriving analysis datasets for select laboratory tables
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper function that merges ADSL and ADLB datasets so that missing lab test records are inserted in the
|
|
6 |
#' output dataset. Remember that `na_level` must match the needed pre-processing
|
|
7 |
#' done with [df_explicit_na()] to have the desired output.
|
|
8 |
#'
|
|
9 |
#' @param adsl (`data.frame`)\cr ADSL data frame.
|
|
10 |
#' @param adlb (`data.frame`)\cr ADLB data frame.
|
|
11 |
#' @param worst_flag (named `character`)\cr worst post-baseline lab flag variable. See how this is implemented in the
|
|
12 |
#' following examples.
|
|
13 |
#' @param by_visit (`flag`)\cr defaults to `FALSE` to generate worst grade per patient.
|
|
14 |
#' If worst grade per patient per visit is specified for `worst_flag`, then
|
|
15 |
#' `by_visit` should be `TRUE` to generate worst grade patient per visit.
|
|
16 |
#' @param no_fillin_visits (named `character`)\cr visits that are not considered for post-baseline worst toxicity
|
|
17 |
#' grade. Defaults to `c("SCREENING", "BASELINE")`.
|
|
18 |
#'
|
|
19 |
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
|
|
20 |
#' `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
|
|
21 |
#' `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
|
|
22 |
#'
|
|
23 |
#' @details In the result data missing records will be created for the following situations:
|
|
24 |
#' * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
|
|
25 |
#' * Patients who do not have any post-baseline lab values.
|
|
26 |
#' * Patients without any post-baseline values flagged as the worst.
|
|
27 |
#'
|
|
28 |
#' @examples
|
|
29 |
#' # `h_adsl_adlb_merge_using_worst_flag`
|
|
30 |
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
|
|
31 |
#' tern_ex_adsl,
|
|
32 |
#' tern_ex_adlb,
|
|
33 |
#' worst_flag = c("WGRHIFL" = "Y")
|
|
34 |
#' )
|
|
35 |
#'
|
|
36 |
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
|
|
37 |
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
|
|
38 |
#' tern_ex_adsl,
|
|
39 |
#' tern_ex_adlb,
|
|
40 |
#' worst_flag = c("WGRLOVFL" = "Y"),
|
|
41 |
#' by_visit = TRUE
|
|
42 |
#' )
|
|
43 |
#'
|
|
44 |
#' @export
|
|
45 |
h_adsl_adlb_merge_using_worst_flag <- function(adsl, |
|
46 |
adlb,
|
|
47 |
worst_flag = c("WGRHIFL" = "Y"), |
|
48 |
by_visit = FALSE, |
|
49 |
no_fillin_visits = c("SCREENING", "BASELINE")) { |
|
50 | 5x |
col_names <- names(worst_flag) |
51 | 5x |
filter_values <- worst_flag |
52 | ||
53 | 5x |
temp <- Map( |
54 | 5x |
function(x, y) which(adlb[[x]] == y), |
55 | 5x |
col_names,
|
56 | 5x |
filter_values
|
57 |
)
|
|
58 | ||
59 | 5x |
position_satisfy_filters <- Reduce(intersect, temp) |
60 | ||
61 | 5x |
adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb)) |
62 | 5x |
columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR") |
63 | ||
64 | 5x |
adlb_f <- adlb[position_satisfy_filters, ] %>% |
65 | 5x |
dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) |
66 | 5x |
adlb_f <- adlb_f[, columns_from_adlb] |
67 | ||
68 | 5x |
avisits_grid <- adlb %>% |
69 | 5x |
dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>% |
70 | 5x |
dplyr::pull(.data[["AVISIT"]]) %>% |
71 | 5x |
unique() |
72 | ||
73 | 5x |
if (by_visit) { |
74 | 1x |
adsl_lb <- expand.grid( |
75 | 1x |
USUBJID = unique(adsl$USUBJID), |
76 | 1x |
AVISIT = avisits_grid, |
77 | 1x |
PARAMCD = unique(adlb$PARAMCD) |
78 |
)
|
|
79 | ||
80 | 1x |
adsl_lb <- adsl_lb %>% |
81 | 1x |
dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>% |
82 | 1x |
dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD") |
83 | ||
84 | 1x |
adsl1 <- adsl[, adsl_adlb_common_columns] |
85 | 1x |
adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID") |
86 | ||
87 | 1x |
by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM") |
88 | ||
89 | 1x |
adlb_btoxgr <- adlb %>% |
90 | 1x |
dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>% |
91 | 1x |
unique() %>% |
92 | 1x |
dplyr::rename("BTOXGR_MAP" = "BTOXGR") |
93 | ||
94 | 1x |
adlb_out <- merge( |
95 | 1x |
adlb_f,
|
96 | 1x |
adsl_lb,
|
97 | 1x |
by = by_variables_from_adlb, |
98 | 1x |
all = TRUE, |
99 | 1x |
sort = FALSE |
100 |
)
|
|
101 | 1x |
adlb_out <- adlb_out %>% |
102 | 1x |
dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>% |
103 | 1x |
dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>% |
104 | 1x |
dplyr::select(-"BTOXGR_MAP") |
105 | ||
106 | 1x |
adlb_var_labels <- c( |
107 | 1x |
formatters::var_labels(adlb[by_variables_from_adlb]), |
108 | 1x |
formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]), |
109 | 1x |
formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]]) |
110 |
)
|
|
111 |
} else { |
|
112 | 4x |
adsl_lb <- expand.grid( |
113 | 4x |
USUBJID = unique(adsl$USUBJID), |
114 | 4x |
PARAMCD = unique(adlb$PARAMCD) |
115 |
)
|
|
116 | ||
117 | 4x |
adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD") |
118 | ||
119 | 4x |
adsl1 <- adsl[, adsl_adlb_common_columns] |
120 | 4x |
adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID") |
121 | ||
122 | 4x |
by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM") |
123 | ||
124 | 4x |
adlb_out <- merge( |
125 | 4x |
adlb_f,
|
126 | 4x |
adsl_lb,
|
127 | 4x |
by = by_variables_from_adlb, |
128 | 4x |
all = TRUE, |
129 | 4x |
sort = FALSE |
130 |
)
|
|
131 | ||
132 | 4x |
adlb_var_labels <- c( |
133 | 4x |
formatters::var_labels(adlb[by_variables_from_adlb]), |
134 | 4x |
formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]), |
135 | 4x |
formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]]) |
136 |
)
|
|
137 |
}
|
|
138 | ||
139 | 5x |
adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR) |
140 | 5x |
adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR) |
141 | ||
142 | 5x |
formatters::var_labels(adlb_out) <- adlb_var_labels |
143 | ||
144 | 5x |
adlb_out
|
145 |
}
|
1 |
#' Count the number of patients with particular flags
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_patients_with_flags()] creates a layout element to calculate counts of patients for
|
|
6 |
#' which user-specified flags are present.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `var` which indicates unique subject identifiers. Flags
|
|
9 |
#' variables to analyze are specified by the user via the `flag_variables` argument, and must either take value
|
|
10 |
#' `TRUE` (flag present) or `FALSE` (flag absent) for each record.
|
|
11 |
#'
|
|
12 |
#' If there are multiple records with the same flag present for a patient, only one occurrence is counted.
|
|
13 |
#'
|
|
14 |
#' @inheritParams argument_convention
|
|
15 |
#' @param flag_variables (`character`)\cr a vector specifying the names of `logical` variables from analysis dataset
|
|
16 |
#' used for counting the number of unique identifiers.
|
|
17 |
#' @param flag_labels (`character`)\cr vector of labels to use for flag variables. If any labels are also specified via
|
|
18 |
#' the `.labels` parameter, the `.labels` values will take precedence and replace these labels.
|
|
19 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
20 |
#'
|
|
21 |
#' Options are: ``r shQuote(get_stats("count_patients_with_flags"), type = "sh")``
|
|
22 |
#'
|
|
23 |
#' @seealso [count_patients_with_event]
|
|
24 |
#'
|
|
25 |
#' @name count_patients_with_flags
|
|
26 |
#' @order 1
|
|
27 |
NULL
|
|
28 | ||
29 |
#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
|
|
30 |
#' a particular flag variable is `TRUE`.
|
|
31 |
#'
|
|
32 |
#' @inheritParams analyze_variables
|
|
33 |
#' @param .var (`string`)\cr name of the column that contains the unique identifier.
|
|
34 |
#'
|
|
35 |
#' @note If `flag_labels` is not specified, variables labels will be extracted from `df`. If variables are not
|
|
36 |
#' labeled, variable names will be used instead. Alternatively, a named `vector` can be supplied to
|
|
37 |
#' `flag_variables` such that within each name-value pair the name corresponds to the variable name and the value is
|
|
38 |
#' the label to use for this variable.
|
|
39 |
#'
|
|
40 |
#' @return
|
|
41 |
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
|
|
42 |
#' flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
|
|
43 |
#'
|
|
44 |
#' @examples
|
|
45 |
#' # `s_count_patients_with_flags()`
|
|
46 |
#'
|
|
47 |
#' s_count_patients_with_flags(
|
|
48 |
#' adae,
|
|
49 |
#' "SUBJID",
|
|
50 |
#' flag_variables = c("fl1", "fl2", "fl3", "fl4"),
|
|
51 |
#' denom = "N_col",
|
|
52 |
#' .N_col = 1000
|
|
53 |
#' )
|
|
54 |
#'
|
|
55 |
#' @export
|
|
56 |
s_count_patients_with_flags <- function(df, |
|
57 |
.var,
|
|
58 |
.N_col = ncol(df), # nolint |
|
59 |
.N_row = nrow(df), # nolint |
|
60 |
...,
|
|
61 |
flag_variables,
|
|
62 |
flag_labels = NULL, |
|
63 |
denom = c("n", "N_col", "N_row")) { |
|
64 | 41x |
checkmate::assert_character(flag_variables) |
65 | 41x |
if (!is.null(flag_labels)) { |
66 | 6x |
checkmate::assert_character(flag_labels, len = length(flag_variables), any.missing = FALSE) |
67 | 6x |
flag_names <- flag_labels |
68 |
} else { |
|
69 | 35x |
if (is.null(names(flag_variables))) { |
70 | 20x |
flag_names <- formatters::var_labels(df[flag_variables], fill = TRUE) |
71 |
} else { |
|
72 | 15x |
flag_names <- unname(flag_variables) |
73 | 15x |
flag_variables <- names(flag_variables) |
74 |
}
|
|
75 |
}
|
|
76 | 41x |
checkmate::assert_subset(flag_variables, colnames(df)) |
77 | ||
78 | 41x |
temp <- sapply(flag_variables, function(x) { |
79 | 123x |
tmp <- Map(function(y) which(df[[y]]), x) |
80 | 123x |
position_satisfy_flags <- Reduce(intersect, tmp) |
81 | 123x |
id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]])) |
82 | 123x |
s_count_values( |
83 | 123x |
x = as.character(unique(df[[.var]])), |
84 | 123x |
values = id_satisfy_flags, |
85 | 123x |
denom = denom, |
86 | 123x |
.N_col = .N_col, |
87 | 123x |
.N_row = .N_row |
88 |
)
|
|
89 |
}) |
|
90 | 41x |
colnames(temp) <- flag_names |
91 | 41x |
temp <- data.frame(t(temp)) |
92 | 41x |
result <- as.list(temp) |
93 | 41x |
if (length(flag_variables) == 1) { |
94 | 1x |
for (i in seq(3)) names(result[[i]]) <- flag_names[1] |
95 |
}
|
|
96 | 41x |
result
|
97 |
}
|
|
98 | ||
99 |
#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
|
|
100 |
#' in `count_patients_with_flags()`.
|
|
101 |
#'
|
|
102 |
#' @return
|
|
103 |
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
104 |
#'
|
|
105 |
#' @examples
|
|
106 |
#' a_count_patients_with_flags(
|
|
107 |
#' adae,
|
|
108 |
#' .N_col = 10L,
|
|
109 |
#' .N_row = 10L,
|
|
110 |
#' .var = "USUBJID",
|
|
111 |
#' flag_variables = c("fl1", "fl2", "fl3", "fl4")
|
|
112 |
#' )
|
|
113 |
#'
|
|
114 |
#' @export
|
|
115 |
a_count_patients_with_flags <- function(df, |
|
116 |
labelstr = "", |
|
117 |
...,
|
|
118 |
.stats = NULL, |
|
119 |
.stat_names = NULL, |
|
120 |
.formats = NULL, |
|
121 |
.labels = NULL, |
|
122 |
.indent_mods = NULL) { |
|
123 |
# Check for additional parameters to the statistics function
|
|
124 | 31x |
dots_extra_args <- list(...) |
125 | 31x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
126 | 31x |
dots_extra_args$.additional_fun_parameters <- NULL |
127 | 31x |
flag_variables <- dots_extra_args[["flag_variables"]] |
128 | 31x |
flag_labels <- dots_extra_args[["flag_labels"]] |
129 | ||
130 | 17x |
if (is.null(names(flag_variables))) flag_variables <- formatters::var_labels(df, fill = TRUE)[flag_variables] |
131 | 26x |
if (is.null(flag_labels)) flag_labels <- flag_variables |
132 | ||
133 |
# Check for user-defined functions
|
|
134 | 31x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
135 | 31x |
.stats <- default_and_custom_stats_list$all_stats |
136 | 31x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
137 | ||
138 |
# Apply statistics function
|
|
139 | 31x |
x_stats <- .apply_stat_functions( |
140 | 31x |
default_stat_fnc = s_count_patients_with_flags, |
141 | 31x |
custom_stat_fnc_list = custom_stat_functions, |
142 | 31x |
args_list = c( |
143 | 31x |
df = list(df), |
144 | 31x |
extra_afun_params,
|
145 | 31x |
dots_extra_args
|
146 |
)
|
|
147 |
)
|
|
148 | ||
149 |
# Fill in formatting defaults
|
|
150 | 31x |
.stats <- get_stats("count_patients_with_flags", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
151 | 31x |
levels_per_stats <- rep(list(names(flag_variables)), length(.stats)) %>% stats::setNames(.stats) |
152 | 31x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
153 | 31x |
.labels <- get_labels_from_stats( |
154 | 31x |
.stats, .labels, levels_per_stats, |
155 | 31x |
tern_defaults = flag_labels %>% stats::setNames(names(flag_variables)) |
156 |
)
|
|
157 | 31x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
158 | ||
159 | 31x |
x_stats <- x_stats[.stats] %>% |
160 | 31x |
.unlist_keep_nulls() %>% |
161 | 31x |
setNames(names(.formats)) |
162 | ||
163 |
# Auto format handling
|
|
164 | 31x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
165 | ||
166 |
# Get and check statistical names
|
|
167 | 31x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
168 | ||
169 | 31x |
in_rows( |
170 | 31x |
.list = x_stats, |
171 | 31x |
.formats = .formats, |
172 | 31x |
.names = names(.labels), |
173 | 31x |
.stat_names = .stat_names, |
174 | 31x |
.labels = .labels %>% .unlist_keep_nulls(), |
175 | 31x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
176 |
)
|
|
177 |
}
|
|
178 | ||
179 |
#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
|
|
180 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
181 |
#'
|
|
182 |
#' @return
|
|
183 |
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
|
|
184 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
185 |
#' the statistics from `s_count_patients_with_flags()` to the table layout.
|
|
186 |
#'
|
|
187 |
#' @examples
|
|
188 |
#' # Add labelled flag variables to analysis dataset.
|
|
189 |
#' adae <- tern_ex_adae %>%
|
|
190 |
#' dplyr::mutate(
|
|
191 |
#' fl1 = TRUE %>% with_label("Total AEs"),
|
|
192 |
#' fl2 = (TRTEMFL == "Y") %>%
|
|
193 |
#' with_label("Total number of patients with at least one adverse event"),
|
|
194 |
#' fl3 = (TRTEMFL == "Y" & AEOUT == "FATAL") %>%
|
|
195 |
#' with_label("Total number of patients with fatal AEs"),
|
|
196 |
#' fl4 = (TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y") %>%
|
|
197 |
#' with_label("Total number of patients with related fatal AEs")
|
|
198 |
#' )
|
|
199 |
#'
|
|
200 |
#' lyt <- basic_table() %>%
|
|
201 |
#' split_cols_by("ARM") %>%
|
|
202 |
#' add_colcounts() %>%
|
|
203 |
#' count_patients_with_flags(
|
|
204 |
#' "SUBJID",
|
|
205 |
#' flag_variables = c("fl1", "fl2", "fl3", "fl4"),
|
|
206 |
#' denom = "N_col"
|
|
207 |
#' )
|
|
208 |
#'
|
|
209 |
#' build_table(lyt, adae, alt_counts_df = tern_ex_adsl)
|
|
210 |
#'
|
|
211 |
#' @export
|
|
212 |
#' @order 2
|
|
213 |
count_patients_with_flags <- function(lyt, |
|
214 |
var,
|
|
215 |
flag_variables,
|
|
216 |
flag_labels = NULL, |
|
217 |
var_labels = var, |
|
218 |
show_labels = "hidden", |
|
219 |
riskdiff = FALSE, |
|
220 |
na_str = default_na_str(), |
|
221 |
nested = TRUE, |
|
222 |
...,
|
|
223 |
table_names = paste0("tbl_flags_", var), |
|
224 |
.stats = "count_fraction", |
|
225 |
.stat_names = NULL, |
|
226 |
.formats = list(count_fraction = format_count_fraction_fixed_dp), |
|
227 |
.indent_mods = NULL, |
|
228 |
.labels = NULL) { |
|
229 | 11x |
checkmate::assert_flag(riskdiff) |
230 | 11x |
afun <- if (isFALSE(riskdiff)) a_count_patients_with_flags else afun_riskdiff |
231 | ||
232 |
# Process standard extra arguments
|
|
233 | 11x |
extra_args <- list(".stats" = .stats) |
234 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
235 | 11x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
236 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
237 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
238 | ||
239 |
# Process additional arguments to the statistic function
|
|
240 | 11x |
extra_args <- c( |
241 | 11x |
extra_args,
|
242 | 11x |
flag_variables = list(flag_variables), flag_labels = list(flag_labels), |
243 | 11x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_flags" = a_count_patients_with_flags)), |
244 |
...
|
|
245 |
)
|
|
246 | ||
247 |
# Append additional info from layout to the analysis function
|
|
248 | 11x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
249 | 11x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
250 | ||
251 | 11x |
analyze( |
252 | 11x |
lyt = lyt, |
253 | 11x |
vars = var, |
254 | 11x |
afun = afun, |
255 | 11x |
na_str = na_str, |
256 | 11x |
nested = nested, |
257 | 11x |
extra_args = extra_args, |
258 | 11x |
var_labels = var_labels, |
259 | 11x |
show_labels = show_labels, |
260 | 11x |
table_names = table_names |
261 |
)
|
|
262 |
}
|
1 |
## Deprecated ------------------------------------------------------------
|
|
2 | ||
3 |
#' Helper functions for tabulation of a single biomarker result
|
|
4 |
#'
|
|
5 |
#' @description `r lifecycle::badge("deprecated")`
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#' @inheritParams survival_duration_subgroups
|
|
9 |
#' @param df (`data.frame`)\cr results for a single biomarker. For `h_tab_rsp_one_biomarker()`, the results returned by
|
|
10 |
#' [extract_rsp_biomarkers()]. For `h_tab_surv_one_biomarker()`, the results returned by
|
|
11 |
#' [extract_survival_biomarkers()].
|
|
12 |
#' @param afuns (named `list` of `function`)\cr analysis functions.
|
|
13 |
#' @param colvars (named `list`)\cr named list with elements `vars` (variables to tabulate) and `labels` (their labels).
|
|
14 |
#'
|
|
15 |
#' @return An `rtables` table object with statistics in columns.
|
|
16 |
#'
|
|
17 |
#' @name h_biomarkers_subgroups
|
|
18 |
NULL
|
|
19 | ||
20 |
#' @describeIn h_biomarkers_subgroups Helper function to calculate statistics in columns for one biomarker.
|
|
21 |
#'
|
|
22 |
#' @export
|
|
23 |
h_tab_one_biomarker <- function(df, |
|
24 |
afuns,
|
|
25 |
colvars,
|
|
26 |
na_str = default_na_str(), |
|
27 |
...,
|
|
28 |
.stats = NULL, |
|
29 |
.stat_names = NULL, |
|
30 |
.formats = NULL, |
|
31 |
.labels = NULL, |
|
32 |
.indent_mods = NULL) { |
|
33 | 2x |
lifecycle::deprecate_warn( |
34 | 2x |
"0.9.8", "h_tab_one_biomarker()", |
35 | 2x |
details = "This function is no longer used within `tern`." |
36 |
)
|
|
37 | ||
38 |
# Process standard extra arguments
|
|
39 | 2x |
extra_args <- list(".stats" = .stats) |
40 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
41 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
42 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
43 | 2x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
44 | ||
45 |
# Process additional arguments to the statistic function
|
|
46 | 2x |
extra_args <- c(extra_args, biomarker = TRUE, ...) |
47 | ||
48 |
# Adding additional info from layout to analysis function
|
|
49 | 2x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
50 | 2x |
formals(afuns) <- c(formals(afuns), extra_args[[".additional_fun_parameters"]]) |
51 | ||
52 |
# Create "ci" column from "lcl" and "ucl"
|
|
53 | 2x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
54 | ||
55 | 2x |
colvars$vars <- intersect(colvars$vars, names(df)) |
56 | 2x |
colvars$labels <- colvars$labels[colvars$vars] |
57 | ||
58 | 2x |
lyt <- basic_table() |
59 | ||
60 |
# Split cols by the multiple variables to populate into columns.
|
|
61 | 2x |
lyt <- split_cols_by_multivar( |
62 | 2x |
lyt = lyt, |
63 | 2x |
vars = colvars$vars, |
64 | 2x |
varlabels = colvars$labels |
65 |
)
|
|
66 | ||
67 |
# Add "All Patients" row
|
|
68 | 2x |
lyt <- split_rows_by( |
69 | 2x |
lyt = lyt, |
70 | 2x |
var = "row_type", |
71 | 2x |
split_fun = keep_split_levels("content"), |
72 | 2x |
nested = TRUE, |
73 | 2x |
child_labels = "hidden" |
74 |
)
|
|
75 | 2x |
lyt <- analyze_colvars( |
76 | 2x |
lyt = lyt, |
77 | 2x |
afun = afuns, |
78 | 2x |
na_str = na_str, |
79 | 2x |
extra_args = c(extra_args) |
80 |
)
|
|
81 | ||
82 |
# Add analysis rows
|
|
83 | 2x |
if ("analysis" %in% df$row_type) { |
84 | ! |
lyt <- split_rows_by( |
85 | ! |
lyt = lyt, |
86 | ! |
var = "row_type", |
87 | ! |
split_fun = keep_split_levels("analysis"), |
88 | ! |
nested = TRUE, |
89 | ! |
child_labels = "hidden" |
90 |
)
|
|
91 | ! |
lyt <- split_rows_by( |
92 | ! |
lyt = lyt, |
93 | ! |
var = "var_label", |
94 | ! |
nested = TRUE, |
95 | ! |
indent_mod = 1L |
96 |
)
|
|
97 | ! |
lyt <- analyze_colvars( |
98 | ! |
lyt = lyt, |
99 | ! |
afun = afuns, |
100 | ! |
na_str = na_str, |
101 | ! |
inclNAs = TRUE, |
102 | ! |
extra_args = extra_args |
103 |
)
|
|
104 |
}
|
|
105 | ||
106 | 2x |
build_table(lyt, df = df) |
107 |
}
|
|
108 | ||
109 |
#' @describeIn h_biomarkers_subgroups Helper function that prepares a single response sub-table given the results for a
|
|
110 |
#' single biomarker.
|
|
111 |
#'
|
|
112 |
#' @examples
|
|
113 |
#' library(dplyr)
|
|
114 |
#' library(forcats)
|
|
115 |
#'
|
|
116 |
#' adrs <- tern_ex_adrs
|
|
117 |
#' adrs_labels <- formatters::var_labels(adrs)
|
|
118 |
#'
|
|
119 |
#' adrs_f <- adrs %>%
|
|
120 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
121 |
#' mutate(rsp = AVALC == "CR")
|
|
122 |
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
|
|
123 |
#'
|
|
124 |
#' # For a single population, separately estimate the effects of two biomarkers.
|
|
125 |
#' df <- h_logistic_mult_cont_df(
|
|
126 |
#' variables = list(
|
|
127 |
#' rsp = "rsp",
|
|
128 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
129 |
#' covariates = "SEX"
|
|
130 |
#' ),
|
|
131 |
#' data = adrs_f
|
|
132 |
#' )
|
|
133 |
#'
|
|
134 |
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
|
|
135 |
#' df1 <- df[1, ]
|
|
136 |
#' df1$subgroup <- "All patients"
|
|
137 |
#' df1$row_type <- "content"
|
|
138 |
#' df1$var <- "ALL"
|
|
139 |
#' df1$var_label <- "All patients"
|
|
140 |
#'
|
|
141 |
#' h_tab_rsp_one_biomarker(
|
|
142 |
#' df1,
|
|
143 |
#' vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
|
|
144 |
#' )
|
|
145 |
#'
|
|
146 |
#' @export
|
|
147 |
h_tab_rsp_one_biomarker <- function(df, |
|
148 |
vars,
|
|
149 |
na_str = default_na_str(), |
|
150 |
.indent_mods = 0L, |
|
151 |
...) { |
|
152 | 1x |
lifecycle::deprecate_warn( |
153 | 1x |
"0.9.8", "h_tab_rsp_one_biomarker()", |
154 | 1x |
details = "This function is no longer used within `tern`." |
155 |
)
|
|
156 | ||
157 | 1x |
colvars <- d_rsp_subgroups_colvars( |
158 | 1x |
vars,
|
159 | 1x |
conf_level = df$conf_level[1], |
160 | 1x |
method = df$pval_label[1] |
161 |
)
|
|
162 | ||
163 | 1x |
h_tab_one_biomarker( |
164 | 1x |
df = df, |
165 | 1x |
afuns = a_response_subgroups, |
166 | 1x |
colvars = colvars, |
167 | 1x |
na_str = na_str, |
168 | 1x |
.indent_mods = .indent_mods, |
169 |
...
|
|
170 |
)
|
|
171 |
}
|
|
172 | ||
173 |
#' @describeIn h_biomarkers_subgroups Helper function that prepares a single survival sub-table given the results for a
|
|
174 |
#' single biomarker.
|
|
175 |
#'
|
|
176 |
#' @examples
|
|
177 |
#' adtte <- tern_ex_adtte
|
|
178 |
#'
|
|
179 |
#' # Save variable labels before data processing steps.
|
|
180 |
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
|
|
181 |
#'
|
|
182 |
#' adtte_f <- adtte %>%
|
|
183 |
#' filter(PARAMCD == "OS") %>%
|
|
184 |
#' mutate(
|
|
185 |
#' AVALU = as.character(AVALU),
|
|
186 |
#' is_event = CNSR == 0
|
|
187 |
#' )
|
|
188 |
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
|
|
189 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
|
|
190 |
#'
|
|
191 |
#' # For a single population, separately estimate the effects of two biomarkers.
|
|
192 |
#' df <- h_coxreg_mult_cont_df(
|
|
193 |
#' variables = list(
|
|
194 |
#' tte = "AVAL",
|
|
195 |
#' is_event = "is_event",
|
|
196 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
197 |
#' covariates = "SEX",
|
|
198 |
#' strata = c("STRATA1", "STRATA2")
|
|
199 |
#' ),
|
|
200 |
#' data = adtte_f
|
|
201 |
#' )
|
|
202 |
#'
|
|
203 |
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
|
|
204 |
#' df1 <- df[1, ]
|
|
205 |
#' df1$subgroup <- "All patients"
|
|
206 |
#' df1$row_type <- "content"
|
|
207 |
#' df1$var <- "ALL"
|
|
208 |
#' df1$var_label <- "All patients"
|
|
209 |
#' h_tab_surv_one_biomarker(
|
|
210 |
#' df1,
|
|
211 |
#' vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
|
|
212 |
#' time_unit = "days"
|
|
213 |
#' )
|
|
214 |
#'
|
|
215 |
#' @export
|
|
216 |
h_tab_surv_one_biomarker <- function(df, |
|
217 |
vars,
|
|
218 |
time_unit,
|
|
219 |
na_str = default_na_str(), |
|
220 |
.indent_mods = 0L, |
|
221 |
...) { |
|
222 | 1x |
lifecycle::deprecate_warn( |
223 | 1x |
"0.9.8", "h_tab_surv_one_biomarker()", |
224 | 1x |
details = "This function is no longer used within `tern`." |
225 |
)
|
|
226 | ||
227 | 1x |
colvars <- d_survival_subgroups_colvars( |
228 | 1x |
vars,
|
229 | 1x |
conf_level = df$conf_level[1], |
230 | 1x |
method = df$pval_label[1], |
231 | 1x |
time_unit = time_unit |
232 |
)
|
|
233 | ||
234 | 1x |
h_tab_one_biomarker( |
235 | 1x |
df = df, |
236 | 1x |
afuns = a_survival_subgroups, |
237 | 1x |
colvars = colvars, |
238 | 1x |
na_str = na_str, |
239 | 1x |
.indent_mods = .indent_mods, |
240 |
...
|
|
241 |
)
|
|
242 |
}
|
1 |
#' Count number of patients and sum exposure across all patients in columns
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [analyze_patients_exposure_in_cols()] creates a layout element to count total numbers of
|
|
6 |
#' patients and sum an analysis value (i.e. exposure) across all patients in columns.
|
|
7 |
#'
|
|
8 |
#' The primary analysis variable `ex_var` is the exposure variable used to calculate the `sum_exposure` statistic. The
|
|
9 |
#' `id` variable is used to uniquely identify patients in the data such that only unique patients are counted in the
|
|
10 |
#' `n_patients` statistic, and the `var` variable is used to create a row split if needed. The percentage returned as
|
|
11 |
#' part of the `n_patients` statistic is the proportion of all records that correspond to a unique patient.
|
|
12 |
#'
|
|
13 |
#' The summarize function [summarize_patients_exposure_in_cols()] performs the same function as
|
|
14 |
#' [analyze_patients_exposure_in_cols()] except it creates content rows, not data rows, to summarize the current table
|
|
15 |
#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
|
|
16 |
#' occurred.
|
|
17 |
#'
|
|
18 |
#' If a column split has not yet been performed in the table, `col_split` must be set to `TRUE` for the first call of
|
|
19 |
#' [analyze_patients_exposure_in_cols()] or [summarize_patients_exposure_in_cols()].
|
|
20 |
#'
|
|
21 |
#' @inheritParams argument_convention
|
|
22 |
#' @param ex_var (`string`)\cr name of the variable in `df` containing exposure values.
|
|
23 |
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty, this will be used as label.
|
|
24 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
25 |
#'
|
|
26 |
#' Options are: ``r shQuote(get_stats("analyze_patients_exposure_in_cols"), type = "sh")``
|
|
27 |
#'
|
|
28 |
#' @name summarize_patients_exposure_in_cols
|
|
29 |
#' @order 1
|
|
30 |
NULL
|
|
31 | ||
32 |
#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
|
|
33 |
#' of patients and the sum of exposure across all patients.
|
|
34 |
#'
|
|
35 |
#' @return
|
|
36 |
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
|
|
37 |
#' * `n_patients`: Number of unique patients in `df`.
|
|
38 |
#' * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
|
|
39 |
#'
|
|
40 |
#' @keywords internal
|
|
41 |
s_count_patients_sum_exposure <- function(df, |
|
42 |
labelstr = "", |
|
43 |
.stats = c("n_patients", "sum_exposure"), |
|
44 |
.N_col, # nolint |
|
45 |
...,
|
|
46 |
ex_var = "AVAL", |
|
47 |
id = "USUBJID", |
|
48 |
custom_label = NULL, |
|
49 |
var_level = NULL) { |
|
50 | 56x |
assert_df_with_variables(df, list(ex_var = ex_var, id = id)) |
51 | 56x |
checkmate::assert_string(id) |
52 | 56x |
checkmate::assert_string(labelstr) |
53 | 56x |
checkmate::assert_string(custom_label, null.ok = TRUE) |
54 | 56x |
checkmate::assert_numeric(df[[ex_var]]) |
55 | 56x |
checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure"))) |
56 | ||
57 | 56x |
row_label <- if (labelstr != "") { |
58 | ! |
labelstr
|
59 | 56x |
} else if (!is.null(var_level)) { |
60 | 42x |
var_level
|
61 | 56x |
} else if (!is.null(custom_label)) { |
62 | 6x |
custom_label
|
63 |
} else { |
|
64 | 8x |
"Total patients numbers/person time"
|
65 |
}
|
|
66 | ||
67 | 56x |
y <- list() |
68 | ||
69 | 56x |
if ("n_patients" %in% .stats) { |
70 | 56x |
y$n_patients <- |
71 | 56x |
formatters::with_label( |
72 | 56x |
s_num_patients_content( |
73 | 56x |
df = df, |
74 | 56x |
.N_col = .N_col, # nolint |
75 | 56x |
.var = id, |
76 | 56x |
labelstr = "" |
77 | 56x |
)$unique, |
78 | 56x |
row_label
|
79 |
)
|
|
80 |
}
|
|
81 | 56x |
if ("sum_exposure" %in% .stats) { |
82 | 56x |
y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label) |
83 |
}
|
|
84 | 56x |
y
|
85 |
}
|
|
86 | ||
87 |
#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
|
|
88 |
#' [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
|
|
89 |
#' [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
|
|
90 |
#'
|
|
91 |
#' @return
|
|
92 |
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
|
|
93 |
#'
|
|
94 |
#' @export
|
|
95 |
a_count_patients_sum_exposure <- function(df, |
|
96 |
labelstr = "", |
|
97 |
...,
|
|
98 |
.stats = NULL, |
|
99 |
.stat_names = NULL, |
|
100 |
.formats = NULL, |
|
101 |
.labels = NULL, |
|
102 |
.indent_mods = NULL) { |
|
103 | 32x |
checkmate::assert_character(.stats, len = 1) |
104 | ||
105 |
# Check for additional parameters to the statistics function
|
|
106 | 32x |
dots_extra_args <- list(...) |
107 | 32x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
108 | 32x |
dots_extra_args$.additional_fun_parameters <- NULL |
109 | ||
110 | 32x |
add_total_level <- dots_extra_args$add_total_level |
111 | 32x |
checkmate::assert_flag(add_total_level) |
112 | ||
113 | 32x |
var <- dots_extra_args$var |
114 | 32x |
if (!is.null(var)) { |
115 | 21x |
assert_df_with_variables(df, list(var = var)) |
116 | 21x |
df[[var]] <- as.factor(df[[var]]) |
117 |
}
|
|
118 | ||
119 |
# Check for user-defined functions
|
|
120 | 32x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
121 | 32x |
.stats <- default_and_custom_stats_list$all_stats |
122 | 32x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
123 | ||
124 | 32x |
x_stats <- list() |
125 | 32x |
if (!is.null(var)) { |
126 | 21x |
for (lvl in levels(df[[var]])) { |
127 | 42x |
x_stats_i <- .apply_stat_functions( |
128 | 42x |
default_stat_fnc = s_count_patients_sum_exposure, |
129 | 42x |
custom_stat_fnc_list = custom_stat_functions, |
130 | 42x |
args_list = c( |
131 | 42x |
df = list(subset(df, get(var) == lvl)), |
132 | 42x |
labelstr = list(labelstr), |
133 | 42x |
var_level = lvl, |
134 | 42x |
extra_afun_params,
|
135 | 42x |
dots_extra_args
|
136 |
)
|
|
137 |
)
|
|
138 | 42x |
x_stats[[.stats]][[lvl]] <- x_stats_i[[.stats]] |
139 |
}
|
|
140 |
}
|
|
141 | ||
142 | 32x |
if (add_total_level || is.null(var)) { |
143 | 13x |
x_stats_total <- .apply_stat_functions( |
144 | 13x |
default_stat_fnc = s_count_patients_sum_exposure, |
145 | 13x |
custom_stat_fnc_list = custom_stat_functions, |
146 | 13x |
args_list = c( |
147 | 13x |
df = list(df), |
148 | 13x |
labelstr = list(labelstr), |
149 | 13x |
extra_afun_params,
|
150 | 13x |
dots_extra_args
|
151 |
)
|
|
152 |
)
|
|
153 | 13x |
x_stats[[.stats]][["Total"]] <- x_stats_total[[.stats]] |
154 |
}
|
|
155 | ||
156 |
# Fill in formatting defaults
|
|
157 | 32x |
.stats <- get_stats( |
158 | 32x |
"analyze_patients_exposure_in_cols",
|
159 | 32x |
stats_in = .stats, |
160 | 32x |
custom_stats_in = names(custom_stat_functions) |
161 |
)
|
|
162 | 32x |
x_stats <- x_stats[.stats] |
163 | 32x |
levels_per_stats <- lapply(x_stats, names) |
164 | 32x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
165 | 32x |
.labels <- get_labels_from_stats( |
166 | 32x |
.stats, .labels, levels_per_stats, |
167 | 32x |
tern_defaults = c(lapply(x_stats[[1]], attr, "label"), tern_default_labels) |
168 |
)
|
|
169 | 32x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
170 | ||
171 | 32x |
x_stats <- x_stats[.stats] %>% |
172 | 32x |
.unlist_keep_nulls() %>% |
173 | 32x |
setNames(names(.formats)) |
174 | ||
175 |
# Auto format handling
|
|
176 | 32x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
177 | ||
178 |
# Get and check statistical names
|
|
179 | 32x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
180 | ||
181 | 32x |
in_rows( |
182 | 32x |
.list = x_stats, |
183 | 32x |
.formats = .formats, |
184 | 32x |
.names = .labels %>% .unlist_keep_nulls(), |
185 | 32x |
.stat_names = .stat_names, |
186 | 32x |
.labels = .labels %>% .unlist_keep_nulls(), |
187 | 32x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
188 |
)
|
|
189 |
}
|
|
190 | ||
191 |
#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
|
|
192 |
#' function arguments and additional format arguments. This function is a wrapper for
|
|
193 |
#' [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
|
|
194 |
#'
|
|
195 |
#' @return
|
|
196 |
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
|
|
197 |
#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
|
|
198 |
#' add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
|
|
199 |
#' columns, to the table layout.
|
|
200 |
#'
|
|
201 |
#' @examples
|
|
202 |
#' lyt5 <- basic_table() %>%
|
|
203 |
#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
|
|
204 |
#'
|
|
205 |
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
|
|
206 |
#' result5
|
|
207 |
#'
|
|
208 |
#' lyt6 <- basic_table() %>%
|
|
209 |
#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
|
|
210 |
#'
|
|
211 |
#' result6 <- build_table(lyt6, df = df, alt_counts_df = adsl)
|
|
212 |
#' result6
|
|
213 |
#'
|
|
214 |
#' @export
|
|
215 |
#' @order 3
|
|
216 |
summarize_patients_exposure_in_cols <- function(lyt, |
|
217 |
var,
|
|
218 |
ex_var = "AVAL", |
|
219 |
id = "USUBJID", |
|
220 |
add_total_level = FALSE, |
|
221 |
custom_label = NULL, |
|
222 |
col_split = TRUE, |
|
223 |
na_str = default_na_str(), |
|
224 |
...,
|
|
225 |
.stats = c("n_patients", "sum_exposure"), |
|
226 |
.stat_names = NULL, |
|
227 |
.formats = NULL, |
|
228 |
.labels = c(n_patients = "Patients", sum_exposure = "Person time"), |
|
229 |
.indent_mods = NULL) { |
|
230 |
# Process standard extra arguments
|
|
231 | 3x |
extra_args <- list() |
232 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
233 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
234 | 3x |
col_labels <- unlist(.labels[.stats]) |
235 | 3x |
.labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")] |
236 | 3x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
237 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
238 | ||
239 |
# Process additional arguments to the statistic function
|
|
240 | 3x |
extra_args <- c( |
241 | 3x |
extra_args,
|
242 | 3x |
ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, |
243 |
...
|
|
244 |
)
|
|
245 | ||
246 |
# Adding additional info from layout to analysis function
|
|
247 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
248 | 3x |
formals(a_count_patients_sum_exposure) <- c( |
249 | 3x |
formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]] |
250 |
)
|
|
251 | ||
252 | 3x |
if (col_split) { |
253 | 3x |
lyt <- split_cols_by_multivar( |
254 | 3x |
lyt = lyt, |
255 | 3x |
vars = rep(var, length(.stats)), |
256 | 3x |
varlabels = col_labels, |
257 | 3x |
extra_args = list(.stats = .stats) |
258 |
)
|
|
259 |
}
|
|
260 | 3x |
summarize_row_groups( |
261 | 3x |
lyt = lyt, |
262 | 3x |
var = var, |
263 | 3x |
cfun = a_count_patients_sum_exposure, |
264 | 3x |
na_str = na_str, |
265 | 3x |
extra_args = extra_args |
266 |
)
|
|
267 |
}
|
|
268 | ||
269 |
#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
|
|
270 |
#' function arguments and additional format arguments. This function is a wrapper for
|
|
271 |
#' [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
|
|
272 |
#'
|
|
273 |
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
|
|
274 |
#' column split has been done already earlier in the layout pipe.
|
|
275 |
#'
|
|
276 |
#' @return
|
|
277 |
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
|
|
278 |
#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
|
|
279 |
#' add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
|
|
280 |
#' columns, to the table layout.
|
|
281 |
#'
|
|
282 |
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
|
|
283 |
#' `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
|
|
284 |
#' pages when pagination is used.
|
|
285 |
#'
|
|
286 |
#' @examples
|
|
287 |
#' set.seed(1)
|
|
288 |
#' df <- data.frame(
|
|
289 |
#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
|
|
290 |
#' ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
|
|
291 |
#' SEX = c(rep("Female", 6), rep("Male", 6)),
|
|
292 |
#' AVAL = as.numeric(sample(seq(1, 20), 12)),
|
|
293 |
#' stringsAsFactors = TRUE
|
|
294 |
#' )
|
|
295 |
#' adsl <- data.frame(
|
|
296 |
#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
|
|
297 |
#' ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
|
|
298 |
#' SEX = c(rep("Female", 2), rep("Male", 2)),
|
|
299 |
#' stringsAsFactors = TRUE
|
|
300 |
#' )
|
|
301 |
#'
|
|
302 |
#' lyt <- basic_table() %>%
|
|
303 |
#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
|
|
304 |
#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
|
|
305 |
#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
|
|
306 |
#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
|
|
307 |
#' result
|
|
308 |
#'
|
|
309 |
#' lyt2 <- basic_table() %>%
|
|
310 |
#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
|
|
311 |
#' summarize_patients_exposure_in_cols(
|
|
312 |
#' var = "AVAL", col_split = TRUE,
|
|
313 |
#' .stats = "n_patients", custom_label = "some custom label"
|
|
314 |
#' ) %>%
|
|
315 |
#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
|
|
316 |
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
|
|
317 |
#' result2
|
|
318 |
#'
|
|
319 |
#' lyt3 <- basic_table() %>%
|
|
320 |
#' analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
|
|
321 |
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
|
|
322 |
#' result3
|
|
323 |
#'
|
|
324 |
#' # Adding total levels and custom label
|
|
325 |
#' lyt4 <- basic_table(
|
|
326 |
#' show_colcounts = TRUE
|
|
327 |
#' ) %>%
|
|
328 |
#' analyze_patients_exposure_in_cols(
|
|
329 |
#' var = "ARMCD",
|
|
330 |
#' col_split = TRUE,
|
|
331 |
#' add_total_level = TRUE,
|
|
332 |
#' custom_label = "TOTAL"
|
|
333 |
#' ) %>%
|
|
334 |
#' append_topleft(c("", "Sex"))
|
|
335 |
#'
|
|
336 |
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
|
|
337 |
#' result4
|
|
338 |
#'
|
|
339 |
#' @export
|
|
340 |
#' @order 2
|
|
341 |
analyze_patients_exposure_in_cols <- function(lyt, |
|
342 |
var = NULL, |
|
343 |
ex_var = "AVAL", |
|
344 |
id = "USUBJID", |
|
345 |
add_total_level = FALSE, |
|
346 |
custom_label = NULL, |
|
347 |
col_split = TRUE, |
|
348 |
na_str = default_na_str(), |
|
349 |
.stats = c("n_patients", "sum_exposure"), |
|
350 |
.stat_names = NULL, |
|
351 |
.formats = NULL, |
|
352 |
.labels = c(n_patients = "Patients", sum_exposure = "Person time"), |
|
353 |
.indent_mods = NULL, |
|
354 |
...) { |
|
355 |
# Process standard extra arguments
|
|
356 | 6x |
extra_args <- list() |
357 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
358 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
359 | 6x |
col_labels <- unlist(.labels[.stats]) |
360 | 6x |
.labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")] |
361 | 6x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
362 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
363 | ||
364 |
# Process additional arguments to the statistic function
|
|
365 | 6x |
extra_args <- c( |
366 | 6x |
extra_args,
|
367 | 6x |
var = var, ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, |
368 |
...
|
|
369 |
)
|
|
370 | ||
371 |
# Adding additional info from layout to analysis function
|
|
372 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
373 | 6x |
formals(a_count_patients_sum_exposure) <- c( |
374 | 6x |
formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]] |
375 |
)
|
|
376 | ||
377 | 6x |
if (col_split) { |
378 | 4x |
lyt <- split_cols_by_multivar( |
379 | 4x |
lyt = lyt, |
380 | 4x |
vars = rep(ex_var, length(.stats)), |
381 | 4x |
varlabels = col_labels, |
382 | 4x |
extra_args = list(.stats = .stats) |
383 |
)
|
|
384 |
}
|
|
385 | ||
386 | 6x |
analyze_colvars( |
387 | 6x |
lyt = lyt, |
388 | 6x |
afun = a_count_patients_sum_exposure, |
389 | 6x |
na_str = na_str, |
390 | 6x |
extra_args = extra_args |
391 |
)
|
|
392 |
}
|
1 |
#' Difference test for two proportions
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [test_proportion_diff()] creates a layout element to test the difference between two
|
|
6 |
#' proportions. The primary analysis variable, `vars`, indicates whether a response has occurred for each record. See
|
|
7 |
#' the `method` parameter for options of methods to use to calculate the p-value. Additionally, a stratification
|
|
8 |
#' variable can be supplied via the `strata` element of the `variables` argument.
|
|
9 |
#'
|
|
10 |
#' @inheritParams argument_convention
|
|
11 |
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
|
|
12 |
#' to calculate the p-value.
|
|
13 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
14 |
#'
|
|
15 |
#' Options are: ``r shQuote(get_stats("test_proportion_diff"), type = "sh")``
|
|
16 |
#'
|
|
17 |
#' @seealso [h_prop_diff_test]
|
|
18 |
#'
|
|
19 |
#' @name prop_diff_test
|
|
20 |
#' @order 1
|
|
21 |
NULL
|
|
22 | ||
23 |
#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
|
|
24 |
#'
|
|
25 |
#' @return
|
|
26 |
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
|
|
27 |
#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
|
|
28 |
#'
|
|
29 |
#' @keywords internal
|
|
30 |
s_test_proportion_diff <- function(df, |
|
31 |
.var,
|
|
32 |
.ref_group,
|
|
33 |
.in_ref_col,
|
|
34 |
variables = list(strata = NULL), |
|
35 |
method = c("chisq", "schouten", "fisher", "cmh"), |
|
36 |
...) { |
|
37 | 58x |
method <- match.arg(method) |
38 | 58x |
y <- list(pval = numeric()) |
39 | ||
40 | 58x |
if (!.in_ref_col) { |
41 | 52x |
assert_df_with_variables(df, list(rsp = .var)) |
42 | 52x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
43 | 52x |
rsp <- factor( |
44 | 52x |
c(.ref_group[[.var]], df[[.var]]), |
45 | 52x |
levels = c("TRUE", "FALSE") |
46 |
)
|
|
47 | 52x |
grp <- factor( |
48 | 52x |
rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), |
49 | 52x |
levels = c("ref", "Not-ref") |
50 |
)
|
|
51 | ||
52 | 52x |
if (!is.null(variables$strata) || method == "cmh") { |
53 | 14x |
strata <- variables$strata |
54 | 14x |
checkmate::assert_false(is.null(strata)) |
55 | 14x |
strata_vars <- stats::setNames(as.list(strata), strata) |
56 | 14x |
assert_df_with_variables(df, strata_vars) |
57 | 14x |
assert_df_with_variables(.ref_group, strata_vars) |
58 | 14x |
strata <- c(interaction(.ref_group[strata]), interaction(df[strata])) |
59 |
}
|
|
60 | ||
61 | 52x |
tbl <- switch(method, |
62 | 52x |
cmh = table(grp, rsp, strata), |
63 | 52x |
table(grp, rsp) |
64 |
)
|
|
65 | ||
66 | 52x |
y$pval <- switch(method, |
67 | 52x |
chisq = prop_chisq(tbl), |
68 | 52x |
cmh = prop_cmh(tbl), |
69 | 52x |
fisher = prop_fisher(tbl), |
70 | 52x |
schouten = prop_schouten(tbl) |
71 |
)
|
|
72 |
}
|
|
73 | ||
74 | 58x |
y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method)) |
75 | 58x |
y
|
76 |
}
|
|
77 | ||
78 |
#' Description of the difference test between two proportions
|
|
79 |
#'
|
|
80 |
#' @description `r lifecycle::badge("stable")`
|
|
81 |
#'
|
|
82 |
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
|
|
83 |
#'
|
|
84 |
#' @inheritParams s_test_proportion_diff
|
|
85 |
#'
|
|
86 |
#' @return A `string` describing the test from which the p-value is derived.
|
|
87 |
#'
|
|
88 |
#' @export
|
|
89 |
d_test_proportion_diff <- function(method) { |
|
90 | 59x |
checkmate::assert_string(method) |
91 | 59x |
meth_part <- switch(method, |
92 | 59x |
"schouten" = "Chi-Squared Test with Schouten Correction", |
93 | 59x |
"chisq" = "Chi-Squared Test", |
94 | 59x |
"cmh" = "Cochran-Mantel-Haenszel Test", |
95 | 59x |
"fisher" = "Fisher's Exact Test", |
96 | 59x |
stop(paste(method, "does not have a description")) |
97 |
)
|
|
98 | 59x |
paste0("p-value (", meth_part, ")") |
99 |
}
|
|
100 | ||
101 |
#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
|
|
102 |
#'
|
|
103 |
#' @return
|
|
104 |
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
105 |
#'
|
|
106 |
#' @keywords internal
|
|
107 |
a_test_proportion_diff <- function(df, |
|
108 |
...,
|
|
109 |
.stats = NULL, |
|
110 |
.stat_names = NULL, |
|
111 |
.formats = NULL, |
|
112 |
.labels = NULL, |
|
113 |
.indent_mods = NULL) { |
|
114 | 13x |
dots_extra_args <- list(...) |
115 | ||
116 |
# Check if there are user-defined functions
|
|
117 | 13x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
118 | 13x |
.stats <- default_and_custom_stats_list$all_stats |
119 | 13x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
120 | ||
121 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
|
|
122 | 13x |
extra_afun_params <- retrieve_extra_afun_params( |
123 | 13x |
names(dots_extra_args$.additional_fun_parameters) |
124 |
)
|
|
125 | 13x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
126 | ||
127 |
# Main statistical functions application
|
|
128 | 13x |
x_stats <- .apply_stat_functions( |
129 | 13x |
default_stat_fnc = s_test_proportion_diff, |
130 | 13x |
custom_stat_fnc_list = custom_stat_functions, |
131 | 13x |
args_list = c( |
132 | 13x |
df = list(df), |
133 | 13x |
extra_afun_params,
|
134 | 13x |
dots_extra_args
|
135 |
)
|
|
136 |
)
|
|
137 | ||
138 |
# Fill in with stats defaults if needed
|
|
139 | 13x |
.stats <- get_stats("test_proportion_diff", |
140 | 13x |
stats_in = .stats, |
141 | 13x |
custom_stats_in = names(custom_stat_functions) |
142 |
)
|
|
143 | ||
144 | 13x |
x_stats <- x_stats[.stats] |
145 | ||
146 |
# Fill in formats/indents/labels with custom input and defaults
|
|
147 | 13x |
.formats <- get_formats_from_stats(.stats, .formats) |
148 | 13x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
149 | 13x |
if (is.null(.labels)) { |
150 | 13x |
.labels <- sapply(x_stats, attr, "label") |
151 | 13x |
.labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)] |
152 |
}
|
|
153 | 13x |
.labels <- get_labels_from_stats(.stats, .labels) |
154 | ||
155 |
# Auto format handling
|
|
156 | 13x |
.formats <- apply_auto_formatting( |
157 | 13x |
.formats,
|
158 | 13x |
x_stats,
|
159 | 13x |
extra_afun_params$.df_row, |
160 | 13x |
extra_afun_params$.var |
161 |
)
|
|
162 | ||
163 |
# Get and check statistical names from defaults
|
|
164 | 13x |
.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats |
165 | ||
166 | 13x |
in_rows( |
167 | 13x |
.list = x_stats, |
168 | 13x |
.formats = .formats, |
169 | 13x |
.names = names(.labels), |
170 | 13x |
.stat_names = .stat_names, |
171 | 13x |
.labels = .labels %>% .unlist_keep_nulls(), |
172 | 13x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
173 |
)
|
|
174 |
}
|
|
175 | ||
176 |
#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
|
|
177 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
178 |
#'
|
|
179 |
#' @return
|
|
180 |
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
|
|
181 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
182 |
#' the statistics from `s_test_proportion_diff()` to the table layout.
|
|
183 |
#'
|
|
184 |
#' @examples
|
|
185 |
#' dta <- data.frame(
|
|
186 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
|
|
187 |
#' grp = factor(rep(c("A", "B"), each = 50)),
|
|
188 |
#' strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
|
|
189 |
#' )
|
|
190 |
#'
|
|
191 |
#' # With `rtables` pipelines.
|
|
192 |
#' l <- basic_table() %>%
|
|
193 |
#' split_cols_by(var = "grp", ref_group = "B") %>%
|
|
194 |
#' test_proportion_diff(
|
|
195 |
#' vars = "rsp",
|
|
196 |
#' method = "cmh", variables = list(strata = "strata")
|
|
197 |
#' )
|
|
198 |
#'
|
|
199 |
#' build_table(l, df = dta)
|
|
200 |
#'
|
|
201 |
#' @export
|
|
202 |
#' @order 2
|
|
203 |
test_proportion_diff <- function(lyt, |
|
204 |
vars,
|
|
205 |
variables = list(strata = NULL), |
|
206 |
method = c("chisq", "schouten", "fisher", "cmh"), |
|
207 |
var_labels = vars, |
|
208 |
na_str = default_na_str(), |
|
209 |
nested = TRUE, |
|
210 |
show_labels = "hidden", |
|
211 |
table_names = vars, |
|
212 |
section_div = NA_character_, |
|
213 |
...,
|
|
214 |
na_rm = TRUE, |
|
215 |
.stats = c("pval"), |
|
216 |
.stat_names = NULL, |
|
217 |
.formats = c(pval = "x.xxxx | (<0.0001)"), |
|
218 |
.labels = NULL, |
|
219 |
.indent_mods = c(pval = 1L)) { |
|
220 |
# Depending on main functions
|
|
221 | 6x |
extra_args <- list( |
222 | 6x |
"na_rm" = na_rm, |
223 | 6x |
"variables" = variables, |
224 | 6x |
"method" = method, |
225 |
...
|
|
226 |
)
|
|
227 | ||
228 |
# Needed defaults
|
|
229 | 6x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
230 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
231 | 6x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
232 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
233 | 6x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
234 | ||
235 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
|
|
236 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
237 | 6x |
formals(a_test_proportion_diff) <- c( |
238 | 6x |
formals(a_test_proportion_diff), |
239 | 6x |
extra_args[[".additional_fun_parameters"]] |
240 |
)
|
|
241 | ||
242 |
# Main {rtables} structural call
|
|
243 | 6x |
analyze( |
244 | 6x |
lyt = lyt, |
245 | 6x |
vars = vars, |
246 | 6x |
var_labels = var_labels, |
247 | 6x |
afun = a_test_proportion_diff, |
248 | 6x |
na_str = na_str, |
249 | 6x |
inclNAs = !na_rm, |
250 | 6x |
nested = nested, |
251 | 6x |
extra_args = extra_args, |
252 | 6x |
show_labels = show_labels, |
253 | 6x |
table_names = table_names, |
254 | 6x |
section_div = section_div |
255 |
)
|
|
256 |
}
|
|
257 | ||
258 |
#' Helper functions to test proportion differences
|
|
259 |
#'
|
|
260 |
#' Helper functions to implement various tests on the difference between two proportions.
|
|
261 |
#'
|
|
262 |
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
|
|
263 |
#'
|
|
264 |
#' @return A p-value.
|
|
265 |
#'
|
|
266 |
#' @seealso [prop_diff_test()] for implementation of these helper functions.
|
|
267 |
#'
|
|
268 |
#' @name h_prop_diff_test
|
|
269 |
NULL
|
|
270 | ||
271 |
#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()].
|
|
272 |
#'
|
|
273 |
#' @keywords internal
|
|
274 |
prop_chisq <- function(tbl) { |
|
275 | 41x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
276 | 41x |
tbl <- tbl[, c("TRUE", "FALSE")] |
277 | 41x |
if (any(colSums(tbl) == 0)) { |
278 | 2x |
return(1) |
279 |
}
|
|
280 | 39x |
stats::prop.test(tbl, correct = FALSE)$p.value |
281 |
}
|
|
282 | ||
283 |
#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test. Internally calls
|
|
284 |
#' [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
|
|
285 |
#'
|
|
286 |
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
|
|
287 |
#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
|
|
288 |
#'
|
|
289 |
#' @keywords internal
|
|
290 |
prop_cmh <- function(ary) { |
|
291 | 16x |
checkmate::assert_array(ary) |
292 | 16x |
checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2) |
293 | 16x |
checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3) |
294 | 16x |
strata_sizes <- apply(ary, MARGIN = 3, sum) |
295 | 16x |
if (any(strata_sizes < 5)) { |
296 | 1x |
warning("<5 data points in some strata. CMH test may be incorrect.") |
297 | 1x |
ary <- ary[, , strata_sizes > 1] |
298 |
}
|
|
299 | ||
300 | 16x |
stats::mantelhaen.test(ary, correct = FALSE)$p.value |
301 |
}
|
|
302 | ||
303 |
#' @describeIn h_prop_diff_test Performs the Chi-Squared test with Schouten correction.
|
|
304 |
#'
|
|
305 |
#' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}.
|
|
306 |
#'
|
|
307 |
#' @keywords internal
|
|
308 |
prop_schouten <- function(tbl) { |
|
309 | 100x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
310 | 100x |
tbl <- tbl[, c("TRUE", "FALSE")] |
311 | 100x |
if (any(colSums(tbl) == 0)) { |
312 | 1x |
return(1) |
313 |
}
|
|
314 | ||
315 | 99x |
n <- sum(tbl) |
316 | 99x |
n1 <- sum(tbl[1, ]) |
317 | 99x |
n2 <- sum(tbl[2, ]) |
318 | ||
319 | 99x |
ad <- diag(tbl) |
320 | 99x |
bc <- diag(apply(tbl, 2, rev)) |
321 | 99x |
ac <- tbl[, 1] |
322 | 99x |
bd <- tbl[, 2] |
323 | ||
324 | 99x |
t_schouten <- (n - 1) * |
325 | 99x |
(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 / |
326 | 99x |
(n1 * n2 * sum(ac) * sum(bd)) |
327 | ||
328 | 99x |
1 - stats::pchisq(t_schouten, df = 1) |
329 |
}
|
|
330 | ||
331 |
#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()].
|
|
332 |
#'
|
|
333 |
#' @keywords internal
|
|
334 |
prop_fisher <- function(tbl) { |
|
335 | 2x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
336 | 2x |
tbl <- tbl[, c("TRUE", "FALSE")] |
337 | 2x |
stats::fisher.test(tbl)$p.value |
338 |
}
|
1 |
#' Multivariate logistic regression table
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
|
|
6 |
#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
|
|
7 |
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
|
|
8 |
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
|
|
9 |
#' category or specified values and corresponding Wald confidence intervals as default but allow user
|
|
10 |
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
|
|
11 |
#' that covariate has no effect on response in model containing all specified covariates.
|
|
12 |
#' Allow option to include one two-way interaction and present similar output for
|
|
13 |
#' each interaction degree of freedom.
|
|
14 |
#'
|
|
15 |
#' @inheritParams argument_convention
|
|
16 |
#' @param drop_and_remove_str (`string`)\cr string to be dropped and removed.
|
|
17 |
#'
|
|
18 |
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
|
|
19 |
#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
|
|
20 |
#'
|
|
21 |
#' @note For the formula, the variable names need to be standard `data.frame` column names without
|
|
22 |
#' special characters.
|
|
23 |
#'
|
|
24 |
#' @examples
|
|
25 |
#' library(dplyr)
|
|
26 |
#' library(broom)
|
|
27 |
#'
|
|
28 |
#' adrs_f <- tern_ex_adrs %>%
|
|
29 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
30 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
31 |
#' mutate(
|
|
32 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
33 |
#' RACE = factor(RACE),
|
|
34 |
#' SEX = factor(SEX)
|
|
35 |
#' )
|
|
36 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
|
|
37 |
#' mod1 <- fit_logistic(
|
|
38 |
#' data = adrs_f,
|
|
39 |
#' variables = list(
|
|
40 |
#' response = "Response",
|
|
41 |
#' arm = "ARMCD",
|
|
42 |
#' covariates = c("AGE", "RACE")
|
|
43 |
#' )
|
|
44 |
#' )
|
|
45 |
#' mod2 <- fit_logistic(
|
|
46 |
#' data = adrs_f,
|
|
47 |
#' variables = list(
|
|
48 |
#' response = "Response",
|
|
49 |
#' arm = "ARMCD",
|
|
50 |
#' covariates = c("AGE", "RACE"),
|
|
51 |
#' interaction = "AGE"
|
|
52 |
#' )
|
|
53 |
#' )
|
|
54 |
#'
|
|
55 |
#' df <- tidy(mod1, conf_level = 0.99)
|
|
56 |
#' df2 <- tidy(mod2, conf_level = 0.99)
|
|
57 |
#'
|
|
58 |
#' # flagging empty strings with "_"
|
|
59 |
#' df <- df_explicit_na(df, na_level = "_")
|
|
60 |
#' df2 <- df_explicit_na(df2, na_level = "_")
|
|
61 |
#'
|
|
62 |
#' result1 <- basic_table() %>%
|
|
63 |
#' summarize_logistic(
|
|
64 |
#' conf_level = 0.95,
|
|
65 |
#' drop_and_remove_str = "_"
|
|
66 |
#' ) %>%
|
|
67 |
#' build_table(df = df)
|
|
68 |
#' result1
|
|
69 |
#'
|
|
70 |
#' result2 <- basic_table() %>%
|
|
71 |
#' summarize_logistic(
|
|
72 |
#' conf_level = 0.95,
|
|
73 |
#' drop_and_remove_str = "_"
|
|
74 |
#' ) %>%
|
|
75 |
#' build_table(df = df2)
|
|
76 |
#' result2
|
|
77 |
#'
|
|
78 |
#' @export
|
|
79 |
#' @order 1
|
|
80 |
summarize_logistic <- function(lyt, |
|
81 |
conf_level,
|
|
82 |
drop_and_remove_str = "", |
|
83 |
.indent_mods = NULL) { |
|
84 |
# checks
|
|
85 | 3x |
checkmate::assert_string(drop_and_remove_str) |
86 | ||
87 | 3x |
sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary") |
88 | 3x |
sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods) |
89 | 3x |
sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods) |
90 | 3x |
split_fun <- drop_and_remove_levels(drop_and_remove_str) |
91 | ||
92 | 3x |
lyt <- logistic_regression_cols(lyt, conf_level = conf_level) |
93 | 3x |
lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun) |
94 | 3x |
lyt <- sum_logistic_variable_test(lyt) |
95 | 3x |
lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun) |
96 | 3x |
lyt <- sum_logistic_term_estimates(lyt) |
97 | 3x |
lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun) |
98 | 3x |
lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun) |
99 | 3x |
lyt <- sum_logistic_odds_ratios(lyt) |
100 | 3x |
lyt
|
101 |
}
|
|
102 | ||
103 |
#' Fit for logistic regression
|
|
104 |
#'
|
|
105 |
#' @description `r lifecycle::badge("stable")`
|
|
106 |
#'
|
|
107 |
#' Fit a (conditional) logistic regression model.
|
|
108 |
#'
|
|
109 |
#' @inheritParams argument_convention
|
|
110 |
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
|
|
111 |
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
|
|
112 |
#' This will be used when fitting the (conditional) logistic regression model on the left hand
|
|
113 |
#' side of the formula.
|
|
114 |
#'
|
|
115 |
#' @return A fitted logistic regression model.
|
|
116 |
#'
|
|
117 |
#' @section Model Specification:
|
|
118 |
#'
|
|
119 |
#' The `variables` list needs to include the following elements:
|
|
120 |
#' * `arm`: Treatment arm variable name.
|
|
121 |
#' * `response`: The response arm variable name. Usually this is a 0/1 variable.
|
|
122 |
#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
|
|
123 |
#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
|
|
124 |
#' included in `covariates`. Then the interaction with the treatment arm is included in the model.
|
|
125 |
#'
|
|
126 |
#' @examples
|
|
127 |
#' library(dplyr)
|
|
128 |
#'
|
|
129 |
#' adrs_f <- tern_ex_adrs %>%
|
|
130 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
131 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
132 |
#' mutate(
|
|
133 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
134 |
#' RACE = factor(RACE),
|
|
135 |
#' SEX = factor(SEX)
|
|
136 |
#' )
|
|
137 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
|
|
138 |
#' mod1 <- fit_logistic(
|
|
139 |
#' data = adrs_f,
|
|
140 |
#' variables = list(
|
|
141 |
#' response = "Response",
|
|
142 |
#' arm = "ARMCD",
|
|
143 |
#' covariates = c("AGE", "RACE")
|
|
144 |
#' )
|
|
145 |
#' )
|
|
146 |
#' mod2 <- fit_logistic(
|
|
147 |
#' data = adrs_f,
|
|
148 |
#' variables = list(
|
|
149 |
#' response = "Response",
|
|
150 |
#' arm = "ARMCD",
|
|
151 |
#' covariates = c("AGE", "RACE"),
|
|
152 |
#' interaction = "AGE"
|
|
153 |
#' )
|
|
154 |
#' )
|
|
155 |
#'
|
|
156 |
#' @export
|
|
157 |
fit_logistic <- function(data, |
|
158 |
variables = list( |
|
159 |
response = "Response", |
|
160 |
arm = "ARMCD", |
|
161 |
covariates = NULL, |
|
162 |
interaction = NULL, |
|
163 |
strata = NULL |
|
164 |
),
|
|
165 |
response_definition = "response") { |
|
166 | 75x |
assert_df_with_variables(data, variables) |
167 | 75x |
checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata")) |
168 | 75x |
checkmate::assert_string(response_definition) |
169 | 75x |
checkmate::assert_true(grepl("response", response_definition)) |
170 | ||
171 | 75x |
response_definition <- sub( |
172 | 75x |
pattern = "response", |
173 | 75x |
replacement = variables$response, |
174 | 75x |
x = response_definition, |
175 | 75x |
fixed = TRUE |
176 |
)
|
|
177 | 75x |
form <- paste0(response_definition, " ~ ", variables$arm) |
178 | 75x |
if (!is.null(variables$covariates)) { |
179 | 29x |
form <- paste0(form, " + ", paste(variables$covariates, collapse = " + ")) |
180 |
}
|
|
181 | 75x |
if (!is.null(variables$interaction)) { |
182 | 18x |
checkmate::assert_string(variables$interaction) |
183 | 18x |
checkmate::assert_subset(variables$interaction, variables$covariates) |
184 | 18x |
form <- paste0(form, " + ", variables$arm, ":", variables$interaction) |
185 |
}
|
|
186 | 75x |
if (!is.null(variables$strata)) { |
187 | 14x |
strata_arg <- if (length(variables$strata) > 1) { |
188 | 7x |
paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))") |
189 |
} else { |
|
190 | 7x |
variables$strata |
191 |
}
|
|
192 | 14x |
form <- paste0(form, "+ strata(", strata_arg, ")") |
193 |
}
|
|
194 | 75x |
formula <- stats::as.formula(form) |
195 | 75x |
if (is.null(variables$strata)) { |
196 | 61x |
stats::glm( |
197 | 61x |
formula = formula, |
198 | 61x |
data = data, |
199 | 61x |
family = stats::binomial("logit") |
200 |
)
|
|
201 |
} else { |
|
202 | 14x |
clogit_with_tryCatch( |
203 | 14x |
formula = formula, |
204 | 14x |
data = data, |
205 | 14x |
x = TRUE |
206 |
)
|
|
207 |
}
|
|
208 |
}
|
|
209 | ||
210 |
#' Custom tidy method for binomial GLM results
|
|
211 |
#'
|
|
212 |
#' @description `r lifecycle::badge("stable")`
|
|
213 |
#'
|
|
214 |
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
|
|
215 |
#' with `binomial` family.
|
|
216 |
#'
|
|
217 |
#' @inheritParams argument_convention
|
|
218 |
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise the median is used.
|
|
219 |
#' @param x (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
|
|
220 |
#'
|
|
221 |
#' @return A `data.frame` containing the tidied model.
|
|
222 |
#'
|
|
223 |
#' @method tidy glm
|
|
224 |
#'
|
|
225 |
#' @seealso [h_logistic_regression] for relevant helper functions.
|
|
226 |
#'
|
|
227 |
#' @examples
|
|
228 |
#' library(dplyr)
|
|
229 |
#' library(broom)
|
|
230 |
#'
|
|
231 |
#' adrs_f <- tern_ex_adrs %>%
|
|
232 |
#' filter(PARAMCD == "BESRSPI") %>%
|
|
233 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
|
|
234 |
#' mutate(
|
|
235 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
236 |
#' RACE = factor(RACE),
|
|
237 |
#' SEX = factor(SEX)
|
|
238 |
#' )
|
|
239 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
|
|
240 |
#' mod1 <- fit_logistic(
|
|
241 |
#' data = adrs_f,
|
|
242 |
#' variables = list(
|
|
243 |
#' response = "Response",
|
|
244 |
#' arm = "ARMCD",
|
|
245 |
#' covariates = c("AGE", "RACE")
|
|
246 |
#' )
|
|
247 |
#' )
|
|
248 |
#' mod2 <- fit_logistic(
|
|
249 |
#' data = adrs_f,
|
|
250 |
#' variables = list(
|
|
251 |
#' response = "Response",
|
|
252 |
#' arm = "ARMCD",
|
|
253 |
#' covariates = c("AGE", "RACE"),
|
|
254 |
#' interaction = "AGE"
|
|
255 |
#' )
|
|
256 |
#' )
|
|
257 |
#'
|
|
258 |
#' df <- tidy(mod1, conf_level = 0.99)
|
|
259 |
#' df2 <- tidy(mod2, conf_level = 0.99)
|
|
260 |
#'
|
|
261 |
#' @export
|
|
262 |
tidy.glm <- function(x, # nolint |
|
263 |
conf_level = 0.95, |
|
264 |
at = NULL, |
|
265 |
...) { |
|
266 | 5x |
checkmate::assert_class(x, "glm") |
267 | 5x |
checkmate::assert_set_equal(x$family$family, "binomial") |
268 | ||
269 | 5x |
terms_name <- attr(stats::terms(x), "term.labels") |
270 | 5x |
xs_class <- attr(x$terms, "dataClasses") |
271 | 5x |
interaction <- terms_name[which(!terms_name %in% names(xs_class))] |
272 | 5x |
df <- if (length(interaction) == 0) { |
273 | 2x |
h_logistic_simple_terms( |
274 | 2x |
x = terms_name, |
275 | 2x |
fit_glm = x, |
276 | 2x |
conf_level = conf_level |
277 |
)
|
|
278 |
} else { |
|
279 | 3x |
h_logistic_inter_terms( |
280 | 3x |
x = terms_name, |
281 | 3x |
fit_glm = x, |
282 | 3x |
conf_level = conf_level, |
283 | 3x |
at = at |
284 |
)
|
|
285 |
}
|
|
286 | 5x |
for (var in c("variable", "term", "interaction", "reference")) { |
287 | 20x |
df[[var]] <- factor(df[[var]], levels = unique(df[[var]])) |
288 |
}
|
|
289 | 5x |
df
|
290 |
}
|
|
291 | ||
292 |
#' Logistic regression multivariate column layout function
|
|
293 |
#'
|
|
294 |
#' @description `r lifecycle::badge("stable")`
|
|
295 |
#'
|
|
296 |
#' Layout-creating function which creates a multivariate column layout summarizing logistic
|
|
297 |
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
|
|
298 |
#'
|
|
299 |
#' @inheritParams argument_convention
|
|
300 |
#'
|
|
301 |
#' @return A layout object suitable for passing to further layouting functions. Adding this
|
|
302 |
#' function to an `rtable` layout will split the table into columns corresponding to
|
|
303 |
#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
|
|
304 |
#'
|
|
305 |
#' @export
|
|
306 |
logistic_regression_cols <- function(lyt, |
|
307 |
conf_level = 0.95) { |
|
308 | 4x |
vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue") |
309 | 4x |
var_labels <- c( |
310 | 4x |
df = "Degrees of Freedom", |
311 | 4x |
estimate = "Parameter Estimate", |
312 | 4x |
std_error = "Standard Error", |
313 | 4x |
odds_ratio = "Odds Ratio", |
314 | 4x |
ci = paste("Wald", f_conf_level(conf_level)), |
315 | 4x |
pvalue = "p-value" |
316 |
)
|
|
317 | 4x |
split_cols_by_multivar( |
318 | 4x |
lyt = lyt, |
319 | 4x |
vars = vars, |
320 | 4x |
varlabels = var_labels |
321 |
)
|
|
322 |
}
|
|
323 | ||
324 |
#' Logistic regression summary table
|
|
325 |
#'
|
|
326 |
#' @description `r lifecycle::badge("stable")`
|
|
327 |
#'
|
|
328 |
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
|
|
329 |
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
|
|
330 |
#'
|
|
331 |
#' @inheritParams argument_convention
|
|
332 |
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
|
|
333 |
#' content function.
|
|
334 |
#'
|
|
335 |
#' @return A content function.
|
|
336 |
#'
|
|
337 |
#' @export
|
|
338 |
logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) { |
|
339 | 10x |
checkmate::assert_string(flag_var) |
340 | 10x |
function(lyt) { |
341 | 10x |
cfun_list <- list( |
342 | 10x |
df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods), |
343 | 10x |
estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods), |
344 | 10x |
std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods), |
345 | 10x |
odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods), |
346 | 10x |
ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods), |
347 | 10x |
pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods) |
348 |
)
|
|
349 | 10x |
summarize_row_groups( |
350 | 10x |
lyt = lyt, |
351 | 10x |
cfun = cfun_list, |
352 | 10x |
na_str = na_str |
353 |
)
|
|
354 |
}
|
|
355 |
}
|
1 |
#' Helper functions for tabulating biomarker effects on survival by subgroup
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions which are documented here separately to not confuse the user
|
|
6 |
#' when reading about the user-facing functions.
|
|
7 |
#'
|
|
8 |
#' @inheritParams survival_biomarkers_subgroups
|
|
9 |
#' @inheritParams argument_convention
|
|
10 |
#' @inheritParams fit_coxreg_multivar
|
|
11 |
#'
|
|
12 |
#' @examples
|
|
13 |
#' library(dplyr)
|
|
14 |
#' library(forcats)
|
|
15 |
#'
|
|
16 |
#' adtte <- tern_ex_adtte
|
|
17 |
#'
|
|
18 |
#' # Save variable labels before data processing steps.
|
|
19 |
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
|
|
20 |
#'
|
|
21 |
#' adtte_f <- adtte %>%
|
|
22 |
#' filter(PARAMCD == "OS") %>%
|
|
23 |
#' mutate(
|
|
24 |
#' AVALU = as.character(AVALU),
|
|
25 |
#' is_event = CNSR == 0
|
|
26 |
#' )
|
|
27 |
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
|
|
28 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
|
|
29 |
#'
|
|
30 |
#' @name h_survival_biomarkers_subgroups
|
|
31 |
NULL
|
|
32 | ||
33 |
#' @describeIn h_survival_biomarkers_subgroups Helps with converting the "survival" function variable list
|
|
34 |
#' to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable
|
|
35 |
#' names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`.
|
|
36 |
#'
|
|
37 |
#' @param biomarker (`string`)\cr the name of the biomarker variable.
|
|
38 |
#'
|
|
39 |
#' @return
|
|
40 |
#' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`,
|
|
41 |
#' `covariates`, and `strata`.
|
|
42 |
#'
|
|
43 |
#' @examples
|
|
44 |
#' # This is how the variable list is converted internally.
|
|
45 |
#' h_surv_to_coxreg_variables(
|
|
46 |
#' variables = list(
|
|
47 |
#' tte = "AVAL",
|
|
48 |
#' is_event = "EVNT",
|
|
49 |
#' covariates = c("A", "B"),
|
|
50 |
#' strata = "D"
|
|
51 |
#' ),
|
|
52 |
#' biomarker = "AGE"
|
|
53 |
#' )
|
|
54 |
#'
|
|
55 |
#' @export
|
|
56 |
h_surv_to_coxreg_variables <- function(variables, biomarker) { |
|
57 | 65x |
checkmate::assert_list(variables) |
58 | 65x |
checkmate::assert_string(variables$tte) |
59 | 65x |
checkmate::assert_string(variables$is_event) |
60 | 65x |
checkmate::assert_string(biomarker) |
61 | 65x |
list( |
62 | 65x |
time = variables$tte, |
63 | 65x |
event = variables$is_event, |
64 | 65x |
arm = biomarker, |
65 | 65x |
covariates = variables$covariates, |
66 | 65x |
strata = variables$strata |
67 |
)
|
|
68 |
}
|
|
69 | ||
70 |
#' @describeIn h_survival_biomarkers_subgroups Prepares estimates for number of events, patients and median survival
|
|
71 |
#' times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers
|
|
72 |
#' in a given single data set.
|
|
73 |
#' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
|
|
74 |
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strata`.
|
|
75 |
#'
|
|
76 |
#' @return
|
|
77 |
#' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
|
|
78 |
#'
|
|
79 |
#' @examples
|
|
80 |
#' # For a single population, estimate separately the effects
|
|
81 |
#' # of two biomarkers.
|
|
82 |
#' df <- h_coxreg_mult_cont_df(
|
|
83 |
#' variables = list(
|
|
84 |
#' tte = "AVAL",
|
|
85 |
#' is_event = "is_event",
|
|
86 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
87 |
#' covariates = "SEX",
|
|
88 |
#' strata = c("STRATA1", "STRATA2")
|
|
89 |
#' ),
|
|
90 |
#' data = adtte_f
|
|
91 |
#' )
|
|
92 |
#' df
|
|
93 |
#'
|
|
94 |
#' # If the data set is empty, still the corresponding rows with missings are returned.
|
|
95 |
#' h_coxreg_mult_cont_df(
|
|
96 |
#' variables = list(
|
|
97 |
#' tte = "AVAL",
|
|
98 |
#' is_event = "is_event",
|
|
99 |
#' biomarkers = c("BMRKR1", "AGE"),
|
|
100 |
#' covariates = "REGION1",
|
|
101 |
#' strata = c("STRATA1", "STRATA2")
|
|
102 |
#' ),
|
|
103 |
#' data = adtte_f[NULL, ]
|
|
104 |
#' )
|
|
105 |
#'
|
|
106 |
#' @export
|
|
107 |
h_coxreg_mult_cont_df <- function(variables, |
|
108 |
data,
|
|
109 |
control = control_coxreg()) { |
|
110 | 33x |
if ("strat" %in% names(variables)) { |
111 | ! |
warning( |
112 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_coxreg_mult_cont_df() ",
|
113 | ! |
"was deprecated in tern 0.9.4.\n ",
|
114 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument."
|
115 |
)
|
|
116 | ! |
variables[["strata"]] <- variables[["strat"]] |
117 |
}
|
|
118 | ||
119 | 33x |
assert_df_with_variables(data, variables) |
120 | 33x |
checkmate::assert_list(control, names = "named") |
121 | 33x |
checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE) |
122 | 33x |
conf_level <- control[["conf_level"]] |
123 | 33x |
pval_label <- paste0( |
124 |
# the regex capitalizes the first letter of the string / senetence.
|
|
125 | 33x |
"p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")" |
126 |
)
|
|
127 |
# If there is any data, run model, otherwise return empty results.
|
|
128 | 33x |
if (nrow(data) > 0) { |
129 | 32x |
bm_cols <- match(variables$biomarkers, names(data)) |
130 | 32x |
l_result <- lapply(variables$biomarkers, function(bm) { |
131 | 64x |
coxreg_list <- fit_coxreg_multivar( |
132 | 64x |
variables = h_surv_to_coxreg_variables(variables, bm), |
133 | 64x |
data = data, |
134 | 64x |
control = control |
135 |
)
|
|
136 | 64x |
result <- do.call( |
137 | 64x |
h_coxreg_multivar_extract,
|
138 | 64x |
c(list(var = bm), coxreg_list[c("mod", "data", "control")]) |
139 |
)
|
|
140 | 64x |
data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y)) |
141 | 64x |
data_fit$status <- as.logical(data_fit$status) |
142 | 64x |
median <- s_surv_time( |
143 | 64x |
df = data_fit, |
144 | 64x |
.var = "time", |
145 | 64x |
is_event = "status" |
146 | 64x |
)$median |
147 | 64x |
data.frame( |
148 |
# Dummy column needed downstream to create a nested header.
|
|
149 | 64x |
biomarker = bm, |
150 | 64x |
biomarker_label = formatters::var_labels(data[bm], fill = TRUE), |
151 | 64x |
n_tot = coxreg_list$mod$n, |
152 | 64x |
n_tot_events = coxreg_list$mod$nevent, |
153 | 64x |
median = as.numeric(median), |
154 | 64x |
result[1L, c("hr", "lcl", "ucl")], |
155 | 64x |
conf_level = conf_level, |
156 | 64x |
pval = result[1L, "pval"], |
157 | 64x |
pval_label = pval_label, |
158 | 64x |
stringsAsFactors = FALSE |
159 |
)
|
|
160 |
}) |
|
161 | 32x |
do.call(rbind, args = c(l_result, make.row.names = FALSE)) |
162 |
} else { |
|
163 | 1x |
data.frame( |
164 | 1x |
biomarker = variables$biomarkers, |
165 | 1x |
biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE), |
166 | 1x |
n_tot = 0L, |
167 | 1x |
n_tot_events = 0L, |
168 | 1x |
median = NA, |
169 | 1x |
hr = NA, |
170 | 1x |
lcl = NA, |
171 | 1x |
ucl = NA, |
172 | 1x |
conf_level = conf_level, |
173 | 1x |
pval = NA, |
174 | 1x |
pval_label = pval_label, |
175 | 1x |
row.names = seq_along(variables$biomarkers), |
176 | 1x |
stringsAsFactors = FALSE |
177 |
)
|
|
178 |
}
|
|
179 |
}
|
1 |
#' Survival time analysis
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [surv_time()] creates a layout element to analyze survival time by calculating survival time
|
|
6 |
#' median, median confidence interval, quantiles, and range (for all, censored, or event patients). The primary
|
|
7 |
#' analysis variable `vars` is the time variable and the secondary analysis variable `is_event` indicates whether or
|
|
8 |
#' not an event has occurred.
|
|
9 |
#'
|
|
10 |
#' @inheritParams argument_convention
|
|
11 |
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
|
|
12 |
#' [control_surv_time()]. Some possible parameter options are:
|
|
13 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
|
|
14 |
#' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
|
|
15 |
#' see more in [survival::survfit()]. Note option "none" is not supported.
|
|
16 |
#' * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
|
|
17 |
#' @param ref_fn_censor (`flag`)\cr whether referential footnotes indicating censored observations should be printed
|
|
18 |
#' when the `range` statistic is included.
|
|
19 |
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
|
|
20 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
|
|
21 |
#' for that statistic's row label.
|
|
22 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
23 |
#'
|
|
24 |
#' Options are: ``r shQuote(get_stats("surv_time"), type = "sh")``
|
|
25 |
#'
|
|
26 |
#' @examples
|
|
27 |
#' library(dplyr)
|
|
28 |
#'
|
|
29 |
#' adtte_f <- tern_ex_adtte %>%
|
|
30 |
#' filter(PARAMCD == "OS") %>%
|
|
31 |
#' mutate(
|
|
32 |
#' AVAL = day2month(AVAL),
|
|
33 |
#' is_event = CNSR == 0
|
|
34 |
#' )
|
|
35 |
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
|
|
36 |
#'
|
|
37 |
#' @name survival_time
|
|
38 |
#' @order 1
|
|
39 |
NULL
|
|
40 | ||
41 |
#' @describeIn survival_time Statistics function which analyzes survival times.
|
|
42 |
#'
|
|
43 |
#' @return
|
|
44 |
#' * `s_surv_time()` returns the statistics:
|
|
45 |
#' * `median`: Median survival time.
|
|
46 |
#' * `median_ci`: Confidence interval for median time.
|
|
47 |
#' * `median_ci_3d`: Median with confidence interval for median time.
|
|
48 |
#' * `quantiles`: Survival time for two specified quantiles.
|
|
49 |
#' * `quantiles_lower`: quantile with confidence interval for the first specified quantile.
|
|
50 |
#' * `quantiles_upper`: quantile with confidence interval for the second specified quantile.
|
|
51 |
#' * `range_censor`: Survival time range for censored observations.
|
|
52 |
#' * `range_event`: Survival time range for observations with events.
|
|
53 |
#' * `range`: Survival time range for all observations.
|
|
54 |
#'
|
|
55 |
#' @keywords internal
|
|
56 |
s_surv_time <- function(df, |
|
57 |
.var,
|
|
58 |
...,
|
|
59 |
is_event,
|
|
60 |
control = control_surv_time()) { |
|
61 | 232x |
checkmate::assert_string(.var) |
62 | 232x |
assert_df_with_variables(df, list(tte = .var, is_event = is_event)) |
63 | 232x |
checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE) |
64 | 232x |
checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE) |
65 | ||
66 | 232x |
conf_type <- control$conf_type |
67 | 232x |
conf_level <- control$conf_level |
68 | 232x |
quantiles <- control$quantiles |
69 | ||
70 | 232x |
formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1")) |
71 | 232x |
srv_fit <- survival::survfit( |
72 | 232x |
formula = formula, |
73 | 232x |
data = df, |
74 | 232x |
conf.int = conf_level, |
75 | 232x |
conf.type = conf_type |
76 |
)
|
|
77 | 232x |
srv_tab <- summary(srv_fit, extend = TRUE)$table |
78 | 232x |
srv_qt_tab_pre <- stats::quantile(srv_fit, probs = quantiles) |
79 | 232x |
srv_qt_tab <- srv_qt_tab_pre$quantile |
80 | 232x |
range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE) |
81 | 232x |
range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE) |
82 | 232x |
range <- range_noinf(df[[.var]], na.rm = TRUE) |
83 | ||
84 | 232x |
names(quantiles) <- as.character(100 * quantiles) |
85 | 232x |
srv_qt_tab_pre <- unlist(srv_qt_tab_pre) |
86 | 232x |
srv_qt_ci <- lapply(quantiles, function(x) { |
87 | 464x |
name <- as.character(100 * x) |
88 | ||
89 | 464x |
c( |
90 | 464x |
srv_qt_tab_pre[[paste0("quantile.", name)]], |
91 | 464x |
srv_qt_tab_pre[[paste0("lower.", name)]], |
92 | 464x |
srv_qt_tab_pre[[paste0("upper.", name)]] |
93 |
)
|
|
94 |
}) |
|
95 | ||
96 | 232x |
list( |
97 | 232x |
median = formatters::with_label(unname(srv_tab["median"]), "Median"), |
98 | 232x |
median_ci = formatters::with_label( |
99 | 232x |
unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level) |
100 |
),
|
|
101 | 232x |
quantiles = formatters::with_label( |
102 | 232x |
unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile") |
103 |
),
|
|
104 | 232x |
range_censor = formatters::with_label(range_censor, "Range (censored)"), |
105 | 232x |
range_event = formatters::with_label(range_event, "Range (event)"), |
106 | 232x |
range = formatters::with_label(range, "Range"), |
107 | 232x |
median_ci_3d = formatters::with_label( |
108 | 232x |
c( |
109 | 232x |
unname(srv_tab["median"]), |
110 | 232x |
unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]) |
111 |
),
|
|
112 | 232x |
paste0("Median (", f_conf_level(conf_level), ")") |
113 |
),
|
|
114 | 232x |
quantiles_lower = formatters::with_label( |
115 | 232x |
unname(srv_qt_ci[[1]]), paste0(quantiles[1] * 100, "%-ile (", f_conf_level(conf_level), ")") |
116 |
),
|
|
117 | 232x |
quantiles_upper = formatters::with_label( |
118 | 232x |
unname(srv_qt_ci[[2]]), paste0(quantiles[2] * 100, "%-ile (", f_conf_level(conf_level), ")") |
119 |
)
|
|
120 |
)
|
|
121 |
}
|
|
122 | ||
123 |
#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
|
|
124 |
#'
|
|
125 |
#' @return
|
|
126 |
#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
127 |
#'
|
|
128 |
#' @examples
|
|
129 |
#' a_surv_time(
|
|
130 |
#' df,
|
|
131 |
#' .df_row = df,
|
|
132 |
#' .var = "AVAL",
|
|
133 |
#' is_event = "is_event"
|
|
134 |
#' )
|
|
135 |
#'
|
|
136 |
#' @export
|
|
137 |
a_surv_time <- function(df, |
|
138 |
labelstr = "", |
|
139 |
...,
|
|
140 |
.stats = NULL, |
|
141 |
.stat_names = NULL, |
|
142 |
.formats = NULL, |
|
143 |
.labels = NULL, |
|
144 |
.indent_mods = NULL) { |
|
145 |
# Check for additional parameters to the statistics function
|
|
146 | 14x |
dots_extra_args <- list(...) |
147 | 14x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
148 | 14x |
dots_extra_args$.additional_fun_parameters <- NULL |
149 | ||
150 |
# Check for user-defined functions
|
|
151 | 14x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
152 | 14x |
.stats <- default_and_custom_stats_list$all_stats |
153 | 14x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
154 | ||
155 |
# Main statistic calculations
|
|
156 | 14x |
x_stats <- .apply_stat_functions( |
157 | 14x |
default_stat_fnc = s_surv_time, |
158 | 14x |
custom_stat_fnc_list = custom_stat_functions, |
159 | 14x |
args_list = c( |
160 | 14x |
df = list(df), |
161 | 14x |
labelstr = list(labelstr), |
162 | 14x |
extra_afun_params,
|
163 | 14x |
dots_extra_args
|
164 |
)
|
|
165 |
)
|
|
166 | ||
167 | 14x |
rng_censor_lwr <- x_stats[["range_censor"]][1] |
168 | 14x |
rng_censor_upr <- x_stats[["range_censor"]][2] |
169 | ||
170 |
# Fill in formatting defaults
|
|
171 | 14x |
.stats <- get_stats("surv_time", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
172 | 14x |
.formats <- get_formats_from_stats(.stats, .formats) |
173 | 14x |
.labels <- get_labels_from_stats(.stats, .labels) %>% labels_use_control(dots_extra_args$control) |
174 | 14x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
175 | ||
176 | 14x |
x_stats <- x_stats[.stats] |
177 | ||
178 |
# Auto format handling
|
|
179 | 14x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
180 | ||
181 |
# Get and check statistical names
|
|
182 | 14x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
183 | ||
184 |
# Get cell footnotes
|
|
185 | 14x |
cell_fns <- stats::setNames(vector("list", length = length(x_stats)), .labels) |
186 | 14x |
if ("range" %in% names(x_stats) && "ref_fn_censor" %in% names(dots_extra_args) && dots_extra_args$ref_fn_censor) { |
187 | 14x |
if (identical(x_stats[["range"]][1], rng_censor_lwr) && identical(x_stats[["range"]][2], rng_censor_upr)) { |
188 | 2x |
cell_fns[[.labels[["range"]]]] <- "Censored observations: range minimum & maximum" |
189 | 12x |
} else if (identical(x_stats[["range"]][1], rng_censor_lwr)) { |
190 | 2x |
cell_fns[[.labels[["range"]]]] <- "Censored observation: range minimum" |
191 | 10x |
} else if (identical(x_stats[["range"]][2], rng_censor_upr)) { |
192 | 1x |
cell_fns[[.labels[["range"]]]] <- "Censored observation: range maximum" |
193 |
}
|
|
194 |
}
|
|
195 | ||
196 | 14x |
in_rows( |
197 | 14x |
.list = x_stats, |
198 | 14x |
.formats = .formats, |
199 | 14x |
.names = names(.labels), |
200 | 14x |
.stat_names = .stat_names, |
201 | 14x |
.labels = .labels %>% .unlist_keep_nulls(), |
202 | 14x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls(), |
203 | 14x |
.cell_footnotes = cell_fns |
204 |
)
|
|
205 |
}
|
|
206 | ||
207 |
#' @describeIn survival_time Layout-creating function which can take statistics function arguments
|
|
208 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
209 |
#'
|
|
210 |
#' @return
|
|
211 |
#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
|
|
212 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
213 |
#' the statistics from `s_surv_time()` to the table layout.
|
|
214 |
#'
|
|
215 |
#' @examples
|
|
216 |
#' basic_table() %>%
|
|
217 |
#' split_cols_by(var = "ARMCD") %>%
|
|
218 |
#' add_colcounts() %>%
|
|
219 |
#' surv_time(
|
|
220 |
#' vars = "AVAL",
|
|
221 |
#' var_labels = "Survival Time (Months)",
|
|
222 |
#' is_event = "is_event",
|
|
223 |
#' control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
|
|
224 |
#' ) %>%
|
|
225 |
#' build_table(df = adtte_f)
|
|
226 |
#'
|
|
227 |
#' @export
|
|
228 |
#' @order 2
|
|
229 |
surv_time <- function(lyt, |
|
230 |
vars,
|
|
231 |
is_event,
|
|
232 |
control = control_surv_time(), |
|
233 |
ref_fn_censor = TRUE, |
|
234 |
na_str = default_na_str(), |
|
235 |
nested = TRUE, |
|
236 |
...,
|
|
237 |
var_labels = "Time to Event", |
|
238 |
show_labels = "visible", |
|
239 |
table_names = vars, |
|
240 |
.stats = c("median", "median_ci", "quantiles", "range"), |
|
241 |
.stat_names = NULL, |
|
242 |
.formats = list( |
|
243 |
median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x", |
|
244 |
quantiles_lower = "xx.x (xx.x - xx.x)", quantiles_upper = "xx.x (xx.x - xx.x)", |
|
245 |
median_ci_3d = "xx.x (xx.x - xx.x)" |
|
246 |
),
|
|
247 |
.labels = list(median_ci = "95% CI", range = "Range"), |
|
248 |
.indent_mods = list(median_ci = 1L)) { |
|
249 |
# Process standard extra arguments
|
|
250 | 3x |
extra_args <- list(".stats" = .stats) |
251 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
252 | 3x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
253 | 3x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
254 | 3x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
255 | ||
256 |
# Process additional arguments to the statistic function
|
|
257 | 3x |
extra_args <- c( |
258 | 3x |
extra_args,
|
259 | 3x |
is_event = is_event, control = list(control), ref_fn_censor = ref_fn_censor, |
260 |
...
|
|
261 |
)
|
|
262 | ||
263 |
# Adding additional info from layout to analysis function
|
|
264 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
265 | 3x |
formals(a_surv_time) <- c(formals(a_surv_time), extra_args[[".additional_fun_parameters"]]) |
266 | ||
267 | 3x |
analyze( |
268 | 3x |
lyt = lyt, |
269 | 3x |
vars = vars, |
270 | 3x |
afun = a_surv_time, |
271 | 3x |
var_labels = var_labels, |
272 | 3x |
show_labels = show_labels, |
273 | 3x |
table_names = table_names, |
274 | 3x |
na_str = na_str, |
275 | 3x |
nested = nested, |
276 | 3x |
extra_args = extra_args |
277 |
)
|
|
278 |
}
|
1 |
#' Missing data
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Substitute missing data with a string or factor level.
|
|
6 |
#'
|
|
7 |
#' @param x (`factor` or `character`)\cr values for which any missing values should be substituted.
|
|
8 |
#' @param label (`string`)\cr string that missing data should be replaced with.
|
|
9 |
#'
|
|
10 |
#' @return `x` with any `NA` values substituted by `label`.
|
|
11 |
#'
|
|
12 |
#' @examples
|
|
13 |
#' explicit_na(c(NA, "a", "b"))
|
|
14 |
#' is.na(explicit_na(c(NA, "a", "b")))
|
|
15 |
#'
|
|
16 |
#' explicit_na(factor(c(NA, "a", "b")))
|
|
17 |
#' is.na(explicit_na(factor(c(NA, "a", "b"))))
|
|
18 |
#'
|
|
19 |
#' explicit_na(sas_na(c("a", "")))
|
|
20 |
#'
|
|
21 |
#' @export
|
|
22 |
explicit_na <- function(x, label = "<Missing>") { |
|
23 | 256x |
checkmate::assert_string(label) |
24 | ||
25 | 256x |
if (is.factor(x)) { |
26 | 151x |
x <- forcats::fct_na_value_to_level(x, label) |
27 | 151x |
forcats::fct_drop(x, only = label) |
28 | 105x |
} else if (is.character(x)) { |
29 | 105x |
x[is.na(x)] <- label |
30 | 105x |
x
|
31 |
} else { |
|
32 | ! |
stop("only factors and character vectors allowed") |
33 |
}
|
|
34 |
}
|
|
35 | ||
36 |
#' Convert strings to `NA`
|
|
37 |
#'
|
|
38 |
#' @description `r lifecycle::badge("stable")`
|
|
39 |
#'
|
|
40 |
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
|
|
41 |
#' convert these values to `NA`s.
|
|
42 |
#'
|
|
43 |
#' @inheritParams explicit_na
|
|
44 |
#' @param empty (`flag`)\cr if `TRUE`, empty strings get replaced by `NA`.
|
|
45 |
#' @param whitespaces (`flag`)\cr if `TRUE`, strings made from only whitespaces get replaced with `NA`.
|
|
46 |
#'
|
|
47 |
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
|
|
48 |
#' `empty` and `whitespaces`.
|
|
49 |
#'
|
|
50 |
#' @examples
|
|
51 |
#' sas_na(c("1", "", " ", " ", "b"))
|
|
52 |
#' sas_na(factor(c("", " ", "b")))
|
|
53 |
#'
|
|
54 |
#' is.na(sas_na(c("1", "", " ", " ", "b")))
|
|
55 |
#'
|
|
56 |
#' @export
|
|
57 |
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) { |
|
58 | 245x |
checkmate::assert_flag(empty) |
59 | 245x |
checkmate::assert_flag(whitespaces) |
60 | ||
61 | 245x |
if (is.factor(x)) { |
62 | 135x |
empty_levels <- levels(x) == "" |
63 | 11x |
if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA |
64 | ||
65 | 135x |
ws_levels <- grepl("^\\s+$", levels(x)) |
66 | ! |
if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA |
67 | ||
68 | 135x |
x
|
69 | 110x |
} else if (is.character(x)) { |
70 | 110x |
if (empty) x[x == ""] <- NA_character_ |
71 | ||
72 | 110x |
if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_ |
73 | ||
74 | 110x |
x
|
75 |
} else { |
|
76 | ! |
stop("only factors and character vectors allowed") |
77 |
}
|
|
78 |
}
|
1 |
#' Custom split functions
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Collection of useful functions that are expanding on the core list of functions
|
|
6 |
#' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()]
|
|
7 |
#' for more information on how to make a custom split function. All these functions
|
|
8 |
#' work with [rtables::split_rows_by()] argument `split_fun` to modify the way the split
|
|
9 |
#' happens. For other split functions, consider consulting [`rtables::split_funcs`].
|
|
10 |
#'
|
|
11 |
#' @seealso [rtables::make_split_fun()]
|
|
12 |
#'
|
|
13 |
#' @name utils_split_funs
|
|
14 |
NULL
|
|
15 | ||
16 |
#' @describeIn utils_split_funs Split function to place reference group facet at a specific position
|
|
17 |
#' during post-processing stage.
|
|
18 |
#'
|
|
19 |
#' @param position (`string` or `integer`)\cr position to use for the reference group facet. Can be `"first"`,
|
|
20 |
#' `"last"`, or a specific position.
|
|
21 |
#'
|
|
22 |
#' @return
|
|
23 |
#' * `ref_group_position()` returns an utility function that puts the reference group
|
|
24 |
#' as first, last or at a certain position and needs to be assigned to `split_fun`.
|
|
25 |
#'
|
|
26 |
#' @examples
|
|
27 |
#' library(dplyr)
|
|
28 |
#'
|
|
29 |
#' dat <- data.frame(
|
|
30 |
#' x = factor(letters[1:5], levels = letters[5:1]),
|
|
31 |
#' y = 1:5
|
|
32 |
#' )
|
|
33 |
#'
|
|
34 |
#' # With rtables layout functions
|
|
35 |
#' basic_table() %>%
|
|
36 |
#' split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>%
|
|
37 |
#' analyze("y") %>%
|
|
38 |
#' build_table(dat)
|
|
39 |
#'
|
|
40 |
#' # With tern layout funcitons
|
|
41 |
#' adtte_f <- tern_ex_adtte %>%
|
|
42 |
#' filter(PARAMCD == "OS") %>%
|
|
43 |
#' mutate(
|
|
44 |
#' AVAL = day2month(AVAL),
|
|
45 |
#' is_event = CNSR == 0
|
|
46 |
#' )
|
|
47 |
#'
|
|
48 |
#' basic_table() %>%
|
|
49 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>%
|
|
50 |
#' add_colcounts() %>%
|
|
51 |
#' surv_time(
|
|
52 |
#' vars = "AVAL",
|
|
53 |
#' var_labels = "Survival Time (Months)",
|
|
54 |
#' is_event = "is_event",
|
|
55 |
#' ) %>%
|
|
56 |
#' build_table(df = adtte_f)
|
|
57 |
#'
|
|
58 |
#' basic_table() %>%
|
|
59 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>%
|
|
60 |
#' add_colcounts() %>%
|
|
61 |
#' surv_time(
|
|
62 |
#' vars = "AVAL",
|
|
63 |
#' var_labels = "Survival Time (Months)",
|
|
64 |
#' is_event = "is_event",
|
|
65 |
#' ) %>%
|
|
66 |
#' build_table(df = adtte_f)
|
|
67 |
#'
|
|
68 |
#' @export
|
|
69 |
ref_group_position <- function(position = "first") { |
|
70 | 20x |
make_split_fun( |
71 | 20x |
post = list( |
72 | 20x |
function(splret, spl, fulldf) { |
73 | 57x |
if (!"ref_group_value" %in% methods::slotNames(spl)) { |
74 | 1x |
stop("Reference group is undefined.") |
75 |
}
|
|
76 | ||
77 | 56x |
spl_var <- rtables:::spl_payload(spl) |
78 | 56x |
fulldf[[spl_var]] <- factor(fulldf[[spl_var]]) |
79 | 56x |
init_lvls <- levels(fulldf[[spl_var]]) |
80 | ||
81 | 56x |
if (!all(names(splret$values) %in% init_lvls)) { |
82 | ! |
stop("This split function does not work with combination facets.") |
83 |
}
|
|
84 | ||
85 | 56x |
ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl)) |
86 | 56x |
pos_choices <- c("first", "last") |
87 | 56x |
if (checkmate::test_choice(position, pos_choices) && position == "first") { |
88 | 41x |
pos <- 0 |
89 | 15x |
} else if (checkmate::test_choice(position, pos_choices) && position == "last") { |
90 | 12x |
pos <- length(init_lvls) |
91 | 3x |
} else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) { |
92 | 3x |
pos <- position - 1 |
93 |
} else { |
|
94 | ! |
stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.") |
95 |
}
|
|
96 | ||
97 | 56x |
reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos) |
98 | 56x |
ord <- match(reord_lvls, names(splret$values)) |
99 | ||
100 | 56x |
make_split_result( |
101 | 56x |
splret$values[ord], |
102 | 56x |
splret$datasplit[ord], |
103 | 56x |
splret$labels[ord] |
104 |
)
|
|
105 |
}
|
|
106 |
)
|
|
107 |
)
|
|
108 |
}
|
|
109 | ||
110 |
#' @describeIn utils_split_funs Split function to change level order based on an `integer`
|
|
111 |
#' vector or a `character` vector that represent the split variable's factor levels.
|
|
112 |
#'
|
|
113 |
#' @param order (`character` or `numeric`)\cr vector of ordering indices for the split facets.
|
|
114 |
#'
|
|
115 |
#' @return
|
|
116 |
#' * `level_order()` returns an utility function that changes the original levels' order,
|
|
117 |
#' depending on input `order` and split levels.
|
|
118 |
#'
|
|
119 |
#' @examples
|
|
120 |
#' # level_order --------
|
|
121 |
#' # Even if default would bring ref_group first, the original order puts it last
|
|
122 |
#' basic_table() %>%
|
|
123 |
#' split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>%
|
|
124 |
#' analyze("Sepal.Length") %>%
|
|
125 |
#' build_table(iris)
|
|
126 |
#'
|
|
127 |
#' # character vector
|
|
128 |
#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)])
|
|
129 |
#' basic_table() %>%
|
|
130 |
#' split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>%
|
|
131 |
#' analyze("Sepal.Length") %>%
|
|
132 |
#' build_table(iris)
|
|
133 |
#'
|
|
134 |
#' @export
|
|
135 |
level_order <- function(order) { |
|
136 | 2x |
make_split_fun( |
137 | 2x |
post = list( |
138 | 2x |
function(splret, spl, fulldf) { |
139 | 4x |
if (checkmate::test_integerish(order)) { |
140 | 1x |
checkmate::assert_integerish(order, lower = 1, upper = length(splret$values)) |
141 | 1x |
ord <- order |
142 |
} else { |
|
143 | 3x |
checkmate::assert_character(order, len = length(splret$values)) |
144 | 3x |
checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE) |
145 | 3x |
ord <- match(order, names(splret$values)) |
146 |
}
|
|
147 | 4x |
make_split_result( |
148 | 4x |
splret$values[ord], |
149 | 4x |
splret$datasplit[ord], |
150 | 4x |
splret$labels[ord] |
151 |
)
|
|
152 |
}
|
|
153 |
)
|
|
154 |
)
|
|
155 |
}
|
1 |
#' Count occurrences
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_occurrences()] creates a layout element to calculate occurrence counts for patients.
|
|
6 |
#'
|
|
7 |
#' This function analyzes the variable(s) supplied to `vars` and returns a table of occurrence counts for
|
|
8 |
#' each unique value (or level) of the variable(s). This variable (or variables) must be
|
|
9 |
#' non-numeric. The `id` variable is used to indicate unique subject identifiers (defaults to `USUBJID`).
|
|
10 |
#'
|
|
11 |
#' If there are multiple occurrences of the same value recorded for a patient, the value is only counted once.
|
|
12 |
#'
|
|
13 |
#' The summarize function [summarize_occurrences()] performs the same function as [count_occurrences()] except it
|
|
14 |
#' creates content rows, not data rows, to summarize the current table row/column context and operates on the level of
|
|
15 |
#' the latest row split or the root of the table if no row splits have occurred.
|
|
16 |
#'
|
|
17 |
#' @inheritParams argument_convention
|
|
18 |
#' @param drop (`flag`)\cr whether non-appearing occurrence levels should be dropped from the resulting table.
|
|
19 |
#' Note that in that case the remaining occurrence levels in the table are sorted alphabetically.
|
|
20 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
21 |
#'
|
|
22 |
#' Options are: ``r shQuote(get_stats("count_occurrences"), type = "sh")``
|
|
23 |
#'
|
|
24 |
#' @note By default, occurrences which don't appear in a given row split are dropped from the table and
|
|
25 |
#' the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout
|
|
26 |
#' needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would
|
|
27 |
#' like to show all occurrences.
|
|
28 |
#'
|
|
29 |
#' @examples
|
|
30 |
#' library(dplyr)
|
|
31 |
#' df <- data.frame(
|
|
32 |
#' USUBJID = as.character(c(
|
|
33 |
#' 1, 1, 2, 4, 4, 4,
|
|
34 |
#' 6, 6, 6, 7, 7, 8
|
|
35 |
#' )),
|
|
36 |
#' MHDECOD = c(
|
|
37 |
#' "MH1", "MH2", "MH1", "MH1", "MH1", "MH3",
|
|
38 |
#' "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"
|
|
39 |
#' ),
|
|
40 |
#' ARM = rep(c("A", "B"), each = 6),
|
|
41 |
#' SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F")
|
|
42 |
#' )
|
|
43 |
#' df_adsl <- df %>%
|
|
44 |
#' select(USUBJID, ARM) %>%
|
|
45 |
#' unique()
|
|
46 |
#'
|
|
47 |
#' @name count_occurrences
|
|
48 |
#' @order 1
|
|
49 |
NULL
|
|
50 | ||
51 |
#' @describeIn count_occurrences Statistics function which counts number of patients that report an
|
|
52 |
#' occurrence.
|
|
53 |
#'
|
|
54 |
#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
|
|
55 |
#' * `N_col`: total number of patients in this column across rows.
|
|
56 |
#' * `n`: number of patients with any occurrences.
|
|
57 |
#' * `N_row`: total number of patients in this row across columns.
|
|
58 |
#'
|
|
59 |
#' @return
|
|
60 |
#' * `s_count_occurrences()` returns a list with:
|
|
61 |
#' * `count`: list of counts with one element per occurrence.
|
|
62 |
#' * `count_fraction`: list of counts and fractions with one element per occurrence.
|
|
63 |
#' * `fraction`: list of numerators and denominators with one element per occurrence.
|
|
64 |
#'
|
|
65 |
#' @examples
|
|
66 |
#' # Count unique occurrences per subject.
|
|
67 |
#' s_count_occurrences(
|
|
68 |
#' df,
|
|
69 |
#' .N_col = 4L,
|
|
70 |
#' .N_row = 4L,
|
|
71 |
#' .df_row = df,
|
|
72 |
#' .var = "MHDECOD",
|
|
73 |
#' id = "USUBJID"
|
|
74 |
#' )
|
|
75 |
#'
|
|
76 |
#' @export
|
|
77 |
s_count_occurrences <- function(df, |
|
78 |
.var = "MHDECOD", |
|
79 |
.N_col, # nolint |
|
80 |
.N_row, # nolint |
|
81 |
.df_row,
|
|
82 |
...,
|
|
83 |
drop = TRUE, |
|
84 |
id = "USUBJID", |
|
85 |
denom = c("N_col", "n", "N_row")) { |
|
86 | 126x |
checkmate::assert_flag(drop) |
87 | 126x |
assert_df_with_variables(df, list(range = .var, id = id)) |
88 | 126x |
checkmate::assert_count(.N_col) |
89 | 126x |
checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character")) |
90 | 126x |
checkmate::assert_multi_class(df[[id]], classes = c("factor", "character")) |
91 | ||
92 | 126x |
occurrences <- if (drop) { |
93 |
# Note that we don't try to preserve original level order here since a) that would required
|
|
94 |
# more time to look up in large original levels and b) that would fail for character input variable.
|
|
95 | 115x |
occurrence_levels <- sort(unique(.df_row[[.var]])) |
96 | 115x |
if (length(occurrence_levels) == 0) { |
97 | 1x |
stop( |
98 | 1x |
"no empty `.df_row` input allowed when `drop = TRUE`,",
|
99 | 1x |
" please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
|
100 |
)
|
|
101 |
}
|
|
102 | 114x |
factor(df[[.var]], levels = occurrence_levels) |
103 |
} else { |
|
104 | 11x |
df[[.var]] |
105 |
}
|
|
106 | 125x |
ids <- factor(df[[id]]) |
107 | 125x |
denom <- match.arg(denom) %>% |
108 | 125x |
switch( |
109 | 125x |
n = nlevels(ids), |
110 | 125x |
N_row = .N_row, |
111 | 125x |
N_col = .N_col |
112 |
)
|
|
113 | 125x |
has_occurrence_per_id <- table(occurrences, ids) > 0 |
114 | 125x |
n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id)) |
115 | 125x |
cur_count_fraction <- lapply( |
116 | 125x |
n_ids_per_occurrence,
|
117 | 125x |
function(i, denom) { |
118 | 514x |
if (i == 0 && denom == 0) { |
119 | ! |
c(0, 0) |
120 |
} else { |
|
121 | 514x |
c(i, i / denom) |
122 |
}
|
|
123 |
},
|
|
124 | 125x |
denom = denom |
125 |
)
|
|
126 | ||
127 | 125x |
list( |
128 | 125x |
count = n_ids_per_occurrence, |
129 | 125x |
count_fraction = cur_count_fraction, |
130 | 125x |
count_fraction_fixed_dp = cur_count_fraction, |
131 | 125x |
fraction = lapply( |
132 | 125x |
n_ids_per_occurrence,
|
133 | 125x |
function(i, denom) c("num" = i, "denom" = denom), |
134 | 125x |
denom = denom |
135 |
)
|
|
136 |
)
|
|
137 |
}
|
|
138 | ||
139 |
#' @describeIn count_occurrences Formatted analysis function which is used as `afun`
|
|
140 |
#' in `count_occurrences()`.
|
|
141 |
#'
|
|
142 |
#' @return
|
|
143 |
#' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
144 |
#'
|
|
145 |
#' @examples
|
|
146 |
#' a_count_occurrences(
|
|
147 |
#' df,
|
|
148 |
#' .N_col = 4L,
|
|
149 |
#' .df_row = df,
|
|
150 |
#' .var = "MHDECOD",
|
|
151 |
#' id = "USUBJID"
|
|
152 |
#' )
|
|
153 |
#'
|
|
154 |
#' @export
|
|
155 |
a_count_occurrences <- function(df, |
|
156 |
labelstr = "", |
|
157 |
...,
|
|
158 |
.stats = NULL, |
|
159 |
.stat_names = NULL, |
|
160 |
.formats = NULL, |
|
161 |
.labels = NULL, |
|
162 |
.indent_mods = NULL) { |
|
163 |
# Check for additional parameters to the statistics function
|
|
164 | 85x |
dots_extra_args <- list(...) |
165 | 85x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
166 | 85x |
dots_extra_args$.additional_fun_parameters <- NULL |
167 | ||
168 |
# Check for user-defined functions
|
|
169 | 85x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
170 | 85x |
.stats <- default_and_custom_stats_list$all_stats |
171 | 85x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
172 | ||
173 |
# Apply statistics function
|
|
174 | 85x |
x_stats <- .apply_stat_functions( |
175 | 85x |
default_stat_fnc = s_count_occurrences, |
176 | 85x |
custom_stat_fnc_list = custom_stat_functions, |
177 | 85x |
args_list = c( |
178 | 85x |
df = list(df), |
179 | 85x |
extra_afun_params,
|
180 | 85x |
dots_extra_args
|
181 |
)
|
|
182 |
)
|
|
183 | ||
184 |
# if empty, return NA
|
|
185 | 85x |
if (is.null(unlist(x_stats))) { |
186 | 3x |
return(in_rows(.list = as.list(rep(NA, length(.stats))) %>% stats::setNames(.stats))) |
187 |
}
|
|
188 | ||
189 |
# Fill in formatting defaults
|
|
190 | 82x |
.stats <- get_stats("count_occurrences", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
191 | 82x |
x_stats <- x_stats[.stats] |
192 | 82x |
levels_per_stats <- lapply(x_stats, names) |
193 | 82x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
194 | 82x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
195 | 82x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
196 | ||
197 | 82x |
x_stats <- x_stats[.stats] %>% |
198 | 82x |
.unlist_keep_nulls() %>% |
199 | 82x |
setNames(names(.formats)) |
200 | ||
201 |
# Auto format handling
|
|
202 | 82x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
203 | ||
204 |
# Get and check statistical names
|
|
205 | 82x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
206 | ||
207 | 82x |
in_rows( |
208 | 82x |
.list = x_stats, |
209 | 82x |
.formats = .formats, |
210 | 82x |
.names = .labels %>% .unlist_keep_nulls(), |
211 | 82x |
.stat_names = .stat_names, |
212 | 82x |
.labels = .labels %>% .unlist_keep_nulls(), |
213 | 82x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
214 |
)
|
|
215 |
}
|
|
216 | ||
217 |
#' @describeIn count_occurrences Layout-creating function which can take statistics function arguments
|
|
218 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
219 |
#'
|
|
220 |
#' @return
|
|
221 |
#' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions,
|
|
222 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
223 |
#' the statistics from `s_count_occurrences()` to the table layout.
|
|
224 |
#'
|
|
225 |
#' @examples
|
|
226 |
#' # Create table layout
|
|
227 |
#' lyt <- basic_table() %>%
|
|
228 |
#' split_cols_by("ARM") %>%
|
|
229 |
#' add_colcounts() %>%
|
|
230 |
#' count_occurrences(vars = "MHDECOD", .stats = c("count_fraction"))
|
|
231 |
#'
|
|
232 |
#' # Apply table layout to data and produce `rtable` object
|
|
233 |
#' tbl <- lyt %>%
|
|
234 |
#' build_table(df, alt_counts_df = df_adsl) %>%
|
|
235 |
#' prune_table()
|
|
236 |
#'
|
|
237 |
#' tbl
|
|
238 |
#'
|
|
239 |
#' @export
|
|
240 |
#' @order 2
|
|
241 |
count_occurrences <- function(lyt, |
|
242 |
vars,
|
|
243 |
id = "USUBJID", |
|
244 |
drop = TRUE, |
|
245 |
var_labels = vars, |
|
246 |
show_labels = "hidden", |
|
247 |
riskdiff = FALSE, |
|
248 |
na_str = default_na_str(), |
|
249 |
nested = TRUE, |
|
250 |
...,
|
|
251 |
table_names = vars, |
|
252 |
.stats = "count_fraction_fixed_dp", |
|
253 |
.stat_names = NULL, |
|
254 |
.formats = NULL, |
|
255 |
.labels = NULL, |
|
256 |
.indent_mods = NULL) { |
|
257 | 9x |
checkmate::assert_flag(riskdiff) |
258 | 9x |
afun <- if (isFALSE(riskdiff)) a_count_occurrences else afun_riskdiff |
259 | ||
260 |
# Process standard extra arguments
|
|
261 | 9x |
extra_args <- list(".stats" = .stats) |
262 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
263 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
264 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
265 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
266 | ||
267 |
# Process additional arguments to the statistic function
|
|
268 | 9x |
extra_args <- c( |
269 | 9x |
extra_args,
|
270 | 9x |
id = id, drop = drop, |
271 | 9x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences" = a_count_occurrences)), |
272 |
...
|
|
273 |
)
|
|
274 | ||
275 |
# Append additional info from layout to the analysis function
|
|
276 | 9x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
277 | 9x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
278 | ||
279 | 9x |
analyze( |
280 | 9x |
lyt = lyt, |
281 | 9x |
vars = vars, |
282 | 9x |
afun = afun, |
283 | 9x |
na_str = na_str, |
284 | 9x |
nested = nested, |
285 | 9x |
extra_args = extra_args, |
286 | 9x |
var_labels = var_labels, |
287 | 9x |
show_labels = show_labels, |
288 | 9x |
table_names = table_names |
289 |
)
|
|
290 |
}
|
|
291 | ||
292 |
#' @describeIn count_occurrences Layout-creating function which can take content function arguments
|
|
293 |
#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
|
|
294 |
#'
|
|
295 |
#' @return
|
|
296 |
#' * `summarize_occurrences()` returns a layout object suitable for passing to further layouting functions,
|
|
297 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
|
|
298 |
#' containing the statistics from `s_count_occurrences()` to the table layout.
|
|
299 |
#'
|
|
300 |
#' @examples
|
|
301 |
#' # Layout creating function with custom format.
|
|
302 |
#' basic_table() %>%
|
|
303 |
#' add_colcounts() %>%
|
|
304 |
#' split_rows_by("SEX", child_labels = "visible") %>%
|
|
305 |
#' summarize_occurrences(
|
|
306 |
#' var = "MHDECOD",
|
|
307 |
#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
|
|
308 |
#' ) %>%
|
|
309 |
#' build_table(df, alt_counts_df = df_adsl)
|
|
310 |
#'
|
|
311 |
#' @export
|
|
312 |
#' @order 3
|
|
313 |
summarize_occurrences <- function(lyt, |
|
314 |
var,
|
|
315 |
id = "USUBJID", |
|
316 |
drop = TRUE, |
|
317 |
riskdiff = FALSE, |
|
318 |
na_str = default_na_str(), |
|
319 |
...,
|
|
320 |
.stats = "count_fraction_fixed_dp", |
|
321 |
.stat_names = NULL, |
|
322 |
.formats = NULL, |
|
323 |
.indent_mods = 0L, |
|
324 |
.labels = NULL) { |
|
325 | 5x |
checkmate::assert_flag(riskdiff) |
326 | 5x |
afun <- if (isFALSE(riskdiff)) a_count_occurrences else afun_riskdiff |
327 | ||
328 |
# Process standard extra arguments
|
|
329 | 5x |
extra_args <- list(".stats" = .stats) |
330 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
331 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
332 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
333 | 5x |
if (is.null(.indent_mods)) { |
334 | ! |
indent_mod <- 0L |
335 | 5x |
} else if (length(.indent_mods) == 1) { |
336 | 5x |
indent_mod <- .indent_mods |
337 |
} else { |
|
338 | ! |
indent_mod <- 0L |
339 | ! |
extra_args[[".indent_mods"]] <- .indent_mods |
340 |
}
|
|
341 | ||
342 |
# Process additional arguments to the statistic function
|
|
343 | 5x |
extra_args <- c( |
344 | 5x |
extra_args,
|
345 | 5x |
id = id, drop = drop, |
346 | 5x |
if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences" = a_count_occurrences)), |
347 |
...
|
|
348 |
)
|
|
349 | ||
350 |
# Append additional info from layout to the analysis function
|
|
351 | 5x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
352 | 5x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
353 | ||
354 | 5x |
summarize_row_groups( |
355 | 5x |
lyt = lyt, |
356 | 5x |
var = var, |
357 | 5x |
cfun = afun, |
358 | 5x |
na_str = na_str, |
359 | 5x |
extra_args = extra_args, |
360 | 5x |
indent_mod = indent_mod |
361 |
)
|
|
362 |
}
|
1 |
#' Control function for Cox-PH model
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This is an auxiliary function for controlling arguments for Cox-PH model, typically used internally to specify
|
|
6 |
#' details of Cox-PH model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
|
|
10 |
#' Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
|
|
11 |
#' @param ties (`string`)\cr string specifying the method for tie handling. Default is `"efron"`,
|
|
12 |
#' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
|
|
13 |
#'
|
|
14 |
#' @return A list of components with the same names as the arguments.
|
|
15 |
#'
|
|
16 |
#' @export
|
|
17 |
control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), |
|
18 |
ties = c("efron", "breslow", "exact"), |
|
19 |
conf_level = 0.95) { |
|
20 | 53x |
pval_method <- match.arg(pval_method) |
21 | 52x |
ties <- match.arg(ties) |
22 | 52x |
assert_proportion_value(conf_level) |
23 | ||
24 | 51x |
list(pval_method = pval_method, ties = ties, conf_level = conf_level) |
25 |
}
|
|
26 | ||
27 |
#' Control function for `survfit` models for survival time
|
|
28 |
#'
|
|
29 |
#' @description `r lifecycle::badge("stable")`
|
|
30 |
#'
|
|
31 |
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
|
|
32 |
#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
|
|
33 |
#'
|
|
34 |
#' @inheritParams argument_convention
|
|
35 |
#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
|
|
36 |
#' see more in [survival::survfit()]. Note option "none" is no longer supported.
|
|
37 |
#' @param quantiles (`numeric(2)`)\cr vector of length two specifying the quantiles of survival time.
|
|
38 |
#'
|
|
39 |
#' @return A list of components with the same names as the arguments.
|
|
40 |
#'
|
|
41 |
#' @export
|
|
42 |
control_surv_time <- function(conf_level = 0.95, |
|
43 |
conf_type = c("plain", "log", "log-log"), |
|
44 |
quantiles = c(0.25, 0.75)) { |
|
45 | 229x |
conf_type <- match.arg(conf_type) |
46 | 228x |
checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE) |
47 | 227x |
nullo <- lapply(quantiles, assert_proportion_value) |
48 | 227x |
assert_proportion_value(conf_level) |
49 | 226x |
list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles) |
50 |
}
|
|
51 | ||
52 |
#' Control function for `survfit` models for patients' survival rate at time points
|
|
53 |
#'
|
|
54 |
#' @description `r lifecycle::badge("stable")`
|
|
55 |
#'
|
|
56 |
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
|
|
57 |
#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
|
|
58 |
#'
|
|
59 |
#' @inheritParams argument_convention
|
|
60 |
#' @inheritParams control_surv_time
|
|
61 |
#'
|
|
62 |
#' @return A list of components with the same names as the arguments.
|
|
63 |
#'
|
|
64 |
#' @export
|
|
65 |
control_surv_timepoint <- function(conf_level = 0.95, |
|
66 |
conf_type = c("plain", "log", "log-log")) { |
|
67 | 24x |
conf_type <- match.arg(conf_type) |
68 | 23x |
assert_proportion_value(conf_level) |
69 | 22x |
list( |
70 | 22x |
conf_level = conf_level, |
71 | 22x |
conf_type = conf_type |
72 |
)
|
|
73 |
}
|
1 |
#' Helper functions for Cox proportional hazards regression
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#' @inheritParams h_coxreg_univar_extract
|
|
9 |
#' @inheritParams cox_regression_inter
|
|
10 |
#' @inheritParams control_coxreg
|
|
11 |
#'
|
|
12 |
#' @seealso [cox_regression]
|
|
13 |
#'
|
|
14 |
#' @name h_cox_regression
|
|
15 |
NULL
|
|
16 | ||
17 |
#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
|
|
18 |
#' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
|
|
19 |
#'
|
|
20 |
#' @return
|
|
21 |
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
|
|
22 |
#'
|
|
23 |
#' @examples
|
|
24 |
#' # `h_coxreg_univar_formulas`
|
|
25 |
#'
|
|
26 |
#' ## Simple formulas.
|
|
27 |
#' h_coxreg_univar_formulas(
|
|
28 |
#' variables = list(
|
|
29 |
#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
|
|
30 |
#' )
|
|
31 |
#' )
|
|
32 |
#'
|
|
33 |
#' ## Addition of an optional strata.
|
|
34 |
#' h_coxreg_univar_formulas(
|
|
35 |
#' variables = list(
|
|
36 |
#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
|
|
37 |
#' strata = "SITE"
|
|
38 |
#' )
|
|
39 |
#' )
|
|
40 |
#'
|
|
41 |
#' ## Inclusion of the interaction term.
|
|
42 |
#' h_coxreg_univar_formulas(
|
|
43 |
#' variables = list(
|
|
44 |
#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
|
|
45 |
#' strata = "SITE"
|
|
46 |
#' ),
|
|
47 |
#' interaction = TRUE
|
|
48 |
#' )
|
|
49 |
#'
|
|
50 |
#' ## Only covariates fitted in separate models.
|
|
51 |
#' h_coxreg_univar_formulas(
|
|
52 |
#' variables = list(
|
|
53 |
#' time = "time", event = "status", covariates = c("X", "y")
|
|
54 |
#' )
|
|
55 |
#' )
|
|
56 |
#'
|
|
57 |
#' @export
|
|
58 |
h_coxreg_univar_formulas <- function(variables, |
|
59 |
interaction = FALSE) { |
|
60 | 50x |
checkmate::assert_list(variables, names = "named") |
61 | 50x |
has_arm <- "arm" %in% names(variables) |
62 | 50x |
arm_name <- if (has_arm) "arm" else NULL |
63 | ||
64 | 50x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
65 | ||
66 | 50x |
checkmate::assert_flag(interaction) |
67 | ||
68 | 50x |
if (!has_arm || is.null(variables$covariates)) { |
69 | 10x |
checkmate::assert_false(interaction) |
70 |
}
|
|
71 | ||
72 | 48x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
73 | ||
74 | 48x |
if (!is.null(variables$covariates)) { |
75 | 47x |
forms <- paste0( |
76 | 47x |
"survival::Surv(", variables$time, ", ", variables$event, ") ~ ", |
77 | 47x |
ifelse(has_arm, variables$arm, "1"), |
78 | 47x |
ifelse(interaction, " * ", " + "), |
79 | 47x |
variables$covariates, |
80 | 47x |
ifelse( |
81 | 47x |
!is.null(variables$strata), |
82 | 47x |
paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"), |
83 |
""
|
|
84 |
)
|
|
85 |
)
|
|
86 |
} else { |
|
87 | 1x |
forms <- NULL |
88 |
}
|
|
89 | 48x |
nams <- variables$covariates |
90 | 48x |
if (has_arm) { |
91 | 41x |
ref <- paste0( |
92 | 41x |
"survival::Surv(", variables$time, ", ", variables$event, ") ~ ", |
93 | 41x |
variables$arm, |
94 | 41x |
ifelse( |
95 | 41x |
!is.null(variables$strata), |
96 | 41x |
paste0( |
97 | 41x |
" + strata(", paste0(variables$strata, collapse = ", "), ")" |
98 |
),
|
|
99 |
""
|
|
100 |
)
|
|
101 |
)
|
|
102 | 41x |
forms <- c(ref, forms) |
103 | 41x |
nams <- c("ref", nams) |
104 |
}
|
|
105 | 48x |
stats::setNames(forms, nams) |
106 |
}
|
|
107 | ||
108 |
#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
|
|
109 |
#' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
|
|
110 |
#' regression models. Interactions will not be included in multivariate Cox regression model.
|
|
111 |
#'
|
|
112 |
#' @return
|
|
113 |
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
|
|
114 |
#'
|
|
115 |
#' @examples
|
|
116 |
#' # `h_coxreg_multivar_formula`
|
|
117 |
#'
|
|
118 |
#' h_coxreg_multivar_formula(
|
|
119 |
#' variables = list(
|
|
120 |
#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
|
|
121 |
#' )
|
|
122 |
#' )
|
|
123 |
#'
|
|
124 |
#' # Addition of an optional strata.
|
|
125 |
#' h_coxreg_multivar_formula(
|
|
126 |
#' variables = list(
|
|
127 |
#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
|
|
128 |
#' strata = "SITE"
|
|
129 |
#' )
|
|
130 |
#' )
|
|
131 |
#'
|
|
132 |
#' # Example without treatment arm.
|
|
133 |
#' h_coxreg_multivar_formula(
|
|
134 |
#' variables = list(
|
|
135 |
#' time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
|
|
136 |
#' strata = "SITE"
|
|
137 |
#' )
|
|
138 |
#' )
|
|
139 |
#'
|
|
140 |
#' @export
|
|
141 |
h_coxreg_multivar_formula <- function(variables) { |
|
142 | 89x |
checkmate::assert_list(variables, names = "named") |
143 | 89x |
has_arm <- "arm" %in% names(variables) |
144 | 89x |
arm_name <- if (has_arm) "arm" else NULL |
145 | ||
146 | 89x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
147 | ||
148 | 89x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
149 | ||
150 | 89x |
y <- paste0( |
151 | 89x |
"survival::Surv(", variables$time, ", ", variables$event, ") ~ ", |
152 | 89x |
ifelse(has_arm, variables$arm, "1") |
153 |
)
|
|
154 | 89x |
if (length(variables$covariates) > 0) { |
155 | 26x |
y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ") |
156 |
}
|
|
157 | 89x |
if (!is.null(variables$strata)) { |
158 | 5x |
y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")") |
159 |
}
|
|
160 | 89x |
y
|
161 |
}
|
|
162 | ||
163 |
#' @describeIn h_cox_regression Utility function to help tabulate the result of
|
|
164 |
#' a univariate Cox regression model.
|
|
165 |
#'
|
|
166 |
#' @param effect (`string`)\cr the treatment variable.
|
|
167 |
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
|
|
168 |
#'
|
|
169 |
#' @return
|
|
170 |
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
|
|
171 |
#' `n`, `hr`, `lcl`, `ucl`, and `pval`.
|
|
172 |
#'
|
|
173 |
#' @examples
|
|
174 |
#' library(survival)
|
|
175 |
#'
|
|
176 |
#' dta_simple <- data.frame(
|
|
177 |
#' time = c(5, 5, 10, 10, 5, 5, 10, 10),
|
|
178 |
#' status = c(0, 0, 1, 0, 0, 1, 1, 1),
|
|
179 |
#' armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
|
|
180 |
#' var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
|
|
181 |
#' var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
|
|
182 |
#' )
|
|
183 |
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
|
|
184 |
#' result <- h_coxreg_univar_extract(
|
|
185 |
#' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
|
|
186 |
#' )
|
|
187 |
#' result
|
|
188 |
#'
|
|
189 |
#' @export
|
|
190 |
h_coxreg_univar_extract <- function(effect, |
|
191 |
covar,
|
|
192 |
data,
|
|
193 |
mod,
|
|
194 |
control = control_coxreg()) { |
|
195 | 66x |
checkmate::assert_string(covar) |
196 | 66x |
checkmate::assert_string(effect) |
197 | 66x |
checkmate::assert_class(mod, "coxph") |
198 | 66x |
test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] |
199 | ||
200 | 66x |
mod_aov <- muffled_car_anova(mod, test_statistic) |
201 | 66x |
msum <- summary(mod, conf.int = control$conf_level) |
202 | 66x |
sum_cox <- broom::tidy(msum) |
203 | ||
204 |
# Combine results together.
|
|
205 | 66x |
effect_aov <- mod_aov[effect, , drop = TRUE] |
206 | 66x |
pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]] |
207 | 66x |
sum_main <- sum_cox[grepl(effect, sum_cox$level), ] |
208 | ||
209 | 66x |
term_label <- if (effect == covar) { |
210 | 34x |
paste0( |
211 | 34x |
levels(data[[covar]])[2], |
212 | 34x |
" vs control (",
|
213 | 34x |
levels(data[[covar]])[1], |
214 |
")"
|
|
215 |
)
|
|
216 |
} else { |
|
217 | 32x |
unname(labels_or_names(data[covar])) |
218 |
}
|
|
219 | 66x |
data.frame( |
220 | 66x |
effect = ifelse(covar == effect, "Treatment:", "Covariate:"), |
221 | 66x |
term = covar, |
222 | 66x |
term_label = term_label, |
223 | 66x |
level = levels(data[[effect]])[2], |
224 | 66x |
n = mod[["n"]], |
225 | 66x |
hr = unname(sum_main["exp(coef)"]), |
226 | 66x |
lcl = unname(sum_main[grep("lower", names(sum_main))]), |
227 | 66x |
ucl = unname(sum_main[grep("upper", names(sum_main))]), |
228 | 66x |
pval = pval, |
229 | 66x |
stringsAsFactors = FALSE |
230 |
)
|
|
231 |
}
|
|
232 | ||
233 |
#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
|
|
234 |
#' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
|
|
235 |
#'
|
|
236 |
#' @return
|
|
237 |
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
|
|
238 |
#' `n`, `term`, and `term_label`.
|
|
239 |
#'
|
|
240 |
#' @examples
|
|
241 |
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
|
|
242 |
#' result <- h_coxreg_multivar_extract(
|
|
243 |
#' var = "var1", mod = mod, data = dta_simple
|
|
244 |
#' )
|
|
245 |
#' result
|
|
246 |
#'
|
|
247 |
#' @export
|
|
248 |
h_coxreg_multivar_extract <- function(var, |
|
249 |
data,
|
|
250 |
mod,
|
|
251 |
control = control_coxreg()) { |
|
252 | 132x |
test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] |
253 | 132x |
mod_aov <- muffled_car_anova(mod, test_statistic) |
254 | ||
255 | 132x |
msum <- summary(mod, conf.int = control$conf_level) |
256 | 132x |
sum_anova <- broom::tidy(mod_aov) |
257 | 132x |
sum_cox <- broom::tidy(msum) |
258 | ||
259 | 132x |
ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")] |
260 | 132x |
names(ret_anova)[2] <- "pval" |
261 | 132x |
if (is.factor(data[[var]])) { |
262 | 53x |
ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")] |
263 |
} else { |
|
264 | 79x |
ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")] |
265 |
}
|
|
266 | 132x |
names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl") |
267 | 132x |
varlab <- unname(labels_or_names(data[var])) |
268 | 132x |
ret_cox$term <- varlab |
269 | ||
270 | 132x |
if (is.numeric(data[[var]])) { |
271 | 79x |
ret <- ret_cox |
272 | 79x |
ret$term_label <- ret$term |
273 | 53x |
} else if (length(levels(data[[var]])) <= 2) { |
274 | 34x |
ret_anova$pval <- NA |
275 | 34x |
ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")") |
276 | 34x |
ret_cox$level <- gsub(var, "", ret_cox$level) |
277 | 34x |
ret_cox$term_label <- ret_cox$level |
278 | 34x |
ret <- dplyr::bind_rows(ret_anova, ret_cox) |
279 |
} else { |
|
280 | 19x |
ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")") |
281 | 19x |
ret_cox$level <- gsub(var, "", ret_cox$level) |
282 | 19x |
ret_cox$term_label <- ret_cox$level |
283 | 19x |
ret <- dplyr::bind_rows(ret_anova, ret_cox) |
284 |
}
|
|
285 | ||
286 | 132x |
as.data.frame(ret) |
287 |
}
|
1 |
#' Factor utilities
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' A collection of utility functions for factors.
|
|
6 |
#'
|
|
7 |
#' @param x (`factor`)\cr factor variable or object to convert (for `as_factor_keep_attributes`).
|
|
8 |
#'
|
|
9 |
#' @seealso [cut_quantile_bins()] for splitting numeric vectors into quantile bins.
|
|
10 |
#'
|
|
11 |
#' @name factor_utils
|
|
12 |
NULL
|
|
13 | ||
14 |
#' @describeIn factor_utils Combine specified old factor Levels in a single new level.
|
|
15 |
#'
|
|
16 |
#' @param levels (`character`)\cr level names to be combined.
|
|
17 |
#' @param new_level (`string`)\cr name of new level.
|
|
18 |
#'
|
|
19 |
#' @return
|
|
20 |
#' * `combine_levels`: A `factor` with the new levels.
|
|
21 |
#'
|
|
22 |
#' @examples
|
|
23 |
#' x <- factor(letters[1:5], levels = letters[5:1])
|
|
24 |
#' combine_levels(x, levels = c("a", "b"))
|
|
25 |
#'
|
|
26 |
#' combine_levels(x, c("e", "b"))
|
|
27 |
#'
|
|
28 |
#' @export
|
|
29 |
combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) { |
|
30 | 4x |
checkmate::assert_factor(x) |
31 | 4x |
checkmate::assert_subset(levels, levels(x)) |
32 | ||
33 | 4x |
lvls <- levels(x) |
34 | ||
35 | 4x |
lvls[lvls %in% levels] <- new_level |
36 | ||
37 | 4x |
levels(x) <- lvls |
38 | ||
39 | 4x |
x
|
40 |
}
|
|
41 | ||
42 |
#' Conversion of a vector to a factor
|
|
43 |
#'
|
|
44 |
#' @describeIn factor_utils Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
|
|
45 |
#' can decide whether they prefer converting to factor manually (e.g. for full control of
|
|
46 |
#' factor levels).
|
|
47 |
#'
|
|
48 |
#' @param x_name (`string`)\cr name of `x`.
|
|
49 |
#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
|
|
50 |
#' @param verbose (`flag`)\cr defaults to `TRUE`. It prints out warnings and messages.
|
|
51 |
#'
|
|
52 |
#' @return
|
|
53 |
#' * `as_factor_keep_attributes`: A `factor` with same attributes (except class) as `x`.
|
|
54 |
#' Does not modify `x` if already a `factor`.
|
|
55 |
#'
|
|
56 |
#' @examples
|
|
57 |
#' a_chr_with_labels <- c("a", "b", NA)
|
|
58 |
#' attr(a_chr_with_labels, "label") <- "A character vector with labels"
|
|
59 |
#' as_factor_keep_attributes(a_chr_with_labels)
|
|
60 |
#'
|
|
61 |
#' @export
|
|
62 |
as_factor_keep_attributes <- function(x, |
|
63 |
x_name = deparse(substitute(x)), |
|
64 |
na_level = "<Missing>", |
|
65 |
verbose = TRUE) { |
|
66 | 221x |
checkmate::assert_atomic(x) |
67 | 221x |
checkmate::assert_string(x_name) |
68 | 221x |
checkmate::assert_string(na_level) |
69 | 221x |
checkmate::assert_flag(verbose) |
70 | 221x |
if (is.factor(x)) { |
71 | 200x |
return(x) |
72 |
}
|
|
73 | 21x |
x_class <- class(x)[1] |
74 | 21x |
if (verbose) { |
75 | 15x |
warning(paste( |
76 | 15x |
"automatically converting", x_class, "variable", x_name, |
77 | 15x |
"to factor, better manually convert to factor to avoid failures"
|
78 |
)) |
|
79 |
}
|
|
80 | 21x |
if (identical(length(x), 0L)) { |
81 | 1x |
warning(paste( |
82 | 1x |
x_name, "has length 0, this can lead to tabulation failures, better convert to factor" |
83 |
)) |
|
84 |
}
|
|
85 | 21x |
if (is.character(x)) { |
86 | 21x |
x_no_na <- explicit_na(sas_na(x), label = na_level) |
87 | 21x |
if (any(na_level %in% x_no_na)) { |
88 | 3x |
do.call( |
89 | 3x |
structure,
|
90 | 3x |
c( |
91 | 3x |
list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)), |
92 | 3x |
attributes(x) |
93 |
)
|
|
94 |
)
|
|
95 |
} else { |
|
96 | 18x |
do.call(structure, c(list(.Data = as.factor(x)), attributes(x))) |
97 |
}
|
|
98 |
} else { |
|
99 | ! |
do.call(structure, c(list(.Data = as.factor(x)), attributes(x))) |
100 |
}
|
|
101 |
}
|
|
102 | ||
103 |
#' Labels for bins in percent
|
|
104 |
#'
|
|
105 |
#' This creates labels for quantile based bins in percent. This assumes the right-closed
|
|
106 |
#' intervals as produced by [cut_quantile_bins()].
|
|
107 |
#'
|
|
108 |
#' @param probs (`numeric`)\cr the probabilities identifying the quantiles.
|
|
109 |
#' This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
|
|
110 |
#' the boundaries 0 and 1 must not be included.
|
|
111 |
#' @param digits (`integer(1)`)\cr number of decimal places to round the percent numbers.
|
|
112 |
#'
|
|
113 |
#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
|
|
114 |
#'
|
|
115 |
#' @keywords internal
|
|
116 |
bins_percent_labels <- function(probs, |
|
117 |
digits = 0) { |
|
118 | 3x |
if (isFALSE(0 %in% probs)) probs <- c(0, probs) |
119 | 3x |
if (isFALSE(1 %in% probs)) probs <- c(probs, 1) |
120 | 10x |
checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE) |
121 | 10x |
percent <- round(probs * 100, digits = digits) |
122 | 10x |
left <- paste0(utils::head(percent, -1), "%") |
123 | 10x |
right <- paste0(utils::tail(percent, -1), "%") |
124 | 10x |
without_left_bracket <- paste0(left, ",", right, "]") |
125 | 10x |
with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1)) |
126 | 10x |
if (length(without_left_bracket) > 1) { |
127 | 7x |
with_left_bracket <- c( |
128 | 7x |
with_left_bracket,
|
129 | 7x |
paste0("(", utils::tail(without_left_bracket, -1)) |
130 |
)
|
|
131 |
}
|
|
132 | 10x |
with_left_bracket
|
133 |
}
|
|
134 | ||
135 |
#' Cut numeric vector into empirical quantile bins
|
|
136 |
#'
|
|
137 |
#' @description `r lifecycle::badge("stable")`
|
|
138 |
#'
|
|
139 |
#' This cuts a numeric vector into sample quantile bins.
|
|
140 |
#'
|
|
141 |
#' @inheritParams bins_percent_labels
|
|
142 |
#' @param x (`numeric`)\cr the continuous variable values which should be cut into
|
|
143 |
#' quantile bins. This may contain `NA` values, which are then
|
|
144 |
#' not used for the quantile calculations, but included in the return vector.
|
|
145 |
#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
|
|
146 |
#' probabilities in `probs`, then this must be `n + 1` long.
|
|
147 |
#' @param type (`integer(1)`)\cr type of quantiles to use, see [stats::quantile()] for details.
|
|
148 |
#' @param ordered (`flag`)\cr should the result be an ordered factor.
|
|
149 |
#'
|
|
150 |
#' @return
|
|
151 |
#' * `cut_quantile_bins`: A `factor` variable with appropriately-labeled bins as levels.
|
|
152 |
#'
|
|
153 |
#' @note Intervals are closed on the right side. That is, the first bin is the interval
|
|
154 |
#' `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
|
|
155 |
#' and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
|
|
156 |
#'
|
|
157 |
#' @examples
|
|
158 |
#' # Default is to cut into quartile bins.
|
|
159 |
#' cut_quantile_bins(cars$speed)
|
|
160 |
#'
|
|
161 |
#' # Use custom quantiles.
|
|
162 |
#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
|
|
163 |
#'
|
|
164 |
#' # Use custom labels.
|
|
165 |
#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
|
|
166 |
#'
|
|
167 |
#' # NAs are preserved in result factor.
|
|
168 |
#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
|
|
169 |
#' which(is.na(ozone_binned))
|
|
170 |
#' # So you might want to make these explicit.
|
|
171 |
#' explicit_na(ozone_binned)
|
|
172 |
#'
|
|
173 |
#' @export
|
|
174 |
cut_quantile_bins <- function(x, |
|
175 |
probs = c(0.25, 0.5, 0.75), |
|
176 |
labels = NULL, |
|
177 |
type = 7, |
|
178 |
ordered = TRUE) { |
|
179 | 8x |
checkmate::assert_flag(ordered) |
180 | 8x |
checkmate::assert_numeric(x) |
181 | 7x |
if (isFALSE(0 %in% probs)) probs <- c(0, probs) |
182 | 7x |
if (isFALSE(1 %in% probs)) probs <- c(probs, 1) |
183 | 8x |
checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE) |
184 | 7x |
if (is.null(labels)) labels <- bins_percent_labels(probs) |
185 | 8x |
checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE) |
186 | ||
187 | 8x |
if (all(is.na(x))) { |
188 |
# Early return if there are only NAs in input.
|
|
189 | 1x |
return(factor(x, ordered = ordered, levels = labels)) |
190 |
}
|
|
191 | ||
192 | 7x |
quantiles <- stats::quantile( |
193 | 7x |
x,
|
194 | 7x |
probs = probs, |
195 | 7x |
type = type, |
196 | 7x |
na.rm = TRUE |
197 |
)
|
|
198 | ||
199 | 7x |
checkmate::assert_numeric(quantiles, unique = TRUE) |
200 | ||
201 | 6x |
cut( |
202 | 6x |
x,
|
203 | 6x |
breaks = quantiles, |
204 | 6x |
labels = labels, |
205 | 6x |
ordered_result = ordered, |
206 | 6x |
include.lowest = TRUE, |
207 | 6x |
right = TRUE |
208 |
)
|
|
209 |
}
|
|
210 | ||
211 |
#' @describeIn factor_utils This discards the observations as well as the levels specified from a factor.
|
|
212 |
#'
|
|
213 |
#' @param discard (`character`)\cr levels to discard.
|
|
214 |
#'
|
|
215 |
#' @return
|
|
216 |
#' * `fct_discard`: A modified `factor` with observations as well as levels from `discard` dropped.
|
|
217 |
#'
|
|
218 |
#' @examples
|
|
219 |
#' fct_discard(factor(c("a", "b", "c")), "c")
|
|
220 |
#'
|
|
221 |
#' @export
|
|
222 |
fct_discard <- function(x, discard) { |
|
223 | 321x |
checkmate::assert_factor(x) |
224 | 321x |
checkmate::assert_character(discard, any.missing = FALSE) |
225 | 321x |
new_obs <- x[!(x %in% discard)] |
226 | 321x |
new_levels <- setdiff(levels(x), discard) |
227 | 321x |
factor(new_obs, levels = new_levels) |
228 |
}
|
|
229 | ||
230 |
#' @describeIn factor_utils This inserts explicit missing values in a factor based on a condition. Additionally,
|
|
231 |
#' existing `NA` values will be explicitly converted to given `na_level`.
|
|
232 |
#'
|
|
233 |
#' @param condition (`logical`)\cr positions at which to insert missing values.
|
|
234 |
#' @param na_level (`string`)\cr which level to use for missing values.
|
|
235 |
#'
|
|
236 |
#' @return
|
|
237 |
#' * `fct_explicit_na_if`: A modified `factor` with inserted and existing `NA` converted to `na_level`.
|
|
238 |
#'
|
|
239 |
#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
|
|
240 |
#'
|
|
241 |
#' @examples
|
|
242 |
#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
|
|
243 |
#'
|
|
244 |
#' @export
|
|
245 |
fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") { |
|
246 | 1x |
checkmate::assert_factor(x, len = length(condition)) |
247 | 1x |
checkmate::assert_logical(condition) |
248 | 1x |
x[condition] <- NA |
249 | 1x |
x <- forcats::fct_na_value_to_level(x, level = na_level) |
250 | 1x |
forcats::fct_drop(x, only = na_level) |
251 |
}
|
|
252 | ||
253 |
#' @describeIn factor_utils This collapses levels and only keeps those new group levels, in the order provided.
|
|
254 |
#' The returned factor has levels in the order given, with the possible missing level last (this will
|
|
255 |
#' only be included if there are missing values).
|
|
256 |
#'
|
|
257 |
#' @param .f (`factor` or `character`)\cr original vector.
|
|
258 |
#' @param ... (named `character`)\cr levels in each vector provided will be collapsed into
|
|
259 |
#' the new level given by the respective name.
|
|
260 |
#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
|
|
261 |
#' new factor. Note that this level must not be contained in the new levels specified in `...`.
|
|
262 |
#'
|
|
263 |
#' @return
|
|
264 |
#' * `fct_collapse_only`: A modified `factor` with collapsed levels. Values and levels which are not included
|
|
265 |
#' in the given `character` vector input will be set to the missing level `.na_level`.
|
|
266 |
#'
|
|
267 |
#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
|
|
268 |
#' [explicit_na()] can be called separately on the result.
|
|
269 |
#'
|
|
270 |
#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
|
|
271 |
#'
|
|
272 |
#' @examples
|
|
273 |
#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
|
|
274 |
#'
|
|
275 |
#' @export
|
|
276 |
fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") { |
|
277 | 4x |
new_lvls <- names(list(...)) |
278 | 4x |
if (checkmate::test_subset(.na_level, new_lvls)) { |
279 | 1x |
stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels")) |
280 |
}
|
|
281 | 3x |
x <- forcats::fct_collapse(.f, ..., other_level = .na_level) |
282 | 3x |
do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls))) |
283 |
}
|
1 |
#' Count patient events in columns
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The summarize function [summarize_patients_events_in_cols()] creates a layout element to summarize patient
|
|
6 |
#' event counts in columns.
|
|
7 |
#'
|
|
8 |
#' This function analyzes the elements (events) supplied via the `filters_list` parameter and returns a row
|
|
9 |
#' with counts of number of patients for each event as well as the total numbers of patients and events.
|
|
10 |
#' The `id` variable is used to indicate unique subject identifiers (defaults to `USUBJID`).
|
|
11 |
#'
|
|
12 |
#' If there are multiple occurrences of the same event recorded for a patient, the event is only counted once.
|
|
13 |
#'
|
|
14 |
#' @inheritParams argument_convention
|
|
15 |
#' @param filters_list (named `list` of `character`)\cr list where each element in this list describes one
|
|
16 |
#' type of event describe by filters, in the same format as [s_count_patients_with_event()].
|
|
17 |
#' If it has a label, then this will be used for the column title.
|
|
18 |
#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
|
|
19 |
#' that corresponding table cells will stay blank.
|
|
20 |
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
|
|
21 |
#' be used as label.
|
|
22 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
23 |
#'
|
|
24 |
#' In addition to any statistics added using `filters_list`, statistic options are:
|
|
25 |
#' ``r shQuote(get_stats("summarize_patients_events_in_cols"), type = "sh")``
|
|
26 |
#'
|
|
27 |
#' @name count_patients_events_in_cols
|
|
28 |
#' @order 1
|
|
29 |
NULL
|
|
30 | ||
31 |
#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
|
|
32 |
#' events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
|
|
33 |
#'
|
|
34 |
#' @return
|
|
35 |
#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
|
|
36 |
#' - `unique`: number of unique patients in `df`.
|
|
37 |
#' - `all`: number of rows in `df`.
|
|
38 |
#' - one element with the same name as in `filters_list`: number of rows in `df`,
|
|
39 |
#' i.e. events, fulfilling the filter condition.
|
|
40 |
#'
|
|
41 |
#' @keywords internal
|
|
42 |
s_count_patients_and_multiple_events <- function(df, |
|
43 |
id,
|
|
44 |
filters_list,
|
|
45 |
empty_stats = character(), |
|
46 |
labelstr = "", |
|
47 |
custom_label = NULL) { |
|
48 | 9x |
checkmate::assert_list(filters_list, names = "named") |
49 | 9x |
checkmate::assert_data_frame(df) |
50 | 9x |
checkmate::assert_string(id) |
51 | 9x |
checkmate::assert_disjunct(c("unique", "all"), names(filters_list)) |
52 | 9x |
checkmate::assert_character(empty_stats) |
53 | 9x |
checkmate::assert_string(labelstr) |
54 | 9x |
checkmate::assert_string(custom_label, null.ok = TRUE) |
55 | ||
56 |
# Below we want to count each row in `df` once, therefore introducing this helper index column.
|
|
57 | 9x |
df$.row_index <- as.character(seq_len(nrow(df))) |
58 | 9x |
y <- list() |
59 | 9x |
row_label <- if (labelstr != "") { |
60 | ! |
labelstr
|
61 | 9x |
} else if (!is.null(custom_label)) { |
62 | 2x |
custom_label
|
63 |
} else { |
|
64 | 7x |
"counts"
|
65 |
}
|
|
66 | 9x |
y$unique <- formatters::with_label( |
67 | 9x |
s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L], |
68 | 9x |
row_label
|
69 |
)
|
|
70 | 9x |
y$all <- formatters::with_label( |
71 | 9x |
nrow(df), |
72 | 9x |
row_label
|
73 |
)
|
|
74 | 9x |
events <- Map( |
75 | 9x |
function(filters) { |
76 | 25x |
formatters::with_label( |
77 | 25x |
s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count, |
78 | 25x |
row_label
|
79 |
)
|
|
80 |
},
|
|
81 | 9x |
filters = filters_list |
82 |
)
|
|
83 | 9x |
y_complete <- c(y, events) |
84 | 9x |
y <- if (length(empty_stats) > 0) { |
85 | 3x |
y_reduced <- y_complete |
86 | 3x |
for (stat in intersect(names(y_complete), empty_stats)) { |
87 | 4x |
y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]])) |
88 |
}
|
|
89 | 3x |
y_reduced
|
90 |
} else { |
|
91 | 6x |
y_complete
|
92 |
}
|
|
93 | 9x |
y
|
94 |
}
|
|
95 | ||
96 |
#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
|
|
97 |
#' arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
|
|
98 |
#'
|
|
99 |
#' @param col_split (`flag`)\cr whether the columns should be split.
|
|
100 |
#' Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
|
|
101 |
#'
|
|
102 |
#' @return
|
|
103 |
#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
|
|
104 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
|
|
105 |
#' containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
|
|
106 |
#'
|
|
107 |
#' @examples
|
|
108 |
#' df <- data.frame(
|
|
109 |
#' USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
|
|
110 |
#' ARM = c("A", "A", "B", "B", "B", "B", "A"),
|
|
111 |
#' AESER = rep("Y", 7),
|
|
112 |
#' AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
|
|
113 |
#' AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
|
|
114 |
#' AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
|
|
115 |
#' AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
|
|
116 |
#' )
|
|
117 |
#'
|
|
118 |
#' # `summarize_patients_events_in_cols()`
|
|
119 |
#' basic_table() %>%
|
|
120 |
#' summarize_patients_events_in_cols(
|
|
121 |
#' filters_list = list(
|
|
122 |
#' related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
|
|
123 |
#' fatal = c(AESDTH = "Y"),
|
|
124 |
#' fatal_related = c(AEREL = "Y", AESDTH = "Y")
|
|
125 |
#' ),
|
|
126 |
#' custom_label = "%s Total number of patients and events"
|
|
127 |
#' ) %>%
|
|
128 |
#' build_table(df)
|
|
129 |
#'
|
|
130 |
#' @export
|
|
131 |
#' @order 2
|
|
132 |
summarize_patients_events_in_cols <- function(lyt, |
|
133 |
id = "USUBJID", |
|
134 |
filters_list = list(), |
|
135 |
empty_stats = character(), |
|
136 |
na_str = default_na_str(), |
|
137 |
...,
|
|
138 |
.stats = c( |
|
139 |
"unique",
|
|
140 |
"all",
|
|
141 |
names(filters_list) |
|
142 |
),
|
|
143 |
.labels = c( |
|
144 |
unique = "Patients (All)", |
|
145 |
all = "Events (All)", |
|
146 |
labels_or_names(filters_list) |
|
147 |
),
|
|
148 |
col_split = TRUE) { |
|
149 | 2x |
extra_args <- list(id = id, filters_list = filters_list, empty_stats = empty_stats, ...) |
150 | ||
151 | 2x |
afun_list <- Map( |
152 | 2x |
function(stat) { |
153 | 7x |
make_afun( |
154 | 7x |
s_count_patients_and_multiple_events,
|
155 | 7x |
.stats = stat, |
156 | 7x |
.formats = "xx." |
157 |
)
|
|
158 |
},
|
|
159 | 2x |
stat = .stats |
160 |
)
|
|
161 | 2x |
if (col_split) { |
162 | 2x |
lyt <- split_cols_by_multivar( |
163 | 2x |
lyt = lyt, |
164 | 2x |
vars = rep(id, length(.stats)), |
165 | 2x |
varlabels = .labels[.stats] |
166 |
)
|
|
167 |
}
|
|
168 | 2x |
summarize_row_groups( |
169 | 2x |
lyt = lyt, |
170 | 2x |
cfun = afun_list, |
171 | 2x |
na_str = na_str, |
172 | 2x |
extra_args = extra_args |
173 |
)
|
|
174 |
}
|
1 |
#' Estimate proportions of each level of a variable
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze & summarize function [estimate_multinomial_response()] creates a layout element to estimate the
|
|
6 |
#' proportion and proportion confidence interval for each level of a factor variable. The primary analysis variable,
|
|
7 |
#' `var`, should be a factor variable, the values of which will be used as labels within the output table.
|
|
8 |
#'
|
|
9 |
#' @inheritParams argument_convention
|
|
10 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
11 |
#'
|
|
12 |
#' Options are: ``r shQuote(get_stats("estimate_multinomial_response"), type = "sh")``
|
|
13 |
#'
|
|
14 |
#' @seealso Relevant description function [d_onco_rsp_label()].
|
|
15 |
#'
|
|
16 |
#' @name estimate_multinomial_rsp
|
|
17 |
#' @order 1
|
|
18 |
NULL
|
|
19 | ||
20 |
#' Description of standard oncology response
|
|
21 |
#'
|
|
22 |
#' @description `r lifecycle::badge("stable")`
|
|
23 |
#'
|
|
24 |
#' Describe the oncology response in a standard way.
|
|
25 |
#'
|
|
26 |
#' @param x (`character`)\cr the standard oncology codes to be described.
|
|
27 |
#'
|
|
28 |
#' @return Response labels.
|
|
29 |
#'
|
|
30 |
#' @seealso [estimate_multinomial_rsp()]
|
|
31 |
#'
|
|
32 |
#' @examples
|
|
33 |
#' d_onco_rsp_label(
|
|
34 |
#' c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
|
|
35 |
#' )
|
|
36 |
#'
|
|
37 |
#' # Adding some values not considered in d_onco_rsp_label
|
|
38 |
#'
|
|
39 |
#' d_onco_rsp_label(
|
|
40 |
#' c("CR", "PR", "hello", "hi")
|
|
41 |
#' )
|
|
42 |
#'
|
|
43 |
#' @export
|
|
44 |
d_onco_rsp_label <- function(x) { |
|
45 | 2x |
x <- as.character(x) |
46 | 2x |
desc <- c( |
47 | 2x |
CR = "Complete Response (CR)", |
48 | 2x |
PR = "Partial Response (PR)", |
49 | 2x |
MR = "Minimal/Minor Response (MR)", |
50 | 2x |
MRD = "Minimal Residual Disease (MRD)", |
51 | 2x |
SD = "Stable Disease (SD)", |
52 | 2x |
PD = "Progressive Disease (PD)", |
53 | 2x |
`NON CR/PD` = "Non-CR or Non-PD (NON CR/PD)", |
54 | 2x |
NE = "Not Evaluable (NE)", |
55 | 2x |
`NE/Missing` = "Missing or unevaluable", |
56 | 2x |
Missing = "Missing", |
57 | 2x |
`NA` = "Not Applicable (NA)", |
58 | 2x |
ND = "Not Done (ND)" |
59 |
)
|
|
60 | ||
61 | 2x |
values_label <- vapply( |
62 | 2x |
X = x, |
63 | 2x |
FUN.VALUE = character(1), |
64 | 2x |
function(val) { |
65 | ! |
if (val %in% names(desc)) desc[val] else val |
66 |
}
|
|
67 |
)
|
|
68 | ||
69 | 2x |
factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))) |
70 |
}
|
|
71 | ||
72 |
#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
|
|
73 |
#' of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
|
|
74 |
#'
|
|
75 |
#' @return
|
|
76 |
#' * `s_length_proportion()` returns statistics from [s_proportion()].
|
|
77 |
#'
|
|
78 |
#' @examples
|
|
79 |
#' s_length_proportion(rep("CR", 10), .N_col = 100)
|
|
80 |
#' s_length_proportion(factor(character(0)), .N_col = 100)
|
|
81 |
#'
|
|
82 |
#' @export
|
|
83 |
s_length_proportion <- function(x, |
|
84 |
...,
|
|
85 |
.N_col) { # nolint |
|
86 | 10x |
checkmate::assert_multi_class(x, classes = c("factor", "character")) |
87 | 9x |
checkmate::assert_vector(x, min.len = 0, max.len = .N_col) |
88 | 7x |
checkmate::assert_vector(unique(x), min.len = 0, max.len = 1) |
89 | ||
90 | 7x |
n_true <- length(x) |
91 | 7x |
n_false <- .N_col - n_true |
92 | 7x |
x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false)) |
93 | 7x |
s_proportion(df = x_logical, ...) |
94 |
}
|
|
95 | ||
96 |
#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
|
|
97 |
#' in `estimate_multinomial_response()`.
|
|
98 |
#'
|
|
99 |
#' @return
|
|
100 |
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
101 |
#'
|
|
102 |
#' @examples
|
|
103 |
#' a_length_proportion(rep("CR", 10), .N_col = 100)
|
|
104 |
#' a_length_proportion(factor(character(0)), .N_col = 100)
|
|
105 |
#'
|
|
106 |
#' @export
|
|
107 |
a_length_proportion <- function(x, |
|
108 |
...,
|
|
109 |
.stats = NULL, |
|
110 |
.stat_names = NULL, |
|
111 |
.formats = NULL, |
|
112 |
.labels = NULL, |
|
113 |
.indent_mods = NULL) { |
|
114 |
# Check for additional parameters to the statistics function
|
|
115 | 6x |
dots_extra_args <- list(...) |
116 | 6x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
117 | 6x |
dots_extra_args$.additional_fun_parameters <- NULL |
118 | ||
119 |
# Check for user-defined functions
|
|
120 | 6x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
121 | 6x |
.stats <- default_and_custom_stats_list$all_stats |
122 | 6x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
123 | ||
124 |
# Apply statistics function
|
|
125 | 6x |
x_stats <- .apply_stat_functions( |
126 | 6x |
default_stat_fnc = s_length_proportion, |
127 | 6x |
custom_stat_fnc_list = custom_stat_functions, |
128 | 6x |
args_list = c( |
129 | 6x |
x = list(x), |
130 | 6x |
extra_afun_params,
|
131 | 6x |
dots_extra_args
|
132 |
)
|
|
133 |
)
|
|
134 | ||
135 |
# Fill in formatting defaults
|
|
136 | 6x |
.stats <- get_stats("estimate_multinomial_response", |
137 | 6x |
stats_in = .stats, |
138 | 6x |
custom_stats_in = names(custom_stat_functions) |
139 |
)
|
|
140 | 6x |
x_stats <- x_stats[.stats] |
141 | 6x |
.formats <- get_formats_from_stats(.stats, .formats) |
142 | 6x |
.labels <- get_labels_from_stats( |
143 | 6x |
.stats, .labels, |
144 | 6x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
145 |
)
|
|
146 | 6x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
147 | ||
148 |
# Auto format handling
|
|
149 | 6x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
150 | ||
151 |
# Get and check statistical names
|
|
152 | 6x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
153 | ||
154 | 6x |
in_rows( |
155 | 6x |
.list = x_stats, |
156 | 6x |
.formats = .formats, |
157 | 6x |
.names = .labels %>% .unlist_keep_nulls(), |
158 | 6x |
.stat_names = .stat_names, |
159 | 6x |
.labels = .labels %>% .unlist_keep_nulls(), |
160 | 6x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
161 |
)
|
|
162 |
}
|
|
163 | ||
164 |
#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
|
|
165 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()] and
|
|
166 |
#' [rtables::summarize_row_groups()].
|
|
167 |
#'
|
|
168 |
#' @return
|
|
169 |
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
|
|
170 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
171 |
#' the statistics from `s_length_proportion()` to the table layout.
|
|
172 |
#'
|
|
173 |
#' @examples
|
|
174 |
#' library(dplyr)
|
|
175 |
#'
|
|
176 |
#' # Use of the layout creating function.
|
|
177 |
#' dta_test <- data.frame(
|
|
178 |
#' USUBJID = paste0("S", 1:12),
|
|
179 |
#' ARM = factor(rep(LETTERS[1:3], each = 4)),
|
|
180 |
#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
|
|
181 |
#' ) %>% mutate(
|
|
182 |
#' AVALC = factor(AVAL,
|
|
183 |
#' levels = c(0, 1),
|
|
184 |
#' labels = c("Complete Response (CR)", "Partial Response (PR)")
|
|
185 |
#' )
|
|
186 |
#' )
|
|
187 |
#'
|
|
188 |
#' lyt <- basic_table() %>%
|
|
189 |
#' split_cols_by("ARM") %>%
|
|
190 |
#' estimate_multinomial_response(var = "AVALC")
|
|
191 |
#'
|
|
192 |
#' tbl <- build_table(lyt, dta_test)
|
|
193 |
#'
|
|
194 |
#' tbl
|
|
195 |
#'
|
|
196 |
#' @export
|
|
197 |
#' @order 2
|
|
198 |
estimate_multinomial_response <- function(lyt, |
|
199 |
var,
|
|
200 |
na_str = default_na_str(), |
|
201 |
nested = TRUE, |
|
202 |
...,
|
|
203 |
show_labels = "hidden", |
|
204 |
table_names = var, |
|
205 |
.stats = "prop_ci", |
|
206 |
.stat_names = NULL, |
|
207 |
.formats = list(prop_ci = "(xx.xx, xx.xx)"), |
|
208 |
.labels = NULL, |
|
209 |
.indent_mods = NULL) { |
|
210 |
# Process standard extra arguments
|
|
211 | 1x |
extra_args <- list(".stats" = .stats) |
212 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
213 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
214 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
215 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
216 | ||
217 |
# Process additional arguments to the statistic function
|
|
218 | 1x |
extra_args <- c(extra_args, ...) |
219 | ||
220 |
# Append additional info from layout to the analysis function
|
|
221 | 1x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
222 | 1x |
formals(a_length_proportion) <- c(formals(a_length_proportion), extra_args[[".additional_fun_parameters"]]) |
223 | ||
224 | 1x |
lyt <- split_rows_by(lyt, var = var) |
225 | 1x |
lyt <- summarize_row_groups(lyt, na_str = na_str) |
226 | ||
227 | 1x |
analyze( |
228 | 1x |
lyt = lyt, |
229 | 1x |
vars = var, |
230 | 1x |
afun = a_length_proportion, |
231 | 1x |
na_str = na_str, |
232 | 1x |
nested = nested, |
233 | 1x |
extra_args = extra_args, |
234 | 1x |
show_labels = show_labels, |
235 | 1x |
table_names = table_names |
236 |
)
|
|
237 |
}
|
1 |
#' Count number of patients
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [analyze_num_patients()] creates a layout element to count total numbers of unique or
|
|
6 |
#' non-unique patients. The primary analysis variable `vars` is used to uniquely identify patients.
|
|
7 |
#'
|
|
8 |
#' The `count_by` variable can be used to identify non-unique patients such that the number of patients with a unique
|
|
9 |
#' combination of values in `vars` and `count_by` will be returned instead as the `nonunique` statistic. The `required`
|
|
10 |
#' variable can be used to specify a variable required to be non-missing for the record to be included in the counts.
|
|
11 |
#'
|
|
12 |
#' The summarize function [summarize_num_patients()] performs the same function as [analyze_num_patients()] except it
|
|
13 |
#' creates content rows, not data rows, to summarize the current table row/column context and operates on the level of
|
|
14 |
#' the latest row split or the root of the table if no row splits have occurred.
|
|
15 |
#'
|
|
16 |
#' @inheritParams argument_convention
|
|
17 |
#' @param required (`character` or `NULL`)\cr name of a variable that is required to be non-missing.
|
|
18 |
#' @param count_by (`character` or `NULL`)\cr name of a variable to be combined with `vars` when counting
|
|
19 |
#' `nonunique` records.
|
|
20 |
#' @param unique_count_suffix (`flag`)\cr whether the `"(n)"` suffix should be added to `unique_count` labels.
|
|
21 |
#' Defaults to `TRUE`.
|
|
22 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
23 |
#'
|
|
24 |
#' Options are: ``r shQuote(get_stats("summarize_num_patients"), type = "sh")``
|
|
25 |
#'
|
|
26 |
#' @name summarize_num_patients
|
|
27 |
#' @order 1
|
|
28 |
NULL
|
|
29 | ||
30 |
#' @describeIn summarize_num_patients Statistics function which counts the number of
|
|
31 |
#' unique patients, the corresponding percentage taken with respect to the
|
|
32 |
#' total number of patients, and the number of non-unique patients.
|
|
33 |
#'
|
|
34 |
#' @param x (`character` or `factor`)\cr vector of patient IDs.
|
|
35 |
#'
|
|
36 |
#' @return
|
|
37 |
#' * `s_num_patients()` returns a named `list` of 3 statistics:
|
|
38 |
#' * `unique`: Vector of counts and percentages.
|
|
39 |
#' * `nonunique`: Vector of counts.
|
|
40 |
#' * `unique_count`: Counts.
|
|
41 |
#'
|
|
42 |
#' @examples
|
|
43 |
#' # Use the statistics function to count number of unique and nonunique patients.
|
|
44 |
#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
|
|
45 |
#' s_num_patients(
|
|
46 |
#' x = as.character(c(1, 1, 1, 2, 4, NA)),
|
|
47 |
#' labelstr = "",
|
|
48 |
#' .N_col = 6L,
|
|
49 |
#' count_by = c(1, 1, 2, 1, 1, 1)
|
|
50 |
#' )
|
|
51 |
#'
|
|
52 |
#' @export
|
|
53 |
s_num_patients <- function(x, |
|
54 |
labelstr,
|
|
55 |
.N_col, # nolint |
|
56 |
...,
|
|
57 |
count_by = NULL, |
|
58 |
unique_count_suffix = TRUE) { |
|
59 | 181x |
checkmate::assert_string(labelstr) |
60 | 181x |
checkmate::assert_count(.N_col) |
61 | 181x |
checkmate::assert_multi_class(x, classes = c("factor", "character")) |
62 | 181x |
checkmate::assert_flag(unique_count_suffix) |
63 | ||
64 | 181x |
count1 <- n_available(unique(x)) |
65 | 181x |
count2 <- n_available(x) |
66 | ||
67 | 181x |
if (!is.null(count_by)) { |
68 | 16x |
checkmate::assert_vector(count_by, len = length(x)) |
69 | 16x |
count2 <- n_available(unique(interaction(x, count_by))) |
70 |
}
|
|
71 | ||
72 | 181x |
out <- list( |
73 | 181x |
unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr), |
74 | 181x |
nonunique = formatters::with_label(count2, labelstr), |
75 | 181x |
unique_count = formatters::with_label( |
76 | 181x |
count1, ifelse(unique_count_suffix, paste0(labelstr, if (nzchar(labelstr)) " ", "(n)"), labelstr) |
77 |
)
|
|
78 |
)
|
|
79 | ||
80 | 181x |
out
|
81 |
}
|
|
82 | ||
83 |
#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
|
|
84 |
#' in a column (variable), the corresponding percentage taken with respect to the total number of
|
|
85 |
#' patients, and the number of non-unique patients in the column.
|
|
86 |
#'
|
|
87 |
#' @return
|
|
88 |
#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
|
|
89 |
#'
|
|
90 |
#' @examples
|
|
91 |
#' # Count number of unique and non-unique patients.
|
|
92 |
#'
|
|
93 |
#' df <- data.frame(
|
|
94 |
#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
|
|
95 |
#' EVENT = as.character(c(10, 15, 10, 17, 8))
|
|
96 |
#' )
|
|
97 |
#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
|
|
98 |
#'
|
|
99 |
#' df_by_event <- data.frame(
|
|
100 |
#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
|
|
101 |
#' EVENT = c(10, 15, 10, 17, 8)
|
|
102 |
#' )
|
|
103 |
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
|
|
104 |
#'
|
|
105 |
#' @export
|
|
106 |
s_num_patients_content <- function(df, |
|
107 |
labelstr = "", |
|
108 |
.N_col, # nolint |
|
109 |
.var,
|
|
110 |
...,
|
|
111 |
required = NULL, |
|
112 |
count_by = NULL, |
|
113 |
unique_count_suffix = TRUE) { |
|
114 | 175x |
checkmate::assert_string(.var) |
115 | 175x |
checkmate::assert_data_frame(df) |
116 | 175x |
if (is.null(count_by)) { |
117 | 162x |
assert_df_with_variables(df, list(id = .var)) |
118 |
} else { |
|
119 | 13x |
assert_df_with_variables(df, list(id = .var, count_by = count_by)) |
120 |
}
|
|
121 | 175x |
if (!is.null(required)) { |
122 | ! |
checkmate::assert_string(required) |
123 | ! |
assert_df_with_variables(df, list(required = required)) |
124 | ! |
df <- df[!is.na(df[[required]]), , drop = FALSE] |
125 |
}
|
|
126 | ||
127 | 175x |
x <- df[[.var]] |
128 | 175x |
y <- if (is.null(count_by)) NULL else df[[count_by]] |
129 | ||
130 | 175x |
s_num_patients( |
131 | 175x |
x = x, |
132 | 175x |
labelstr = labelstr, |
133 | 175x |
.N_col = .N_col, |
134 | 175x |
count_by = y, |
135 | 175x |
unique_count_suffix = unique_count_suffix |
136 |
)
|
|
137 |
}
|
|
138 | ||
139 |
#' @describeIn summarize_num_patients Formatted analysis function which is used as `afun`
|
|
140 |
#' in `analyze_num_patients()` and as `cfun` in `summarize_num_patients()`.
|
|
141 |
#'
|
|
142 |
#' @return
|
|
143 |
#' * `a_num_patients()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
144 |
#'
|
|
145 |
#' @keywords internal
|
|
146 |
a_num_patients <- function(df, |
|
147 |
labelstr = "", |
|
148 |
...,
|
|
149 |
.stats = NULL, |
|
150 |
.stat_names = NULL, |
|
151 |
.formats = NULL, |
|
152 |
.labels = NULL, |
|
153 |
.indent_mods = NULL) { |
|
154 |
# Check for additional parameters to the statistics function
|
|
155 | 86x |
dots_extra_args <- list(...) |
156 | 86x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
157 | 86x |
dots_extra_args$.additional_fun_parameters <- NULL |
158 | ||
159 |
# Check for user-defined functions
|
|
160 | 86x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
161 | 86x |
.stats <- default_and_custom_stats_list$all_stats |
162 | 86x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
163 | ||
164 |
# Apply statistics function
|
|
165 | 86x |
x_stats <- .apply_stat_functions( |
166 | 86x |
default_stat_fnc = s_num_patients_content, |
167 | 86x |
custom_stat_fnc_list = custom_stat_functions, |
168 | 86x |
args_list = c( |
169 | 86x |
df = list(df), |
170 | 86x |
labelstr = list(labelstr), |
171 | 86x |
extra_afun_params,
|
172 | 86x |
dots_extra_args
|
173 |
)
|
|
174 |
)
|
|
175 | ||
176 |
# Fill in formatting defaults
|
|
177 | 86x |
.stats <- get_stats("summarize_num_patients", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
178 | 86x |
.formats <- get_formats_from_stats(.stats, .formats) |
179 | 86x |
.labels <- get_labels_from_stats( |
180 | 86x |
.stats, .labels, |
181 | 86x |
tern_defaults = c(lapply(x_stats, attr, "label")[nchar(lapply(x_stats, attr, "label")) > 0], tern_default_labels) |
182 |
)
|
|
183 | 86x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
184 | ||
185 | 86x |
x_stats <- x_stats[.stats] |
186 | ||
187 |
# Auto format handling
|
|
188 | 86x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
189 | ||
190 |
# Get and check statistical names
|
|
191 | 86x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
192 | ||
193 | 86x |
in_rows( |
194 | 86x |
.list = x_stats, |
195 | 86x |
.formats = .formats, |
196 | 86x |
.names = .labels %>% .unlist_keep_nulls(), |
197 | 86x |
.stat_names = .stat_names, |
198 | 86x |
.labels = .labels %>% .unlist_keep_nulls(), |
199 | 86x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
200 |
)
|
|
201 |
}
|
|
202 | ||
203 |
#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
|
|
204 |
#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
|
|
205 |
#'
|
|
206 |
#' @return
|
|
207 |
#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
|
|
208 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
209 |
#' the statistics from `s_num_patients_content()` to the table layout.
|
|
210 |
#'
|
|
211 |
#' @examples
|
|
212 |
#' # summarize_num_patients
|
|
213 |
#' tbl <- basic_table() %>%
|
|
214 |
#' split_cols_by("ARM") %>%
|
|
215 |
#' split_rows_by("SEX") %>%
|
|
216 |
#' summarize_num_patients("USUBJID", .stats = "unique_count") %>%
|
|
217 |
#' build_table(df)
|
|
218 |
#'
|
|
219 |
#' tbl
|
|
220 |
#'
|
|
221 |
#' @export
|
|
222 |
#' @order 3
|
|
223 |
summarize_num_patients <- function(lyt, |
|
224 |
var,
|
|
225 |
required = NULL, |
|
226 |
count_by = NULL, |
|
227 |
unique_count_suffix = TRUE, |
|
228 |
na_str = default_na_str(), |
|
229 |
riskdiff = FALSE, |
|
230 |
...,
|
|
231 |
.stats = c("unique", "nonunique", "unique_count"), |
|
232 |
.stat_names = NULL, |
|
233 |
.formats = NULL, |
|
234 |
.labels = list( |
|
235 |
unique = "Number of patients with at least one event", |
|
236 |
nonunique = "Number of events" |
|
237 |
),
|
|
238 |
.indent_mods = 0L) { |
|
239 | 17x |
checkmate::assert_flag(riskdiff) |
240 | 17x |
afun <- if (isFALSE(riskdiff)) a_num_patients else afun_riskdiff |
241 | ||
242 |
# Process standard extra arguments
|
|
243 | 17x |
extra_args <- list(".stats" = .stats) |
244 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
245 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
246 | 17x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
247 | 17x |
if (is.null(.indent_mods)) { |
248 | ! |
indent_mod <- 0L |
249 | 17x |
} else if (length(.indent_mods) == 1) { |
250 | 17x |
indent_mod <- .indent_mods |
251 |
} else { |
|
252 | ! |
indent_mod <- 0L |
253 | ! |
extra_args[[".indent_mods"]] <- .indent_mods |
254 |
}
|
|
255 | ||
256 |
# Process additional arguments to the statistic function
|
|
257 | 17x |
extra_args <- c( |
258 | 17x |
extra_args,
|
259 | 17x |
required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, |
260 | 17x |
if (!isFALSE(riskdiff)) list(afun = list("s_num_patients_content" = a_num_patients)), |
261 |
...
|
|
262 |
)
|
|
263 | ||
264 |
# Append additional info from layout to the analysis function
|
|
265 | 17x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
266 | 17x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
267 | ||
268 | 17x |
summarize_row_groups( |
269 | 17x |
lyt = lyt, |
270 | 17x |
var = var, |
271 | 17x |
cfun = afun, |
272 | 17x |
na_str = na_str, |
273 | 17x |
extra_args = extra_args, |
274 | 17x |
indent_mod = indent_mod |
275 |
)
|
|
276 |
}
|
|
277 | ||
278 |
#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
|
|
279 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
280 |
#'
|
|
281 |
#' @return
|
|
282 |
#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
|
|
283 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
284 |
#' the statistics from `s_num_patients_content()` to the table layout.
|
|
285 |
#'
|
|
286 |
#' @details In general, functions that starts with `analyze*` are expected to
|
|
287 |
#' work like [rtables::analyze()], while functions that starts with `summarize*`
|
|
288 |
#' are based upon [rtables::summarize_row_groups()]. The latter provides a
|
|
289 |
#' value for each dividing split in the row and column space, but, being it
|
|
290 |
#' bound to the fundamental splits, it is repeated by design in every page
|
|
291 |
#' when pagination is involved.
|
|
292 |
#'
|
|
293 |
#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
|
|
294 |
#'
|
|
295 |
#' @examples
|
|
296 |
#' df <- data.frame(
|
|
297 |
#' USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
|
|
298 |
#' ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
|
|
299 |
#' AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17),
|
|
300 |
#' SEX = c("M", "M", "M", "F", "F", "F", "M", "F", "M")
|
|
301 |
#' )
|
|
302 |
#'
|
|
303 |
#' # analyze_num_patients
|
|
304 |
#' tbl <- basic_table() %>%
|
|
305 |
#' split_cols_by("ARM") %>%
|
|
306 |
#' add_colcounts() %>%
|
|
307 |
#' analyze_num_patients("USUBJID", .stats = c("unique")) %>%
|
|
308 |
#' build_table(df)
|
|
309 |
#'
|
|
310 |
#' tbl
|
|
311 |
#'
|
|
312 |
#' @export
|
|
313 |
#' @order 2
|
|
314 |
analyze_num_patients <- function(lyt, |
|
315 |
vars,
|
|
316 |
required = NULL, |
|
317 |
count_by = NULL, |
|
318 |
unique_count_suffix = TRUE, |
|
319 |
na_str = default_na_str(), |
|
320 |
nested = TRUE, |
|
321 |
show_labels = c("default", "visible", "hidden"), |
|
322 |
riskdiff = FALSE, |
|
323 |
...,
|
|
324 |
.stats = c("unique", "nonunique", "unique_count"), |
|
325 |
.stat_names = NULL, |
|
326 |
.formats = NULL, |
|
327 |
.labels = list( |
|
328 |
unique = "Number of patients with at least one event", |
|
329 |
nonunique = "Number of events" |
|
330 |
),
|
|
331 |
.indent_mods = NULL) { |
|
332 | 4x |
checkmate::assert_flag(riskdiff) |
333 | 4x |
afun <- if (isFALSE(riskdiff)) a_num_patients else afun_riskdiff |
334 | ||
335 |
# Process standard extra arguments
|
|
336 | 4x |
extra_args <- list(".stats" = .stats) |
337 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
338 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
339 | 4x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
340 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
341 | ||
342 |
# Process additional arguments to the statistic function
|
|
343 | 4x |
extra_args <- c( |
344 | 4x |
extra_args,
|
345 | 4x |
required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, |
346 | 4x |
if (!isFALSE(riskdiff)) list(afun = list("s_num_patients_content" = a_num_patients)), |
347 |
...
|
|
348 |
)
|
|
349 | ||
350 |
# Append additional info from layout to the analysis function
|
|
351 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
352 | 4x |
formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]]) |
353 | ||
354 | 4x |
analyze( |
355 | 4x |
lyt = lyt, |
356 | 4x |
vars = vars, |
357 | 4x |
afun = afun, |
358 | 4x |
na_str = na_str, |
359 | 4x |
nested = nested, |
360 | 4x |
extra_args = extra_args, |
361 | 4x |
show_labels = show_labels |
362 |
)
|
|
363 |
}
|
1 |
#' Confidence interval for mean
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
|
|
6 |
#' geometric mean. It can be used as a `ggplot` helper function for plotting.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
|
|
10 |
#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s.
|
|
11 |
#' @param geom_mean (`flag`)\cr whether the geometric mean should be calculated.
|
|
12 |
#'
|
|
13 |
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
|
|
14 |
#'
|
|
15 |
#' @examples
|
|
16 |
#' stat_mean_ci(sample(10), gg_helper = FALSE)
|
|
17 |
#'
|
|
18 |
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
|
|
19 |
#' ggplot2::geom_point()
|
|
20 |
#'
|
|
21 |
#' p + ggplot2::stat_summary(
|
|
22 |
#' fun.data = stat_mean_ci,
|
|
23 |
#' geom = "errorbar"
|
|
24 |
#' )
|
|
25 |
#'
|
|
26 |
#' p + ggplot2::stat_summary(
|
|
27 |
#' fun.data = stat_mean_ci,
|
|
28 |
#' fun.args = list(conf_level = 0.5),
|
|
29 |
#' geom = "errorbar"
|
|
30 |
#' )
|
|
31 |
#'
|
|
32 |
#' p + ggplot2::stat_summary(
|
|
33 |
#' fun.data = stat_mean_ci,
|
|
34 |
#' fun.args = list(conf_level = 0.5, geom_mean = TRUE),
|
|
35 |
#' geom = "errorbar"
|
|
36 |
#' )
|
|
37 |
#'
|
|
38 |
#' @export
|
|
39 |
stat_mean_ci <- function(x, |
|
40 |
conf_level = 0.95, |
|
41 |
na.rm = TRUE, # nolint |
|
42 |
n_min = 2, |
|
43 |
gg_helper = TRUE, |
|
44 |
geom_mean = FALSE) { |
|
45 | 2301x |
if (na.rm) { |
46 | 10x |
x <- stats::na.omit(x) |
47 |
}
|
|
48 | 2301x |
n <- length(x) |
49 | ||
50 | 2301x |
if (!geom_mean) { |
51 | 1158x |
m <- mean(x) |
52 |
} else { |
|
53 | 1143x |
negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0) |
54 | 1143x |
if (negative_values_exist) { |
55 | 26x |
m <- NA_real_ |
56 |
} else { |
|
57 | 1117x |
x <- log(x) |
58 | 1117x |
m <- mean(x) |
59 |
}
|
|
60 |
}
|
|
61 | ||
62 | 2301x |
if (n < n_min || is.na(m)) { |
63 | 306x |
ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_) |
64 |
} else { |
|
65 | 1995x |
hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n) |
66 | 1995x |
ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci) |
67 | 1995x |
if (geom_mean) { |
68 | 986x |
ci <- exp(ci) |
69 |
}
|
|
70 |
}
|
|
71 | ||
72 | 2301x |
if (gg_helper) { |
73 | 4x |
m <- ifelse(is.na(m), NA_real_, m) |
74 | 4x |
ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]]) |
75 |
}
|
|
76 | ||
77 | 2301x |
return(ci) |
78 |
}
|
|
79 | ||
80 |
#' Confidence interval for median
|
|
81 |
#'
|
|
82 |
#' @description `r lifecycle::badge("stable")`
|
|
83 |
#'
|
|
84 |
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
|
|
85 |
#' function for plotting.
|
|
86 |
#'
|
|
87 |
#' @inheritParams argument_convention
|
|
88 |
#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s.
|
|
89 |
#'
|
|
90 |
#' @details This function was adapted from `DescTools/versions/0.99.35/source`
|
|
91 |
#'
|
|
92 |
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
|
|
93 |
#'
|
|
94 |
#' @examples
|
|
95 |
#' stat_median_ci(sample(10), gg_helper = FALSE)
|
|
96 |
#'
|
|
97 |
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
|
|
98 |
#' ggplot2::geom_point()
|
|
99 |
#' p + ggplot2::stat_summary(
|
|
100 |
#' fun.data = stat_median_ci,
|
|
101 |
#' geom = "errorbar"
|
|
102 |
#' )
|
|
103 |
#'
|
|
104 |
#' @export
|
|
105 |
stat_median_ci <- function(x, |
|
106 |
conf_level = 0.95, |
|
107 |
na.rm = TRUE, # nolint |
|
108 |
gg_helper = TRUE) { |
|
109 | 1156x |
x <- unname(x) |
110 | 1156x |
if (na.rm) { |
111 | 9x |
x <- x[!is.na(x)] |
112 |
}
|
|
113 | 1156x |
n <- length(x) |
114 | 1156x |
med <- stats::median(x) |
115 | ||
116 | 1156x |
k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE) |
117 | ||
118 |
# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
|
|
119 | 1156x |
if (k == 0 || is.na(med)) { |
120 | 248x |
ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_) |
121 | 248x |
empir_conf_level <- NA_real_ |
122 |
} else { |
|
123 | 908x |
x_sort <- sort(x) |
124 | 908x |
ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1]) |
125 | 908x |
empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5) |
126 |
}
|
|
127 | ||
128 | 1156x |
if (gg_helper) { |
129 | 4x |
ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]]) |
130 |
}
|
|
131 | ||
132 | 1156x |
attr(ci, "conf_level") <- empir_conf_level |
133 | ||
134 | 1156x |
return(ci) |
135 |
}
|
|
136 | ||
137 |
#' p-Value of the mean
|
|
138 |
#'
|
|
139 |
#' @description `r lifecycle::badge("stable")`
|
|
140 |
#'
|
|
141 |
#' Convenient function for calculating the two-sided p-value of the mean.
|
|
142 |
#'
|
|
143 |
#' @inheritParams argument_convention
|
|
144 |
#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
|
|
145 |
#' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis.
|
|
146 |
#'
|
|
147 |
#' @return A p-value.
|
|
148 |
#'
|
|
149 |
#' @examples
|
|
150 |
#' stat_mean_pval(sample(10))
|
|
151 |
#'
|
|
152 |
#' stat_mean_pval(rnorm(10), test_mean = 0.5)
|
|
153 |
#'
|
|
154 |
#' @export
|
|
155 |
stat_mean_pval <- function(x, |
|
156 |
na.rm = TRUE, # nolint |
|
157 |
n_min = 2, |
|
158 |
test_mean = 0) { |
|
159 | 1156x |
if (na.rm) { |
160 | 9x |
x <- stats::na.omit(x) |
161 |
}
|
|
162 | 1156x |
n <- length(x) |
163 | ||
164 | 1156x |
x_mean <- mean(x) |
165 | 1156x |
x_sd <- stats::sd(x) |
166 | ||
167 | 1156x |
if (n < n_min) { |
168 | 140x |
pv <- c(p_value = NA_real_) |
169 |
} else { |
|
170 | 1016x |
x_se <- stats::sd(x) / sqrt(n) |
171 | 1016x |
ttest <- (x_mean - test_mean) / x_se |
172 | 1016x |
pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1)) |
173 |
}
|
|
174 | ||
175 | 1156x |
return(pv) |
176 |
}
|
|
177 | ||
178 |
#' Proportion difference and confidence interval
|
|
179 |
#'
|
|
180 |
#' @description `r lifecycle::badge("stable")`
|
|
181 |
#'
|
|
182 |
#' Function for calculating the proportion (or risk) difference and confidence interval between arm
|
|
183 |
#' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence
|
|
184 |
#' in arm Y from cumulative incidence in arm X.
|
|
185 |
#'
|
|
186 |
#' @inheritParams argument_convention
|
|
187 |
#' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group).
|
|
188 |
#' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`.
|
|
189 |
#' @param N_x (`numeric(1)`)\cr total number of records in arm X.
|
|
190 |
#' @param N_y (`numeric(1)`)\cr total number of records in arm Y.
|
|
191 |
#' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in
|
|
192 |
#' `x` and `y`. Must be of equal length to `x` and `y`.
|
|
193 |
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
|
|
194 |
#'
|
|
195 |
#' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and
|
|
196 |
#' `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound.
|
|
197 |
#'
|
|
198 |
#' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()]
|
|
199 |
#' with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing
|
|
200 |
#' proportion (risk) difference to an `rtables` layout.
|
|
201 |
#'
|
|
202 |
#' @examples
|
|
203 |
#' stat_propdiff_ci(
|
|
204 |
#' x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9
|
|
205 |
#' )
|
|
206 |
#'
|
|
207 |
#' stat_propdiff_ci(
|
|
208 |
#' x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE
|
|
209 |
#' )
|
|
210 |
#'
|
|
211 |
#' @export
|
|
212 |
stat_propdiff_ci <- function(x, |
|
213 |
y,
|
|
214 |
N_x, # nolint |
|
215 |
N_y, # nolint |
|
216 |
list_names = NULL, |
|
217 |
conf_level = 0.95, |
|
218 |
pct = TRUE) { |
|
219 | 62x |
checkmate::assert_list(x, types = "numeric") |
220 | 62x |
checkmate::assert_list(y, types = "numeric", len = length(x)) |
221 | 62x |
checkmate::assert_character(list_names, len = length(x), null.ok = TRUE) |
222 | 62x |
rd_list <- lapply(seq_along(x), function(i) { |
223 | 145x |
p_x <- x[[i]] / N_x |
224 | 145x |
p_y <- y[[i]] / N_y |
225 | 145x |
rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) * |
226 | 145x |
sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y) |
227 | 145x |
c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1) |
228 |
}) |
|
229 | 62x |
names(rd_list) <- list_names |
230 | 62x |
rd_list
|
231 |
}
|
1 |
#' Summarize change from baseline values or absolute baseline values
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [summarize_change()] creates a layout element to summarize the change from baseline or absolute
|
|
6 |
#' baseline values. The primary analysis variable `vars` indicates the numerical change from baseline results.
|
|
7 |
#'
|
|
8 |
#' Required secondary analysis variables `value` and `baseline_flag` can be supplied to the function via
|
|
9 |
#' the `variables` argument. The `value` element should be the name of the analysis value variable, and the
|
|
10 |
#' `baseline_flag` element should be the name of the flag variable that indicates whether or not records contain
|
|
11 |
#' baseline values. Depending on the baseline flag given, either the absolute baseline values (at baseline)
|
|
12 |
#' or the change from baseline values (post-baseline) are then summarized.
|
|
13 |
#'
|
|
14 |
#' @inheritParams argument_convention
|
|
15 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
16 |
#'
|
|
17 |
#' Options are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
|
|
18 |
#'
|
|
19 |
#' @name summarize_change
|
|
20 |
#' @order 1
|
|
21 |
NULL
|
|
22 | ||
23 |
#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
|
|
24 |
#'
|
|
25 |
#' @return
|
|
26 |
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
|
|
27 |
#'
|
|
28 |
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
|
|
29 |
#' an error will be thrown.
|
|
30 |
#'
|
|
31 |
#' @keywords internal
|
|
32 |
s_change_from_baseline <- function(df, ...) { |
|
33 | 10x |
args_list <- list(...) |
34 | 10x |
.var <- args_list[[".var"]] |
35 | 10x |
variables <- args_list[["variables"]] |
36 | ||
37 | 10x |
checkmate::assert_numeric(df[[variables$value]]) |
38 | 10x |
checkmate::assert_numeric(df[[.var]]) |
39 | 10x |
checkmate::assert_logical(df[[variables$baseline_flag]]) |
40 | 10x |
checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1) |
41 | 10x |
assert_df_with_variables(df, c(variables, list(chg = .var))) |
42 | ||
43 | 10x |
combined <- ifelse( |
44 | 10x |
df[[variables$baseline_flag]], |
45 | 10x |
df[[variables$value]], |
46 | 10x |
df[[.var]] |
47 |
)
|
|
48 | 10x |
if (is.logical(combined) && identical(length(combined), 0L)) { |
49 | 1x |
combined <- numeric(0) |
50 |
}
|
|
51 | 10x |
s_summary(combined, ...) |
52 |
}
|
|
53 | ||
54 |
#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
|
|
55 |
#'
|
|
56 |
#' @return
|
|
57 |
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
58 |
#'
|
|
59 |
#' @keywords internal
|
|
60 |
a_change_from_baseline <- function(df, |
|
61 |
...,
|
|
62 |
.stats = NULL, |
|
63 |
.stat_names = NULL, |
|
64 |
.formats = NULL, |
|
65 |
.labels = NULL, |
|
66 |
.indent_mods = NULL) { |
|
67 |
# Check for additional parameters to the statistics function
|
|
68 | 8x |
dots_extra_args <- list(...) |
69 | 8x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
70 | 8x |
dots_extra_args$.additional_fun_parameters <- NULL |
71 | ||
72 |
# Check for user-defined functions
|
|
73 | 8x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
74 | 8x |
.stats <- default_and_custom_stats_list$all_stats |
75 | 8x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
76 | ||
77 |
# Apply statistics function
|
|
78 | 8x |
x_stats <- .apply_stat_functions( |
79 | 8x |
default_stat_fnc = s_change_from_baseline, |
80 | 8x |
custom_stat_fnc_list = custom_stat_functions, |
81 | 8x |
args_list = c( |
82 | 8x |
df = list(df), |
83 | 8x |
extra_afun_params,
|
84 | 8x |
dots_extra_args
|
85 |
)
|
|
86 |
)
|
|
87 | ||
88 |
# Fill in with formatting defaults
|
|
89 | 6x |
.stats <- get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
90 | 6x |
.formats <- get_formats_from_stats(.stats, .formats) |
91 | 6x |
.labels <- get_labels_from_stats(.stats, .labels) |
92 | 6x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
93 | ||
94 | 6x |
x_stats <- x_stats[.stats] |
95 | ||
96 |
# Auto format handling
|
|
97 | 6x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
98 | ||
99 |
# Get and check statistical names
|
|
100 | 6x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
101 | ||
102 | 6x |
in_rows( |
103 | 6x |
.list = x_stats, |
104 | 6x |
.formats = .formats, |
105 | 6x |
.names = names(.labels), |
106 | 6x |
.stat_names = .stat_names, |
107 | 6x |
.labels = .labels %>% .unlist_keep_nulls(), |
108 | 6x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
109 |
)
|
|
110 |
}
|
|
111 | ||
112 |
#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
|
|
113 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
114 |
#'
|
|
115 |
#' @return
|
|
116 |
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
|
|
117 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
118 |
#' the statistics from `s_change_from_baseline()` to the table layout.
|
|
119 |
#'
|
|
120 |
#' @note To be used after a split on visits in the layout, such that each data subset only contains
|
|
121 |
#' either baseline or post-baseline data.
|
|
122 |
#'
|
|
123 |
#' @examples
|
|
124 |
#' library(dplyr)
|
|
125 |
#'
|
|
126 |
#' # Fabricate dataset
|
|
127 |
#' dta_test <- data.frame(
|
|
128 |
#' USUBJID = rep(1:6, each = 3),
|
|
129 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
130 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)),
|
|
131 |
#' AVAL = c(9:1, rep(NA, 9))
|
|
132 |
#' ) %>%
|
|
133 |
#' mutate(ABLFLL = AVISIT == "V1") %>%
|
|
134 |
#' group_by(USUBJID) %>%
|
|
135 |
#' mutate(
|
|
136 |
#' BLVAL = AVAL[ABLFLL],
|
|
137 |
#' CHG = AVAL - BLVAL
|
|
138 |
#' ) %>%
|
|
139 |
#' ungroup()
|
|
140 |
#'
|
|
141 |
#' results <- basic_table() %>%
|
|
142 |
#' split_cols_by("ARM") %>%
|
|
143 |
#' split_rows_by("AVISIT") %>%
|
|
144 |
#' summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
|
|
145 |
#' build_table(dta_test)
|
|
146 |
#'
|
|
147 |
#' results
|
|
148 |
#'
|
|
149 |
#' @export
|
|
150 |
#' @order 2
|
|
151 |
summarize_change <- function(lyt, |
|
152 |
vars,
|
|
153 |
variables,
|
|
154 |
var_labels = vars, |
|
155 |
na_str = default_na_str(), |
|
156 |
na_rm = TRUE, |
|
157 |
nested = TRUE, |
|
158 |
show_labels = "default", |
|
159 |
table_names = vars, |
|
160 |
section_div = NA_character_, |
|
161 |
...,
|
|
162 |
.stats = c("n", "mean_sd", "median", "range"), |
|
163 |
.stat_names = NULL, |
|
164 |
.formats = c( |
|
165 |
mean_sd = "xx.xx (xx.xx)", |
|
166 |
mean_se = "xx.xx (xx.xx)", |
|
167 |
median = "xx.xx", |
|
168 |
range = "xx.xx - xx.xx", |
|
169 |
mean_pval = "xx.xx" |
|
170 |
),
|
|
171 |
.labels = NULL, |
|
172 |
.indent_mods = NULL) { |
|
173 |
# Process standard extra arguments
|
|
174 | 4x |
extra_args <- list(".stats" = .stats) |
175 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
176 | 4x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
177 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
178 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
179 | ||
180 |
# Process additional arguments to the statistic function
|
|
181 | 4x |
extra_args <- c( |
182 | 4x |
extra_args,
|
183 | 4x |
variables = list(variables), |
184 | 4x |
na_rm = na_rm, |
185 |
...
|
|
186 |
)
|
|
187 | ||
188 |
# Append additional info from layout to the analysis function
|
|
189 | 4x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
190 | 4x |
formals(a_change_from_baseline) <- c(formals(a_change_from_baseline), extra_args[[".additional_fun_parameters"]]) |
191 | ||
192 | 4x |
analyze( |
193 | 4x |
lyt = lyt, |
194 | 4x |
vars = vars, |
195 | 4x |
afun = a_change_from_baseline, |
196 | 4x |
na_str = na_str, |
197 | 4x |
nested = nested, |
198 | 4x |
extra_args = extra_args, |
199 | 4x |
var_labels = var_labels, |
200 | 4x |
show_labels = show_labels, |
201 | 4x |
table_names = table_names, |
202 | 4x |
inclNAs = !na_rm, |
203 | 4x |
section_div = section_div |
204 |
)
|
|
205 |
}
|
1 |
#' Control function for subgroup treatment effect pattern (STEP) calculations
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This is an auxiliary function for controlling arguments for STEP calculations.
|
|
6 |
#'
|
|
7 |
#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
|
|
8 |
#' could be used to infer `bandwidth`, see below.
|
|
9 |
#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
|
|
10 |
#' quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
|
|
11 |
#' covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
|
|
12 |
#' distributed.
|
|
13 |
#' @param bandwidth (`numeric(1)` or `NULL`)\cr indicating the bandwidth of each window.
|
|
14 |
#' Depending on the argument `use_percentile`, it can be either the length of actual-value
|
|
15 |
#' windows on the real biomarker scale, or percentage windows.
|
|
16 |
#' If `use_percentile = TRUE`, it should be a number between 0 and 1.
|
|
17 |
#' If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
|
|
18 |
#' By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
|
|
19 |
#' variable for actual-value windows.
|
|
20 |
#' @param degree (`integer(1)`)\cr the degree of polynomial function of the biomarker as an interaction term
|
|
21 |
#' with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
|
|
22 |
#' is not included in the model fitted in each biomarker window.
|
|
23 |
#' @param num_points (`integer(1)`)\cr the number of points at which the hazard ratios are estimated. The
|
|
24 |
#' smallest number is 2.
|
|
25 |
#'
|
|
26 |
#' @return A list of components with the same names as the arguments, except `biomarker` which is
|
|
27 |
#' just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
|
|
28 |
#'
|
|
29 |
#' @examples
|
|
30 |
#' # Provide biomarker values and request actual values to be used,
|
|
31 |
#' # so that bandwidth is chosen from range.
|
|
32 |
#' control_step(biomarker = 1:10, use_percentile = FALSE)
|
|
33 |
#'
|
|
34 |
#' # Use a global model with quadratic biomarker interaction term.
|
|
35 |
#' control_step(bandwidth = NULL, degree = 2)
|
|
36 |
#'
|
|
37 |
#' # Reduce number of points to be used.
|
|
38 |
#' control_step(num_points = 10)
|
|
39 |
#'
|
|
40 |
#' @export
|
|
41 |
control_step <- function(biomarker = NULL, |
|
42 |
use_percentile = TRUE, |
|
43 |
bandwidth,
|
|
44 |
degree = 0L, |
|
45 |
num_points = 39L) { |
|
46 | 31x |
checkmate::assert_numeric(biomarker, null.ok = TRUE) |
47 | 30x |
checkmate::assert_flag(use_percentile) |
48 | 30x |
checkmate::assert_int(num_points, lower = 2) |
49 | 29x |
checkmate::assert_count(degree) |
50 | ||
51 | 29x |
if (missing(bandwidth)) { |
52 |
# Infer bandwidth
|
|
53 | 21x |
bandwidth <- if (use_percentile) { |
54 | 18x |
0.25
|
55 | 21x |
} else if (!is.null(biomarker)) { |
56 | 3x |
diff(range(biomarker, na.rm = TRUE)) / 4 |
57 |
} else { |
|
58 | ! |
NULL
|
59 |
}
|
|
60 |
} else { |
|
61 |
# Check bandwidth
|
|
62 | 8x |
if (!is.null(bandwidth)) { |
63 | 5x |
if (use_percentile) { |
64 | 4x |
assert_proportion_value(bandwidth) |
65 |
} else { |
|
66 | 1x |
checkmate::assert_scalar(bandwidth) |
67 | 1x |
checkmate::assert_true(bandwidth > 0) |
68 |
}
|
|
69 |
}
|
|
70 |
}
|
|
71 | 28x |
list( |
72 | 28x |
use_percentile = use_percentile, |
73 | 28x |
bandwidth = bandwidth, |
74 | 28x |
degree = as.integer(degree), |
75 | 28x |
num_points = as.integer(num_points) |
76 |
)
|
|
77 |
}
|
1 |
#' Count number of patients with missed doses by thresholds
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function creates a layout element to calculate cumulative counts of patients with number of missed
|
|
6 |
#' doses at least equal to user-specified threshold values.
|
|
7 |
#'
|
|
8 |
#' This function analyzes numeric variable `vars`, a variable with numbers of missed doses,
|
|
9 |
#' against the threshold values supplied to the `thresholds` argument as a numeric vector. This function
|
|
10 |
#' assumes that every row of the given data frame corresponds to a unique patient.
|
|
11 |
#'
|
|
12 |
#' @inheritParams s_count_cumulative
|
|
13 |
#' @inheritParams argument_convention
|
|
14 |
#' @param thresholds (`numeric`)\cr minimum number of missed doses the patients had.
|
|
15 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
16 |
#'
|
|
17 |
#' Options are: ``r shQuote(get_stats("count_missed_doses"), type = "sh")``
|
|
18 |
#'
|
|
19 |
#' @seealso
|
|
20 |
#' * Relevant description function [d_count_missed_doses()] which generates labels for [count_missed_doses()].
|
|
21 |
#' * Similar analyze function [count_cumulative()] which more generally counts cumulative values and has more
|
|
22 |
#' options for threshold handling, but uses different labels.
|
|
23 |
#'
|
|
24 |
#' @name count_missed_doses
|
|
25 |
#' @order 1
|
|
26 |
NULL
|
|
27 | ||
28 |
#' Description function that calculates labels for `s_count_missed_doses()`
|
|
29 |
#'
|
|
30 |
#' @description `r lifecycle::badge("stable")`
|
|
31 |
#'
|
|
32 |
#' @inheritParams s_count_missed_doses
|
|
33 |
#'
|
|
34 |
#' @return [d_count_missed_doses()] returns a named `character` vector with the labels.
|
|
35 |
#'
|
|
36 |
#' @seealso [s_count_missed_doses()]
|
|
37 |
#'
|
|
38 |
#' @export
|
|
39 |
d_count_missed_doses <- function(thresholds) { |
|
40 | 8x |
paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", "")) |
41 |
}
|
|
42 | ||
43 |
#' @describeIn count_missed_doses Statistics function to count patients with missed doses.
|
|
44 |
#'
|
|
45 |
#' @return
|
|
46 |
#' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold.
|
|
47 |
#'
|
|
48 |
#' @keywords internal
|
|
49 |
s_count_missed_doses <- function(x, |
|
50 |
thresholds,
|
|
51 |
.N_col, # nolint |
|
52 |
.N_row, # nolint |
|
53 |
denom = c("N_col", "n", "N_row"), |
|
54 |
...) { |
|
55 | 7x |
stat <- s_count_cumulative( |
56 | 7x |
x = x, |
57 | 7x |
thresholds = thresholds, |
58 | 7x |
lower_tail = FALSE, |
59 | 7x |
include_eq = TRUE, |
60 | 7x |
.N_col = .N_col, |
61 | 7x |
.N_row = .N_row, |
62 | 7x |
denom = denom, |
63 |
...
|
|
64 |
)
|
|
65 | 7x |
labels <- d_count_missed_doses(thresholds) |
66 | 7x |
for (i in seq_along(stat$count_fraction)) { |
67 | 14x |
stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i]) |
68 |
}
|
|
69 | ||
70 | 7x |
c(list(n = n_available(x)), stat) |
71 |
}
|
|
72 | ||
73 |
#' @describeIn count_missed_doses Formatted analysis function which is used as `afun`
|
|
74 |
#' in `count_missed_doses()`.
|
|
75 |
#'
|
|
76 |
#' @return
|
|
77 |
#' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
78 |
#'
|
|
79 |
#' @keywords internal
|
|
80 |
a_count_missed_doses <- function(x, |
|
81 |
...,
|
|
82 |
.stats = NULL, |
|
83 |
.stat_names = NULL, |
|
84 |
.formats = NULL, |
|
85 |
.labels = NULL, |
|
86 |
.indent_mods = NULL) { |
|
87 | 6x |
dots_extra_args <- list(...) |
88 | ||
89 |
# Check if there are user-defined functions
|
|
90 | 6x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
91 | 6x |
.stats <- default_and_custom_stats_list$all_stats |
92 | 6x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
93 | ||
94 |
# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
|
|
95 | 6x |
extra_afun_params <- retrieve_extra_afun_params( |
96 | 6x |
names(dots_extra_args$.additional_fun_parameters) |
97 |
)
|
|
98 | 6x |
dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore |
99 | ||
100 |
# Main statistical functions application
|
|
101 | 6x |
x_stats <- .apply_stat_functions( |
102 | 6x |
default_stat_fnc = s_count_missed_doses, |
103 | 6x |
custom_stat_fnc_list = custom_stat_functions, |
104 | 6x |
args_list = c( |
105 | 6x |
x = list(x), |
106 | 6x |
extra_afun_params,
|
107 | 6x |
dots_extra_args
|
108 |
)
|
|
109 |
)
|
|
110 | ||
111 |
# Fill in with stats defaults if needed
|
|
112 | 6x |
.stats <- get_stats("count_missed_doses", |
113 | 6x |
stats_in = .stats, |
114 | 6x |
custom_stats_in = names(custom_stat_functions) |
115 |
)
|
|
116 | ||
117 | 6x |
x_stats <- x_stats[.stats] |
118 | 6x |
levels_per_stats <- lapply(x_stats, names) |
119 | ||
120 |
# Fill in formats/indents/labels with custom input and defaults
|
|
121 | 6x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
122 | 6x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
123 | 6x |
.labels <- get_labels_from_stats( |
124 | 6x |
.stats, .labels, levels_per_stats, |
125 | 6x |
label_attr_from_stats = sapply(.unlist_keep_nulls(x_stats), attr, "label") |
126 |
)
|
|
127 | ||
128 |
# Unlist stats
|
|
129 | 6x |
x_stats <- x_stats %>% |
130 | 6x |
.unlist_keep_nulls() %>% |
131 | 6x |
setNames(names(.formats)) |
132 | ||
133 |
# Auto format handling
|
|
134 | 6x |
.formats <- apply_auto_formatting( |
135 | 6x |
.formats,
|
136 | 6x |
x_stats,
|
137 | 6x |
extra_afun_params$.df_row, |
138 | 6x |
extra_afun_params$.var |
139 |
)
|
|
140 | ||
141 |
# Get and check statistical names from defaults
|
|
142 | 6x |
.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats |
143 | ||
144 | 6x |
in_rows( |
145 | 6x |
.list = x_stats, |
146 | 6x |
.formats = .formats, |
147 | 6x |
.names = names(.labels), |
148 | 6x |
.stat_names = .stat_names, |
149 | 6x |
.labels = .labels %>% .unlist_keep_nulls(), |
150 | 6x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
151 |
)
|
|
152 |
}
|
|
153 | ||
154 |
#' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments
|
|
155 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
156 |
#'
|
|
157 |
#' @return
|
|
158 |
#' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions,
|
|
159 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
160 |
#' the statistics from `s_count_missed_doses()` to the table layout.
|
|
161 |
#'
|
|
162 |
#' @examples
|
|
163 |
#' library(dplyr)
|
|
164 |
#'
|
|
165 |
#' anl <- tern_ex_adsl %>%
|
|
166 |
#' distinct(STUDYID, USUBJID, ARM) %>%
|
|
167 |
#' mutate(
|
|
168 |
#' PARAMCD = "TNDOSMIS",
|
|
169 |
#' PARAM = "Total number of missed doses during study",
|
|
170 |
#' AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE),
|
|
171 |
#' AVALC = ""
|
|
172 |
#' )
|
|
173 |
#'
|
|
174 |
#' basic_table() %>%
|
|
175 |
#' split_cols_by("ARM") %>%
|
|
176 |
#' add_colcounts() %>%
|
|
177 |
#' count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>%
|
|
178 |
#' build_table(anl, alt_counts_df = tern_ex_adsl)
|
|
179 |
#'
|
|
180 |
#' @export
|
|
181 |
#' @order 2
|
|
182 |
count_missed_doses <- function(lyt, |
|
183 |
vars,
|
|
184 |
thresholds,
|
|
185 |
var_labels = vars, |
|
186 |
show_labels = "visible", |
|
187 |
na_str = default_na_str(), |
|
188 |
nested = TRUE, |
|
189 |
table_names = vars, |
|
190 |
...,
|
|
191 |
na_rm = TRUE, |
|
192 |
.stats = c("n", "count_fraction"), |
|
193 |
.stat_names = NULL, |
|
194 |
.formats = NULL, |
|
195 |
.labels = NULL, |
|
196 |
.indent_mods = NULL) { |
|
197 |
# Depending on main functions
|
|
198 | 2x |
extra_args <- list( |
199 | 2x |
"na_rm" = na_rm, |
200 | 2x |
"thresholds" = thresholds, |
201 |
...
|
|
202 |
)
|
|
203 | ||
204 |
# Needed defaults
|
|
205 | 2x |
if (!is.null(.stats)) extra_args[[".stats"]] <- .stats |
206 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
207 | ! |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
208 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
209 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
210 | ||
211 |
# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
|
|
212 | 2x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
213 | 2x |
formals(a_count_missed_doses) <- c( |
214 | 2x |
formals(a_count_missed_doses), |
215 | 2x |
extra_args[[".additional_fun_parameters"]] |
216 |
)
|
|
217 | ||
218 |
# Main {rtables} structural call
|
|
219 | 2x |
analyze( |
220 | 2x |
lyt,
|
221 | 2x |
vars,
|
222 | 2x |
afun = a_count_missed_doses, |
223 | 2x |
na_str = na_str, |
224 | 2x |
inclNAs = !na_rm, |
225 | 2x |
table_names = table_names, |
226 | 2x |
var_labels = var_labels, |
227 | 2x |
show_labels = show_labels, |
228 | 2x |
nested = nested, |
229 | 2x |
extra_args = extra_args |
230 |
)
|
|
231 |
}
|
1 |
#' Count patients with abnormal range values
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [count_abnormal()] creates a layout element to count patients with abnormal analysis range
|
|
6 |
#' values in each direction.
|
|
7 |
#'
|
|
8 |
#' This function analyzes primary analysis variable `var` which indicates abnormal range results.
|
|
9 |
#' Additional analysis variables that can be supplied as a list via the `variables` parameter are
|
|
10 |
#' `id` (defaults to `USUBJID`), a variable to indicate unique subject identifiers, and `baseline`
|
|
11 |
#' (defaults to `BNRIND`), a variable to indicate baseline reference ranges.
|
|
12 |
#'
|
|
13 |
#' For each direction specified via the `abnormal` parameter (e.g. High or Low), a fraction of
|
|
14 |
#' patient counts is returned, with numerator and denominator calculated as follows:
|
|
15 |
#' * `num`: The number of patients with this abnormality recorded while on treatment.
|
|
16 |
#' * `denom`: The total number of patients with at least one post-baseline assessment.
|
|
17 |
#'
|
|
18 |
#' This function assumes that `df` has been filtered to only include post-baseline records.
|
|
19 |
#'
|
|
20 |
#' @inheritParams argument_convention
|
|
21 |
#' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to
|
|
22 |
#' `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list,
|
|
23 |
#' for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`.
|
|
24 |
#' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality
|
|
25 |
#' from numerator and denominator.
|
|
26 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
27 |
#'
|
|
28 |
#' Options are: ``r shQuote(get_stats("abnormal"), type = "sh")``
|
|
29 |
#'
|
|
30 |
#' @note
|
|
31 |
#' * `count_abnormal()` only considers a single variable that contains multiple abnormal levels.
|
|
32 |
#' * `df` should be filtered to only include post-baseline records.
|
|
33 |
#' * The denominator includes patients that may have other abnormal levels at baseline,
|
|
34 |
#' and patients missing baseline records. Patients with these abnormalities at
|
|
35 |
#' baseline can be optionally excluded from numerator and denominator via the
|
|
36 |
#' `exclude_base_abn` parameter.
|
|
37 |
#'
|
|
38 |
#' @name abnormal
|
|
39 |
#' @include formatting_functions.R
|
|
40 |
#' @order 1
|
|
41 |
NULL
|
|
42 | ||
43 |
#' @describeIn abnormal Statistics function which counts patients with abnormal range values
|
|
44 |
#' for a single `abnormal` level.
|
|
45 |
#'
|
|
46 |
#' @return
|
|
47 |
#' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients.
|
|
48 |
#'
|
|
49 |
#' @keywords internal
|
|
50 |
s_count_abnormal <- function(df, |
|
51 |
.var,
|
|
52 |
abnormal = list(Low = "LOW", High = "HIGH"), |
|
53 |
variables = list(id = "USUBJID", baseline = "BNRIND"), |
|
54 |
exclude_base_abn = FALSE, |
|
55 |
...) { |
|
56 | 8x |
checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE) |
57 | 8x |
checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]]))) |
58 | 8x |
checkmate::assert_factor(df[[.var]]) |
59 | 8x |
checkmate::assert_flag(exclude_base_abn) |
60 | 8x |
assert_df_with_variables(df, c(range = .var, variables)) |
61 | 8x |
checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character")) |
62 | 8x |
checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character")) |
63 | ||
64 | 8x |
count_abnormal_single <- function(abn_name, abn) { |
65 |
# Patients in the denominator fulfill:
|
|
66 |
# - have at least one post-baseline visit
|
|
67 |
# - their baseline must not be abnormal if `exclude_base_abn`.
|
|
68 | 16x |
if (exclude_base_abn) { |
69 | 8x |
denom_select <- !(df[[variables$baseline]] %in% abn) |
70 |
} else { |
|
71 | 8x |
denom_select <- TRUE |
72 |
}
|
|
73 | 16x |
denom <- length(unique(df[denom_select, variables$id, drop = TRUE])) |
74 | ||
75 |
# Patients in the numerator fulfill:
|
|
76 |
# - have at least one post-baseline visit with the required abnormality level
|
|
77 |
# - are part of the denominator patients.
|
|
78 | 16x |
num_select <- (df[[.var]] %in% abn) & denom_select |
79 | 16x |
num <- length(unique(df[num_select, variables$id, drop = TRUE])) |
80 | ||
81 | 16x |
formatters::with_label(c(num = num, denom = denom), abn_name) |
82 |
}
|
|
83 | ||
84 |
# This will define the abnormal levels theoretically possible for a specific lab parameter
|
|
85 |
# within a split level of a layout.
|
|
86 | 8x |
abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]])) |
87 | 8x |
abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))] |
88 | ||
89 | 8x |
result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE) |
90 | 8x |
result <- list(fraction = result) |
91 | 8x |
result
|
92 |
}
|
|
93 | ||
94 |
#' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`.
|
|
95 |
#'
|
|
96 |
#' @return
|
|
97 |
#' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
98 |
#'
|
|
99 |
#' @keywords internal
|
|
100 |
a_count_abnormal <- function(df, |
|
101 |
...,
|
|
102 |
.stats = NULL, |
|
103 |
.stat_names = NULL, |
|
104 |
.formats = NULL, |
|
105 |
.labels = NULL, |
|
106 |
.indent_mods = NULL) { |
|
107 |
# Check for additional parameters to the statistics function
|
|
108 | 4x |
dots_extra_args <- list(...) |
109 | 4x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
110 | 4x |
dots_extra_args$.additional_fun_parameters <- NULL |
111 | ||
112 |
# Check for user-defined functions
|
|
113 | 4x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
114 | 4x |
.stats <- default_and_custom_stats_list$all_stats |
115 | 4x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
116 | ||
117 |
# Apply statistics function
|
|
118 | 4x |
x_stats <- .apply_stat_functions( |
119 | 4x |
default_stat_fnc = s_count_abnormal, |
120 | 4x |
custom_stat_fnc_list = custom_stat_functions, |
121 | 4x |
args_list = c( |
122 | 4x |
df = list(df), |
123 | 4x |
extra_afun_params,
|
124 | 4x |
dots_extra_args
|
125 |
)
|
|
126 |
)
|
|
127 | ||
128 |
# Fill in formatting defaults
|
|
129 | 4x |
.stats <- get_stats("abnormal", stats_in = .stats, custom_stats_in = names(custom_stat_functions)) |
130 | 4x |
levels_per_stats <- lapply(x_stats, names) |
131 | 4x |
.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats) |
132 | 4x |
.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats) |
133 | 4x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats) |
134 | ||
135 | 4x |
x_stats <- x_stats[.stats] %>% |
136 | 4x |
.unlist_keep_nulls() %>% |
137 | 4x |
setNames(names(.formats)) |
138 | ||
139 |
# Auto format handling
|
|
140 | 4x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
141 | ||
142 |
# Get and check statistical names
|
|
143 | 4x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
144 | ||
145 | 4x |
in_rows( |
146 | 4x |
.list = x_stats, |
147 | 4x |
.formats = .formats, |
148 | 4x |
.names = .labels %>% .unlist_keep_nulls(), |
149 | 4x |
.stat_names = .stat_names, |
150 | 4x |
.labels = .labels %>% .unlist_keep_nulls(), |
151 | 4x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
152 |
)
|
|
153 |
}
|
|
154 | ||
155 |
#' @describeIn abnormal Layout-creating function which can take statistics function arguments
|
|
156 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
157 |
#'
|
|
158 |
#' @return
|
|
159 |
#' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions,
|
|
160 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
161 |
#' the statistics from `s_count_abnormal()` to the table layout.
|
|
162 |
#'
|
|
163 |
#' @examples
|
|
164 |
#' library(dplyr)
|
|
165 |
#'
|
|
166 |
#' df <- data.frame(
|
|
167 |
#' USUBJID = as.character(c(1, 1, 2, 2)),
|
|
168 |
#' ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
|
|
169 |
#' BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
|
|
170 |
#' ONTRTFL = c("", "Y", "", "Y"),
|
|
171 |
#' stringsAsFactors = FALSE
|
|
172 |
#' )
|
|
173 |
#'
|
|
174 |
#' # Select only post-baseline records.
|
|
175 |
#' df <- df %>%
|
|
176 |
#' filter(ONTRTFL == "Y")
|
|
177 |
#'
|
|
178 |
#' # Layout creating function.
|
|
179 |
#' basic_table() %>%
|
|
180 |
#' count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>%
|
|
181 |
#' build_table(df)
|
|
182 |
#'
|
|
183 |
#' # Passing of statistics function and formatting arguments.
|
|
184 |
#' df2 <- data.frame(
|
|
185 |
#' ID = as.character(c(1, 1, 2, 2)),
|
|
186 |
#' RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
|
|
187 |
#' BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
|
|
188 |
#' ONTRTFL = c("", "Y", "", "Y"),
|
|
189 |
#' stringsAsFactors = FALSE
|
|
190 |
#' )
|
|
191 |
#'
|
|
192 |
#' # Select only post-baseline records.
|
|
193 |
#' df2 <- df2 %>%
|
|
194 |
#' filter(ONTRTFL == "Y")
|
|
195 |
#'
|
|
196 |
#' basic_table() %>%
|
|
197 |
#' count_abnormal(
|
|
198 |
#' var = "RANGE",
|
|
199 |
#' abnormal = list(low = "LOW", high = "HIGH"),
|
|
200 |
#' variables = list(id = "ID", baseline = "BL_RANGE")
|
|
201 |
#' ) %>%
|
|
202 |
#' build_table(df2)
|
|
203 |
#'
|
|
204 |
#' @export
|
|
205 |
#' @order 2
|
|
206 |
count_abnormal <- function(lyt, |
|
207 |
var,
|
|
208 |
abnormal = list(Low = "LOW", High = "HIGH"), |
|
209 |
variables = list(id = "USUBJID", baseline = "BNRIND"), |
|
210 |
exclude_base_abn = FALSE, |
|
211 |
na_str = default_na_str(), |
|
212 |
nested = TRUE, |
|
213 |
...,
|
|
214 |
table_names = var, |
|
215 |
.stats = "fraction", |
|
216 |
.stat_names = NULL, |
|
217 |
.formats = list(fraction = format_fraction), |
|
218 |
.labels = NULL, |
|
219 |
.indent_mods = NULL) { |
|
220 |
# Process standard extra arguments
|
|
221 | 3x |
extra_args <- list(".stats" = .stats) |
222 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
223 | 3x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
224 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
225 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
226 | ||
227 |
# Process additional arguments to the statistic function
|
|
228 | 3x |
extra_args <- c( |
229 | 3x |
extra_args,
|
230 | 3x |
"abnormal" = list(abnormal), "variables" = list(variables), "exclude_base_abn" = exclude_base_abn, |
231 |
...
|
|
232 |
)
|
|
233 | ||
234 |
# Append additional info from layout to the analysis function
|
|
235 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
236 | 3x |
formals(a_count_abnormal) <- c(formals(a_count_abnormal), extra_args[[".additional_fun_parameters"]]) |
237 | ||
238 | 3x |
analyze( |
239 | 3x |
lyt = lyt, |
240 | 3x |
vars = var, |
241 | 3x |
afun = a_count_abnormal, |
242 | 3x |
na_str = na_str, |
243 | 3x |
nested = nested, |
244 | 3x |
extra_args = extra_args, |
245 | 3x |
show_labels = "hidden", |
246 | 3x |
table_names = table_names |
247 |
)
|
|
248 |
}
|
1 |
#' Analyze a pairwise Cox-PH model
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [coxph_pairwise()] creates a layout element to analyze a pairwise Cox-PH model.
|
|
6 |
#'
|
|
7 |
#' This function can return statistics including p-value, hazard ratio (HR), and HR confidence intervals from both
|
|
8 |
#' stratified and unstratified Cox-PH models. The variable(s) to be analyzed is specified via the `vars` argument and
|
|
9 |
#' any stratification factors via the `strata` argument.
|
|
10 |
#'
|
|
11 |
#' @inheritParams argument_convention
|
|
12 |
#' @inheritParams s_surv_time
|
|
13 |
#' @param strata (`character` or `NULL`)\cr variable names indicating stratification factors.
|
|
14 |
#' @param strat `r lifecycle::badge("deprecated")` Please use the `strata` argument instead.
|
|
15 |
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
|
|
16 |
#' [control_coxph()]. Some possible parameter options are:
|
|
17 |
#' * `pval_method` (`string`)\cr p-value method for testing the null hypothesis that hazard ratio = 1. Default
|
|
18 |
#' method is `"log-rank"` which comes from [survival::survdiff()], can also be set to `"wald"` or `"likelihood"`
|
|
19 |
#' (from [survival::coxph()]).
|
|
20 |
#' * `ties` (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
|
|
21 |
#' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
|
|
22 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
|
|
23 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
24 |
#'
|
|
25 |
#' Options are: ``r shQuote(get_stats("coxph_pairwise"), type = "sh")``
|
|
26 |
#'
|
|
27 |
#' @name survival_coxph_pairwise
|
|
28 |
#' @order 1
|
|
29 |
NULL
|
|
30 | ||
31 |
#' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR, and p-value of a Cox-PH model.
|
|
32 |
#'
|
|
33 |
#' @return
|
|
34 |
#' * `s_coxph_pairwise()` returns the statistics:
|
|
35 |
#' * `pvalue`: p-value to test the null hypothesis that hazard ratio = 1.
|
|
36 |
#' * `hr`: Hazard ratio.
|
|
37 |
#' * `hr_ci`: Confidence interval for hazard ratio.
|
|
38 |
#' * `n_tot`: Total number of observations.
|
|
39 |
#' * `n_tot_events`: Total number of events.
|
|
40 |
#'
|
|
41 |
#' @keywords internal
|
|
42 |
s_coxph_pairwise <- function(df, |
|
43 |
.ref_group,
|
|
44 |
.in_ref_col,
|
|
45 |
.var,
|
|
46 |
is_event,
|
|
47 |
strata = NULL, |
|
48 |
strat = lifecycle::deprecated(), |
|
49 |
control = control_coxph(), |
|
50 |
...) { |
|
51 | 110x |
if (lifecycle::is_present(strat)) { |
52 | ! |
lifecycle::deprecate_warn("0.9.4", "s_coxph_pairwise(strat)", "s_coxph_pairwise(strata)") |
53 | ! |
strata <- strat |
54 |
}
|
|
55 | ||
56 | 110x |
checkmate::assert_string(.var) |
57 | 110x |
checkmate::assert_numeric(df[[.var]]) |
58 | 110x |
checkmate::assert_logical(df[[is_event]]) |
59 | 110x |
assert_df_with_variables(df, list(tte = .var, is_event = is_event)) |
60 | 110x |
pval_method <- control$pval_method |
61 | 110x |
ties <- control$ties |
62 | 110x |
conf_level <- control$conf_level |
63 | ||
64 | 110x |
if (.in_ref_col) { |
65 | 6x |
return( |
66 | 6x |
list( |
67 | 6x |
pvalue = formatters::with_label(numeric(), paste0("p-value (", pval_method, ")")), |
68 | 6x |
hr = formatters::with_label(numeric(), "Hazard Ratio"), |
69 | 6x |
hr_ci = formatters::with_label(numeric(), f_conf_level(conf_level)), |
70 | 6x |
hr_ci_3d = formatters::with_label(numeric(), paste0("Hazard Ratio (", f_conf_level(conf_level), ")")), |
71 | 6x |
n_tot = formatters::with_label(numeric(), "Total n"), |
72 | 6x |
n_tot_events = formatters::with_label(numeric(), "Total events") |
73 |
)
|
|
74 |
)
|
|
75 |
}
|
|
76 | 104x |
data <- rbind(.ref_group, df) |
77 | 104x |
group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x")) |
78 | ||
79 | 104x |
df_cox <- data.frame( |
80 | 104x |
tte = data[[.var]], |
81 | 104x |
is_event = data[[is_event]], |
82 | 104x |
arm = group |
83 |
)
|
|
84 | 104x |
if (is.null(strata)) { |
85 | 91x |
formula_cox <- survival::Surv(tte, is_event) ~ arm |
86 |
} else { |
|
87 | 13x |
formula_cox <- stats::as.formula( |
88 | 13x |
paste0( |
89 | 13x |
"survival::Surv(tte, is_event) ~ arm + strata(",
|
90 | 13x |
paste(strata, collapse = ","), |
91 |
")"
|
|
92 |
)
|
|
93 |
)
|
|
94 | 13x |
df_cox <- cbind(df_cox, data[strata]) |
95 |
}
|
|
96 | 104x |
cox_fit <- survival::coxph( |
97 | 104x |
formula = formula_cox, |
98 | 104x |
data = df_cox, |
99 | 104x |
ties = ties |
100 |
)
|
|
101 | 104x |
sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) |
102 | 104x |
orginal_survdiff <- survival::survdiff( |
103 | 104x |
formula_cox,
|
104 | 104x |
data = df_cox |
105 |
)
|
|
106 | 104x |
log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1) |
107 | ||
108 | 104x |
pval <- switch(pval_method, |
109 | 104x |
"wald" = sum_cox$waldtest["pvalue"], |
110 | 104x |
"log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff() |
111 | 104x |
"likelihood" = sum_cox$logtest["pvalue"] |
112 |
)
|
|
113 | 104x |
list( |
114 | 104x |
pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")), |
115 | 104x |
hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"), |
116 | 104x |
hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)), |
117 | 104x |
hr_ci_3d = formatters::with_label( |
118 | 104x |
c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])), |
119 | 104x |
paste0("Hazard Ratio (", f_conf_level(conf_level), ")") |
120 |
),
|
|
121 | 104x |
n_tot = formatters::with_label(sum_cox$n, "Total n"), |
122 | 104x |
n_tot_events = formatters::with_label(sum_cox$nevent, "Total events") |
123 |
)
|
|
124 |
}
|
|
125 | ||
126 |
#' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`.
|
|
127 |
#'
|
|
128 |
#' @return
|
|
129 |
#' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()].
|
|
130 |
#'
|
|
131 |
#' @keywords internal
|
|
132 |
a_coxph_pairwise <- function(df, |
|
133 |
...,
|
|
134 |
.stats = NULL, |
|
135 |
.stat_names = NULL, |
|
136 |
.formats = NULL, |
|
137 |
.labels = NULL, |
|
138 |
.indent_mods = NULL) { |
|
139 |
# Check for additional parameters to the statistics function
|
|
140 | 18x |
dots_extra_args <- list(...) |
141 | 18x |
extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters)) |
142 | 18x |
dots_extra_args$.additional_fun_parameters <- NULL |
143 | ||
144 |
# Check for user-defined functions
|
|
145 | 18x |
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) |
146 | 18x |
.stats <- default_and_custom_stats_list$all_stats |
147 | 18x |
custom_stat_functions <- default_and_custom_stats_list$custom_stats |
148 | ||
149 |
# Apply statistics function
|
|
150 | 18x |
x_stats <- .apply_stat_functions( |
151 | 18x |
default_stat_fnc = s_coxph_pairwise, |
152 | 18x |
custom_stat_fnc_list = custom_stat_functions, |
153 | 18x |
args_list = c( |
154 | 18x |
df = list(df), |
155 | 18x |
extra_afun_params,
|
156 | 18x |
dots_extra_args
|
157 |
)
|
|
158 |
)
|
|
159 | ||
160 |
# Fill in formatting defaults
|
|
161 | 18x |
.stats <- get_stats("coxph_pairwise", |
162 | 18x |
stats_in = .stats, |
163 | 18x |
custom_stats_in = names(custom_stat_functions) |
164 |
)
|
|
165 | 18x |
x_stats <- x_stats[.stats] |
166 | 18x |
.formats <- get_formats_from_stats(.stats, .formats) |
167 | 18x |
.labels <- get_labels_from_stats( |
168 | 18x |
.stats, .labels, |
169 | 18x |
tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels) |
170 |
)
|
|
171 | 18x |
.indent_mods <- get_indents_from_stats(.stats, .indent_mods) |
172 | ||
173 |
# Auto format handling
|
|
174 | 18x |
.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var) |
175 | ||
176 |
# Get and check statistical names
|
|
177 | 18x |
.stat_names <- get_stat_names(x_stats, .stat_names) |
178 | ||
179 | 18x |
in_rows( |
180 | 18x |
.list = x_stats, |
181 | 18x |
.formats = .formats, |
182 | 18x |
.names = .labels %>% .unlist_keep_nulls(), |
183 | 18x |
.stat_names = .stat_names, |
184 | 18x |
.labels = .labels %>% .unlist_keep_nulls(), |
185 | 18x |
.indent_mods = .indent_mods %>% .unlist_keep_nulls() |
186 |
)
|
|
187 |
}
|
|
188 | ||
189 |
#' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments
|
|
190 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
191 |
#'
|
|
192 |
#' @return
|
|
193 |
#' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions,
|
|
194 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
195 |
#' the statistics from `s_coxph_pairwise()` to the table layout.
|
|
196 |
#'
|
|
197 |
#' @examples
|
|
198 |
#' library(dplyr)
|
|
199 |
#'
|
|
200 |
#' adtte_f <- tern_ex_adtte %>%
|
|
201 |
#' filter(PARAMCD == "OS") %>%
|
|
202 |
#' mutate(is_event = CNSR == 0)
|
|
203 |
#'
|
|
204 |
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
|
|
205 |
#' df_ref_group <- adtte_f %>% filter(ARMCD == "ARM B")
|
|
206 |
#'
|
|
207 |
#' basic_table() %>%
|
|
208 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
|
|
209 |
#' add_colcounts() %>%
|
|
210 |
#' coxph_pairwise(
|
|
211 |
#' vars = "AVAL",
|
|
212 |
#' is_event = "is_event",
|
|
213 |
#' var_labels = "Unstratified Analysis"
|
|
214 |
#' ) %>%
|
|
215 |
#' build_table(df = adtte_f)
|
|
216 |
#'
|
|
217 |
#' basic_table() %>%
|
|
218 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
|
|
219 |
#' add_colcounts() %>%
|
|
220 |
#' coxph_pairwise(
|
|
221 |
#' vars = "AVAL",
|
|
222 |
#' is_event = "is_event",
|
|
223 |
#' var_labels = "Stratified Analysis",
|
|
224 |
#' strata = "SEX",
|
|
225 |
#' control = control_coxph(pval_method = "wald")
|
|
226 |
#' ) %>%
|
|
227 |
#' build_table(df = adtte_f)
|
|
228 |
#'
|
|
229 |
#' @export
|
|
230 |
#' @order 2
|
|
231 |
coxph_pairwise <- function(lyt, |
|
232 |
vars,
|
|
233 |
strata = NULL, |
|
234 |
control = control_coxph(), |
|
235 |
na_str = default_na_str(), |
|
236 |
nested = TRUE, |
|
237 |
...,
|
|
238 |
var_labels = "CoxPH", |
|
239 |
show_labels = "visible", |
|
240 |
table_names = vars, |
|
241 |
.stats = c("pvalue", "hr", "hr_ci"), |
|
242 |
.stat_names = NULL, |
|
243 |
.formats = NULL, |
|
244 |
.labels = NULL, |
|
245 |
.indent_mods = NULL) { |
|
246 |
# Process standard extra arguments
|
|
247 | 6x |
extra_args <- list(".stats" = .stats) |
248 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
249 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
250 | ! |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
251 | ! |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
252 | ||
253 |
# Process additional arguments to the statistic function
|
|
254 | 6x |
extra_args <- c( |
255 | 6x |
extra_args,
|
256 | 6x |
strata = list(strata), control = list(control), |
257 |
...
|
|
258 |
)
|
|
259 | ||
260 |
# Append additional info from layout to the analysis function
|
|
261 | 6x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
262 | 6x |
formals(a_coxph_pairwise) <- c(formals(a_coxph_pairwise), extra_args[[".additional_fun_parameters"]]) |
263 | ||
264 | 6x |
analyze( |
265 | 6x |
lyt = lyt, |
266 | 6x |
vars = vars, |
267 | 6x |
afun = a_coxph_pairwise, |
268 | 6x |
na_str = na_str, |
269 | 6x |
nested = nested, |
270 | 6x |
extra_args = extra_args, |
271 | 6x |
var_labels = var_labels, |
272 | 6x |
show_labels = show_labels, |
273 | 6x |
table_names = table_names |
274 |
)
|
|
275 |
}
|
1 |
#' Add titles, footnotes, page Number, and a bounding box to a grid grob
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
|
|
6 |
#' with title, footnote, and page numbers.
|
|
7 |
#'
|
|
8 |
#' @inheritParams grid::grob
|
|
9 |
#' @param grob (`grob`)\cr a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
|
|
10 |
#' @param titles (`character`)\cr titles given as a vector of strings that are each separated by a newline and wrapped
|
|
11 |
#' according to the page width.
|
|
12 |
#' @param footnotes (`character`)\cr footnotes. Uses the same formatting rules as `titles`.
|
|
13 |
#' @param page (`string` or `NULL`)\cr page numeration. If `NULL` then no page number is displayed.
|
|
14 |
#' @param width_titles (`grid::unit`)\cr width of titles. Usually defined as all the available space
|
|
15 |
#' `grid::unit(1, "npc")`, it is affected by the parameter `outer_margins`. Right margins (`outer_margins[4]`)
|
|
16 |
#' need to be subtracted to the allowed width.
|
|
17 |
#' @param width_footnotes (`grid::unit`)\cr width of footnotes. Same default and margin correction as `width_titles`.
|
|
18 |
#' @param border (`flag`)\cr whether a border should be drawn around the plot or not.
|
|
19 |
#' @param padding (`grid::unit`)\cr padding. A unit object of length 4. Innermost margin between the plot (`grob`)
|
|
20 |
#' and, possibly, the border of the plot. Usually expressed in 4 identical values (usually `"lines"`). It defaults
|
|
21 |
#' to `grid::unit(rep(1, 4), "lines")`.
|
|
22 |
#' @param margins (`grid::unit`)\cr margins. A unit object of length 4. Margins between the plot and the other
|
|
23 |
#' elements in the list (e.g. titles, plot, and footers). This is usually expressed in 4 `"lines"`, where the
|
|
24 |
#' lateral ones are 0s, while top and bottom are 1s. It defaults to `grid::unit(c(1, 0, 1, 0), "lines")`.
|
|
25 |
#' @param outer_margins (`grid::unit`)\cr outer margins. A unit object of length 4. It defines the general margin of
|
|
26 |
#' the plot, considering also decorations like titles, footnotes, and page numbers. It defaults to
|
|
27 |
#' `grid::unit(c(2, 1.5, 3, 1.5), "cm")`.
|
|
28 |
#' @param gp_titles (`gpar`)\cr a `gpar` object. Mainly used to set different `"fontsize"`.
|
|
29 |
#' @param gp_footnotes (`gpar`)\cr a `gpar` object. Mainly used to set different `"fontsize"`.
|
|
30 |
#'
|
|
31 |
#' @return A grid grob (`gTree`).
|
|
32 |
#'
|
|
33 |
#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
|
|
34 |
#'
|
|
35 |
#' @examples
|
|
36 |
#' library(grid)
|
|
37 |
#'
|
|
38 |
#' titles <- c(
|
|
39 |
#' "Edgar Anderson's Iris Data",
|
|
40 |
#' paste(
|
|
41 |
#' "This famous (Fisher's or Anderson's) iris data set gives the measurements",
|
|
42 |
#' "in centimeters of the variables sepal length and width and petal length",
|
|
43 |
#' "and width, respectively, for 50 flowers from each of 3 species of iris."
|
|
44 |
#' )
|
|
45 |
#' )
|
|
46 |
#'
|
|
47 |
#' footnotes <- c(
|
|
48 |
#' "The species are Iris setosa, versicolor, and virginica.",
|
|
49 |
#' paste(
|
|
50 |
#' "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
|
|
51 |
#' "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
|
|
52 |
#' )
|
|
53 |
#' )
|
|
54 |
#'
|
|
55 |
#' ## empty plot
|
|
56 |
#' grid.newpage()
|
|
57 |
#'
|
|
58 |
#' grid.draw(
|
|
59 |
#' decorate_grob(
|
|
60 |
#' NULL,
|
|
61 |
#' titles = titles,
|
|
62 |
#' footnotes = footnotes,
|
|
63 |
#' page = "Page 4 of 10"
|
|
64 |
#' )
|
|
65 |
#' )
|
|
66 |
#'
|
|
67 |
#' # grid
|
|
68 |
#' p <- gTree(
|
|
69 |
#' children = gList(
|
|
70 |
#' rectGrob(),
|
|
71 |
#' xaxisGrob(),
|
|
72 |
#' yaxisGrob(),
|
|
73 |
#' textGrob("Sepal.Length", y = unit(-4, "lines")),
|
|
74 |
#' textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
|
|
75 |
#' pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
|
|
76 |
#' ),
|
|
77 |
#' vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
|
|
78 |
#' )
|
|
79 |
#' grid.newpage()
|
|
80 |
#' grid.draw(p)
|
|
81 |
#'
|
|
82 |
#' grid.newpage()
|
|
83 |
#' grid.draw(
|
|
84 |
#' decorate_grob(
|
|
85 |
#' grob = p,
|
|
86 |
#' titles = titles,
|
|
87 |
#' footnotes = footnotes,
|
|
88 |
#' page = "Page 6 of 129"
|
|
89 |
#' )
|
|
90 |
#' )
|
|
91 |
#'
|
|
92 |
#' ## with ggplot2
|
|
93 |
#' library(ggplot2)
|
|
94 |
#'
|
|
95 |
#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
|
|
96 |
#' ggplot2::geom_point()
|
|
97 |
#' p_gg
|
|
98 |
#' p <- ggplotGrob(p_gg)
|
|
99 |
#' grid.newpage()
|
|
100 |
#' grid.draw(
|
|
101 |
#' decorate_grob(
|
|
102 |
#' grob = p,
|
|
103 |
#' titles = titles,
|
|
104 |
#' footnotes = footnotes,
|
|
105 |
#' page = "Page 6 of 129"
|
|
106 |
#' )
|
|
107 |
#' )
|
|
108 |
#'
|
|
109 |
#' ## with lattice
|
|
110 |
#' library(lattice)
|
|
111 |
#'
|
|
112 |
#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
|
|
113 |
#' p <- grid.grab()
|
|
114 |
#' grid.newpage()
|
|
115 |
#' grid.draw(
|
|
116 |
#' decorate_grob(
|
|
117 |
#' grob = p,
|
|
118 |
#' titles = titles,
|
|
119 |
#' footnotes = footnotes,
|
|
120 |
#' page = "Page 6 of 129"
|
|
121 |
#' )
|
|
122 |
#' )
|
|
123 |
#'
|
|
124 |
#' # with gridExtra - no borders
|
|
125 |
#' library(gridExtra)
|
|
126 |
#' grid.newpage()
|
|
127 |
#' grid.draw(
|
|
128 |
#' decorate_grob(
|
|
129 |
#' tableGrob(
|
|
130 |
#' head(mtcars)
|
|
131 |
#' ),
|
|
132 |
#' titles = "title",
|
|
133 |
#' footnotes = "footnote",
|
|
134 |
#' border = FALSE
|
|
135 |
#' )
|
|
136 |
#' )
|
|
137 |
#'
|
|
138 |
#' @export
|
|
139 |
decorate_grob <- function(grob, |
|
140 |
titles,
|
|
141 |
footnotes,
|
|
142 |
page = "", |
|
143 |
width_titles = grid::unit(1, "npc"), |
|
144 |
width_footnotes = grid::unit(1, "npc"), |
|
145 |
border = TRUE, |
|
146 |
padding = grid::unit(rep(1, 4), "lines"), |
|
147 |
margins = grid::unit(c(1, 0, 1, 0), "lines"), |
|
148 |
outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"), |
|
149 |
gp_titles = grid::gpar(), |
|
150 |
gp_footnotes = grid::gpar(fontsize = 8), |
|
151 |
name = NULL, |
|
152 |
gp = grid::gpar(), |
|
153 |
vp = NULL) { |
|
154 |
# External margins need to be taken into account when defining the width of titles and footers
|
|
155 |
# because the text is split in advance depending on only the width of the viewport.
|
|
156 | 9x |
if (any(as.numeric(outer_margins) > 0)) { |
157 | 9x |
width_titles <- width_titles - outer_margins[4] |
158 | 9x |
width_footnotes <- width_footnotes - outer_margins[4] |
159 |
}
|
|
160 | ||
161 | 9x |
st_titles <- split_text_grob( |
162 | 9x |
titles,
|
163 | 9x |
x = 0, y = 1, |
164 | 9x |
just = c("left", "top"), |
165 | 9x |
width = width_titles, |
166 | 9x |
vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1), |
167 | 9x |
gp = gp_titles |
168 |
)
|
|
169 | ||
170 | 9x |
st_footnotes <- split_text_grob( |
171 | 9x |
footnotes,
|
172 | 9x |
x = 0, y = 1, |
173 | 9x |
just = c("left", "top"), |
174 | 9x |
width = width_footnotes, |
175 | 9x |
vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1), |
176 | 9x |
gp = gp_footnotes |
177 |
)
|
|
178 | ||
179 | 9x |
pg_footnote <- grid::textGrob( |
180 | 9x |
paste("\n", page), |
181 | 9x |
x = 1, y = 0, |
182 | 9x |
just = c("right", "bottom"), |
183 | 9x |
vp = grid::viewport(layout.pos.row = 4, layout.pos.col = 1), |
184 | 9x |
gp = gp_footnotes |
185 |
)
|
|
186 | ||
187 |
# Initial decoration of the grob -> border, paddings, and margins are used here
|
|
188 | 9x |
main_plot <- grid::gTree( |
189 | 9x |
children = grid::gList( |
190 | 9x |
if (border) grid::rectGrob(), |
191 | 9x |
grid::gTree( |
192 | 9x |
children = grid::gList( |
193 | 9x |
grob
|
194 |
),
|
|
195 | 9x |
vp = grid::plotViewport(margins = padding) # innermost margins of the grob plot |
196 |
)
|
|
197 |
),
|
|
198 | 9x |
vp = grid::vpStack( |
199 | 9x |
grid::viewport(layout.pos.row = 2, layout.pos.col = 1), |
200 | 9x |
grid::plotViewport(margins = margins) # margins around the border plot |
201 |
)
|
|
202 |
)
|
|
203 | ||
204 | 9x |
grid::gTree( |
205 | 9x |
grob = grob, |
206 | 9x |
titles = titles, |
207 | 9x |
footnotes = footnotes, |
208 | 9x |
page = page, |
209 | 9x |
width_titles = width_titles, |
210 | 9x |
width_footnotes = width_footnotes, |
211 | 9x |
outer_margins = outer_margins, |
212 | 9x |
gp_titles = gp_titles, |
213 | 9x |
gp_footnotes = gp_footnotes, |
214 | 9x |
children = grid::gList( |
215 | 9x |
grid::gTree( |
216 | 9x |
children = grid::gList( |
217 | 9x |
st_titles,
|
218 | 9x |
main_plot, # main plot with border, padding, and margins |
219 | 9x |
st_footnotes,
|
220 | 9x |
pg_footnote
|
221 |
),
|
|
222 | 9x |
childrenvp = NULL, |
223 | 9x |
name = "titles_grob_footnotes", |
224 | 9x |
vp = grid::vpStack( |
225 | 9x |
grid::plotViewport(margins = outer_margins), # Main external margins |
226 | 9x |
grid::viewport( |
227 | 9x |
layout = grid::grid.layout( |
228 | 9x |
nrow = 4, ncol = 1, |
229 | 9x |
heights = grid::unit.c( |
230 | 9x |
grid::grobHeight(st_titles), |
231 | 9x |
grid::unit(1, "null"), |
232 | 9x |
grid::grobHeight(st_footnotes), |
233 | 9x |
grid::grobHeight(pg_footnote) |
234 |
)
|
|
235 |
)
|
|
236 |
)
|
|
237 |
)
|
|
238 |
)
|
|
239 |
),
|
|
240 | 9x |
name = name, |
241 | 9x |
gp = gp, |
242 | 9x |
vp = vp, |
243 | 9x |
cl = "decoratedGrob" |
244 |
)
|
|
245 |
}
|
|
246 | ||
247 |
# nocov start
|
|
248 |
#' @importFrom grid validDetails
|
|
249 |
#' @noRd
|
|
250 |
validDetails.decoratedGrob <- function(x) { |
|
251 |
checkmate::assert_character(x$titles) |
|
252 |
checkmate::assert_character(x$footnotes) |
|
253 | ||
254 |
if (!is.null(x$grob)) { |
|
255 |
checkmate::assert_true(grid::is.grob(x$grob)) |
|
256 |
}
|
|
257 |
if (length(x$page) == 1) { |
|
258 |
checkmate::assert_character(x$page) |
|
259 |
}
|
|
260 |
if (!grid::is.unit(x$outer_margins)) { |
|
261 |
checkmate::assert_vector(x$outer_margins, len = 4) |
|
262 |
}
|
|
263 |
if (!grid::is.unit(x$margins)) { |
|
264 |
checkmate::assert_vector(x$margins, len = 4) |
|
265 |
}
|
|
266 |
if (!grid::is.unit(x$padding)) { |
|
267 |
checkmate::assert_vector(x$padding, len = 4) |
|
268 |
}
|
|
269 | ||
270 |
x
|
|
271 |
}
|
|
272 | ||
273 |
#' @importFrom grid widthDetails
|
|
274 |
#' @noRd
|
|
275 |
widthDetails.decoratedGrob <- function(x) { |
|
276 |
grid::unit(1, "null") |
|
277 |
}
|
|
278 | ||
279 |
#' @importFrom grid heightDetails
|
|
280 |
#' @noRd
|
|
281 |
heightDetails.decoratedGrob <- function(x) { |
|
282 |
grid::unit(1, "null") |
|
283 |
}
|
|
284 | ||
285 |
#' Split text according to available text width
|
|
286 |
#'
|
|
287 |
#' Dynamically wrap text.
|
|
288 |
#'
|
|
289 |
#' @inheritParams grid::grid.text
|
|
290 |
#' @param text (`string`)\cr the text to wrap.
|
|
291 |
#' @param width (`grid::unit`)\cr a unit object specifying maximum width of text.
|
|
292 |
#'
|
|
293 |
#' @return A text `grob`.
|
|
294 |
#'
|
|
295 |
#' @details This code is taken from `R Graphics by Paul Murell, 2nd edition`
|
|
296 |
#'
|
|
297 |
#' @keywords internal
|
|
298 |
split_text_grob <- function(text, |
|
299 |
x = grid::unit(0.5, "npc"), |
|
300 |
y = grid::unit(0.5, "npc"), |
|
301 |
width = grid::unit(1, "npc"), |
|
302 |
just = "centre", |
|
303 |
hjust = NULL, |
|
304 |
vjust = NULL, |
|
305 |
default.units = "npc", # nolint |
|
306 |
name = NULL, |
|
307 |
gp = grid::gpar(), |
|
308 |
vp = NULL) { |
|
309 |
text <- gsub("\\\\n", "\n", text) # fixing cases of mixed behavior (\n and \\n) |
|
310 | ||
311 |
if (!grid::is.unit(x)) x <- grid::unit(x, default.units) |
|
312 |
if (!grid::is.unit(y)) y <- grid::unit(y, default.units) |
|
313 |
if (!grid::is.unit(width)) width <- grid::unit(width, default.units) |
|
314 |
if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units) |
|
315 |
if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units) |
|
316 |
if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units) |
|
317 | ||
318 |
if (length(gp) > 0) { # account for effect of gp on text width -> it was bugging when text was empty |
|
319 |
horizontal_npc_width_no_gp <- grid::convertWidth( |
|
320 |
grid::grobWidth( |
|
321 |
grid::textGrob( |
|
322 |
paste0(text, collapse = "\n") |
|
323 |
)
|
|
324 |
), "npc", |
|
325 |
valueOnly = TRUE |
|
326 |
)
|
|
327 |
horizontal_npc_width_with_gp <- grid::convertWidth(grid::grobWidth( |
|
328 |
grid::textGrob( |
|
329 |
paste0(text, collapse = "\n"), |
|
330 |
gp = gp |
|
331 |
)
|
|
332 |
), "npc", valueOnly = TRUE) |
|
333 | ||
334 |
# Adapting width to the input gpar (it is normalized so does not matter what is text)
|
|
335 |
width <- width * horizontal_npc_width_no_gp / horizontal_npc_width_with_gp |
|
336 |
}
|
|
337 | ||
338 |
## if it is a fixed unit then we do not need to recalculate when viewport resized
|
|
339 |
if (!inherits(width, "unit.arithmetic") && !is.null(attr(width, "unit")) && |
|
340 |
attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) { # nolint |
|
341 |
attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n") |
|
342 |
}
|
|
343 | ||
344 |
# Fix for split_string in case of residual \n (otherwise is counted as character)
|
|
345 |
text2 <- unlist( |
|
346 |
strsplit( |
|
347 |
paste0(text, collapse = "\n"), # for "" cases |
|
348 |
"\n"
|
|
349 |
)
|
|
350 |
)
|
|
351 | ||
352 |
# Final grid text with cat-friendly split_string
|
|
353 |
grid::grid.text( |
|
354 |
label = split_string(text2, width), |
|
355 |
x = x, y = y, |
|
356 |
just = just, |
|
357 |
hjust = hjust, |
|
358 |
vjust = vjust, |
|
359 |
rot = 0, |
|
360 |
check.overlap = FALSE, |
|
361 |
name = name, |
|
362 |
gp = gp, |
|
363 |
vp = vp, |
|
364 |
draw = FALSE |
|
365 |
)
|
|
366 |
}
|
|
367 | ||
368 |
#' @importFrom grid validDetails
|
|
369 |
#' @noRd
|
|
370 |
validDetails.dynamicSplitText <- function(x) { |
|
371 |
checkmate::assert_character(x$text) |
|
372 |
checkmate::assert_true(grid::is.unit(x$width)) |
|
373 |
checkmate::assert_vector(x$width, len = 1) |
|
374 |
x
|
|
375 |
}
|
|
376 | ||
377 |
#' @importFrom grid heightDetails
|
|
378 |
#' @noRd
|
|
379 |
heightDetails.dynamicSplitText <- function(x) { |
|
380 |
txt <- if (!is.null(attr(x$text, "fixed_text"))) { |
|
381 |
attr(x$text, "fixed_text") |
|
382 |
} else { |
|
383 |
paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n") |
|
384 |
}
|
|
385 |
grid::stringHeight(txt) |
|
386 |
}
|
|
387 | ||
388 |
#' @importFrom grid widthDetails
|
|
389 |
#' @noRd
|
|
390 |
widthDetails.dynamicSplitText <- function(x) { |
|
391 |
x$width |
|
392 |
}
|
|
393 | ||
394 |
#' @importFrom grid drawDetails
|
|
395 |
#' @noRd
|
|
396 |
drawDetails.dynamicSplitText <- function(x, recording) { |
|
397 |
txt <- if (!is.null(attr(x$text, "fixed_text"))) { |
|
398 |
attr(x$text, "fixed_text") |
|
399 |
} else { |
|
400 |
paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n") |
|
401 |
}
|
|
402 | ||
403 |
x$width <- NULL |
|
404 |
x$label <- txt |
|
405 |
x$text <- NULL |
|
406 |
class(x) <- c("text", class(x)[-1]) |
|
407 | ||
408 |
grid::grid.draw(x) |
|
409 |
}
|
|
410 |
# nocov end
|
|
411 | ||
412 |
# Adapted from Paul Murell R Graphics 2nd Edition
|
|
413 |
# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
|
|
414 |
split_string <- function(text, width) { |
|
415 | 26x |
strings <- strsplit(text, " ") |
416 | 26x |
out_string <- NA |
417 | 26x |
for (string_i in seq_along(strings)) { |
418 | 48x |
newline_str <- strings[[string_i]] |
419 | 6x |
if (length(newline_str) == 0) newline_str <- "" |
420 | 48x |
if (is.na(out_string[string_i])) { |
421 | 48x |
out_string[string_i] <- newline_str[[1]][[1]] |
422 | 48x |
linewidth <- grid::stringWidth(out_string[string_i]) |
423 |
}
|
|
424 | 48x |
gapwidth <- grid::stringWidth(" ") |
425 | 48x |
availwidth <- as.numeric(width) |
426 | 48x |
if (length(newline_str) > 1) { |
427 | 12x |
for (i in seq(2, length(newline_str))) { |
428 | 184x |
width_i <- grid::stringWidth(newline_str[i]) |
429 |
# Main conversion of allowed text width -> npc units are 0<npc<1. External viewport is used for conversion
|
|
430 | 184x |
if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) { |
431 | 177x |
sep <- " " |
432 | 177x |
linewidth <- linewidth + gapwidth + width_i |
433 |
} else { |
|
434 | 7x |
sep <- "\n" |
435 | 7x |
linewidth <- width_i |
436 |
}
|
|
437 | 184x |
out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep) |
438 |
}
|
|
439 |
}
|
|
440 |
}
|
|
441 | 26x |
paste(out_string, collapse = "\n") |
442 |
}
|
|
443 | ||
444 |
#' Update page number
|
|
445 |
#'
|
|
446 |
#' Automatically updates page number.
|
|
447 |
#'
|
|
448 |
#' @param npages (`numeric(1)`)\cr total number of pages.
|
|
449 |
#' @param ... arguments passed on to [decorate_grob()].
|
|
450 |
#'
|
|
451 |
#' @return Closure that increments the page number.
|
|
452 |
#'
|
|
453 |
#' @keywords internal
|
|
454 |
decorate_grob_factory <- function(npages, ...) { |
|
455 | 2x |
current_page <- 0 |
456 | 2x |
function(grob) { |
457 | 7x |
current_page <<- current_page + 1 |
458 | 7x |
if (current_page > npages) { |
459 | 1x |
stop(paste("current page is", current_page, "but max.", npages, "specified.")) |
460 |
}
|
|
461 | 6x |
decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...) |
462 |
}
|
|
463 |
}
|
|
464 | ||
465 |
#' Decorate set of `grob`s and add page numbering
|
|
466 |
#'
|
|
467 |
#' @description `r lifecycle::badge("stable")`
|
|
468 |
#'
|
|
469 |
#' Note that this uses the [decorate_grob_factory()] function.
|
|
470 |
#'
|
|
471 |
#' @param grobs (`list` of `grob`)\cr a list of grid grobs.
|
|
472 |
#' @param ... arguments passed on to [decorate_grob()].
|
|
473 |
#'
|
|
474 |
#' @return A decorated grob.
|
|
475 |
#'
|
|
476 |
#' @examples
|
|
477 |
#' library(ggplot2)
|
|
478 |
#' library(grid)
|
|
479 |
#' g <- with(data = iris, {
|
|
480 |
#' list(
|
|
481 |
#' ggplot2::ggplotGrob(
|
|
482 |
#' ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
|
|
483 |
#' ggplot2::geom_point()
|
|
484 |
#' ),
|
|
485 |
#' ggplot2::ggplotGrob(
|
|
486 |
#' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
|
|
487 |
#' ggplot2::geom_point()
|
|
488 |
#' ),
|
|
489 |
#' ggplot2::ggplotGrob(
|
|
490 |
#' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
|
|
491 |
#' ggplot2::geom_point()
|
|
492 |
#' ),
|
|
493 |
#' ggplot2::ggplotGrob(
|
|
494 |
#' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
|
|
495 |
#' ggplot2::geom_point()
|
|
496 |
#' ),
|
|
497 |
#' ggplot2::ggplotGrob(
|
|
498 |
#' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
|
|
499 |
#' ggplot2::geom_point()
|
|
500 |
#' ),
|
|
501 |
#' ggplot2::ggplotGrob(
|
|
502 |
#' ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
|
|
503 |
#' ggplot2::geom_point()
|
|
504 |
#' )
|
|
505 |
#' )
|
|
506 |
#' })
|
|
507 |
#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
|
|
508 |
#'
|
|
509 |
#' draw_grob(lg[[1]])
|
|
510 |
#' draw_grob(lg[[2]])
|
|
511 |
#' draw_grob(lg[[6]])
|
|
512 |
#'
|
|
513 |
#' @export
|
|
514 |
decorate_grob_set <- function(grobs, ...) { |
|
515 | 1x |
n <- length(grobs) |
516 | 1x |
lgf <- decorate_grob_factory(npages = n, ...) |
517 | 1x |
lapply(grobs, lgf) |
518 |
}
|
1 |
#' Apply 1/3 or 1/2 imputation rule to data
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' @inheritParams argument_convention
|
|
6 |
#' @param x_stats (named `list`)\cr a named list of statistics, typically the results of [s_summary()].
|
|
7 |
#' @param stat (`string`)\cr statistic to return the value/NA level of according to the imputation
|
|
8 |
#' rule applied.
|
|
9 |
#' @param imp_rule (`string`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation
|
|
10 |
#' rule or `"1/2"` to implement 1/2 imputation rule.
|
|
11 |
#' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`).
|
|
12 |
#' This parameter is only used when `imp_rule` is set to `"1/3"`.
|
|
13 |
#' @param avalcat_var (`string`)\cr name of variable that indicates whether a row in `df` corresponds
|
|
14 |
#' to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above
|
|
15 |
#' (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`.
|
|
16 |
#'
|
|
17 |
#' @return A `list` containing statistic value (`val`) and NA level (`na_str`) that should be displayed
|
|
18 |
#' according to the specified imputation rule.
|
|
19 |
#'
|
|
20 |
#' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule`
|
|
21 |
#' argument.
|
|
22 |
#'
|
|
23 |
#' @examples
|
|
24 |
#' set.seed(1)
|
|
25 |
#' df <- data.frame(
|
|
26 |
#' AVAL = runif(50, 0, 1),
|
|
27 |
#' AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE)
|
|
28 |
#' )
|
|
29 |
#' x_stats <- s_summary(df$AVAL)
|
|
30 |
#' imputation_rule(df, x_stats, "max", "1/3")
|
|
31 |
#' imputation_rule(df, x_stats, "geom_mean", "1/3")
|
|
32 |
#' imputation_rule(df, x_stats, "mean", "1/2")
|
|
33 |
#'
|
|
34 |
#' @export
|
|
35 |
imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") { |
|
36 | 128x |
checkmate::assert_choice(avalcat_var, names(df)) |
37 | 128x |
checkmate::assert_choice(imp_rule, c("1/3", "1/2")) |
38 | 128x |
n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", df[[avalcat_var]])) |
39 | 128x |
ltr_blq_ratio <- n_blq / max(1, nrow(df)) |
40 | ||
41 |
# defaults
|
|
42 | 128x |
val <- x_stats[[stat]] |
43 | 128x |
na_str <- "NE" |
44 | ||
45 | 128x |
if (imp_rule == "1/3") { |
46 | 2x |
if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT |
47 | 84x |
if (ltr_blq_ratio > 1 / 3) { |
48 | 63x |
if (stat != "geom_mean") na_str <- "ND" # 1/3_pre_GT, 1/3_post_GT |
49 | 9x |
if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT |
50 | 39x |
if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT |
51 |
}
|
|
52 | 44x |
} else if (imp_rule == "1/2") { |
53 | 44x |
if (ltr_blq_ratio > 1 / 2 && !stat == "max") { |
54 | 12x |
val <- NA # 1/2_GT |
55 | 12x |
na_str <- "ND" # 1/2_GT |
56 |
}
|
|
57 |
}
|
|
58 | ||
59 | 128x |
list(val = val, na_str = na_str) |
60 |
}
|
1 |
#' Cox proportional hazards regression
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
|
|
6 |
#'
|
|
7 |
#' @inheritParams argument_convention
|
|
8 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
9 |
#'
|
|
10 |
#' Options are: ``r shQuote(get_stats("summarize_coxreg"), type = "sh")``
|
|
11 |
#'
|
|
12 |
#' @details Cox models are the most commonly used methods to estimate the magnitude of
|
|
13 |
#' the effect in survival analysis. It assumes proportional hazards: the ratio
|
|
14 |
#' of the hazards between groups (e.g., two arms) is constant over time.
|
|
15 |
#' This ratio is referred to as the "hazard ratio" (HR) and is one of the
|
|
16 |
#' most commonly reported metrics to describe the effect size in survival
|
|
17 |
#' analysis (NEST Team, 2020).
|
|
18 |
#'
|
|
19 |
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
|
|
20 |
#' helper functions, and [tidy_coxreg] for custom tidy methods.
|
|
21 |
#'
|
|
22 |
#' @examples
|
|
23 |
#' library(survival)
|
|
24 |
#'
|
|
25 |
#' # Testing dataset [survival::bladder].
|
|
26 |
#' set.seed(1, kind = "Mersenne-Twister")
|
|
27 |
#' dta_bladder <- with(
|
|
28 |
#' data = bladder[bladder$enum < 5, ],
|
|
29 |
#' tibble::tibble(
|
|
30 |
#' TIME = stop,
|
|
31 |
#' STATUS = event,
|
|
32 |
#' ARM = as.factor(rx),
|
|
33 |
#' COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
|
|
34 |
#' COVAR2 = factor(
|
|
35 |
#' sample(as.factor(enum)),
|
|
36 |
#' levels = 1:4, labels = c("F", "F", "M", "M")
|
|
37 |
#' ) %>% formatters::with_label("Sex (F/M)")
|
|
38 |
#' )
|
|
39 |
#' )
|
|
40 |
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
|
|
41 |
#' dta_bladder$STUDYID <- factor("X")
|
|
42 |
#'
|
|
43 |
#' u1_variables <- list(
|
|
44 |
#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
|
|
45 |
#' )
|
|
46 |
#'
|
|
47 |
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
|
|
48 |
#'
|
|
49 |
#' m1_variables <- list(
|
|
50 |
#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
|
|
51 |
#' )
|
|
52 |
#'
|
|
53 |
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
|
|
54 |
#'
|
|
55 |
#' @name cox_regression
|
|
56 |
#' @order 1
|
|
57 |
NULL
|
|
58 | ||
59 |
#' @describeIn cox_regression Statistics function that transforms results tabulated
|
|
60 |
#' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
|
|
61 |
#'
|
|
62 |
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
|
|
63 |
#' function with tidying applied via [broom::tidy()].
|
|
64 |
#' @param .stats (`character`)\cr the names of statistics to be reported among:
|
|
65 |
#' * `n`: number of observations (univariate only)
|
|
66 |
#' * `hr`: hazard ratio
|
|
67 |
#' * `ci`: confidence interval
|
|
68 |
#' * `pval`: p-value of the treatment effect
|
|
69 |
#' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
|
|
70 |
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
|
|
71 |
#' Defaults to `"all"`. Other options include `"var_main"` for main effects, `"inter"` for interaction effects,
|
|
72 |
#' and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is `"all"`, specific
|
|
73 |
#' variables can be selected by specifying `.var_nms`.
|
|
74 |
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
|
|
75 |
#' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
|
|
76 |
#' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
|
|
77 |
#' is `"var_main"`, `.var_nms` should be only the variable name.
|
|
78 |
#'
|
|
79 |
#' @return
|
|
80 |
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
|
|
81 |
#'
|
|
82 |
#' @examples
|
|
83 |
#' # s_coxreg
|
|
84 |
#'
|
|
85 |
#' # Univariate
|
|
86 |
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
|
|
87 |
#' df1 <- broom::tidy(univar_model)
|
|
88 |
#'
|
|
89 |
#' s_coxreg(model_df = df1, .stats = "hr")
|
|
90 |
#'
|
|
91 |
#' # Univariate with interactions
|
|
92 |
#' univar_model_inter <- fit_coxreg_univar(
|
|
93 |
#' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
|
|
94 |
#' )
|
|
95 |
#' df1_inter <- broom::tidy(univar_model_inter)
|
|
96 |
#'
|
|
97 |
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
|
|
98 |
#'
|
|
99 |
#' # Univariate without treatment arm - only "COVAR2" covariate effects
|
|
100 |
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
|
|
101 |
#' df1_covs <- broom::tidy(univar_covs_model)
|
|
102 |
#'
|
|
103 |
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
|
|
104 |
#'
|
|
105 |
#' # Multivariate.
|
|
106 |
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
|
|
107 |
#' df2 <- broom::tidy(multivar_model)
|
|
108 |
#'
|
|
109 |
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
|
|
110 |
#' s_coxreg(
|
|
111 |
#' model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
|
|
112 |
#' .var_nms = c("COVAR1", "A Covariate Label")
|
|
113 |
#' )
|
|
114 |
#'
|
|
115 |
#' # Multivariate without treatment arm - only "COVAR1" main effect
|
|
116 |
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
|
|
117 |
#' df2_covs <- broom::tidy(multivar_covs_model)
|
|
118 |
#'
|
|
119 |
#' s_coxreg(model_df = df2_covs, .stats = "hr")
|
|
120 |
#'
|
|
121 |
#' @export
|
|
122 |
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) { |
|
123 | 291x |
assert_df_with_variables(model_df, list(term = "term", stat = .stats)) |
124 | 291x |
checkmate::assert_multi_class(model_df$term, classes = c("factor", "character")) |
125 | 291x |
model_df$term <- as.character(model_df$term) |
126 | 291x |
.var_nms <- .var_nms[!is.na(.var_nms)] |
127 | ||
128 | 289x |
if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ] |
129 | 69x |
if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1) |
130 | ||
131 |
# We need a list with names corresponding to the stats to display of equal length to the list of stats.
|
|
132 | 291x |
y <- split(model_df, f = model_df$term, drop = FALSE) |
133 | 291x |
y <- stats::setNames(y, nm = rep(.stats, length(y))) |
134 | ||
135 | 291x |
if (.which_vars == "var_main") { |
136 | 128x |
y <- lapply(y, function(x) x[1, ]) # only main effect |
137 | 163x |
} else if (.which_vars %in% c("inter", "multi_lvl")) { |
138 | 120x |
y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect |
139 |
}
|
|
140 | ||
141 | 291x |
lapply( |
142 | 291x |
X = y, |
143 | 291x |
FUN = function(x) { |
144 | 295x |
z <- as.list(x[[.stats]]) |
145 | 295x |
stats::setNames(z, nm = x$term_label) |
146 |
}
|
|
147 |
)
|
|
148 |
}
|
|
149 | ||
150 |
#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
|
|
151 |
#' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
|
|
152 |
#'
|
|
153 |
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
|
|
154 |
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
|
|
155 |
#' @param na_str (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
|
|
156 |
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
|
|
157 |
#' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
|
|
158 |
#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed
|
|
159 |
#' as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing
|
|
160 |
#' from variables, then only Cox model(s) including the covariates will be fitted and the corresponding
|
|
161 |
#' effect estimates will be tabulated later.
|
|
162 |
#'
|
|
163 |
#' @return
|
|
164 |
#' * `a_coxreg()` returns formatted [rtables::CellValue()].
|
|
165 |
#'
|
|
166 |
#' @examples
|
|
167 |
#' a_coxreg(
|
|
168 |
#' df = dta_bladder,
|
|
169 |
#' labelstr = "Label 1",
|
|
170 |
#' variables = u1_variables,
|
|
171 |
#' .spl_context = list(value = "COVAR1"),
|
|
172 |
#' .stats = "n",
|
|
173 |
#' .formats = "xx"
|
|
174 |
#' )
|
|
175 |
#'
|
|
176 |
#' a_coxreg(
|
|
177 |
#' df = dta_bladder,
|
|
178 |
#' labelstr = "",
|
|
179 |
#' variables = u1_variables,
|
|
180 |
#' .spl_context = list(value = "COVAR2"),
|
|
181 |
#' .stats = "pval",
|
|
182 |
#' .formats = "xx.xxxx"
|
|
183 |
#' )
|
|
184 |
#'
|
|
185 |
#' @export
|
|
186 |
a_coxreg <- function(df, |
|
187 |
labelstr,
|
|
188 |
eff = FALSE, |
|
189 |
var_main = FALSE, |
|
190 |
multivar = FALSE, |
|
191 |
variables,
|
|
192 |
at = list(), |
|
193 |
control = control_coxreg(), |
|
194 |
.spl_context,
|
|
195 |
.stats,
|
|
196 |
.formats,
|
|
197 |
.indent_mods = NULL, |
|
198 |
na_str = "", |
|
199 |
cache_env = NULL) { |
|
200 | 288x |
cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm |
201 | 288x |
cov <- tail(.spl_context$value, 1) # current variable/covariate |
202 | 288x |
var_lbl <- formatters::var_labels(df)[cov] # check for df labels |
203 | 288x |
if (length(labelstr) > 1) { |
204 | 8x |
labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none |
205 | 280x |
} else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) { |
206 | 67x |
labelstr <- var_lbl |
207 |
}
|
|
208 | 288x |
if (eff || multivar || cov_no_arm) { |
209 | 143x |
control$interaction <- FALSE |
210 |
} else { |
|
211 | 145x |
variables$covariates <- cov |
212 | 50x |
if (var_main) control$interaction <- TRUE |
213 |
}
|
|
214 | ||
215 | 288x |
if (is.null(cache_env[[cov]])) { |
216 | 47x |
if (!multivar) { |
217 | 32x |
model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy() |
218 |
} else { |
|
219 | 15x |
model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy() |
220 |
}
|
|
221 | 47x |
cache_env[[cov]] <- model |
222 |
} else { |
|
223 | 241x |
model <- cache_env[[cov]] |
224 |
}
|
|
225 | 148x |
if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_ |
226 | ||
227 | 288x |
if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) { |
228 | 15x |
multivar <- TRUE |
229 | 3x |
if (!cov_no_arm) var_main <- TRUE |
230 |
}
|
|
231 | ||
232 | 288x |
vars_coxreg <- list(which_vars = "all", var_nms = NULL) |
233 | 288x |
if (eff) { |
234 | 65x |
if (multivar && !var_main) { # multivar treatment level |
235 | 12x |
var_lbl_arm <- formatters::var_labels(df)[[variables$arm]] |
236 | 12x |
vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl") |
237 |
} else { # treatment effect |
|
238 | 53x |
vars_coxreg["var_nms"] <- variables$arm |
239 | 12x |
if (var_main) vars_coxreg["which_vars"] <- "var_main" |
240 |
}
|
|
241 |
} else { |
|
242 | 223x |
if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level |
243 | 166x |
vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main") |
244 | 57x |
} else if (multivar) { # multivar covariate level |
245 | 57x |
vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl") |
246 | 12x |
if (var_main) model[cov, .stats] <- NA_real_ |
247 |
}
|
|
248 | 50x |
if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect |
249 |
}
|
|
250 | 288x |
var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]] |
251 | 288x |
var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) { |
252 | 27x |
paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels |
253 | 288x |
} else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) || |
254 | 288x |
(multivar && var_main && is.numeric(df[[cov]]))) { # nolint |
255 | 71x |
labelstr # other main effect labels |
256 | 288x |
} else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) { |
257 | 12x |
"All" # multivar numeric covariate |
258 |
} else { |
|
259 | 178x |
names(var_vals) |
260 |
}
|
|
261 | 288x |
in_rows( |
262 | 288x |
.list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods, |
263 | 288x |
.formats = stats::setNames(rep(.formats, length(var_names)), var_names), |
264 | 288x |
.format_na_strs = stats::setNames(rep(na_str, length(var_names)), var_names) |
265 |
)
|
|
266 |
}
|
|
267 | ||
268 |
#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
|
|
269 |
#' layout. This function is a wrapper for several `rtables` layouting functions. This function
|
|
270 |
#' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
|
|
271 |
#'
|
|
272 |
#' @inheritParams fit_coxreg_univar
|
|
273 |
#' @param multivar (`flag`)\cr whether multivariate Cox regression should run (defaults to `FALSE`), otherwise
|
|
274 |
#' univariate Cox regression will run.
|
|
275 |
#' @param common_var (`string`)\cr the name of a factor variable in the dataset which takes the same value
|
|
276 |
#' for all rows. This should be created during pre-processing if no such variable currently exists.
|
|
277 |
#' @param .section_div (`string` or `NA`)\cr string which should be repeated as a section divider between sections.
|
|
278 |
#' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
|
|
279 |
#' treatment and covariate sections and the second between different covariates.
|
|
280 |
#'
|
|
281 |
#' @return
|
|
282 |
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
|
|
283 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
|
|
284 |
#' containing the chosen statistics to the table layout.
|
|
285 |
#'
|
|
286 |
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
|
|
287 |
#' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
|
|
288 |
#' Cox regression models, respectively.
|
|
289 |
#'
|
|
290 |
#' @examples
|
|
291 |
#' # summarize_coxreg
|
|
292 |
#'
|
|
293 |
#' result_univar <- basic_table() %>%
|
|
294 |
#' summarize_coxreg(variables = u1_variables) %>%
|
|
295 |
#' build_table(dta_bladder)
|
|
296 |
#' result_univar
|
|
297 |
#'
|
|
298 |
#' result_univar_covs <- basic_table() %>%
|
|
299 |
#' summarize_coxreg(
|
|
300 |
#' variables = u2_variables,
|
|
301 |
#' ) %>%
|
|
302 |
#' build_table(dta_bladder)
|
|
303 |
#' result_univar_covs
|
|
304 |
#'
|
|
305 |
#' result_multivar <- basic_table() %>%
|
|
306 |
#' summarize_coxreg(
|
|
307 |
#' variables = m1_variables,
|
|
308 |
#' multivar = TRUE,
|
|
309 |
#' ) %>%
|
|
310 |
#' build_table(dta_bladder)
|
|
311 |
#' result_multivar
|
|
312 |
#'
|
|
313 |
#' result_multivar_covs <- basic_table() %>%
|
|
314 |
#' summarize_coxreg(
|
|
315 |
#' variables = m2_variables,
|
|
316 |
#' multivar = TRUE,
|
|
317 |
#' varlabels = c("Covariate 1", "Covariate 2") # custom labels
|
|
318 |
#' ) %>%
|
|
319 |
#' build_table(dta_bladder)
|
|
320 |
#' result_multivar_covs
|
|
321 |
#'
|
|
322 |
#' @export
|
|
323 |
#' @order 2
|
|
324 |
summarize_coxreg <- function(lyt, |
|
325 |
variables,
|
|
326 |
control = control_coxreg(), |
|
327 |
at = list(), |
|
328 |
multivar = FALSE, |
|
329 |
common_var = "STUDYID", |
|
330 |
.stats = c("n", "hr", "ci", "pval", "pval_inter"), |
|
331 |
.formats = c( |
|
332 |
n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)", |
|
333 |
pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)" |
|
334 |
),
|
|
335 |
varlabels = NULL, |
|
336 |
.indent_mods = NULL, |
|
337 |
na_str = "", |
|
338 |
.section_div = NA_character_) { |
|
339 | 16x |
if (multivar && control$interaction) { |
340 | 1x |
warning(paste( |
341 | 1x |
"Interactions are not available for multivariate cox regression using summarize_coxreg.",
|
342 | 1x |
"The model will be calculated without interaction effects."
|
343 |
)) |
|
344 |
}
|
|
345 | 16x |
if (control$interaction && !"arm" %in% names(variables)) { |
346 | 1x |
stop("To include interactions please specify 'arm' in variables.") |
347 |
}
|
|
348 | ||
349 | 15x |
.stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics |
350 | 6x |
intersect(c("hr", "ci", "pval"), .stats) |
351 | 15x |
} else if (control$interaction) { |
352 | 5x |
intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats) |
353 |
} else { |
|
354 | 4x |
intersect(c("n", "hr", "ci", "pval"), .stats) |
355 |
}
|
|
356 | 15x |
stat_labels <- c( |
357 | 15x |
n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"), |
358 | 15x |
pval = "p-value", pval_inter = "Interaction p-value" |
359 |
)
|
|
360 | 15x |
stat_labels <- stat_labels[names(stat_labels) %in% .stats] |
361 | 15x |
.formats <- .formats[names(.formats) %in% .stats] |
362 | 15x |
env <- new.env() # create caching environment |
363 | ||
364 | 15x |
lyt <- lyt %>% |
365 | 15x |
split_cols_by_multivar( |
366 | 15x |
vars = rep(common_var, length(.stats)), |
367 | 15x |
varlabels = stat_labels, |
368 | 15x |
extra_args = list( |
369 | 15x |
.stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_str = rep(na_str, length(.stats)), |
370 | 15x |
cache_env = replicate(length(.stats), list(env)) |
371 |
)
|
|
372 |
)
|
|
373 | ||
374 | 15x |
if ("arm" %in% names(variables)) { # treatment effect |
375 | 13x |
lyt <- lyt %>% |
376 | 13x |
split_rows_by( |
377 | 13x |
common_var,
|
378 | 13x |
split_label = "Treatment:", |
379 | 13x |
label_pos = "visible", |
380 | 13x |
child_labels = "hidden", |
381 | 13x |
section_div = head(.section_div, 1) |
382 |
)
|
|
383 | 13x |
if (!multivar) { |
384 | 9x |
lyt <- lyt %>% |
385 | 9x |
analyze_colvars( |
386 | 9x |
afun = a_coxreg, |
387 | 9x |
na_str = na_str, |
388 | 9x |
extra_args = list( |
389 | 9x |
variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar, |
390 | 9x |
labelstr = "" |
391 |
)
|
|
392 |
)
|
|
393 |
} else { # treatment level effects |
|
394 | 4x |
lyt <- lyt %>% |
395 | 4x |
summarize_row_groups( |
396 | 4x |
cfun = a_coxreg, |
397 | 4x |
na_str = na_str, |
398 | 4x |
extra_args = list( |
399 | 4x |
variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar |
400 |
)
|
|
401 |
) %>% |
|
402 | 4x |
analyze_colvars( |
403 | 4x |
afun = a_coxreg, |
404 | 4x |
na_str = na_str, |
405 | 4x |
extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "") |
406 |
)
|
|
407 |
}
|
|
408 |
}
|
|
409 | ||
410 | 15x |
if ("covariates" %in% names(variables)) { # covariate main effects |
411 | 15x |
lyt <- lyt %>% |
412 | 15x |
split_rows_by_multivar( |
413 | 15x |
vars = variables$covariates, |
414 | 15x |
varlabels = varlabels, |
415 | 15x |
split_label = "Covariate:", |
416 | 15x |
nested = FALSE, |
417 | 15x |
child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden", |
418 | 15x |
section_div = tail(.section_div, 1) |
419 |
)
|
|
420 | 15x |
if (multivar || control$interaction || !"arm" %in% names(variables)) { |
421 | 11x |
lyt <- lyt %>% |
422 | 11x |
summarize_row_groups( |
423 | 11x |
cfun = a_coxreg, |
424 | 11x |
na_str = na_str, |
425 | 11x |
extra_args = list( |
426 | 11x |
variables = variables, at = at, control = control, multivar = multivar, |
427 | 11x |
var_main = if (multivar) multivar else control$interaction |
428 |
)
|
|
429 |
)
|
|
430 |
} else { |
|
431 | 1x |
if (!is.null(varlabels)) names(varlabels) <- variables$covariates |
432 | 4x |
lyt <- lyt %>% |
433 | 4x |
analyze_colvars( |
434 | 4x |
afun = a_coxreg, |
435 | 4x |
na_str = na_str, |
436 | 4x |
extra_args = list( |
437 | 4x |
variables = variables, at = at, control = control, multivar = multivar, |
438 | 4x |
var_main = if (multivar) multivar else control$interaction, |
439 | 4x |
labelstr = if (is.null(varlabels)) "" else varlabels |
440 |
)
|
|
441 |
)
|
|
442 |
}
|
|
443 | ||
444 | 2x |
if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm |
445 | 15x |
if (multivar || control$interaction) { # covariate level effects |
446 | 11x |
lyt <- lyt %>% |
447 | 11x |
analyze_colvars( |
448 | 11x |
afun = a_coxreg, |
449 | 11x |
na_str = na_str, |
450 | 11x |
extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = ""), |
451 | 11x |
indent_mod = if (!"arm" %in% names(variables) || multivar) 0L else -1L |
452 |
)
|
|
453 |
}
|
|
454 |
}
|
|
455 | ||
456 | 15x |
lyt
|
457 |
}
|
1 |
#' Encode categorical missing values in a data frame
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This is a helper function to encode missing entries across groups of categorical
|
|
6 |
#' variables in a data frame.
|
|
7 |
#'
|
|
8 |
#' @details Missing entries are those with `NA` or empty strings and will
|
|
9 |
#' be replaced with a specified value. If factor variables include missing
|
|
10 |
#' values, the missing value will be inserted as the last level.
|
|
11 |
#' Similarly, in case character or logical variables should be converted to factors
|
|
12 |
#' with the `char_as_factor` or `logical_as_factor` options, the missing values will
|
|
13 |
#' be set as the last level.
|
|
14 |
#'
|
|
15 |
#' @param data (`data.frame`)\cr data set.
|
|
16 |
#' @param omit_columns (`character`)\cr names of variables from `data` that should
|
|
17 |
#' not be modified by this function.
|
|
18 |
#' @param char_as_factor (`flag`)\cr whether to convert character variables
|
|
19 |
#' in `data` to factors.
|
|
20 |
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
|
|
21 |
#' in `data` to factors.
|
|
22 |
#' @param na_level (`string`)\cr string used to replace all `NA` or empty
|
|
23 |
#' values inside non-`omit_columns` columns.
|
|
24 |
#'
|
|
25 |
#' @return A `data.frame` with the chosen modifications applied.
|
|
26 |
#'
|
|
27 |
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
|
|
28 |
#'
|
|
29 |
#' @examples
|
|
30 |
#' my_data <- data.frame(
|
|
31 |
#' u = c(TRUE, FALSE, NA, TRUE),
|
|
32 |
#' v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
|
|
33 |
#' w = c("A", "B", NA, "C"),
|
|
34 |
#' x = c("D", "E", "F", NA),
|
|
35 |
#' y = c("G", "H", "I", ""),
|
|
36 |
#' z = c(1, 2, 3, 4),
|
|
37 |
#' stringsAsFactors = FALSE
|
|
38 |
#' )
|
|
39 |
#'
|
|
40 |
#' # Example 1
|
|
41 |
#' # Encode missing values in all character or factor columns.
|
|
42 |
#' df_explicit_na(my_data)
|
|
43 |
#' # Also convert logical columns to factor columns.
|
|
44 |
#' df_explicit_na(my_data, logical_as_factor = TRUE)
|
|
45 |
#' # Encode missing values in a subset of columns.
|
|
46 |
#' df_explicit_na(my_data, omit_columns = c("x", "y"))
|
|
47 |
#'
|
|
48 |
#' # Example 2
|
|
49 |
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
|
|
50 |
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
|
|
51 |
#' # included when generating `rtables`.
|
|
52 |
#' adsl <- tern_ex_adsl
|
|
53 |
#' adsl$SEX[adsl$SEX == "M"] <- NA
|
|
54 |
#' adsl <- df_explicit_na(adsl)
|
|
55 |
#'
|
|
56 |
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
|
|
57 |
#' adsl <- tern_ex_adsl
|
|
58 |
#' adsl$SEX[adsl$SEX == "M"] <- NA
|
|
59 |
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
|
|
60 |
#'
|
|
61 |
#' # Example 3
|
|
62 |
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
|
|
63 |
#' # a numeric variable will not be included in the summary statistics, nor will they be included
|
|
64 |
#' # in the denominator value for calculating the percent values.
|
|
65 |
#' adsl <- tern_ex_adsl
|
|
66 |
#' adsl$AGE[adsl$AGE < 30] <- NA
|
|
67 |
#' adsl <- df_explicit_na(adsl)
|
|
68 |
#'
|
|
69 |
#' @export
|
|
70 |
df_explicit_na <- function(data, |
|
71 |
omit_columns = NULL, |
|
72 |
char_as_factor = TRUE, |
|
73 |
logical_as_factor = FALSE, |
|
74 |
na_level = "<Missing>") { |
|
75 | 24x |
checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
76 | 23x |
checkmate::assert_data_frame(data) |
77 | 22x |
checkmate::assert_flag(char_as_factor) |
78 | 21x |
checkmate::assert_flag(logical_as_factor) |
79 | 21x |
checkmate::assert_string(na_level) |
80 | ||
81 | 19x |
target_vars <- if (is.null(omit_columns)) { |
82 | 17x |
names(data) |
83 |
} else { |
|
84 | 2x |
setdiff(names(data), omit_columns) # May have duplicates. |
85 |
}
|
|
86 | 19x |
if (length(target_vars) == 0) { |
87 | 1x |
return(data) |
88 |
}
|
|
89 | ||
90 | 18x |
l_target_vars <- split(target_vars, target_vars) |
91 | ||
92 |
# Makes sure target_vars exist in data and names are not duplicated.
|
|
93 | 18x |
assert_df_with_variables(data, l_target_vars) |
94 | ||
95 | 18x |
for (x in target_vars) { |
96 | 306x |
xi <- data[[x]] |
97 | 306x |
xi_label <- obj_label(xi) |
98 | ||
99 |
# Determine whether to convert character or logical input.
|
|
100 | 306x |
do_char_conversion <- is.character(xi) && char_as_factor |
101 | 306x |
do_logical_conversion <- is.logical(xi) && logical_as_factor |
102 | ||
103 |
# Pre-convert logical to character to deal correctly with replacing NA
|
|
104 |
# values below.
|
|
105 | 306x |
if (do_logical_conversion) { |
106 | 2x |
xi <- as.character(xi) |
107 |
}
|
|
108 | ||
109 | 306x |
if (is.factor(xi) || is.character(xi)) { |
110 |
# Handle empty strings and NA values.
|
|
111 | 219x |
xi <- explicit_na(sas_na(xi), label = na_level) |
112 | ||
113 |
# Convert to factors if requested for the original type,
|
|
114 |
# set na_level as the last value.
|
|
115 | 219x |
if (do_char_conversion || do_logical_conversion) { |
116 | 78x |
levels_xi <- setdiff(sort(unique(xi)), na_level) |
117 | 78x |
if (na_level %in% unique(xi)) { |
118 | 18x |
levels_xi <- c(levels_xi, na_level) |
119 |
}
|
|
120 | ||
121 | 78x |
xi <- factor(xi, levels = levels_xi) |
122 |
}
|
|
123 | ||
124 | 219x |
data[, x] <- formatters::with_label(xi, label = xi_label) |
125 |
}
|
|
126 |
}
|
|
127 | 18x |
return(data) |
128 |
}
|
1 |
#' Subgroup treatment effect pattern (STEP) fit for survival outcome
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This fits the subgroup treatment effect pattern (STEP) models for a survival outcome. The treatment arm
|
|
6 |
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
|
|
7 |
#' hazard ratios are for the comparison of the second level vs. the first one.
|
|
8 |
#'
|
|
9 |
#' The model which is fit is:
|
|
10 |
#'
|
|
11 |
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
|
|
12 |
#'
|
|
13 |
#' where `degree` is specified by `control_step()`.
|
|
14 |
#'
|
|
15 |
#' @inheritParams argument_convention
|
|
16 |
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
|
|
17 |
#' `arm`, `biomarker`, and optional `covariates` and `strata`.
|
|
18 |
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
|
|
19 |
#'
|
|
20 |
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
|
|
21 |
#' for the biomarker variable, including where the center of the intervals are and their bounds. The
|
|
22 |
#' second part of the columns contain the estimates for the treatment arm comparison.
|
|
23 |
#'
|
|
24 |
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
|
|
25 |
#'
|
|
26 |
#' @seealso [control_step()] and [control_coxph()] for the available customization options.
|
|
27 |
#'
|
|
28 |
#' @examples
|
|
29 |
#' # Testing dataset with just two treatment arms.
|
|
30 |
#' library(dplyr)
|
|
31 |
#'
|
|
32 |
#' adtte_f <- tern_ex_adtte %>%
|
|
33 |
#' filter(
|
|
34 |
#' PARAMCD == "OS",
|
|
35 |
#' ARM %in% c("B: Placebo", "A: Drug X")
|
|
36 |
#' ) %>%
|
|
37 |
#' mutate(
|
|
38 |
#' # Reorder levels of ARM to display reference arm before treatment arm.
|
|
39 |
#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
|
|
40 |
#' is_event = CNSR == 0
|
|
41 |
#' )
|
|
42 |
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
|
|
43 |
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
|
|
44 |
#'
|
|
45 |
#' variables <- list(
|
|
46 |
#' arm = "ARM",
|
|
47 |
#' biomarker = "BMRKR1",
|
|
48 |
#' covariates = c("AGE", "BMRKR2"),
|
|
49 |
#' event = "is_event",
|
|
50 |
#' time = "AVAL"
|
|
51 |
#' )
|
|
52 |
#'
|
|
53 |
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
|
|
54 |
#' step_matrix <- fit_survival_step(
|
|
55 |
#' variables = variables,
|
|
56 |
#' data = adtte_f
|
|
57 |
#' )
|
|
58 |
#' dim(step_matrix)
|
|
59 |
#' head(step_matrix)
|
|
60 |
#'
|
|
61 |
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
|
|
62 |
#' # models. Or specify different Cox regression options.
|
|
63 |
#' step_matrix2 <- fit_survival_step(
|
|
64 |
#' variables = variables,
|
|
65 |
#' data = adtte_f,
|
|
66 |
#' control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
|
|
67 |
#' )
|
|
68 |
#'
|
|
69 |
#' # Use a global model with cubic interaction and only 5 points.
|
|
70 |
#' step_matrix3 <- fit_survival_step(
|
|
71 |
#' variables = variables,
|
|
72 |
#' data = adtte_f,
|
|
73 |
#' control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
|
|
74 |
#' )
|
|
75 |
#'
|
|
76 |
#' @export
|
|
77 |
fit_survival_step <- function(variables, |
|
78 |
data,
|
|
79 |
control = c(control_step(), control_coxph())) { |
|
80 | 4x |
checkmate::assert_list(control) |
81 | 4x |
assert_df_with_variables(data, variables) |
82 | 4x |
data <- data[!is.na(data[[variables$biomarker]]), ] |
83 | 4x |
window_sel <- h_step_window(x = data[[variables$biomarker]], control = control) |
84 | 4x |
interval_center <- window_sel$interval[, "Interval Center"] |
85 | 4x |
form <- h_step_survival_formula(variables = variables, control = control) |
86 | 4x |
estimates <- if (is.null(control$bandwidth)) { |
87 | 1x |
h_step_survival_est( |
88 | 1x |
formula = form, |
89 | 1x |
data = data, |
90 | 1x |
variables = variables, |
91 | 1x |
x = interval_center, |
92 | 1x |
control = control |
93 |
)
|
|
94 |
} else { |
|
95 | 3x |
tmp <- mapply( |
96 | 3x |
FUN = h_step_survival_est, |
97 | 3x |
x = interval_center, |
98 | 3x |
subset = as.list(as.data.frame(window_sel$sel)), |
99 | 3x |
MoreArgs = list( |
100 | 3x |
formula = form, |
101 | 3x |
data = data, |
102 | 3x |
variables = variables, |
103 | 3x |
control = control |
104 |
)
|
|
105 |
)
|
|
106 |
# Maybe we find a more elegant solution than this.
|
|
107 | 3x |
rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper") |
108 | 3x |
t(tmp) |
109 |
}
|
|
110 | 4x |
result <- cbind(window_sel$interval, estimates) |
111 | 4x |
structure( |
112 | 4x |
result,
|
113 | 4x |
class = c("step", "matrix"), |
114 | 4x |
variables = variables, |
115 | 4x |
control = control |
116 |
)
|
|
117 |
}
|
1 |
#' Re-implemented `range()` default S3 method for numerical objects
|
|
2 |
#'
|
|
3 |
#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
|
|
4 |
#' without any warnings.
|
|
5 |
#'
|
|
6 |
#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
|
|
7 |
#' @param na.rm (`flag`)\cr flag indicating if `NA` should be omitted.
|
|
8 |
#' @param finite (`flag`)\cr flag indicating if non-finite elements should be removed.
|
|
9 |
#'
|
|
10 |
#' @return A 2-element vector of class `numeric`.
|
|
11 |
#'
|
|
12 |
#' @keywords internal
|
|
13 |
range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint |
|
14 | ||
15 | 1887x |
checkmate::assert_numeric(x) |
16 | ||
17 | 1887x |
if (finite) { |
18 | 24x |
x <- x[is.finite(x)] # removes NAs too |
19 | 1863x |
} else if (na.rm) { |
20 | 708x |
x <- x[!is.na(x)] |
21 |
}
|
|
22 | ||
23 | 1887x |
if (length(x) == 0) { |
24 | 111x |
rval <- c(NA, NA) |
25 | 111x |
mode(rval) <- typeof(x) |
26 |
} else { |
|
27 | 1776x |
rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE)) |
28 |
}
|
|
29 | ||
30 | 1887x |
return(rval) |
31 |
}
|
|
32 | ||
33 |
#' Utility function to create label for confidence interval
|
|
34 |
#'
|
|
35 |
#' @description `r lifecycle::badge("stable")`
|
|
36 |
#'
|
|
37 |
#' @inheritParams argument_convention
|
|
38 |
#'
|
|
39 |
#' @return A `string`.
|
|
40 |
#'
|
|
41 |
#' @export
|
|
42 |
f_conf_level <- function(conf_level) { |
|
43 | 8302x |
assert_proportion_value(conf_level) |
44 | 8300x |
paste0(conf_level * 100, "% CI") |
45 |
}
|
|
46 | ||
47 |
#' Utility function to create label for p-value
|
|
48 |
#'
|
|
49 |
#' @description `r lifecycle::badge("stable")`
|
|
50 |
#'
|
|
51 |
#' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis.
|
|
52 |
#'
|
|
53 |
#' @return A `string`.
|
|
54 |
#'
|
|
55 |
#' @export
|
|
56 |
f_pval <- function(test_mean) { |
|
57 | 1148x |
checkmate::assert_numeric(test_mean, len = 1) |
58 | 1146x |
paste0("p-value (H0: mean = ", test_mean, ")") |
59 |
}
|
|
60 | ||
61 |
#' Utility function to return a named list of covariate names
|
|
62 |
#'
|
|
63 |
#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
|
|
64 |
#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
|
|
65 |
#'
|
|
66 |
#' @return A named `list` of `character` vector.
|
|
67 |
#'
|
|
68 |
#' @keywords internal
|
|
69 |
get_covariates <- function(covariates) { |
|
70 | 14x |
checkmate::assert_character(covariates) |
71 | 12x |
cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*")))) |
72 | 12x |
stats::setNames(as.list(cov_vars), cov_vars) |
73 |
}
|
|
74 | ||
75 |
#' Replicate entries of a vector if required
|
|
76 |
#'
|
|
77 |
#' @description `r lifecycle::badge("stable")`
|
|
78 |
#'
|
|
79 |
#' Replicate entries of a vector if required.
|
|
80 |
#'
|
|
81 |
#' @inheritParams argument_convention
|
|
82 |
#' @param n (`integer(1)`)\cr number of entries that are needed.
|
|
83 |
#'
|
|
84 |
#' @return `x` if it has the required length already or is `NULL`,
|
|
85 |
#' otherwise if it is scalar the replicated version of it with `n` entries.
|
|
86 |
#'
|
|
87 |
#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
|
|
88 |
#'
|
|
89 |
#' @export
|
|
90 |
to_n <- function(x, n) { |
|
91 | 5x |
if (is.null(x)) { |
92 | 1x |
NULL
|
93 | 4x |
} else if (length(x) == 1) { |
94 | 1x |
rep(x, n) |
95 | 3x |
} else if (length(x) == n) { |
96 | 2x |
x
|
97 |
} else { |
|
98 | 1x |
stop("dimension mismatch") |
99 |
}
|
|
100 |
}
|
|
101 | ||
102 |
#' Check element dimension
|
|
103 |
#'
|
|
104 |
#' Checks if the elements in `...` have the same dimension.
|
|
105 |
#'
|
|
106 |
#' @param ... (`data.frame` or `vector`)\cr any data frames or vectors.
|
|
107 |
#' @param omit_null (`flag`)\cr whether `NULL` elements in `...` should be omitted from the check.
|
|
108 |
#'
|
|
109 |
#' @return A `logical` value.
|
|
110 |
#'
|
|
111 |
#' @keywords internal
|
|
112 |
check_same_n <- function(..., omit_null = TRUE) { |
|
113 | 2x |
dots <- list(...) |
114 | ||
115 | 2x |
n_list <- Map( |
116 | 2x |
function(x, name) { |
117 | 5x |
if (is.null(x)) { |
118 | ! |
if (omit_null) { |
119 | 2x |
NA_integer_
|
120 |
} else { |
|
121 | ! |
stop("arg", name, "is not supposed to be NULL") |
122 |
}
|
|
123 | 5x |
} else if (is.data.frame(x)) { |
124 | ! |
nrow(x) |
125 | 5x |
} else if (is.atomic(x)) { |
126 | 5x |
length(x) |
127 |
} else { |
|
128 | ! |
stop("data structure for ", name, "is currently not supported") |
129 |
}
|
|
130 |
},
|
|
131 | 2x |
dots, names(dots) |
132 |
)
|
|
133 | ||
134 | 2x |
n <- stats::na.omit(unlist(n_list)) |
135 | ||
136 | 2x |
if (length(unique(n)) > 1) { |
137 | ! |
sel <- which(n != n[1]) |
138 | ! |
stop("Dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1]) |
139 |
}
|
|
140 | ||
141 | 2x |
TRUE
|
142 |
}
|
|
143 | ||
144 |
#' Utility function to check if a float value is equal to another float value
|
|
145 |
#'
|
|
146 |
#' Uses `.Machine$double.eps` as the tolerance for the comparison.
|
|
147 |
#'
|
|
148 |
#' @param x (`numeric(1)`)\cr a float number.
|
|
149 |
#' @param y (`numeric(1)`)\cr a float number.
|
|
150 |
#'
|
|
151 |
#' @return `TRUE` if identical, otherwise `FALSE`.
|
|
152 |
#'
|
|
153 |
#' @keywords internal
|
|
154 |
.is_equal_float <- function(x, y) { |
|
155 | 2990x |
checkmate::assert_number(x) |
156 | 2990x |
checkmate::assert_number(y) |
157 | ||
158 |
# Define a tolerance
|
|
159 | 2990x |
tolerance <- .Machine$double.eps |
160 | ||
161 |
# Check if x is close enough to y
|
|
162 | 2990x |
abs(x - y) < tolerance |
163 |
}
|
|
164 | ||
165 |
#' Make names without dots
|
|
166 |
#'
|
|
167 |
#' @param nams (`character`)\cr vector of original names.
|
|
168 |
#'
|
|
169 |
#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
|
|
170 |
#'
|
|
171 |
#' @keywords internal
|
|
172 |
make_names <- function(nams) { |
|
173 | 6x |
orig <- make.names(nams) |
174 | 6x |
gsub(".", "", x = orig, fixed = TRUE) |
175 |
}
|
|
176 | ||
177 |
#' Conversion of months to days
|
|
178 |
#'
|
|
179 |
#' @description `r lifecycle::badge("stable")`
|
|
180 |
#'
|
|
181 |
#' Conversion of months to days. This is an approximative calculation because it
|
|
182 |
#' considers each month as having an average of 30.4375 days.
|
|
183 |
#'
|
|
184 |
#' @param x (`numeric(1)`)\cr time in months.
|
|
185 |
#'
|
|
186 |
#' @return A `numeric` vector with the time in days.
|
|
187 |
#'
|
|
188 |
#' @examples
|
|
189 |
#' x <- c(13.25, 8.15, 1, 2.834)
|
|
190 |
#' month2day(x)
|
|
191 |
#'
|
|
192 |
#' @export
|
|
193 |
month2day <- function(x) { |
|
194 | 1x |
checkmate::assert_numeric(x) |
195 | 1x |
x * 30.4375 |
196 |
}
|
|
197 | ||
198 |
#' Conversion of days to months
|
|
199 |
#'
|
|
200 |
#' @param x (`numeric(1)`)\cr time in days.
|
|
201 |
#'
|
|
202 |
#' @return A `numeric` vector with the time in months.
|
|
203 |
#'
|
|
204 |
#' @examples
|
|
205 |
#' x <- c(403, 248, 30, 86)
|
|
206 |
#' day2month(x)
|
|
207 |
#'
|
|
208 |
#' @export
|
|
209 |
day2month <- function(x) { |
|
210 | 19x |
checkmate::assert_numeric(x) |
211 | 19x |
x / 30.4375 |
212 |
}
|
|
213 | ||
214 |
#' Return an empty numeric if all elements are `NA`.
|
|
215 |
#'
|
|
216 |
#' @param x (`numeric`)\cr vector.
|
|
217 |
#'
|
|
218 |
#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
|
|
219 |
#'
|
|
220 |
#' @examples
|
|
221 |
#' x <- c(NA, NA, NA)
|
|
222 |
#' # Internal function - empty_vector_if_na
|
|
223 |
#' @keywords internal
|
|
224 |
empty_vector_if_na <- function(x) { |
|
225 | 1017x |
if (all(is.na(x))) { |
226 | 310x |
numeric() |
227 |
} else { |
|
228 | 707x |
x
|
229 |
}
|
|
230 |
}
|
|
231 | ||
232 |
#' Element-wise combination of two vectors
|
|
233 |
#'
|
|
234 |
#' @param x (`vector`)\cr first vector to combine.
|
|
235 |
#' @param y (`vector`)\cr second vector to combine.
|
|
236 |
#'
|
|
237 |
#' @return A `list` where each element combines corresponding elements of `x` and `y`.
|
|
238 |
#'
|
|
239 |
#' @examples
|
|
240 |
#' combine_vectors(1:3, 4:6)
|
|
241 |
#'
|
|
242 |
#' @export
|
|
243 |
combine_vectors <- function(x, y) { |
|
244 | 42x |
checkmate::assert_vector(x) |
245 | 42x |
checkmate::assert_vector(y, len = length(x)) |
246 | ||
247 | 42x |
result <- lapply(as.data.frame(rbind(x, y)), `c`) |
248 | 42x |
names(result) <- NULL |
249 | 42x |
result
|
250 |
}
|
|
251 | ||
252 |
#' Extract elements by name
|
|
253 |
#'
|
|
254 |
#' This utility function extracts elements from a vector `x` by `names`.
|
|
255 |
#' Differences to the standard `[` function are:
|
|
256 |
#'
|
|
257 |
#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
|
|
258 |
#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
|
|
259 |
#' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
|
|
260 |
#'
|
|
261 |
#' @param x (named `vector`)\cr where to extract named elements from.
|
|
262 |
#' @param names (`character`)\cr vector of names to extract.
|
|
263 |
#'
|
|
264 |
#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
|
|
265 |
#'
|
|
266 |
#' @keywords internal
|
|
267 |
extract_by_name <- function(x, names) { |
|
268 | 3x |
if (is.null(x)) { |
269 | 1x |
return(NULL) |
270 |
}
|
|
271 | 2x |
checkmate::assert_named(x) |
272 | 2x |
checkmate::assert_character(names) |
273 | 2x |
which_extract <- intersect(names(x), names) |
274 | 2x |
if (length(which_extract) > 0) { |
275 | 1x |
x[which_extract] |
276 |
} else { |
|
277 | 1x |
NULL
|
278 |
}
|
|
279 |
}
|
|
280 | ||
281 |
#' Labels for adverse event baskets
|
|
282 |
#'
|
|
283 |
#' @description `r lifecycle::badge("stable")`
|
|
284 |
#'
|
|
285 |
#' @param aesi (`character`)\cr vector with standardized MedDRA query name (e.g. `SMQxxNAM`) or customized query
|
|
286 |
#' name (e.g. `CQxxNAM`).
|
|
287 |
#' @param scope (`character`)\cr vector with scope of query (e.g. `SMQxxSC`).
|
|
288 |
#'
|
|
289 |
#' @return A `string` with the standard label for the AE basket.
|
|
290 |
#'
|
|
291 |
#' @examples
|
|
292 |
#' adae <- tern_ex_adae
|
|
293 |
#'
|
|
294 |
#' # Standardized query label includes scope.
|
|
295 |
#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
|
|
296 |
#'
|
|
297 |
#' # Customized query label.
|
|
298 |
#' aesi_label(adae$CQ01NAM)
|
|
299 |
#'
|
|
300 |
#' @export
|
|
301 |
aesi_label <- function(aesi, scope = NULL) { |
|
302 | 4x |
checkmate::assert_character(aesi) |
303 | 4x |
checkmate::assert_character(scope, null.ok = TRUE) |
304 | 4x |
aesi_label <- obj_label(aesi) |
305 | 4x |
aesi <- sas_na(aesi) |
306 | 4x |
aesi <- unique(aesi)[!is.na(unique(aesi))] |
307 | ||
308 | 4x |
lbl <- if (length(aesi) == 1 && !is.null(scope)) { |
309 | 1x |
scope <- sas_na(scope) |
310 | 1x |
scope <- unique(scope)[!is.na(unique(scope))] |
311 | 1x |
checkmate::assert_string(scope) |
312 | 1x |
paste0(aesi, " (", scope, ")") |
313 | 4x |
} else if (length(aesi) == 1 && is.null(scope)) { |
314 | 1x |
aesi
|
315 |
} else { |
|
316 | 2x |
aesi_label
|
317 |
}
|
|
318 | ||
319 | 4x |
lbl
|
320 |
}
|
|
321 | ||
322 |
#' Indicate study arm variable in formula
|
|
323 |
#'
|
|
324 |
#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
|
|
325 |
#'
|
|
326 |
#' @param x arm information
|
|
327 |
#'
|
|
328 |
#' @return `x`
|
|
329 |
#'
|
|
330 |
#' @keywords internal
|
|
331 |
study_arm <- function(x) { |
|
332 | ! |
structure(x, varname = deparse(substitute(x))) |
333 |
}
|
|
334 | ||
335 |
#' Smooth function with optional grouping
|
|
336 |
#'
|
|
337 |
#' @description `r lifecycle::badge("stable")`
|
|
338 |
#'
|
|
339 |
#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
|
|
340 |
#'
|
|
341 |
#' @param df (`data.frame`)\cr data set containing all analysis variables.
|
|
342 |
#' @param x (`string`)\cr x column name.
|
|
343 |
#' @param y (`string`)\cr y column name.
|
|
344 |
#' @param groups (`character` or `NULL`)\cr vector with optional grouping variables names.
|
|
345 |
#' @param level (`proportion`)\cr level of confidence interval to use (0.95 by default).
|
|
346 |
#'
|
|
347 |
#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
|
|
348 |
#' optional `groups` variables formatted as `factor` type.
|
|
349 |
#'
|
|
350 |
#' @export
|
|
351 |
get_smooths <- function(df, x, y, groups = NULL, level = 0.95) { |
|
352 | 5x |
checkmate::assert_data_frame(df) |
353 | 5x |
df_cols <- colnames(df) |
354 | 5x |
checkmate::assert_string(x) |
355 | 5x |
checkmate::assert_subset(x, df_cols) |
356 | 5x |
checkmate::assert_numeric(df[[x]]) |
357 | 5x |
checkmate::assert_string(y) |
358 | 5x |
checkmate::assert_subset(y, df_cols) |
359 | 5x |
checkmate::assert_numeric(df[[y]]) |
360 | ||
361 | 5x |
if (!is.null(groups)) { |
362 | 4x |
checkmate::assert_character(groups) |
363 | 4x |
checkmate::assert_subset(groups, df_cols) |
364 |
}
|
|
365 | ||
366 | 5x |
smooths <- function(x, y) { |
367 | 18x |
stats::predict(stats::loess(y ~ x), se = TRUE) |
368 |
}
|
|
369 | ||
370 | 5x |
if (!is.null(groups)) { |
371 | 4x |
cc <- stats::complete.cases(df[c(x, y, groups)]) |
372 | 4x |
df_c <- df[cc, c(x, y, groups)] |
373 | 4x |
df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE] |
374 | 4x |
df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups])) |
375 | ||
376 | 4x |
df_smooth_raw <- |
377 | 4x |
by(df_c_ordered, df_c_g, function(d) { |
378 | 17x |
plx <- smooths(d[[x]], d[[y]]) |
379 | 17x |
data.frame( |
380 | 17x |
x = d[[x]], |
381 | 17x |
y = plx$fit, |
382 | 17x |
ylow = plx$fit - stats::qt(level, plx$df) * plx$se.fit, |
383 | 17x |
yhigh = plx$fit + stats::qt(level, plx$df) * plx$se.fit |
384 |
)
|
|
385 |
}) |
|
386 | ||
387 | 4x |
df_smooth <- do.call(rbind, df_smooth_raw) |
388 | 4x |
df_smooth[groups] <- df_c_g |
389 | ||
390 | 4x |
df_smooth
|
391 |
} else { |
|
392 | 1x |
cc <- stats::complete.cases(df[c(x, y)]) |
393 | 1x |
df_c <- df[cc, ] |
394 | 1x |
plx <- smooths(df_c[[x]], df_c[[y]]) |
395 | ||
396 | 1x |
df_smooth <- data.frame( |
397 | 1x |
x = df_c[[x]], |
398 | 1x |
y = plx$fit, |
399 | 1x |
ylow = plx$fit - stats::qt(level, plx$df) * plx$se.fit, |
400 | 1x |
yhigh = plx$fit + stats::qt(level, plx$df) * plx$se.fit |
401 |
)
|
|
402 | ||
403 | 1x |
df_smooth
|
404 |
}
|
|
405 |
}
|
|
406 | ||
407 |
#' Number of available (non-missing entries) in a vector
|
|
408 |
#'
|
|
409 |
#' Small utility function for better readability.
|
|
410 |
#'
|
|
411 |
#' @param x (`vector`)\cr vector in which to count non-missing values.
|
|
412 |
#'
|
|
413 |
#' @return Number of non-missing values.
|
|
414 |
#'
|
|
415 |
#' @keywords internal
|
|
416 |
n_available <- function(x) { |
|
417 | 423x |
sum(!is.na(x)) |
418 |
}
|
|
419 | ||
420 |
#' Reapply variable labels
|
|
421 |
#'
|
|
422 |
#' This is a helper function that is used in tests.
|
|
423 |
#'
|
|
424 |
#' @param x (`vector`)\cr vector of elements that needs new labels.
|
|
425 |
#' @param varlabels (`character`)\cr vector of labels for `x`.
|
|
426 |
#' @param ... further parameters to be added to the list.
|
|
427 |
#'
|
|
428 |
#' @return `x` with variable labels reapplied.
|
|
429 |
#'
|
|
430 |
#' @export
|
|
431 |
reapply_varlabels <- function(x, varlabels, ...) { |
|
432 | 11x |
named_labels <- c(as.list(varlabels), list(...)) |
433 | 11x |
formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels) |
434 | 11x |
x
|
435 |
}
|
|
436 | ||
437 |
# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
|
|
438 |
clogit_with_tryCatch <- function(formula, data, ...) { # nolint |
|
439 | 33x |
tryCatch( |
440 | 33x |
survival::clogit(formula = formula, data = data, ...), |
441 | 33x |
error = function(e) stop("model not built successfully with survival::clogit") |
442 |
)
|
|
443 |
}
|
1 |
#' Control function for incidence rate
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This is an auxiliary function for controlling arguments for the incidence rate, used
|
|
6 |
#' internally to specify details in `s_incidence_rate()`.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
|
|
10 |
#' for confidence interval type.
|
|
11 |
#' @param input_time_unit (`string`)\cr `day`, `week`, `month`, or `year` (default)
|
|
12 |
#' indicating time unit for data input.
|
|
13 |
#' @param num_pt_year (`numeric(1)`)\cr number of patient-years to use when calculating adverse event rates.
|
|
14 |
#'
|
|
15 |
#' @return A list of components with the same names as the arguments.
|
|
16 |
#'
|
|
17 |
#' @seealso [incidence_rate]
|
|
18 |
#'
|
|
19 |
#' @examples
|
|
20 |
#' control_incidence_rate(0.9, "exact", "month", 100)
|
|
21 |
#'
|
|
22 |
#' @export
|
|
23 |
control_incidence_rate <- function(conf_level = 0.95, |
|
24 |
conf_type = c("normal", "normal_log", "exact", "byar"), |
|
25 |
input_time_unit = c("year", "day", "week", "month"), |
|
26 |
num_pt_year = 100) { |
|
27 | 14x |
conf_type <- match.arg(conf_type) |
28 | 13x |
input_time_unit <- match.arg(input_time_unit) |
29 | 12x |
checkmate::assert_number(num_pt_year) |
30 | 11x |
assert_proportion_value(conf_level) |
31 | ||
32 | 10x |
list( |
33 | 10x |
conf_level = conf_level, |
34 | 10x |
conf_type = conf_type, |
35 | 10x |
input_time_unit = input_time_unit, |
36 | 10x |
num_pt_year = num_pt_year |
37 |
)
|
|
38 |
}
|
1 |
#' Summarize variables in columns
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [summarize_colvars()] uses the statistics function [s_summary()] to analyze variables that are
|
|
6 |
#' arranged in columns. The variables to analyze should be specified in the table layout via column splits (see
|
|
7 |
#' [rtables::split_cols_by()] and [rtables::split_cols_by_multivar()]) prior to using [summarize_colvars()].
|
|
8 |
#'
|
|
9 |
#' The function is a minimal wrapper for [rtables::analyze_colvars()], a function typically used to apply different
|
|
10 |
#' analysis methods in rows for each column variable. To use the analysis methods as column labels, please refer to
|
|
11 |
#' the [analyze_vars_in_cols()] function.
|
|
12 |
#'
|
|
13 |
#' @inheritParams argument_convention
|
|
14 |
#' @param ... arguments passed to [s_summary()].
|
|
15 |
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
|
|
16 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
|
|
17 |
#' for that statistic's row label.
|
|
18 |
#'
|
|
19 |
#' @return
|
|
20 |
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
|
|
21 |
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
|
|
22 |
#' in columns, and add it to the table layout.
|
|
23 |
#'
|
|
24 |
#' @seealso [rtables::split_cols_by_multivar()] and [`analyze_colvars_functions`].
|
|
25 |
#'
|
|
26 |
#' @examples
|
|
27 |
#' dta_test <- data.frame(
|
|
28 |
#' USUBJID = rep(1:6, each = 3),
|
|
29 |
#' PARAMCD = rep("lab", 6 * 3),
|
|
30 |
#' AVISIT = rep(paste0("V", 1:3), 6),
|
|
31 |
#' ARM = rep(LETTERS[1:3], rep(6, 3)),
|
|
32 |
#' AVAL = c(9:1, rep(NA, 9)),
|
|
33 |
#' CHG = c(1:9, rep(NA, 9))
|
|
34 |
#' )
|
|
35 |
#'
|
|
36 |
#' ## Default output within a `rtables` pipeline.
|
|
37 |
#' basic_table() %>%
|
|
38 |
#' split_cols_by("ARM") %>%
|
|
39 |
#' split_rows_by("AVISIT") %>%
|
|
40 |
#' split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
|
|
41 |
#' summarize_colvars() %>%
|
|
42 |
#' build_table(dta_test)
|
|
43 |
#'
|
|
44 |
#' ## Selection of statistics, formats and labels also work.
|
|
45 |
#' basic_table() %>%
|
|
46 |
#' split_cols_by("ARM") %>%
|
|
47 |
#' split_rows_by("AVISIT") %>%
|
|
48 |
#' split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
|
|
49 |
#' summarize_colvars(
|
|
50 |
#' .stats = c("n", "mean_sd"),
|
|
51 |
#' .formats = c("mean_sd" = "xx.x, xx.x"),
|
|
52 |
#' .labels = c(n = "n", mean_sd = "Mean, SD")
|
|
53 |
#' ) %>%
|
|
54 |
#' build_table(dta_test)
|
|
55 |
#'
|
|
56 |
#' ## Use arguments interpreted by `s_summary`.
|
|
57 |
#' basic_table() %>%
|
|
58 |
#' split_cols_by("ARM") %>%
|
|
59 |
#' split_rows_by("AVISIT") %>%
|
|
60 |
#' split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
|
|
61 |
#' summarize_colvars(na.rm = FALSE) %>%
|
|
62 |
#' build_table(dta_test)
|
|
63 |
#'
|
|
64 |
#' @export
|
|
65 |
summarize_colvars <- function(lyt, |
|
66 |
na_str = default_na_str(), |
|
67 |
...,
|
|
68 |
.stats = c("n", "mean_sd", "median", "range", "count_fraction"), |
|
69 |
.stat_names = NULL, |
|
70 |
.formats = NULL, |
|
71 |
.labels = NULL, |
|
72 |
.indent_mods = NULL) { |
|
73 |
# Process standard extra arguments
|
|
74 | 3x |
extra_args <- list(".stats" = .stats) |
75 | ! |
if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names |
76 | 1x |
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats |
77 | 1x |
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels |
78 | 1x |
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods |
79 | ||
80 |
# Adding additional info from layout to analysis function
|
|
81 | 3x |
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) |
82 | 3x |
formals(a_summary) <- c(formals(a_summary), extra_args[[".additional_fun_parameters"]]) |
83 | ||
84 | 3x |
analyze_colvars( |
85 | 3x |
lyt,
|
86 | 3x |
afun = a_summary, |
87 | 3x |
na_str = na_str, |
88 | 3x |
extra_args = extra_args |
89 |
)
|
|
90 |
}
|
1 |
#' Sort pharmacokinetic data by `PARAM` variable
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' @param pk_data (`data.frame`)\cr pharmacokinetic data frame.
|
|
6 |
#' @param key_var (`string`)\cr key variable used to merge pk_data and metadata created by [d_pkparam()].
|
|
7 |
#'
|
|
8 |
#' @return A pharmacokinetic `data.frame` sorted by a `PARAM` variable.
|
|
9 |
#'
|
|
10 |
#' @examples
|
|
11 |
#' library(dplyr)
|
|
12 |
#'
|
|
13 |
#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
|
|
14 |
#' pk_ordered_data <- h_pkparam_sort(adpp)
|
|
15 |
#'
|
|
16 |
#' @export
|
|
17 |
h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") { |
|
18 | 4x |
assert_df_with_variables(pk_data, list(key_var = key_var)) |
19 | 4x |
pk_data$PARAMCD <- pk_data[[key_var]] |
20 | ||
21 | 4x |
ordered_pk_data <- d_pkparam() |
22 | ||
23 |
# Add the numeric values from ordered_pk_data to pk_data
|
|
24 | 4x |
joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffixes = c("", ".y")) |
25 | ||
26 | 4x |
joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))] |
27 | ||
28 | 4x |
joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER) |
29 | ||
30 |
# Then order PARAM based on this column
|
|
31 | 4x |
joined_data$PARAM <- factor(joined_data$PARAM, |
32 | 4x |
levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]), |
33 | 4x |
ordered = TRUE |
34 |
)
|
|
35 | ||
36 | 4x |
joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY, |
37 | 4x |
levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]), |
38 | 4x |
ordered = TRUE |
39 |
)
|
|
40 | ||
41 | 4x |
joined_data
|
42 |
}
|
1 |
#' Occurrence table sorting
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Functions to score occurrence table subtables and rows which can be used in the
|
|
6 |
#' sorting of occurrence tables.
|
|
7 |
#'
|
|
8 |
#' @name score_occurrences
|
|
9 |
NULL
|
|
10 | ||
11 |
#' @describeIn score_occurrences Scoring function which sums the counts across all
|
|
12 |
#' columns. It will fail if anything else but counts are used.
|
|
13 |
#'
|
|
14 |
#' @inheritParams rtables_access
|
|
15 |
#'
|
|
16 |
#' @return
|
|
17 |
#' * `score_occurrences()` returns the sum of counts across all columns of a table row.
|
|
18 |
#'
|
|
19 |
#' @seealso [h_row_first_values()]
|
|
20 |
#'
|
|
21 |
#' @examples
|
|
22 |
#' lyt <- basic_table() %>%
|
|
23 |
#' split_cols_by("ARM") %>%
|
|
24 |
#' add_colcounts() %>%
|
|
25 |
#' analyze_num_patients(
|
|
26 |
#' vars = "USUBJID",
|
|
27 |
#' .stats = c("unique"),
|
|
28 |
#' .labels = c("Total number of patients with at least one event")
|
|
29 |
#' ) %>%
|
|
30 |
#' split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>%
|
|
31 |
#' summarize_num_patients(
|
|
32 |
#' var = "USUBJID",
|
|
33 |
#' .stats = c("unique", "nonunique"),
|
|
34 |
#' .labels = c(
|
|
35 |
#' "Total number of patients with at least one event",
|
|
36 |
#' "Total number of events"
|
|
37 |
#' )
|
|
38 |
#' ) %>%
|
|
39 |
#' count_occurrences(vars = "AEDECOD")
|
|
40 |
#'
|
|
41 |
#' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>%
|
|
42 |
#' prune_table()
|
|
43 |
#'
|
|
44 |
#' tbl_sorted <- tbl %>%
|
|
45 |
#' sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences)
|
|
46 |
#'
|
|
47 |
#' tbl_sorted
|
|
48 |
#'
|
|
49 |
#' @export
|
|
50 |
score_occurrences <- function(table_row) { |
|
51 | 37x |
row_counts <- h_row_counts(table_row) |
52 | 37x |
sum(row_counts) |
53 |
}
|
|
54 | ||
55 |
#' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include
|
|
56 |
#' specific columns in the scoring. See [h_row_counts()] for further information.
|
|
57 |
#'
|
|
58 |
#' @inheritParams has_count_in_cols
|
|
59 |
#'
|
|
60 |
#' @return
|
|
61 |
#' * `score_occurrences_cols()` returns a function that sums counts across all specified columns
|
|
62 |
#' of a table row.
|
|
63 |
#'
|
|
64 |
#' @seealso [h_row_counts()]
|
|
65 |
#'
|
|
66 |
#' @examples
|
|
67 |
#' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo"))
|
|
68 |
#'
|
|
69 |
#' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted.
|
|
70 |
#' # That would require a second pass of `sort_at_path`.
|
|
71 |
#' tbl_sorted <- tbl %>%
|
|
72 |
#' sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b)
|
|
73 |
#'
|
|
74 |
#' tbl_sorted
|
|
75 |
#'
|
|
76 |
#' @export
|
|
77 |
score_occurrences_cols <- function(...) { |
|
78 | 4x |
function(table_row) { |
79 | 20x |
row_counts <- h_row_counts(table_row, ...) |
80 | 20x |
sum(row_counts) |
81 |
}
|
|
82 |
}
|
|
83 | ||
84 |
#' @describeIn score_occurrences Scoring functions produced by this constructor can be used on
|
|
85 |
#' subtables: They sum up all specified column counts in the subtable. This is useful when
|
|
86 |
#' there is no available content row summing up these counts.
|
|
87 |
#'
|
|
88 |
#' @return
|
|
89 |
#' * `score_occurrences_subtable()` returns a function that sums counts in each subtable
|
|
90 |
#' across all specified columns.
|
|
91 |
#'
|
|
92 |
#' @examples
|
|
93 |
#' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl))
|
|
94 |
#'
|
|
95 |
#' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That
|
|
96 |
#' # would require a second pass of `sort_at_path`.
|
|
97 |
#' tbl_sorted <- tbl %>%
|
|
98 |
#' sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE)
|
|
99 |
#'
|
|
100 |
#' tbl_sorted
|
|
101 |
#'
|
|
102 |
#' @export
|
|
103 |
score_occurrences_subtable <- function(...) { |
|
104 | 1x |
score_table_row <- score_occurrences_cols(...) |
105 | 1x |
function(table_tree) { |
106 | 2x |
table_rows <- collect_leaves(table_tree) |
107 | 2x |
counts <- vapply(table_rows, score_table_row, numeric(1)) |
108 | 2x |
sum(counts) |
109 |
}
|
|
110 |
}
|
|
111 | ||
112 |
#' @describeIn score_occurrences Produces a score function for sorting table by summing the first content row in
|
|
113 |
#' specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()].
|
|
114 |
#'
|
|
115 |
#' @return
|
|
116 |
#' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in
|
|
117 |
#' specified columns.
|
|
118 |
#'
|
|
119 |
#' @export
|
|
120 |
score_occurrences_cont_cols <- function(...) { |
|
121 | 1x |
score_table_row <- score_occurrences_cols(...) |
122 | 1x |
function(table_tree) { |
123 | 2x |
if (inherits(table_tree, "ContentRow")) { |
124 | ! |
return(NA) |
125 |
}
|
|
126 | 2x |
content_row <- h_content_first_row(table_tree) |
127 | 2x |
score_table_row(content_row) |
128 |
}
|
|
129 |
}
|
1 |
#' Occurrence table pruning
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' Family of constructor and condition functions to flexibly prune occurrence tables.
|
|
6 |
#' The condition functions always return whether the row result is higher than the threshold.
|
|
7 |
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
|
|
8 |
#' functions.
|
|
9 |
#'
|
|
10 |
#' @note Since most table specifications are worded positively, we name our constructor and condition
|
|
11 |
#' functions positively, too. However, note that the result of [keep_rows()] says what
|
|
12 |
#' should be pruned, to conform with the [rtables::prune_table()] interface.
|
|
13 |
#'
|
|
14 |
#' @examples
|
|
15 |
#' \donttest{
|
|
16 |
#' tab <- basic_table() %>%
|
|
17 |
#' split_cols_by("ARM") %>%
|
|
18 |
#' split_rows_by("RACE") %>%
|
|
19 |
#' split_rows_by("STRATA1") %>%
|
|
20 |
#' summarize_row_groups() %>%
|
|
21 |
#' analyze_vars("COUNTRY", .stats = "count_fraction") %>%
|
|
22 |
#' build_table(DM)
|
|
23 |
#' }
|
|
24 |
#'
|
|
25 |
#' @name prune_occurrences
|
|
26 |
NULL
|
|
27 | ||
28 |
#' @describeIn prune_occurrences Constructor for creating pruning functions based on
|
|
29 |
#' a row condition function. This removes all analysis rows (`TableRow`) that should be
|
|
30 |
#' pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
|
|
31 |
#' children left.
|
|
32 |
#'
|
|
33 |
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
|
|
34 |
#' analysis rows and flags whether these should be kept in the pruned table.
|
|
35 |
#'
|
|
36 |
#' @return
|
|
37 |
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
|
|
38 |
#' to prune an `rtables` table.
|
|
39 |
#'
|
|
40 |
#' @examples
|
|
41 |
#' \donttest{
|
|
42 |
#' # `keep_rows`
|
|
43 |
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
|
|
44 |
#' prune_table(tab, keep_rows(is_non_empty))
|
|
45 |
#' }
|
|
46 |
#'
|
|
47 |
#' @export
|
|
48 |
keep_rows <- function(row_condition) { |
|
49 | 6x |
checkmate::assert_function(row_condition) |
50 | 6x |
function(table_tree) { |
51 | 2256x |
if (inherits(table_tree, "TableRow")) { |
52 | 1872x |
return(!row_condition(table_tree)) |
53 |
}
|
|
54 | 384x |
children <- tree_children(table_tree) |
55 | 384x |
identical(length(children), 0L) |
56 |
}
|
|
57 |
}
|
|
58 | ||
59 |
#' @describeIn prune_occurrences Constructor for creating pruning functions based on
|
|
60 |
#' a condition for the (first) content row in leaf tables. This removes all leaf tables where
|
|
61 |
#' the first content row does not fulfill the condition. It does not check individual rows.
|
|
62 |
#' It then proceeds recursively by removing the sub tree if there are no children left.
|
|
63 |
#'
|
|
64 |
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
|
|
65 |
#' first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
|
|
66 |
#'
|
|
67 |
#' @return
|
|
68 |
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
|
|
69 |
#' row of leaf tables in the table.
|
|
70 |
#'
|
|
71 |
#' @examples
|
|
72 |
#' # `keep_content_rows`
|
|
73 |
#' \donttest{
|
|
74 |
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
|
|
75 |
#' prune_table(tab, keep_content_rows(more_than_twenty))
|
|
76 |
#' }
|
|
77 |
#'
|
|
78 |
#' @export
|
|
79 |
keep_content_rows <- function(content_row_condition) { |
|
80 | 1x |
checkmate::assert_function(content_row_condition) |
81 | 1x |
function(table_tree) { |
82 | 166x |
if (is_leaf_table(table_tree)) { |
83 | 24x |
content_row <- h_content_first_row(table_tree) |
84 | 24x |
return(!content_row_condition(content_row)) |
85 |
}
|
|
86 | 142x |
if (inherits(table_tree, "DataRow")) { |
87 | 120x |
return(FALSE) |
88 |
}
|
|
89 | 22x |
children <- tree_children(table_tree) |
90 | 22x |
identical(length(children), 0L) |
91 |
}
|
|
92 |
}
|
|
93 | ||
94 |
#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
|
|
95 |
#'
|
|
96 |
#' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row.
|
|
97 |
#' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including
|
|
98 |
#' the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
|
|
99 |
#' directly instead.
|
|
100 |
#'
|
|
101 |
#' @return
|
|
102 |
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
|
|
103 |
#'
|
|
104 |
#' @examples
|
|
105 |
#' \donttest{
|
|
106 |
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
|
|
107 |
#' prune_table(tab, keep_rows(more_than_one))
|
|
108 |
#' }
|
|
109 |
#'
|
|
110 |
#' @export
|
|
111 |
has_count_in_cols <- function(atleast, ...) { |
|
112 | 6x |
checkmate::assert_count(atleast) |
113 | 6x |
CombinationFunction(function(table_row) { |
114 | 337x |
row_counts <- h_row_counts(table_row, ...) |
115 | 337x |
total_count <- sum(row_counts) |
116 | 337x |
total_count >= atleast |
117 |
}) |
|
118 |
}
|
|
119 | ||
120 |
#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
|
|
121 |
#' the specified columns satisfying a threshold.
|
|
122 |
#'
|
|
123 |
#' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row.
|
|
124 |
#'
|
|
125 |
#' @return
|
|
126 |
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
|
|
127 |
#' specified columns with the threshold.
|
|
128 |
#'
|
|
129 |
#' @examples
|
|
130 |
#' \donttest{
|
|
131 |
#' # `has_count_in_any_col`
|
|
132 |
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
|
|
133 |
#' prune_table(tab, keep_rows(any_more_than_one))
|
|
134 |
#' }
|
|
135 |
#'
|
|
136 |
#' @export
|
|
137 |
has_count_in_any_col <- function(atleast, ...) { |
|
138 | 3x |
checkmate::assert_count(atleast) |
139 | 3x |
CombinationFunction(function(table_row) { |
140 | 3x |
row_counts <- h_row_counts(table_row, ...) |
141 | 3x |
any(row_counts >= atleast) |
142 |
}) |
|
143 |
}
|
|
144 | ||
145 |
#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
|
|
146 |
#' the specified columns.
|
|
147 |
#'
|
|
148 |
#' @return
|
|
149 |
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
|
|
150 |
#' specified column, and computes the fraction by dividing by the total column counts.
|
|
151 |
#'
|
|
152 |
#' @examples
|
|
153 |
#' \donttest{
|
|
154 |
#' # `has_fraction_in_cols`
|
|
155 |
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
|
|
156 |
#' prune_table(tab, keep_rows(more_than_five_percent))
|
|
157 |
#' }
|
|
158 |
#'
|
|
159 |
#' @export
|
|
160 |
has_fraction_in_cols <- function(atleast, ...) { |
|
161 | 4x |
assert_proportion_value(atleast, include_boundaries = TRUE) |
162 | 4x |
CombinationFunction(function(table_row) { |
163 | 306x |
row_counts <- h_row_counts(table_row, ...) |
164 | 306x |
total_count <- sum(row_counts) |
165 | 306x |
col_counts <- h_col_counts(table_row, ...) |
166 | 306x |
total_n <- sum(col_counts) |
167 | 306x |
total_percent <- total_count / total_n |
168 | 306x |
total_percent >= atleast |
169 |
}) |
|
170 |
}
|
|
171 | ||
172 |
#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
|
|
173 |
#' the specified columns.
|
|
174 |
#'
|
|
175 |
#' @return
|
|
176 |
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
|
|
177 |
#' in the specified columns and checks whether any of them fulfill the threshold.
|
|
178 |
#'
|
|
179 |
#' @examples
|
|
180 |
#' \donttest{
|
|
181 |
#' # `has_fraction_in_any_col`
|
|
182 |
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
|
|
183 |
#' prune_table(tab, keep_rows(any_atleast_five_percent))
|
|
184 |
#' }
|
|
185 |
#'
|
|
186 |
#' @export
|
|
187 |
has_fraction_in_any_col <- function(atleast, ...) { |
|
188 | 3x |
assert_proportion_value(atleast, include_boundaries = TRUE) |
189 | 3x |
CombinationFunction(function(table_row) { |
190 | 3x |
row_fractions <- h_row_fractions(table_row, ...) |
191 | 3x |
any(row_fractions >= atleast) |
192 |
}) |
|
193 |
}
|
|
194 | ||
195 |
#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
|
|
196 |
#' between the fractions reported in each specified column.
|
|
197 |
#'
|
|
198 |
#' @return
|
|
199 |
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
|
|
200 |
#' specified column, and computes the difference of the minimum and maximum.
|
|
201 |
#'
|
|
202 |
#' @examples
|
|
203 |
#' \donttest{
|
|
204 |
#' # `has_fractions_difference`
|
|
205 |
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
|
|
206 |
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
|
|
207 |
#' }
|
|
208 |
#'
|
|
209 |
#' @export
|
|
210 |
has_fractions_difference <- function(atleast, ...) { |
|
211 | 4x |
assert_proportion_value(atleast, include_boundaries = TRUE) |
212 | 4x |
CombinationFunction(function(table_row) { |
213 | 246x |
fractions <- h_row_fractions(table_row, ...) |
214 | 246x |
difference <- diff(range(fractions)) |
215 | 246x |
difference >= atleast |
216 |
}) |
|
217 |
}
|
|
218 | ||
219 |
#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
|
|
220 |
#' between the counts reported in each specified column.
|
|
221 |
#'
|
|
222 |
#' @return
|
|
223 |
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
|
|
224 |
#' specified column, and computes the difference of the minimum and maximum.
|
|
225 |
#'
|
|
226 |
#' @examples
|
|
227 |
#' \donttest{
|
|
228 |
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
|
|
229 |
#' prune_table(tab, keep_rows(more_than_one_diff))
|
|
230 |
#' }
|
|
231 |
#'
|
|
232 |
#' @export
|
|
233 |
has_counts_difference <- function(atleast, ...) { |
|
234 | 4x |
checkmate::assert_count(atleast) |
235 | 4x |
CombinationFunction(function(table_row) { |
236 | 30x |
counts <- h_row_counts(table_row, ...) |
237 | 30x |
difference <- diff(range(counts)) |
238 | 30x |
difference >= atleast |
239 |
}) |
|
240 |
}
|
1 |
#' Helper functions for incidence rate
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' @param control (`list`)\cr parameters for estimation details, specified by using
|
|
6 |
#' the helper function [control_incidence_rate()]. Possible parameter options are:
|
|
7 |
#' * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
|
|
8 |
#' * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
|
|
9 |
#' for confidence interval type.
|
|
10 |
#' * `input_time_unit`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
|
|
11 |
#' indicating time unit for data input.
|
|
12 |
#' * `num_pt_year`: (`numeric`)\cr time unit for desired output (in person-years).
|
|
13 |
#' @param person_years (`numeric(1)`)\cr total person-years at risk.
|
|
14 |
#' @param alpha (`numeric(1)`)\cr two-sided alpha-level for confidence interval.
|
|
15 |
#' @param n_events (`integer(1)`)\cr number of events observed.
|
|
16 |
#'
|
|
17 |
#' @return Estimated incidence rate, `rate`, and associated confidence interval, `rate_ci`.
|
|
18 |
#'
|
|
19 |
#' @seealso [incidence_rate]
|
|
20 |
#'
|
|
21 |
#' @name h_incidence_rate
|
|
22 |
NULL
|
|
23 | ||
24 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
|
|
25 |
#' associated confidence interval.
|
|
26 |
#'
|
|
27 |
#' @keywords internal
|
|
28 |
h_incidence_rate <- function(person_years, |
|
29 |
n_events,
|
|
30 |
control = control_incidence_rate()) { |
|
31 | 18x |
alpha <- 1 - control$conf_level |
32 | 18x |
est <- switch(control$conf_type, |
33 | 18x |
normal = h_incidence_rate_normal(person_years, n_events, alpha), |
34 | 18x |
normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha), |
35 | 18x |
exact = h_incidence_rate_exact(person_years, n_events, alpha), |
36 | 18x |
byar = h_incidence_rate_byar(person_years, n_events, alpha) |
37 |
)
|
|
38 | ||
39 | 18x |
num_pt_year <- control$num_pt_year |
40 | 18x |
list( |
41 | 18x |
rate = est$rate * num_pt_year, |
42 | 18x |
rate_ci = est$rate_ci * num_pt_year |
43 |
)
|
|
44 |
}
|
|
45 | ||
46 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
|
|
47 |
#' associated confidence interval based on the normal approximation for the
|
|
48 |
#' incidence rate. Unit is one person-year.
|
|
49 |
#'
|
|
50 |
#' @examples
|
|
51 |
#' h_incidence_rate_normal(200, 2)
|
|
52 |
#'
|
|
53 |
#' @export
|
|
54 |
h_incidence_rate_normal <- function(person_years, |
|
55 |
n_events,
|
|
56 |
alpha = 0.05) { |
|
57 | 14x |
checkmate::assert_number(person_years) |
58 | 14x |
checkmate::assert_number(n_events) |
59 | 14x |
assert_proportion_value(alpha) |
60 | ||
61 | 14x |
est <- n_events / person_years |
62 | 14x |
se <- sqrt(est / person_years) |
63 | 14x |
ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se |
64 | ||
65 | 14x |
list(rate = est, rate_ci = ci) |
66 |
}
|
|
67 | ||
68 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
|
|
69 |
#' associated confidence interval based on the normal approximation for the
|
|
70 |
#' logarithm of the incidence rate. Unit is one person-year.
|
|
71 |
#'
|
|
72 |
#' @examples
|
|
73 |
#' h_incidence_rate_normal_log(200, 2)
|
|
74 |
#'
|
|
75 |
#' @export
|
|
76 |
h_incidence_rate_normal_log <- function(person_years, |
|
77 |
n_events,
|
|
78 |
alpha = 0.05) { |
|
79 | 6x |
checkmate::assert_number(person_years) |
80 | 6x |
checkmate::assert_number(n_events) |
81 | 6x |
assert_proportion_value(alpha) |
82 | ||
83 | 6x |
rate_est <- n_events / person_years |
84 | 6x |
rate_se <- sqrt(rate_est / person_years) |
85 | 6x |
lrate_est <- log(rate_est) |
86 | 6x |
lrate_se <- rate_se / rate_est |
87 | 6x |
ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se) |
88 | ||
89 | 6x |
list(rate = rate_est, rate_ci = ci) |
90 |
}
|
|
91 | ||
92 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
|
|
93 |
#' associated exact confidence interval. Unit is one person-year.
|
|
94 |
#'
|
|
95 |
#' @examples
|
|
96 |
#' h_incidence_rate_exact(200, 2)
|
|
97 |
#'
|
|
98 |
#' @export
|
|
99 |
h_incidence_rate_exact <- function(person_years, |
|
100 |
n_events,
|
|
101 |
alpha = 0.05) { |
|
102 | 1x |
checkmate::assert_number(person_years) |
103 | 1x |
checkmate::assert_number(n_events) |
104 | 1x |
assert_proportion_value(alpha) |
105 | ||
106 | 1x |
est <- n_events / person_years |
107 | 1x |
lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years) |
108 | 1x |
ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years) |
109 | ||
110 | 1x |
list(rate = est, rate_ci = c(lcl, ucl)) |
111 |
}
|
|
112 | ||
113 |
#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
|
|
114 |
#' associated Byar's confidence interval. Unit is one person-year.
|
|
115 |
#'
|
|
116 |
#' @examples
|
|
117 |
#' h_incidence_rate_byar(200, 2)
|
|
118 |
#'
|
|
119 |
#' @export
|
|
120 |
h_incidence_rate_byar <- function(person_years, |
|
121 |
n_events,
|
|
122 |
alpha = 0.05) { |
|
123 | 1x |
checkmate::assert_number(person_years) |
124 | 1x |
checkmate::assert_number(n_events) |
125 | 1x |
assert_proportion_value(alpha) |
126 | ||
127 | 1x |
est <- n_events / person_years |
128 | 1x |
seg_1 <- n_events + 0.5 |
129 | 1x |
seg_2 <- 1 - 1 / (9 * (n_events + 0.5)) |
130 | 1x |
seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3 |
131 | 1x |
lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years |
132 | 1x |
ucl <- seg_1 * ((seg_2 + seg_3)^3) / person_years |
133 | ||
134 | 1x |
list(rate = est, rate_ci = c(lcl, ucl)) |
135 |
}
|
1 |
#' Control function for logistic regression model fitting
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This is an auxiliary function for controlling arguments for logistic regression models.
|
|
6 |
#' `conf_level` refers to the confidence level used for the Odds Ratio CIs.
|
|
7 |
#'
|
|
8 |
#' @inheritParams argument_convention
|
|
9 |
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
|
|
10 |
#' This will be used when fitting the logistic regression model on the left hand side of the formula.
|
|
11 |
#' Note that the evaluated expression should result in either a logical vector or a factor with 2
|
|
12 |
#' levels. By default this is just `"response"` such that the original response variable is used
|
|
13 |
#' and not modified further.
|
|
14 |
#'
|
|
15 |
#' @return A list of components with the same names as the arguments.
|
|
16 |
#'
|
|
17 |
#' @examples
|
|
18 |
#' # Standard options.
|
|
19 |
#' control_logistic()
|
|
20 |
#'
|
|
21 |
#' # Modify confidence level.
|
|
22 |
#' control_logistic(conf_level = 0.9)
|
|
23 |
#'
|
|
24 |
#' # Use a different response definition.
|
|
25 |
#' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))")
|
|
26 |
#'
|
|
27 |
#' @export
|
|
28 |
control_logistic <- function(response_definition = "response", |
|
29 |
conf_level = 0.95) { |
|
30 | 29x |
checkmate::assert_true(grepl("response", response_definition)) |
31 | 28x |
checkmate::assert_string(response_definition) |
32 | 28x |
assert_proportion_value(conf_level) |
33 | 27x |
list( |
34 | 27x |
response_definition = response_definition, |
35 | 27x |
conf_level = conf_level |
36 |
)
|
|
37 |
}
|
1 |
#' Subgroup treatment effect pattern (STEP) fit for binary (response) outcome
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
|
|
6 |
#' (response) outcome. The treatment arm variable must have exactly 2 levels,
|
|
7 |
#' where the first one is taken as reference and the estimated odds ratios are
|
|
8 |
#' for the comparison of the second level vs. the first one.
|
|
9 |
#'
|
|
10 |
#' The (conditional) logistic regression model which is fit is:
|
|
11 |
#'
|
|
12 |
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
|
|
13 |
#'
|
|
14 |
#' where `degree` is specified by `control_step()`.
|
|
15 |
#'
|
|
16 |
#' @inheritParams argument_convention
|
|
17 |
#' @param variables (named `list` of `character`)\cr list of analysis variables:
|
|
18 |
#' needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
|
|
19 |
#' @param control (named `list`)\cr combined control list from [control_step()]
|
|
20 |
#' and [control_logistic()].
|
|
21 |
#'
|
|
22 |
#' @return A matrix of class `step`. The first part of the columns describe the
|
|
23 |
#' subgroup intervals used for the biomarker variable, including where the
|
|
24 |
#' center of the intervals are and their bounds. The second part of the
|
|
25 |
#' columns contain the estimates for the treatment arm comparison.
|
|
26 |
#'
|
|
27 |
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
|
|
28 |
#'
|
|
29 |
#' @seealso [control_step()] and [control_logistic()] for the available
|
|
30 |
#' customization options.
|
|
31 |
#'
|
|
32 |
#' @examples
|
|
33 |
#' # Testing dataset with just two treatment arms.
|
|
34 |
#' library(survival)
|
|
35 |
#' library(dplyr)
|
|
36 |
#'
|
|
37 |
#' adrs_f <- tern_ex_adrs %>%
|
|
38 |
#' filter(
|
|
39 |
#' PARAMCD == "BESRSPI",
|
|
40 |
#' ARM %in% c("B: Placebo", "A: Drug X")
|
|
41 |
#' ) %>%
|
|
42 |
#' mutate(
|
|
43 |
#' # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
|
|
44 |
#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
|
|
45 |
#' RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
|
|
46 |
#' SEX = factor(SEX)
|
|
47 |
#' )
|
|
48 |
#'
|
|
49 |
#' variables <- list(
|
|
50 |
#' arm = "ARM",
|
|
51 |
#' biomarker = "BMRKR1",
|
|
52 |
#' covariates = "AGE",
|
|
53 |
#' response = "RSP"
|
|
54 |
#' )
|
|
55 |
#'
|
|
56 |
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
|
|
57 |
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
|
|
58 |
#' step_matrix <- fit_rsp_step(
|
|
59 |
#' variables = variables,
|
|
60 |
#' data = adrs_f,
|
|
61 |
#' control = c(control_logistic(), control_step(bandwidth = 0.9))
|
|
62 |
#' )
|
|
63 |
#' dim(step_matrix)
|
|
64 |
#' head(step_matrix)
|
|
65 |
#'
|
|
66 |
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
|
|
67 |
#' # models. Or specify different logistic regression options, including confidence level.
|
|
68 |
#' step_matrix2 <- fit_rsp_step(
|
|
69 |
#' variables = variables,
|
|
70 |
#' data = adrs_f,
|
|
71 |
#' control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = NULL, degree = 1))
|
|
72 |
#' )
|
|
73 |
#'
|
|
74 |
#' # Use a global constant model. This is helpful as a reference for the subgroup models.
|
|
75 |
#' step_matrix3 <- fit_rsp_step(
|
|
76 |
#' variables = variables,
|
|
77 |
#' data = adrs_f,
|
|
78 |
#' control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
|
|
79 |
#' )
|
|
80 |
#'
|
|
81 |
#' # It is also possible to use strata, i.e. use conditional logistic regression models.
|
|
82 |
#' variables2 <- list(
|
|
83 |
#' arm = "ARM",
|
|
84 |
#' biomarker = "BMRKR1",
|
|
85 |
#' covariates = "AGE",
|
|
86 |
#' response = "RSP",
|
|
87 |
#' strata = c("STRATA1", "STRATA2")
|
|
88 |
#' )
|
|
89 |
#'
|
|
90 |
#' step_matrix4 <- fit_rsp_step(
|
|
91 |
#' variables = variables2,
|
|
92 |
#' data = adrs_f,
|
|
93 |
#' control = c(control_logistic(), control_step(bandwidth = NULL))
|
|
94 |
#' )
|
|
95 |
#'
|
|
96 |
#' @export
|
|
97 |
fit_rsp_step <- function(variables, |
|
98 |
data,
|
|
99 |
control = c(control_step(), control_logistic())) { |
|
100 | 5x |
assert_df_with_variables(data, variables) |
101 | 5x |
checkmate::assert_list(control, names = "named") |
102 | 5x |
data <- data[!is.na(data[[variables$biomarker]]), ] |
103 | 5x |
window_sel <- h_step_window(x = data[[variables$biomarker]], control = control) |
104 | 5x |
interval_center <- window_sel$interval[, "Interval Center"] |
105 | 5x |
form <- h_step_rsp_formula(variables = variables, control = control) |
106 | 5x |
estimates <- if (is.null(control$bandwidth)) { |
107 | 1x |
h_step_rsp_est( |
108 | 1x |
formula = form, |
109 | 1x |
data = data, |
110 | 1x |
variables = variables, |
111 | 1x |
x = interval_center, |
112 | 1x |
control = control |
113 |
)
|
|
114 |
} else { |
|
115 | 4x |
tmp <- mapply( |
116 | 4x |
FUN = h_step_rsp_est, |
117 | 4x |
x = interval_center, |
118 | 4x |
subset = as.list(as.data.frame(window_sel$sel)), |
119 | 4x |
MoreArgs = list( |
120 | 4x |
formula = form, |
121 | 4x |
data = data, |
122 | 4x |
variables = variables, |
123 | 4x |
control = control |
124 |
)
|
|
125 |
)
|
|
126 |
# Maybe we find a more elegant solution than this.
|
|
127 | 4x |
rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper") |
128 | 4x |
t(tmp) |
129 |
}
|
|
130 | 5x |
result <- cbind(window_sel$interval, estimates) |
131 | 5x |
structure( |
132 | 5x |
result,
|
133 | 5x |
class = c("step", "matrix"), |
134 | 5x |
variables = variables, |
135 | 5x |
control = control |
136 |
)
|
|
137 |
}
|
1 |
#' Class for `CombinationFunction`
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
|
|
6 |
#' can be combined and negated with the logical operators.
|
|
7 |
#'
|
|
8 |
#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
|
|
9 |
#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
|
|
10 |
#' @param x (`CombinationFunction`)\cr the function which should be negated.
|
|
11 |
#'
|
|
12 |
#' @return A logical value indicating whether the left hand side of the equation equals the right hand side.
|
|
13 |
#'
|
|
14 |
#' @examples
|
|
15 |
#' higher <- function(a) {
|
|
16 |
#' force(a)
|
|
17 |
#' CombinationFunction(
|
|
18 |
#' function(x) {
|
|
19 |
#' x > a
|
|
20 |
#' }
|
|
21 |
#' )
|
|
22 |
#' }
|
|
23 |
#'
|
|
24 |
#' lower <- function(b) {
|
|
25 |
#' force(b)
|
|
26 |
#' CombinationFunction(
|
|
27 |
#' function(x) {
|
|
28 |
#' x < b
|
|
29 |
#' }
|
|
30 |
#' )
|
|
31 |
#' }
|
|
32 |
#'
|
|
33 |
#' c1 <- higher(5)
|
|
34 |
#' c2 <- lower(10)
|
|
35 |
#' c3 <- higher(5) & lower(10)
|
|
36 |
#' c3(7)
|
|
37 |
#'
|
|
38 |
#' @name combination_function
|
|
39 |
#' @aliases CombinationFunction-class
|
|
40 |
#' @exportClass CombinationFunction
|
|
41 |
#' @export CombinationFunction
|
|
42 |
CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint |
|
43 | ||
44 |
#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
|
|
45 |
#' The resulting object is of the same class, and evaluates the two argument functions. The result
|
|
46 |
#' is then the "AND" of the two individual results.
|
|
47 |
#'
|
|
48 |
#' @export
|
|
49 |
methods::setMethod( |
|
50 |
"&",
|
|
51 |
signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"), |
|
52 |
definition = function(e1, e2) { |
|
53 | 4x |
CombinationFunction(function(...) { |
54 | 490x |
e1(...) && e2(...) |
55 |
}) |
|
56 |
}
|
|
57 |
)
|
|
58 | ||
59 |
#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
|
|
60 |
#' The resulting object is of the same class, and evaluates the two argument functions. The result
|
|
61 |
#' is then the "OR" of the two individual results.
|
|
62 |
#'
|
|
63 |
#' @export
|
|
64 |
methods::setMethod( |
|
65 |
"|",
|
|
66 |
signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"), |
|
67 |
definition = function(e1, e2) { |
|
68 | 2x |
CombinationFunction(function(...) { |
69 | 4x |
e1(...) || e2(...) |
70 |
}) |
|
71 |
}
|
|
72 |
)
|
|
73 | ||
74 |
#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
|
|
75 |
#' The resulting object is of the same class, and evaluates the original function. The result
|
|
76 |
#' is then the opposite of this results.
|
|
77 |
#'
|
|
78 |
#' @export
|
|
79 |
methods::setMethod( |
|
80 |
"!",
|
|
81 |
signature = c(x = "CombinationFunction"), |
|
82 |
definition = function(x) { |
|
83 | 2x |
CombinationFunction(function(...) { |
84 | 305x |
!x(...) |
85 |
}) |
|
86 |
}
|
|
87 |
)
|
1 |
#' Compare variables between groups
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' The analyze function [compare_vars()] creates a layout element to summarize and compare one or more variables, using
|
|
6 |
#' the S3 generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics
|
|
7 |
#' for numeric variables can be viewed by running `get_stats("analyze_vars_numeric", add_pval = TRUE)` and for
|
|
8 |
#' non-numeric variables by running `get_stats("analyze_vars_counts", add_pval = TRUE)`. Use the `.stats` parameter to
|
|
9 |
#' specify the statistics to include in your output summary table.
|
|
10 |
#'
|
|
11 |
#' Prior to using this function in your table layout you must use [rtables::split_cols_by()] to create a column
|
|
12 |
#' split on the variable to be used in comparisons, and specify a reference group via the `ref_group` parameter.
|
|
13 |
#' Comparisons can be performed for each group (column) against the specified reference group by including the p-value
|
|
14 |
#' statistic.
|
|
15 |
#'
|
|
16 |
#' @inheritParams argument_convention
|
|
17 |
#' @param .stats (`character`)\cr statistics to select for the table.
|
|
18 |
#'
|
|
19 |
#' Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric", add_pval = TRUE), type = "sh")``
|
|
20 |
#'
|
|
21 |
#' Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts", add_pval = TRUE), type = "sh")``
|
|
22 |
#'
|
|
23 |
#' @note
|
|
24 |
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
|
|
25 |
#' between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
|
|
26 |
#' be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
|
|
27 |
#' for as explicit factor levels.
|
|
28 |
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
|
|
29 |
#' set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
|
|
30 |
#' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
|
|
31 |
#' default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
|
|
32 |
#' * For character variables, automatic conversion to factor does not guarantee that the table
|
|
33 |
#' will be generated correctly. In particular for sparse tables this very likely can fail.
|
|
34 |
#' Therefore it is always better to manually convert character variables to factors during pre-processing.
|
|
35 |
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
|
|
36 |
#' is well defined.
|
|
37 |
#'
|
|
38 |
#' @seealso [s_summary()] which is used internally to compute a summary within `s_compare()`, and [a_summary()]
|
|
39 |
#' which is used (with `compare = TRUE`) as the analysis function for `compare_vars()`.
|
|
40 |
#'
|
|
41 |
#' @name compare_variables
|
|
42 |
#' @include analyze_variables.R
|
|
43 |
#' @order 1
|
|
44 |
NULL
|
|
45 | ||
46 |
#' @describeIn compare_variables S3 generic function to produce a comparison summary.
|
|
47 |
#'
|
|
48 |
#' @return
|
|
49 |
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
|
|
50 |
#'
|
|
51 |
#' @export
|
|
52 |
s_compare <- function(x, |
|
53 |
...) { |
|
54 | 9x |
UseMethod("s_compare", x) |
55 |
}
|
|
56 | ||
57 |
#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
|
|
58 |
#' to calculate the p-value.
|
|
59 |
#'
|
|
60 |
#' @method s_compare numeric
|
|
61 |
#'
|
|
62 |
#' @examples
|
|
63 |
#' # `s_compare.numeric`
|
|
64 |
#'
|
|
65 |
#' ## Usual case where both this and the reference group vector have more than 1 value.
|
|
66 |
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
|
|
67 |
#'
|
|
68 |
#' ## If one group has not more than 1 value, then p-value is not calculated.
|
|
69 |
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
|
|
70 |
#'
|
|
71 |
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
|
|
72 |
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
|
|
73 |
#'
|
|
74 |
#' @export
|
|
75 |
s_compare.numeric <- function(x, ...) { |
|
76 | 2x |
s_summary.numeric(x = x, compare_with_ref_group = TRUE, ...) |
77 |
}
|
|
78 | ||
79 |
#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
|
|
80 |
#' to calculate the p-value.
|
|
81 |
#'
|
|
82 |
#' @method s_compare factor
|
|
83 |
#'
|
|
84 |
#' @examples
|
|
85 |
#' # `s_compare.factor`
|
|
86 |
#'
|
|
87 |
#' ## Basic usage:
|
|
88 |
#' x <- factor(c("a", "a", "b", "c", "a"))
|
|
89 |
#' y <- factor(c("a", "b", "c"))
|
|
90 |
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
|
|
91 |
#'
|
|
92 |
#' ## Management of NA values.
|
|
93 |
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
|
|
94 |
#' y <- explicit_na(factor(c("a", "b", "c", NA)))
|
|
95 |
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE)
|
|
96 |
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE)
|
|
97 |
#'
|
|
98 |
#' @export
|
|
99 |
s_compare.factor <- function(x, ...) { |
|
100 | 3x |
s_summary.factor( |
101 | 3x |
x = x, |
102 | 3x |
compare_with_ref_group = TRUE, |
103 |
...
|
|
104 |
)
|
|
105 |
}
|
|
106 | ||
107 |
#' @describeIn compare_variables Method for `character` class. This makes an automatic
|
|
108 |
#' conversion to `factor` (with a warning) and then forwards to the method for factors.
|
|
109 |
#'
|
|
110 |
#' @method s_compare character
|
|
111 |
#'
|
|
112 |
#' @examples
|
|
113 |
#' # `s_compare.character`
|
|
114 |
#'
|
|
115 |
#' ## Basic usage:
|
|
116 |
#' x <- c("a", "a", "b", "c", "a")
|
|
117 |
#' y <- c("a", "b", "c")
|
|
118 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
|
|
119 |
#'
|
|
120 |
#' ## Note that missing values handling can make a large difference:
|
|
121 |
#' x <- c("a", "a", "b", "c", "a", NA)
|
|
122 |
#' y <- c("a", "b", "c", rep(NA, 20))
|
|
123 |
#' s_compare(x,
|
|
124 |
#' .ref_group = y, .in_ref_col = FALSE,
|
|
125 |
#' .var = "x", verbose = FALSE
|
|
126 |
#' )
|
|
127 |
#' s_compare(x,
|
|
128 |
#' .ref_group = y, .in_ref_col = FALSE, .var = "x",
|
|
129 |
#' na.rm = FALSE, verbose = FALSE
|
|
130 |
#' )
|
|
131 |
#'
|
|
132 |
#' @export
|
|
133 |
s_compare.character <- function(x, ...) { |
|
134 | 1x |
s_summary.character( |
135 | 1x |
x,
|
136 | 1x |
compare_with_ref_group = TRUE, |
137 |
...
|
|
138 |
)
|
|
139 |
}
|
|
140 | ||
141 |
#' @describeIn compare_variables Method for `logical` class. A chi-squared test
|
|
142 |
#' is used. If missing values are not removed, then they are counted as `FALSE`.
|
|
143 |
#'
|
|
144 |
#' @method s_compare logical
|
|
145 |
#'
|
|
146 |
#' @examples
|
|
147 |
#' # `s_compare.logical`
|
|
148 |
#'
|
|
149 |
#' ## Basic usage:
|
|
150 |
#' x <- c(TRUE, FALSE, TRUE, TRUE)
|
|
151 |
#' y <- c(FALSE, FALSE, TRUE)
|
|
152 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
|
|
153 |
#'
|
|
154 |
#' ## Management of NA values.
|
|
155 |
#' x <- c(NA, TRUE, FALSE)
|
|
156 |
#' y <- c(NA, NA, NA, NA, FALSE)
|
|
157 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE)
|
|
158 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE)
|
|
159 |
#'
|
|
160 |
#' @export
|
|
161 |
s_compare.logical <- function(x, ...) { |
|
162 | 3x |
s_summary.logical( |
163 | 3x |
x = x, |
164 | 3x |
compare_with_ref_group = TRUE, |
165 |
...
|
|
166 |
)
|
|
167 |
}
|
|
168 | ||
169 |
#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
|
|
170 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
|
|
171 |
#'
|
|
172 |
#' @param ... additional arguments passed to `s_compare()`, including:
|
|
173 |
#' * `denom`: (`string`) choice of denominator. Options are `c("n", "N_col", "N_row")`. For factor variables, can
|
|
174 |
#' only be `"n"` (number of values in this row and column intersection).
|
|
175 |
#' * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no
|
|
176 |
#' column-based subsetting).
|
|
177 |
#' * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within.
|
|
178 |
#' * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out
|
|
179 |
#' information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only.
|
|
180 |
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
|
|
181 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
|
|
182 |
#' for that statistic's row label.
|
|
183 |
#'
|
|
184 |
#' @return
|
|
185 |
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
|
|
186 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
|
|
187 |
#' the statistics from `s_compare()` to the table layout.
|
|
188 |
#'
|
|
189 |
#' @examples
|
|
190 |
#' # `compare_vars()` in `rtables` pipelines
|
|
191 |
#'
|
|
192 |
#' ## Default output within a `rtables` pipeline.
|
|
193 |
#' lyt <- basic_table() %>%
|
|
194 |
#' split_cols_by("ARMCD", ref_group = "ARM B") %>%
|
|
195 |
#' compare_vars(c("AGE", "SEX"))
|
|
196 |
#' build_table(lyt, tern_ex_adsl)
|
|
197 |
#'
|
|
198 |
#' ## Select and format statistics output.
|
|
199 |
#' lyt <- basic_table() %>%
|
|
200 |
#' split_cols_by("ARMCD", ref_group = "ARM C") %>%
|
|
201 |
#' compare_vars(
|
|
202 |
#' vars = "AGE",
|
|
203 |
#' .stats = c("mean_sd", "pval"),
|
|
204 |
#' .formats = c(mean_sd = "xx.x, xx.x"),
|
|
205 |
#' .labels = c(mean_sd = "Mean, SD")
|
|
206 |
#' )
|
|
207 |
#' build_table(lyt, df = tern_ex_adsl)
|
|
208 |
#'
|
|
209 |
#' @export
|
|
210 |
#' @order 2
|
|
211 |
compare_vars <- function(lyt, |
|
212 |
vars,
|
|
213 |
var_labels = vars, |
|
214 |
na_str = default_na_str(), |
|
215 |
nested = TRUE, |
|
216 |
...,
|
|
217 |
na_rm = TRUE, |
|
218 |
show_labels = "default", |
|
219 |
table_names = vars, |
|
220 |
section_div = NA_character_, |
|
221 |
.stats = c("n", "mean_sd", "count_fraction", "pval"), |
|
222 |
.stat_names = NULL, |
|
223 |
.formats = NULL, |
|
224 |
.labels = NULL, |
|
225 |
.indent_mods = NULL) { |
|
226 | 4x |
analyze_vars( |
227 | 4x |
lyt = lyt, |
228 | 4x |
compare_with_ref_group = TRUE, |
229 | 4x |
vars = vars, |
230 | 4x |
var_labels = var_labels, |
231 | 4x |
na_str = na_str, |
232 | 4x |
nested = nested, |
233 | 4x |
na_rm = na_rm, |
234 | 4x |
show_labels = show_labels, |
235 | 4x |
table_names = table_names, |
236 | 4x |
section_div = section_div, |
237 | 4x |
.stats = .stats, |
238 | 4x |
.stat_names = .stat_names, |
239 | 4x |
.formats = .formats, |
240 | 4x |
.labels = .labels, |
241 | 4x |
.indent_mods = .indent_mods, |
242 |
...
|
|
243 |
)
|
|
244 |
}
|
1 |
#' Generate PK reference dataset
|
|
2 |
#'
|
|
3 |
#' @description `r lifecycle::badge("stable")`
|
|
4 |
#'
|
|
5 |
#' @return A `data.frame` of PK parameters.
|
|
6 |
#'
|
|
7 |
#' @examples
|
|
8 |
#' pk_reference_dataset <- d_pkparam()
|
|
9 |
#'
|
|
10 |
#' @export
|
|
11 |
d_pkparam <- function() { |
|
12 | 4x |
pk_dataset <- as.data.frame(matrix( |
13 | 4x |
c( |
14 | 4x |
"TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1", |
15 | 4x |
"CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2", |
16 | 4x |
"CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3", |
17 | 4x |
"AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4", |
18 | 4x |
"AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5", |
19 | 4x |
"AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6", |
20 | 4x |
"AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7", |
21 | 4x |
"AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8", |
22 | 4x |
"AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9", |
23 | 4x |
"AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10", |
24 | 4x |
"AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11", |
25 | 4x |
"AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12", |
26 | 4x |
"AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13", |
27 | 4x |
"AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14", |
28 | 4x |
"AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15", |
29 | 4x |
"AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16", |
30 | 4x |
"AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17", |
31 | 4x |
"AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18", |
32 | 4x |
"AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19", |
33 | 4x |
"AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20", |
34 | 4x |
"AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21", |
35 | 4x |
"AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22", |
36 | 4x |
"AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23", |
37 | 4x |
"AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24", |
38 | 4x |
"AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25", |
39 | 4x |
"TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26", |
40 | 4x |
"CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27", |
41 | 4x |
"C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28", |
42 | 4x |
"CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29", |
43 | 4x |
"CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30", |
44 | 4x |
"CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31", |
45 | 4x |
"LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32", |
46 | 4x |
"CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33", |
47 | 4x |
"CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34", |
48 | 4x |
"CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35", |
49 | 4x |
"CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36", |
50 | 4x |
"CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37", |
51 | 4x |
"CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38", |
52 | 4x |
"VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39", |
53 | 4x |
"VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40", |
54 | 4x |
"VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41", |
55 | 4x |
"VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42", |
56 | 4x |
"VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43", |
57 | 4x |
"VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44", |
58 | 4x |
"LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45", |
59 | 4x |
"LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46", |
60 | 4x |
"LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47", |
61 | 4x |
"LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48", |
62 | 4x |
"MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49", |
63 | 4x |
"MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50", |
64 | 4x |
"MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51", |
65 | 4x |
"R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52", |
66 | 4x |
"R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53", |
67 | 4x |
"TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54", |
68 | 4x |
"TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55", |
69 | 4x |
"ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56", |
70 | 4x |
"FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57", |
71 | 4x |
"CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58", |
72 | 4x |
"RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59", |
73 | 4x |
"RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60", |
74 | 4x |
"VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61", |
75 | 4x |
"RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62", |
76 | 4x |
"ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63", |
77 | 4x |
"RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64", |
78 | 4x |
"RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65", |
79 | 4x |
"ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66", |
80 | 4x |
"MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67", |
81 | 4x |
"ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68", |
82 | 4x |
"TON", "Time to Onset", "Tonset", "Matrix of PD", "69", |
83 | 4x |
"TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70", |
84 | 4x |
"TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71", |
85 | 4x |
"TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72", |
86 | 4x |
"TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73", |
87 | 4x |
"TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74", |
88 | 4x |
"TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75", |
89 | 4x |
"TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76", |
90 | 4x |
"BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77", |
91 | 4x |
"TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78", |
92 | 4x |
"AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79", |
93 | 4x |
"AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80", |
94 | 4x |
"AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81", |
95 | 4x |
"AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82", |
96 | 4x |
"AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83", |
97 | 4x |
"AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84", |
98 | 4x |
"TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85", |
99 | 4x |
"AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86", |
100 | 4x |
"AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87", |
101 | 4x |
"TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88", |
102 | 4x |
"TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89", |
103 | 4x |
"TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90", |
104 | 4x |
"TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91", |
105 | 4x |
"SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92", |
106 | 4x |
"SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93", |
107 | 4x |
"SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94", |
108 | 4x |
"SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95", |
109 | 4x |
"R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96", |
110 | 4x |
"R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97", |
111 | 4x |
"R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98", |
112 | 4x |
"R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99", |
113 | 4x |
"CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100", |
114 | 4x |
"CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101", |
115 | 4x |
"AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102", |
116 | 4x |
"ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103", |
117 | 4x |
"ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104", |
118 | 4x |
"ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105", |
119 | 4x |
"ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106", |
120 | 4x |
"ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107", |
121 | 4x |
"ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108", |
122 | 4x |
"ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109", |
123 | 4x |
"ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110", |
124 | 4x |
"ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111", |
125 | 4x |
"ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112", |
126 | 4x |
"ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113", |
127 | 4x |
"ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114", |
128 | 4x |
"ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115", |
129 | 4x |
"ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116", |
130 | 4x |
"ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117", |
131 | 4x |
"AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118", |
132 | 4x |
"AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119", |
133 | 4x |
"AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120", |
134 | 4x |
"AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121", |
135 | 4x |
"AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122", |
136 | 4x |
"AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123", |
137 | 4x |
"AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124", |
138 | 4x |
"AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125", |
139 | 4x |
"AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126", |
140 | 4x |
"AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127", |
141 | 4x |
"AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128", |
142 | 4x |
"AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129", |
143 | 4x |
"AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130", |
144 | 4x |
"AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131", |
145 | 4x |
"AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132", |
146 | 4x |
"AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133", |
147 | 4x |
"AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134", |
148 | 4x |
"AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135", |
149 | 4x |
"AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136", |
150 | 4x |
"AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137", |
151 | 4x |
"AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138", |
152 | 4x |
"AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139", |
153 | 4x |
"AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140", |
154 | 4x |
"AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141", |
155 | 4x |
"AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142", |
156 | 4x |
"AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143", |
157 | 4x |
"AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144", |
158 | 4x |
"AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145", |
159 | 4x |
"AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146", |
160 | 4x |
"AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147", |
161 | 4x |
"AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148", |
162 | 4x |
"AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149", |
163 | 4x |
"AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150", |
164 | 4x |
"AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151", |
165 | 4x |
"AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152", |
166 | 4x |
"AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153", |
167 | 4x |
"AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154", |
168 | 4x |
"AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155", |
169 | 4x |
"AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156", |
170 | 4x |
"AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157", |
171 | 4x |
"AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158", |
172 | 4x |
"AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159", |
173 | 4x |
"AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160", |
174 | 4x |
"AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161", |
175 | 4x |
"AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162", |
176 | 4x |
"AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163", |
177 | 4x |
"AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164", |
178 | 4x |
"AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165", |
179 | 4x |
"AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166", |
180 | 4x |
"AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167", |
181 | 4x |
"AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168", |
182 | 4x |
"AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169", |
183 | 4x |
"AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170", |
184 | 4x |
"AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171", |
185 | 4x |
"AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172", |
186 | 4x |
"AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173", |
187 | 4x |
"AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174", |
188 | 4x |
"AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175", |
189 | 4x |
"AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176", |
190 | 4x |
"AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177", |
191 | 4x |
"AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178", |
192 | 4x |
"AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179", |
193 | 4x |
"AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180", |
194 | 4x |
"AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181", |
195 | 4x |
"AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182", |
196 | 4x |
"C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183", |
197 | 4x |
"C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184", |
198 | 4x |
"C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185", |
199 | 4x |
"C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186", |
200 | 4x |
"CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187", |
201 | 4x |
"CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188", |
202 | 4x |
"CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189", |
203 | 4x |
"CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190", |
204 | 4x |
"CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191", |
205 | 4x |
"CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192", |
206 | 4x |
"CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193", |
207 | 4x |
"CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194", |
208 | 4x |
"CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195", |
209 | 4x |
"CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196", |
210 | 4x |
"CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197", |
211 | 4x |
"CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198", |
212 | 4x |
"CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199", |
213 | 4x |
"CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200", |
214 | 4x |
"CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201", |
215 | 4x |
"CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202", |
216 | 4x |
"CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203", |
217 | 4x |
"CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204", |
218 | 4x |
"CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205", |
219 | 4x |
"CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206", |
220 | 4x |
"CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207", |
221 | 4x |
"CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208", |
222 | 4x |
"CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209", |
223 | 4x |
"CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210", |
224 | 4x |
"CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211", |
225 | 4x |
"CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212", |
226 | 4x |
"CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213", |
227 | 4x |
"CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214", |
228 | 4x |
"CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215", |
229 | 4x |
"CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216", |
230 | 4x |
"CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217", |
231 | 4x |
"CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218", |
232 | 4x |
"CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219", |
233 | 4x |
"CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220", |
234 | 4x |
"CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221", |
235 | 4x |
"CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222", |
236 | 4x |
"CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223", |
237 | 4x |
"CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224", |
238 | 4x |
"CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225", |
239 | 4x |
"CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226", |
240 | 4x |
"CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227", |
241 | 4x |
"CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228", |
242 | 4x |
"CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229", |
243 | 4x |
"CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230", |
244 | 4x |
"CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231", |
245 | 4x |
"CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232", |
246 | 4x |
"CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233", |
247 | 4x |
"CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234", |
248 | 4x |
"CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235", |
249 | 4x |
"CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236", |
250 | 4x |
"CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237", |
251 | 4x |
"CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238", |
252 | 4x |
"CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239", |
253 | 4x |
"CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240", |
254 | 4x |
"CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241", |
255 | 4x |
"CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242", |
256 | 4x |
"CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243", |
257 | 4x |
"CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244", |
258 | 4x |
"CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245", |
259 | 4x |
"CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246", |
260 | 4x |
"CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247", |
261 | 4x |
"CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248", |
262 | 4x |
"CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249", |
263 | 4x |
"CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250", |
264 | 4x |
"EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251", |
265 | 4x |
"ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252", |
266 | 4x |
"ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253", |
267 | 4x |
"ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254", |
268 | 4x |
"ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255", |
269 | 4x |
"ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256", |
270 | 4x |
"ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257", |
271 | 4x |
"ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258", |
272 | 4x |
"ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259", |
273 | 4x |
"ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260", |
274 | 4x |
"ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261", |
275 | 4x |
"ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262", |
276 | 4x |
"ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263", |
277 | 4x |
"ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264", |
278 | 4x |
"ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265", |
279 | 4x |
"FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266", |
280 | 4x |
"FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267", |
281 | 4x |
"FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268", |
282 | 4x |
"FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269", |
283 | 4x |
"FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270", |
284 | 4x |
"HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271", |
285 | 4x |
"HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272", |
286 | 4x |
"HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273", |
287 | 4x |
"LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274", |
288 | 4x |
"LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275", |
289 | 4x |
"LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276", |
290 | 4x |
"LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277", |
291 | 4x |
"LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278", |
292 | 4x |
"MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279", |
293 | 4x |
"MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280", |
294 | 4x |
"MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281", |
295 | 4x |
"MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282", |
296 | 4x |
"MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283", |
297 | 4x |
"MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284", |
298 | 4x |
"MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285", |
299 | 4x |
"MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286", |
300 | 4x |
"MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287", |
301 | 4x |
"MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288", |
302 | 4x |
"MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289", |
303 | 4x |
"MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290", |
304 | 4x |
"MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291", |
305 | 4x |
"NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292", |
306 | 4x |
"NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293", |
307 | 4x |
"NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294", |
308 | 4x |
"NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295", |
309 | 4x |
"NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296", |
310 | 4x |
"PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297", |
311 | 4x |
"RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298", |
312 | 4x |
"RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299", |
313 | 4x |
"RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300", |
314 | 4x |
"RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301", |
315 | 4x |
"RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302", |
316 | 4x |
"RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303", |
317 | 4x |
"RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304", |
318 | 4x |
"RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305", |
319 | 4x |
"RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306", |
320 | 4x |
"RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307", |
321 | 4x |
"RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308", |
322 | 4x |
"RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309", |
323 | 4x |
"RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310", |
324 | 4x |
"RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311", |
325 | 4x |
"RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312", |
326 | 4x |
"RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313", |
327 | 4x |
"RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314", |
328 | 4x |
"RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315", |
329 | 4x |
"RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316", |
330 | 4x |
"RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317", |
331 | 4x |
"RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318", |
332 | 4x |
"RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319", |
333 | 4x |
"RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320", |
334 | 4x |
"RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321", |
335 | 4x |
"RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322", |
336 | 4x |
"RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323", |
337 | 4x |
"RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324", |
338 | 4x |
"RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325", |
339 | 4x |
"RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326", |
340 | 4x |
"RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327", |
341 | 4x |
"RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328", |
342 | 4x |
"RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329", |
343 | 4x |
"RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330", |
344 | 4x |
"RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331", |
345 | 4x |
"RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332", |
346 | 4x |
"RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333", |
347 | 4x |
"RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334", |
348 | 4x |
"RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335", |
349 | 4x |
"RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336", |
350 | 4x |
"RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337", |
351 | 4x |
"RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338", |
352 | 4x |
"RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339", |
353 | 4x |
"RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340", |
354 | 4x |
"RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341", |
355 | 4x |
"RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342", |
356 | 4x |
"RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343", |
357 | 4x |
"RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344", |
358 | 4x |
"RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345", |
359 | 4x |
"RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346", |
360 | 4x |
"RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347", |
361 | 4x |
"RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348", |
362 | 4x |
"SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349", |
363 | 4x |
"SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350", |
364 | 4x |
"TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351", |
365 | 4x |
"TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352", |
366 | 4x |
"TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353", |
367 | 4x |
"V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354", |
368 | 4x |
"V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355", |
369 | 4x |
"V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356", |
370 | 4x |
"V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357", |
371 | 4x |
"V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358", |
372 | 4x |
"VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359", |
373 | 4x |
"VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360", |
374 | 4x |
"VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361", |
375 | 4x |
"VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362", |
376 | 4x |
"VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363", |
377 | 4x |
"VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364", |
378 | 4x |
"VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365", |
379 | 4x |
"VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366", |
380 | 4x |
"VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367", |
381 | 4x |
"VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368", |
382 | 4x |
"VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369", |
383 | 4x |
"VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370", |
384 | 4x |
"VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371", |
385 | 4x |
"VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372", |
386 | 4x |
"VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373", |
387 | 4x |
"VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374", |
388 | 4x |
"VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375", |
389 | 4x |
"VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376", |
390 | 4x |
"VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377", |
391 | 4x |
"VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378", |
392 | 4x |
"VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379", |
393 | 4x |
"VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380", |
394 | 4x |
"VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381", |
395 | 4x |
"VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382", |
396 | 4x |
"VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383", |
397 | 4x |
"VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384", |
398 | 4x |
"VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385", |
399 | 4x |
"VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386", |
400 | 4x |
"VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387", |
401 | 4x |
"VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388", |
402 | 4x |
"VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389", |
403 | 4x |
"VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390", |
404 | 4x |
"VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391", |
405 | 4x |
"VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392", |
406 | 4x |
"VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393", |
407 | 4x |
"VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394", |
408 | 4x |
"VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395", |
409 | 4x |
"VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396", |
410 | 4x |
"VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397", |
411 | 4x |
"VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398" |
412 |
),
|
|
413 | 4x |
ncol = 5, |
414 | 4x |
byrow = TRUE |
415 |
)) |
|
416 | 4x |
colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER") |
417 | 4x |
pk_dataset
|
418 |
}
|