1 |
#' Formatting functions |
|
2 |
#' |
|
3 |
#' See below for the list of formatting functions created in `tern` to work with `rtables`. |
|
4 |
#' |
|
5 |
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional |
|
6 |
#' custom formats can be created via the [`formatters::sprintf_format()`] function. |
|
7 |
#' |
|
8 |
#' @family formatting functions |
|
9 |
#' @name formatting_functions |
|
10 |
NULL |
|
11 | ||
12 |
#' Format fraction and percentage |
|
13 |
#' |
|
14 |
#' @description `r lifecycle::badge("stable")` |
|
15 |
#' |
|
16 |
#' Formats a fraction together with ratio in percent. |
|
17 |
#' |
|
18 |
#' @param x (named `integer`)\cr vector with elements `num` and `denom`. |
|
19 |
#' @param ... not used. Required for `rtables` interface. |
|
20 |
#' |
|
21 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' format_fraction(x = c(num = 2L, denom = 3L)) |
|
25 |
#' format_fraction(x = c(num = 0L, denom = 3L)) |
|
26 |
#' |
|
27 |
#' @family formatting functions |
|
28 |
#' @export |
|
29 |
format_fraction <- function(x, ...) { |
|
30 | 4x |
attr(x, "label") <- NULL |
31 | ||
32 | 4x |
checkmate::assert_vector(x) |
33 | 4x |
checkmate::assert_count(x["num"]) |
34 | 2x |
checkmate::assert_count(x["denom"]) |
35 | ||
36 | 2x |
result <- if (x["num"] == 0) { |
37 | 1x |
paste0(x["num"], "/", x["denom"]) |
38 |
} else { |
|
39 | 1x |
paste0( |
40 | 1x |
x["num"], "/", x["denom"], |
41 | 1x |
" (", round(x["num"] / x["denom"] * 100, 1), "%)" |
42 |
) |
|
43 |
} |
|
44 | ||
45 | 2x |
return(result) |
46 |
} |
|
47 | ||
48 |
#' Format fraction and percentage with fixed single decimal place |
|
49 |
#' |
|
50 |
#' @description `r lifecycle::badge("stable")` |
|
51 |
#' |
|
52 |
#' Formats a fraction together with ratio in percent with fixed single decimal place. |
|
53 |
#' Includes trailing zero in case of whole number percentages to always keep one decimal place. |
|
54 |
#' |
|
55 |
#' @inheritParams format_fraction |
|
56 |
#' |
|
57 |
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`. |
|
58 |
#' |
|
59 |
#' @examples |
|
60 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L)) |
|
61 |
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L)) |
|
62 |
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L)) |
|
63 |
#' |
|
64 |
#' @family formatting functions |
|
65 |
#' @export |
|
66 |
format_fraction_fixed_dp <- function(x, ...) { |
|
67 | 3x |
attr(x, "label") <- NULL |
68 | 3x |
checkmate::assert_vector(x) |
69 | 3x |
checkmate::assert_count(x["num"]) |
70 | 3x |
checkmate::assert_count(x["denom"]) |
71 | ||
72 | 3x |
result <- if (x["num"] == 0) { |
73 | 1x |
paste0(x["num"], "/", x["denom"]) |
74 |
} else { |
|
75 | 2x |
paste0( |
76 | 2x |
x["num"], "/", x["denom"], |
77 | 2x |
" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)" |
78 |
) |
|
79 |
} |
|
80 | 3x |
return(result) |
81 |
} |
|
82 | ||
83 |
#' Format count and fraction |
|
84 |
#' |
|
85 |
#' @description `r lifecycle::badge("stable")` |
|
86 |
#' |
|
87 |
#' Formats a count together with fraction with special consideration when count is `0`. |
|
88 |
#' |
|
89 |
#' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively. |
|
90 |
#' @param ... not used. Required for `rtables` interface. |
|
91 |
#' |
|
92 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. |
|
93 |
#' |
|
94 |
#' @examples |
|
95 |
#' format_count_fraction(x = c(2, 0.6667)) |
|
96 |
#' format_count_fraction(x = c(0, 0)) |
|
97 |
#' |
|
98 |
#' @family formatting functions |
|
99 |
#' @export |
|
100 |
format_count_fraction <- function(x, ...) { |
|
101 | 3x |
attr(x, "label") <- NULL |
102 | ||
103 | 3x |
if (any(is.na(x))) { |
104 | 1x |
return("NA") |
105 |
} |
|
106 | ||
107 | 2x |
checkmate::assert_vector(x) |
108 | 2x |
checkmate::assert_integerish(x[1]) |
109 | 2x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
110 | ||
111 | 2x |
result <- if (x[1] == 0) { |
112 | 1x |
"0" |
113 |
} else { |
|
114 | 1x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)") |
115 |
} |
|
116 | ||
117 | 2x |
return(result) |
118 |
} |
|
119 | ||
120 |
#' Format count and percentage with fixed single decimal place |
|
121 |
#' |
|
122 |
#' @description `r lifecycle::badge("experimental")` |
|
123 |
#' |
|
124 |
#' Formats a count together with fraction with special consideration when count is `0`. |
|
125 |
#' |
|
126 |
#' @inheritParams format_count_fraction |
|
127 |
#' |
|
128 |
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`. |
|
129 |
#' |
|
130 |
#' @examples |
|
131 |
#' format_count_fraction_fixed_dp(x = c(2, 0.6667)) |
|
132 |
#' format_count_fraction_fixed_dp(x = c(2, 0.5)) |
|
133 |
#' format_count_fraction_fixed_dp(x = c(0, 0)) |
|
134 |
#' |
|
135 |
#' @family formatting functions |
|
136 |
#' @export |
|
137 |
format_count_fraction_fixed_dp <- function(x, ...) { |
|
138 | 1093x |
attr(x, "label") <- NULL |
139 | ||
140 | 1093x |
if (any(is.na(x))) { |
141 | ! |
return("NA") |
142 |
} |
|
143 | ||
144 | 1093x |
checkmate::assert_vector(x) |
145 | 1093x |
checkmate::assert_integerish(x[1]) |
146 | 1093x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
147 | ||
148 | 1093x |
result <- if (x[1] == 0) { |
149 | 177x |
"0" |
150 | 1093x |
} else if (.is_equal_float(x[2], 1)) { |
151 | 543x |
sprintf("%d (100%%)", x[1]) |
152 |
} else { |
|
153 | 373x |
sprintf("%d (%.1f%%)", x[1], x[2] * 100) |
154 |
} |
|
155 | ||
156 | 1093x |
return(result) |
157 |
} |
|
158 | ||
159 |
#' Format count and fraction with special case for count < 10 |
|
160 |
#' |
|
161 |
#' @description `r lifecycle::badge("stable")` |
|
162 |
#' |
|
163 |
#' Formats a count together with fraction with special consideration when count is less than 10. |
|
164 |
#' |
|
165 |
#' @inheritParams format_count_fraction |
|
166 |
#' |
|
167 |
#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed. |
|
168 |
#' |
|
169 |
#' @examples |
|
170 |
#' format_count_fraction_lt10(x = c(275, 0.9673)) |
|
171 |
#' format_count_fraction_lt10(x = c(2, 0.6667)) |
|
172 |
#' format_count_fraction_lt10(x = c(9, 1)) |
|
173 |
#' |
|
174 |
#' @family formatting functions |
|
175 |
#' @export |
|
176 |
format_count_fraction_lt10 <- function(x, ...) { |
|
177 | 7x |
attr(x, "label") <- NULL |
178 | ||
179 | 7x |
if (any(is.na(x))) { |
180 | 1x |
return("NA") |
181 |
} |
|
182 | ||
183 | 6x |
checkmate::assert_vector(x) |
184 | 6x |
checkmate::assert_integerish(x[1]) |
185 | 6x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
186 | ||
187 | 6x |
result <- if (x[1] < 10) { |
188 | 3x |
paste0(x[1]) |
189 |
} else { |
|
190 | 3x |
paste0(x[1], " (", round(x[2] * 100, 1), "%)") |
191 |
} |
|
192 | ||
193 | 6x |
return(result) |
194 |
} |
|
195 | ||
196 |
#' Format XX as a formatting function |
|
197 |
#' |
|
198 |
#' Translate a string where x and dots are interpreted as number place |
|
199 |
#' holders, and others as formatting elements. |
|
200 |
#' |
|
201 |
#' @param str (`string`)\cr template. |
|
202 |
#' |
|
203 |
#' @return An `rtables` formatting function. |
|
204 |
#' |
|
205 |
#' @examples |
|
206 |
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6)) |
|
207 |
#' |
|
208 |
#' z <- format_xx("xx (xx.x)") |
|
209 |
#' sapply(test, z) |
|
210 |
#' |
|
211 |
#' z <- format_xx("xx.x - xx.x") |
|
212 |
#' sapply(test, z) |
|
213 |
#' |
|
214 |
#' z <- format_xx("xx.x, incl. xx.x% NE") |
|
215 |
#' sapply(test, z) |
|
216 |
#' |
|
217 |
#' @family formatting functions |
|
218 |
#' @export |
|
219 |
format_xx <- function(str) { |
|
220 |
# Find position in the string. |
|
221 | 1x |
positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE) |
222 | 1x |
x_positions <- regmatches(x = str, m = positions)[[1]] |
223 | ||
224 |
# Roundings depends on the number of x behind [.]. |
|
225 | 1x |
roundings <- lapply( |
226 | 1x |
X = x_positions, |
227 | 1x |
function(x) { |
228 | 2x |
y <- strsplit(split = "\\.", x = x)[[1]] |
229 | 2x |
rounding <- function(x) { |
230 | 4x |
round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)) |
231 |
} |
|
232 | 2x |
return(rounding) |
233 |
} |
|
234 |
) |
|
235 | ||
236 | 1x |
rtable_format <- function(x, output) { |
237 | 2x |
values <- Map(y = x, fun = roundings, function(y, fun) fun(y)) |
238 | 2x |
regmatches(x = str, m = positions)[[1]] <- values |
239 | 2x |
return(str) |
240 |
} |
|
241 | ||
242 | 1x |
return(rtable_format) |
243 |
} |
|
244 | ||
245 |
#' Format numeric values by significant figures |
|
246 |
#' |
|
247 |
#' Format numeric values to print with a specified number of significant figures. |
|
248 |
#' |
|
249 |
#' @param sigfig (`integer(1)`)\cr number of significant figures to display. |
|
250 |
#' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal |
|
251 |
#' places in string are ignored in favor of formatting by significant figures. Formats options are: |
|
252 |
#' `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`. |
|
253 |
#' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for |
|
254 |
#' standard significant figures formatting - fixed (non-scientific notation) format (`"f"`) |
|
255 |
#' and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the |
|
256 |
#' [formatC()] `format` argument for more options. |
|
257 |
#' |
|
258 |
#' @return An `rtables` formatting function. |
|
259 |
#' |
|
260 |
#' @examples |
|
261 |
#' fmt_3sf <- format_sigfig(3) |
|
262 |
#' fmt_3sf(1.658) |
|
263 |
#' fmt_3sf(1e1) |
|
264 |
#' |
|
265 |
#' fmt_5sf <- format_sigfig(5) |
|
266 |
#' fmt_5sf(0.57) |
|
267 |
#' fmt_5sf(0.000025645) |
|
268 |
#' |
|
269 |
#' @family formatting functions |
|
270 |
#' @export |
|
271 |
format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") { |
|
272 | 3x |
checkmate::assert_integerish(sigfig) |
273 | 3x |
format <- gsub("xx\\.|xx\\.x+", "xx", format) |
274 | 3x |
checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)")) |
275 | 3x |
function(x, ...) { |
276 | ! |
if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.") |
277 | 12x |
num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#") |
278 | 12x |
num <- gsub("\\.$", "", num) # remove trailing "." |
279 | ||
280 | 12x |
format_value(num, format) |
281 |
} |
|
282 |
} |
|
283 | ||
284 |
#' Format fraction with lower threshold |
|
285 |
#' |
|
286 |
#' @description `r lifecycle::badge("stable")` |
|
287 |
#' |
|
288 |
#' Formats a fraction when the second element of the input `x` is the fraction. It applies |
|
289 |
#' a lower threshold, below which it is just stated that the fraction is smaller than that. |
|
290 |
#' |
|
291 |
#' @param threshold (`proportion`)\cr lower threshold. |
|
292 |
#' |
|
293 |
#' @return An `rtables` formatting function that takes numeric input `x` where the second |
|
294 |
#' element is the fraction that is formatted. If the fraction is above or equal to the threshold, |
|
295 |
#' then it is displayed in percentage. If it is positive but below the threshold, it returns, |
|
296 |
#' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned. |
|
297 |
#' |
|
298 |
#' @examples |
|
299 |
#' format_fun <- format_fraction_threshold(0.05) |
|
300 |
#' format_fun(x = c(20, 0.1)) |
|
301 |
#' format_fun(x = c(2, 0.01)) |
|
302 |
#' format_fun(x = c(0, 0)) |
|
303 |
#' |
|
304 |
#' @family formatting functions |
|
305 |
#' @export |
|
306 |
format_fraction_threshold <- function(threshold) { |
|
307 | 1x |
assert_proportion_value(threshold) |
308 | 1x |
string_below_threshold <- paste0("<", round(threshold * 100)) |
309 | 1x |
function(x, ...) { |
310 | 3x |
assert_proportion_value(x[2], include_boundaries = TRUE) |
311 | 3x |
ifelse( |
312 | 3x |
x[2] > 0.01, |
313 | 3x |
round(x[2] * 100), |
314 | 3x |
ifelse( |
315 | 3x |
x[2] == 0, |
316 | 3x |
"0", |
317 | 3x |
string_below_threshold |
318 |
) |
|
319 |
) |
|
320 |
} |
|
321 |
} |
|
322 | ||
323 |
#' Format extreme values |
|
324 |
#' |
|
325 |
#' @description `r lifecycle::badge("stable")` |
|
326 |
#' |
|
327 |
#' `rtables` formatting functions that handle extreme values. |
|
328 |
#' |
|
329 |
#' @param digits (`integer(1)`)\cr number of decimal places to display. |
|
330 |
#' |
|
331 |
#' @details For each input, apply a format to the specified number of `digits`. If the value is |
|
332 |
#' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is |
|
333 |
#' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2. |
|
334 |
#' If it is zero, then returns "0.00". |
|
335 |
#' |
|
336 |
#' @family formatting functions |
|
337 |
#' @name extreme_format |
|
338 |
NULL |
|
339 | ||
340 |
#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings |
|
341 |
#' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`. |
|
342 |
#' |
|
343 |
#' @return |
|
344 |
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds, |
|
345 |
#' and `format_string`, with thresholds formatted as strings. |
|
346 |
#' |
|
347 |
#' @examples |
|
348 |
#' h_get_format_threshold(2L) |
|
349 |
#' |
|
350 |
#' @export |
|
351 |
h_get_format_threshold <- function(digits = 2L) { |
|
352 | 2113x |
checkmate::assert_integerish(digits) |
353 | ||
354 | 2113x |
low_threshold <- 1 / (10 ^ digits) # styler: off |
355 | 2113x |
high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off |
356 | ||
357 | 2113x |
string_below_threshold <- paste0("<", low_threshold) |
358 | 2113x |
string_above_threshold <- paste0(">", high_threshold) |
359 | ||
360 | 2113x |
list( |
361 | 2113x |
"threshold" = c(low = low_threshold, high = high_threshold), |
362 | 2113x |
"format_string" = c(low = string_below_threshold, high = string_above_threshold) |
363 |
) |
|
364 |
} |
|
365 | ||
366 |
#' @describeIn extreme_format Internal helper function to apply a threshold format to a value. |
|
367 |
#' Creates a formatted string to be used in Formatting Functions. |
|
368 |
#' |
|
369 |
#' @param x (`numeric(1)`)\cr value to format. |
|
370 |
#' |
|
371 |
#' @return |
|
372 |
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation |
|
373 |
#' of the given value to the digit threshold, as a formatted string. |
|
374 |
#' |
|
375 |
#' @examples |
|
376 |
#' h_format_threshold(0.001) |
|
377 |
#' h_format_threshold(1000) |
|
378 |
#' |
|
379 |
#' @export |
|
380 |
h_format_threshold <- function(x, digits = 2L) { |
|
381 | 2115x |
if (is.na(x)) { |
382 | 4x |
return(x) |
383 |
} |
|
384 | ||
385 | 2111x |
checkmate::assert_numeric(x, lower = 0) |
386 | ||
387 | 2111x |
l_fmt <- h_get_format_threshold(digits) |
388 | ||
389 | 2111x |
result <- if (x < l_fmt$threshold["low"] && 0 < x) { |
390 | 44x |
l_fmt$format_string["low"] |
391 | 2111x |
} else if (x > l_fmt$threshold["high"]) { |
392 | 99x |
l_fmt$format_string["high"] |
393 |
} else { |
|
394 | 1968x |
sprintf(fmt = paste0("%.", digits, "f"), x) |
395 |
} |
|
396 | ||
397 | 2111x |
unname(result) |
398 |
} |
|
399 | ||
400 |
#' Format a single extreme value |
|
401 |
#' |
|
402 |
#' @description `r lifecycle::badge("stable")` |
|
403 |
#' |
|
404 |
#' Create a formatting function for a single extreme value. |
|
405 |
#' |
|
406 |
#' @inheritParams extreme_format |
|
407 |
#' |
|
408 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value. |
|
409 |
#' |
|
410 |
#' @examples |
|
411 |
#' format_fun <- format_extreme_values(2L) |
|
412 |
#' format_fun(x = 0.127) |
|
413 |
#' format_fun(x = Inf) |
|
414 |
#' format_fun(x = 0) |
|
415 |
#' format_fun(x = 0.009) |
|
416 |
#' |
|
417 |
#' @family formatting functions |
|
418 |
#' @export |
|
419 |
format_extreme_values <- function(digits = 2L) { |
|
420 | 63x |
function(x, ...) { |
421 | 657x |
checkmate::assert_scalar(x, na.ok = TRUE) |
422 | ||
423 | 657x |
h_format_threshold(x = x, digits = digits) |
424 |
} |
|
425 |
} |
|
426 | ||
427 |
#' Format extreme values part of a confidence interval |
|
428 |
#' |
|
429 |
#' @description `r lifecycle::badge("stable")` |
|
430 |
#' |
|
431 |
#' Formatting Function for extreme values part of a confidence interval. Values |
|
432 |
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2. |
|
433 |
#' |
|
434 |
#' @inheritParams extreme_format |
|
435 |
#' |
|
436 |
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme |
|
437 |
#' values confidence interval. |
|
438 |
#' |
|
439 |
#' @examples |
|
440 |
#' format_fun <- format_extreme_values_ci(2L) |
|
441 |
#' format_fun(x = c(0.127, Inf)) |
|
442 |
#' format_fun(x = c(0, 0.009)) |
|
443 |
#' |
|
444 |
#' @family formatting functions |
|
445 |
#' @export |
|
446 |
format_extreme_values_ci <- function(digits = 2L) { |
|
447 | 71x |
function(x, ...) { |
448 | 726x |
checkmate::assert_vector(x, len = 2) |
449 | 726x |
l_result <- h_format_threshold(x = x[1], digits = digits) |
450 | 726x |
h_result <- h_format_threshold(x = x[2], digits = digits) |
451 | ||
452 | 726x |
paste0("(", l_result, ", ", h_result, ")") |
453 |
} |
|
454 |
} |
|
455 | ||
456 |
#' Format automatically using data significant digits |
|
457 |
#' |
|
458 |
#' @description `r lifecycle::badge("stable")` |
|
459 |
#' |
|
460 |
#' Formatting function for the majority of default methods used in [analyze_vars()]. |
|
461 |
#' For non-derived values, the significant digits of data is used (e.g. range), while derived |
|
462 |
#' values have one more digits (measure of location and dispersion like mean, standard deviation). |
|
463 |
#' This function can be called internally with "auto" like, for example, |
|
464 |
#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function. |
|
465 |
#' |
|
466 |
#' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to |
|
467 |
#' find significant digits. In [analyze_vars] this comes from `.df_row` (see |
|
468 |
#' [rtables::additional_fun_params]), and it is the row data after the above row splits. No |
|
469 |
#' column split is considered. |
|
470 |
#' @param x_stat (`string`)\cr string indicating the current statistical method used. |
|
471 |
#' |
|
472 |
#' @return A string that `rtables` prints in a table cell. |
|
473 |
#' |
|
474 |
#' @details |
|
475 |
#' The internal function is needed to work with `rtables` default structure for |
|
476 |
#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation. |
|
477 |
#' It can be more than one element (e.g. for `.stats = "mean_sd"`). |
|
478 |
#' |
|
479 |
#' @examples |
|
480 |
#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4) |
|
481 |
#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3])) |
|
482 |
#' |
|
483 |
#' # x is the result coming into the formatting function -> res!! |
|
484 |
#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res) |
|
485 |
#' format_auto(x_todo, "range")(x = range(x_todo)) |
|
486 |
#' no_sc_x <- c(0.0000001, 1) |
|
487 |
#' format_auto(no_sc_x, "range")(x = no_sc_x) |
|
488 |
#' |
|
489 |
#' @family formatting functions |
|
490 |
#' @export |
|
491 |
format_auto <- function(dt_var, x_stat) { |
|
492 | 10x |
function(x = "", ...) { |
493 | 18x |
checkmate::assert_numeric(x, min.len = 1) |
494 | 18x |
checkmate::assert_numeric(dt_var, min.len = 1) |
495 |
# Defaults - they may be a param in the future |
|
496 | 18x |
der_stats <- c( |
497 | 18x |
"mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr", |
498 | 18x |
"mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi", |
499 | 18x |
"median_ci" |
500 |
) |
|
501 | 18x |
nonder_stats <- c("n", "range", "min", "max") |
502 | ||
503 |
# Safenet for miss-modifications |
|
504 | 18x |
stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint |
505 | 18x |
checkmate::assert_choice(x_stat, c(der_stats, nonder_stats)) |
506 | ||
507 |
# Finds the max number of digits in data |
|
508 | 18x |
detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>% |
509 | 18x |
max() |
510 | ||
511 | 18x |
if (x_stat %in% der_stats) { |
512 | 8x |
detect_dig <- detect_dig + 1 |
513 |
} |
|
514 | ||
515 |
# Render input |
|
516 | 18x |
str_vals <- formatC(x, digits = detect_dig, format = "f") |
517 | 18x |
def_fmt <- get_formats_from_stats(x_stat)[[x_stat]] |
518 | 18x |
str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]] |
519 | 18x |
if (length(str_fmt) != length(str_vals)) { |
520 | 2x |
stop( |
521 | 2x |
"Number of inserted values as result (", length(str_vals), |
522 | 2x |
") is not the same as there should be in the default tern formats for ", |
523 | 2x |
x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ", |
524 | 2x |
"See tern_default_formats to check all of them." |
525 |
) |
|
526 |
} |
|
527 | ||
528 |
# Squashing them together |
|
529 | 16x |
inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]] |
530 | 16x |
stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint |
531 | ||
532 | 16x |
out <- vector("character", length = length(inv_str_fmt) + length(str_vals)) |
533 | 16x |
is_even <- seq_along(out) %% 2 == 0 |
534 | 16x |
out[is_even] <- str_vals |
535 | 16x |
out[!is_even] <- inv_str_fmt |
536 | ||
537 | 16x |
return(paste0(out, collapse = "")) |
538 |
} |
|
539 |
} |
|
540 | ||
541 |
# Utility function that could be useful in general |
|
542 |
str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) { |
|
543 | 34x |
regmatches(string, gregexpr(pattern, string), invert = invert) |
544 |
} |
|
545 | ||
546 |
# Helper function |
|
547 |
count_decimalplaces <- function(dec) { |
|
548 | 161x |
if (is.na(dec)) { |
549 | 6x |
return(0) |
550 | 155x |
} else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision |
551 | 122x |
nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]]) |
552 |
} else { |
|
553 | 33x |
return(0) |
554 |
} |
|
555 |
} |
|
556 | ||
557 |
#' Apply automatic formatting |
|
558 |
#' |
|
559 |
#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with |
|
560 |
#' the correct implementation of `format_auto` for the given statistics, data, and variable. |
|
561 |
#' |
|
562 |
#' @inheritParams argument_convention |
|
563 |
#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds |
|
564 |
#' to an element in `.formats`, with matching names. |
|
565 |
#' |
|
566 |
#' @keywords internal |
|
567 |
apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) { |
|
568 | 476x |
is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) |
569 | 476x |
if (any(is_auto_fmt)) { |
570 | 3x |
auto_stats <- x_stats[is_auto_fmt] |
571 | 3x |
var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets |
572 | 3x |
.formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df) |
573 |
} |
|
574 | 476x |
.formats |
575 |
} |
1 |
#' Horizontal waterfall plot |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup. |
|
6 |
#' |
|
7 |
#' @param height (`numeric`)\cr vector containing values to be plotted as the waterfall bars. |
|
8 |
#' @param id (`character`)\cr vector containing identifiers to use as the x-axis label for the waterfall bars. |
|
9 |
#' @param col (`character`)\cr color(s). |
|
10 |
#' @param col_var (`factor`, `character`, or `NULL`)\cr categorical variable for bar coloring. `NULL` by default. |
|
11 |
#' @param xlab (`string`)\cr x label. Default is `"ID"`. |
|
12 |
#' @param ylab (`string`)\cr y label. Default is `"Value"`. |
|
13 |
#' @param title (`string`)\cr text to be displayed as plot title. |
|
14 |
#' @param col_legend_title (`string`)\cr text to be displayed as legend title. |
|
15 |
#' |
|
16 |
#' @return A `ggplot` waterfall plot. |
|
17 |
#' |
|
18 |
#' @examples |
|
19 |
#' library(dplyr) |
|
20 |
#' |
|
21 |
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3]) |
|
22 |
#' |
|
23 |
#' g_waterfall( |
|
24 |
#' height = c(3, 5, -1), |
|
25 |
#' id = letters[1:3], |
|
26 |
#' col_var = letters[1:3] |
|
27 |
#' ) |
|
28 |
#' |
|
29 |
#' adsl_f <- tern_ex_adsl %>% |
|
30 |
#' select(USUBJID, STUDYID, ARM, ARMCD, SEX) |
|
31 |
#' |
|
32 |
#' adrs_f <- tern_ex_adrs %>% |
|
33 |
#' filter(PARAMCD == "OVRINV") %>% |
|
34 |
#' mutate(pchg = rnorm(n(), 10, 50)) |
|
35 |
#' |
|
36 |
#' adrs_f <- head(adrs_f, 30) |
|
37 |
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ] |
|
38 |
#' head(adrs_f) |
|
39 |
#' |
|
40 |
#' g_waterfall( |
|
41 |
#' height = adrs_f$pchg, |
|
42 |
#' id = adrs_f$USUBJID, |
|
43 |
#' col_var = adrs_f$AVALC |
|
44 |
#' ) |
|
45 |
#' |
|
46 |
#' g_waterfall( |
|
47 |
#' height = adrs_f$pchg, |
|
48 |
#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID), |
|
49 |
#' col_var = adrs_f$SEX |
|
50 |
#' ) |
|
51 |
#' |
|
52 |
#' g_waterfall( |
|
53 |
#' height = adrs_f$pchg, |
|
54 |
#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID), |
|
55 |
#' xlab = "ID", |
|
56 |
#' ylab = "Percentage Change", |
|
57 |
#' title = "Waterfall plot" |
|
58 |
#' ) |
|
59 |
#' |
|
60 |
#' @export |
|
61 |
g_waterfall <- function(height, |
|
62 |
id, |
|
63 |
col_var = NULL, |
|
64 |
col = getOption("ggplot2.discrete.colour"), |
|
65 |
xlab = NULL, |
|
66 |
ylab = NULL, |
|
67 |
col_legend_title = NULL, |
|
68 |
title = NULL) { |
|
69 | 2x |
if (!is.null(col_var)) { |
70 | 1x |
check_same_n(height = height, id = id, col_var = col_var) |
71 |
} else { |
|
72 | 1x |
check_same_n(height = height, id = id) |
73 |
} |
|
74 | ||
75 | 2x |
checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE) |
76 | 2x |
checkmate::assert_character(col, null.ok = TRUE) |
77 | ||
78 | 2x |
xlabel <- deparse(substitute(id)) |
79 | 2x |
ylabel <- deparse(substitute(height)) |
80 | ||
81 | 2x |
col_label <- if (!missing(col_var)) { |
82 | 1x |
deparse(substitute(col_var)) |
83 |
} |
|
84 | ||
85 | 2x |
xlab <- if (is.null(xlab)) xlabel else xlab |
86 | 2x |
ylab <- if (is.null(ylab)) ylabel else ylab |
87 | 2x |
col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title |
88 | ||
89 | 2x |
plot_data <- data.frame( |
90 | 2x |
height = height, |
91 | 2x |
id = as.character(id), |
92 | 2x |
col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)), |
93 | 2x |
stringsAsFactors = FALSE |
94 |
) |
|
95 | ||
96 | 2x |
plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ] |
97 | ||
98 | 2x |
p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) + |
99 | 2x |
ggplot2::geom_col() + |
100 | 2x |
ggplot2::geom_text( |
101 | 2x |
label = format(plot_data_ord$height, digits = 2), |
102 | 2x |
vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5) |
103 |
) + |
|
104 | 2x |
ggplot2::xlab(xlab) + |
105 | 2x |
ggplot2::ylab(ylab) + |
106 | 2x |
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5)) |
107 | ||
108 | 2x |
if (!is.null(col_var)) { |
109 | 1x |
p <- p + |
110 | 1x |
ggplot2::aes(fill = col_var) + |
111 | 1x |
ggplot2::labs(fill = col_legend_title) + |
112 | 1x |
ggplot2::theme( |
113 | 1x |
legend.position = "bottom", |
114 | 1x |
legend.background = ggplot2::element_blank(), |
115 | 1x |
legend.title = ggplot2::element_text(face = "bold"), |
116 | 1x |
legend.box.background = ggplot2::element_rect(colour = "black") |
117 |
) |
|
118 |
} |
|
119 | ||
120 | 2x |
if (!is.null(col)) { |
121 | 1x |
p <- p + |
122 | 1x |
ggplot2::scale_fill_manual(values = col) |
123 |
} |
|
124 | ||
125 | 2x |
if (!is.null(title)) { |
126 | 1x |
p <- p + |
127 | 1x |
ggplot2::labs(title = title) + |
128 | 1x |
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) |
129 |
} |
|
130 | ||
131 | 2x |
p |
132 |
} |
1 |
#' Control functions for Kaplan-Meier plot annotation tables |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Auxiliary functions for controlling arguments for formatting the annotation tables that can be added to plots |
|
6 |
#' generated via [g_km()]. |
|
7 |
#' |
|
8 |
#' @param x (`proportion`)\cr x-coordinate for center of annotation table. |
|
9 |
#' @param y (`proportion`)\cr y-coordinate for center of annotation table. |
|
10 |
#' @param w (`proportion`)\cr relative width of the annotation table. |
|
11 |
#' @param h (`proportion`)\cr relative height of the annotation table. |
|
12 |
#' @param fill (`flag` or `character`)\cr whether the annotation table should have a background fill color. |
|
13 |
#' Can also be a color code to use as the background fill color. If `TRUE`, color code defaults to `"#00000020"`. |
|
14 |
#' |
|
15 |
#' @return A list of components with the same names as the arguments. |
|
16 |
#' |
|
17 |
#' @seealso [g_km()] |
|
18 |
#' |
|
19 |
#' @name control_annot |
|
20 |
NULL |
|
21 | ||
22 |
#' @describeIn control_annot Control function for formatting the median survival time annotation table. This annotation |
|
23 |
#' table can be added in [g_km()] by setting `annot_surv_med=TRUE`, and can be configured using the |
|
24 |
#' `control_surv_med_annot()` function by setting it as the `control_annot_surv_med` argument. |
|
25 |
#' |
|
26 |
#' @examples |
|
27 |
#' control_surv_med_annot() |
|
28 |
#' |
|
29 |
#' @export |
|
30 |
control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { |
|
31 | 22x |
assert_proportion_value(x) |
32 | 22x |
assert_proportion_value(y) |
33 | 22x |
assert_proportion_value(w) |
34 | 22x |
assert_proportion_value(h) |
35 | ||
36 | 22x |
list(x = x, y = y, w = w, h = h, fill = fill) |
37 |
} |
|
38 | ||
39 |
#' @describeIn control_annot Control function for formatting the Cox-PH annotation table. This annotation table can be |
|
40 |
#' added in [g_km()] by setting `annot_coxph=TRUE`, and can be configured using the `control_coxph_annot()` function |
|
41 |
#' by setting it as the `control_annot_coxph` argument. |
|
42 |
#' |
|
43 |
#' @param ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the |
|
44 |
#' annotation table. If `FALSE` (default), only comparison groups will be printed in the table labels. |
|
45 |
#' |
|
46 |
#' @examples |
|
47 |
#' control_coxph_annot() |
|
48 |
#' |
|
49 |
#' @export |
|
50 |
control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { |
|
51 | 11x |
checkmate::assert_logical(ref_lbls, any.missing = FALSE) |
52 | ||
53 | 11x |
res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) |
54 | 11x |
res |
55 |
} |
|
56 | ||
57 |
#' Helper function to calculate x-tick positions |
|
58 |
#' |
|
59 |
#' @description `r lifecycle::badge("stable")` |
|
60 |
#' |
|
61 |
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already |
|
62 |
#' exists it is kept as is. It is based on the same function `ggplot2` relies on, |
|
63 |
#' and is required in the graphic and the patient-at-risk annotation table. |
|
64 |
#' |
|
65 |
#' @inheritParams g_km |
|
66 |
#' @inheritParams h_ggkm |
|
67 |
#' |
|
68 |
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object. |
|
69 |
#' |
|
70 |
#' @examples |
|
71 |
#' library(dplyr) |
|
72 |
#' library(survival) |
|
73 |
#' |
|
74 |
#' data <- tern_ex_adtte %>% |
|
75 |
#' filter(PARAMCD == "OS") %>% |
|
76 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
77 |
#' h_data_plot() |
|
78 |
#' |
|
79 |
#' h_xticks(data) |
|
80 |
#' h_xticks(data, xticks = seq(0, 3000, 500)) |
|
81 |
#' h_xticks(data, xticks = 500) |
|
82 |
#' h_xticks(data, xticks = 500, max_time = 6000) |
|
83 |
#' h_xticks(data, xticks = c(0, 500), max_time = 300) |
|
84 |
#' h_xticks(data, xticks = 500, max_time = 300) |
|
85 |
#' |
|
86 |
#' @export |
|
87 |
h_xticks <- function(data, xticks = NULL, max_time = NULL) { |
|
88 | 18x |
if (is.null(xticks)) { |
89 | 13x |
if (is.null(max_time)) { |
90 | 11x |
labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) |
91 |
} else { |
|
92 | 2x |
labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) |
93 |
} |
|
94 | 5x |
} else if (checkmate::test_number(xticks)) { |
95 | 2x |
if (is.null(max_time)) { |
96 | 1x |
seq(0, max(data$time), xticks) |
97 |
} else { |
|
98 | 1x |
seq(0, max(data$time, max_time), xticks) |
99 |
} |
|
100 | 3x |
} else if (is.numeric(xticks)) { |
101 | 2x |
xticks |
102 |
} else { |
|
103 | 1x |
stop( |
104 | 1x |
paste( |
105 | 1x |
"xticks should be either `NULL`", |
106 | 1x |
"or a single number (interval between x ticks)", |
107 | 1x |
"or a numeric vector (position of ticks on the x axis)" |
108 |
) |
|
109 |
) |
|
110 |
} |
|
111 |
} |
|
112 | ||
113 |
#' Helper function for survival estimations |
|
114 |
#' |
|
115 |
#' @description `r lifecycle::badge("stable")` |
|
116 |
#' |
|
117 |
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval. |
|
118 |
#' |
|
119 |
#' @inheritParams h_data_plot |
|
120 |
#' |
|
121 |
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`). |
|
122 |
#' |
|
123 |
#' @examples |
|
124 |
#' library(dplyr) |
|
125 |
#' library(survival) |
|
126 |
#' |
|
127 |
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS") |
|
128 |
#' fit <- survfit( |
|
129 |
#' formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, |
|
130 |
#' data = adtte |
|
131 |
#' ) |
|
132 |
#' h_tbl_median_surv(fit_km = fit) |
|
133 |
#' |
|
134 |
#' @export |
|
135 |
h_tbl_median_surv <- function(fit_km, armval = "All") { |
|
136 | 10x |
y <- if (is.null(fit_km$strata)) { |
137 | ! |
as.data.frame(t(summary(fit_km)$table), row.names = armval) |
138 |
} else { |
|
139 | 10x |
tbl <- summary(fit_km)$table |
140 | 10x |
rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") |
141 | 10x |
rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] |
142 | 10x |
as.data.frame(tbl) |
143 |
} |
|
144 | 10x |
conf.int <- summary(fit_km)$conf.int # nolint |
145 | 10x |
y$records <- round(y$records) |
146 | 10x |
y$median <- signif(y$median, 4) |
147 | 10x |
y$`CI` <- paste0( |
148 | 10x |
"(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" |
149 |
) |
|
150 | 10x |
stats::setNames( |
151 | 10x |
y[c("records", "median", "CI")], |
152 | 10x |
c("N", "Median", f_conf_level(conf.int)) |
153 |
) |
|
154 |
} |
|
155 | ||
156 |
#' Helper function for generating a pairwise Cox-PH table |
|
157 |
#' |
|
158 |
#' @description `r lifecycle::badge("stable")` |
|
159 |
#' |
|
160 |
#' Create a `data.frame` of pairwise stratified or unstratified Cox-PH analysis results. |
|
161 |
#' |
|
162 |
#' @inheritParams g_km |
|
163 |
#' @param annot_coxph_ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the |
|
164 |
#' `annot_coxph` table. If `FALSE` (default), only comparison groups will be printed in `annot_coxph` table labels. |
|
165 |
#' |
|
166 |
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`), |
|
167 |
#' and `p-value (log-rank)`. |
|
168 |
#' |
|
169 |
#' @examples |
|
170 |
#' library(dplyr) |
|
171 |
#' |
|
172 |
#' adtte <- tern_ex_adtte %>% |
|
173 |
#' filter(PARAMCD == "OS") %>% |
|
174 |
#' mutate(is_event = CNSR == 0) |
|
175 |
#' |
|
176 |
#' h_tbl_coxph_pairwise( |
|
177 |
#' df = adtte, |
|
178 |
#' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"), |
|
179 |
#' control_coxph_pw = control_coxph(conf_level = 0.9) |
|
180 |
#' ) |
|
181 |
#' |
|
182 |
#' @export |
|
183 |
h_tbl_coxph_pairwise <- function(df, |
|
184 |
variables, |
|
185 |
ref_group_coxph = NULL, |
|
186 |
control_coxph_pw = control_coxph(), |
|
187 |
annot_coxph_ref_lbls = FALSE) { |
|
188 | 4x |
if ("strat" %in% names(variables)) { |
189 | ! |
warning( |
190 | ! |
"Warning: the `strat` element name of the `variables` list argument to `h_tbl_coxph_pairwise() ", |
191 | ! |
"was deprecated in tern 0.9.4.\n ", |
192 | ! |
"Please use the name `strata` instead of `strat` in the `variables` argument." |
193 |
) |
|
194 | ! |
variables[["strata"]] <- variables[["strat"]] |
195 |
} |
|
196 | ||
197 | 4x |
assert_df_with_variables(df, variables) |
198 | 4x |
checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) |
199 | 4x |
checkmate::assert_flag(annot_coxph_ref_lbls) |
200 | ||
201 | 4x |
arm <- variables$arm |
202 | 4x |
df[[arm]] <- factor(df[[arm]]) |
203 | ||
204 | 4x |
ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] |
205 | 4x |
comp_group <- setdiff(levels(df[[arm]]), ref_group) |
206 | ||
207 | 4x |
results <- Map(function(comp) { |
208 | 8x |
res <- s_coxph_pairwise( |
209 | 8x |
df = df[df[[arm]] == comp, , drop = FALSE], |
210 | 8x |
.ref_group = df[df[[arm]] == ref_group, , drop = FALSE], |
211 | 8x |
.in_ref_col = FALSE, |
212 | 8x |
.var = variables$tte, |
213 | 8x |
is_event = variables$is_event, |
214 | 8x |
strata = variables$strata, |
215 | 8x |
control = control_coxph_pw |
216 |
) |
|
217 | 8x |
res_df <- data.frame( |
218 | 8x |
hr = format(round(res$hr, 2), nsmall = 2), |
219 | 8x |
hr_ci = paste0( |
220 | 8x |
"(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", |
221 | 8x |
format(round(res$hr_ci[2], 2), nsmall = 2), ")" |
222 |
), |
|
223 | 8x |
pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), |
224 | 8x |
stringsAsFactors = FALSE |
225 |
) |
|
226 | 8x |
colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) |
227 | 8x |
row.names(res_df) <- comp |
228 | 8x |
res_df |
229 | 4x |
}, comp_group) |
230 | 1x |
if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) |
231 | ||
232 | 4x |
do.call(rbind, results) |
233 |
} |
|
234 | ||
235 |
#' Helper function to tidy survival fit data |
|
236 |
#' |
|
237 |
#' @description `r lifecycle::badge("stable")` |
|
238 |
#' |
|
239 |
#' Convert the survival fit data into a data frame designed for plotting |
|
240 |
#' within `g_km`. |
|
241 |
#' |
|
242 |
#' This starts from the [broom::tidy()] result, and then: |
|
243 |
#' * Post-processes the `strata` column into a factor. |
|
244 |
#' * Extends each stratum by an additional first row with time 0 and probability 1 so that |
|
245 |
#' downstream plot lines start at those coordinates. |
|
246 |
#' * Adds a `censor` column. |
|
247 |
#' * Filters the rows before `max_time`. |
|
248 |
#' |
|
249 |
#' @inheritParams g_km |
|
250 |
#' @param fit_km (`survfit`)\cr result of [survival::survfit()]. |
|
251 |
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`. |
|
252 |
#' |
|
253 |
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`, |
|
254 |
#' `conf.low`, `strata`, and `censor`. |
|
255 |
#' |
|
256 |
#' @examples |
|
257 |
#' library(dplyr) |
|
258 |
#' library(survival) |
|
259 |
#' |
|
260 |
#' # Test with multiple arms |
|
261 |
#' tern_ex_adtte %>% |
|
262 |
#' filter(PARAMCD == "OS") %>% |
|
263 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
264 |
#' h_data_plot() |
|
265 |
#' |
|
266 |
#' # Test with single arm |
|
267 |
#' tern_ex_adtte %>% |
|
268 |
#' filter(PARAMCD == "OS", ARMCD == "ARM B") %>% |
|
269 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
270 |
#' h_data_plot(armval = "ARM B") |
|
271 |
#' |
|
272 |
#' @export |
|
273 |
h_data_plot <- function(fit_km, |
|
274 |
armval = "All", |
|
275 |
max_time = NULL) { |
|
276 | 18x |
y <- broom::tidy(fit_km) |
277 | ||
278 | 18x |
if (!is.null(fit_km$strata)) { |
279 | 18x |
fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") |
280 | 18x |
strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) |
281 | 18x |
strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals") |
282 | 18x |
y$strata <- factor( |
283 | 18x |
vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), |
284 | 18x |
levels = strata_levels |
285 |
) |
|
286 |
} else { |
|
287 | ! |
y$strata <- armval |
288 |
} |
|
289 | ||
290 | 18x |
y_by_strata <- split(y, y$strata) |
291 | 18x |
y_by_strata_extended <- lapply( |
292 | 18x |
y_by_strata, |
293 | 18x |
FUN = function(tbl) { |
294 | 53x |
first_row <- tbl[1L, ] |
295 | 53x |
first_row$time <- 0 |
296 | 53x |
first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")]) |
297 | 53x |
first_row$n.event <- first_row$n.censor <- 0 |
298 | 53x |
first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 |
299 | 53x |
first_row$std.error <- 0 |
300 | 53x |
rbind( |
301 | 53x |
first_row, |
302 | 53x |
tbl |
303 |
) |
|
304 |
} |
|
305 |
) |
|
306 | 18x |
y <- do.call(rbind, y_by_strata_extended) |
307 | ||
308 | 18x |
y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) |
309 | 18x |
if (!is.null(max_time)) { |
310 | 1x |
y <- y[y$time <= max(max_time), ] |
311 |
} |
|
312 | 18x |
y |
313 |
} |
|
314 | ||
315 |
## Deprecated Functions ---- |
|
316 | ||
317 |
#' Helper function to create a KM plot |
|
318 |
#' |
|
319 |
#' @description `r lifecycle::badge("deprecated")` |
|
320 |
#' |
|
321 |
#' Draw the Kaplan-Meier plot using `ggplot2`. |
|
322 |
#' |
|
323 |
#' @inheritParams g_km |
|
324 |
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`. |
|
325 |
#' |
|
326 |
#' @return A `ggplot` object. |
|
327 |
#' |
|
328 |
#' @examples |
|
329 |
#' \donttest{ |
|
330 |
#' library(dplyr) |
|
331 |
#' library(survival) |
|
332 |
#' |
|
333 |
#' fit_km <- tern_ex_adtte %>% |
|
334 |
#' filter(PARAMCD == "OS") %>% |
|
335 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
336 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
337 |
#' xticks <- h_xticks(data = data_plot) |
|
338 |
#' gg <- h_ggkm( |
|
339 |
#' data = data_plot, |
|
340 |
#' censor_show = TRUE, |
|
341 |
#' xticks = xticks, |
|
342 |
#' xlab = "Days", |
|
343 |
#' yval = "Survival", |
|
344 |
#' ylab = "Survival Probability", |
|
345 |
#' title = "Survival" |
|
346 |
#' ) |
|
347 |
#' gg |
|
348 |
#' } |
|
349 |
#' |
|
350 |
#' @export |
|
351 |
h_ggkm <- function(data, |
|
352 |
xticks = NULL, |
|
353 |
yval = "Survival", |
|
354 |
censor_show, |
|
355 |
xlab, |
|
356 |
ylab, |
|
357 |
ylim = NULL, |
|
358 |
title, |
|
359 |
footnotes = NULL, |
|
360 |
max_time = NULL, |
|
361 |
lwd = 1, |
|
362 |
lty = NULL, |
|
363 |
pch = 3, |
|
364 |
size = 2, |
|
365 |
col = NULL, |
|
366 |
ci_ribbon = FALSE, |
|
367 |
ggtheme = nestcolor::theme_nest()) { |
|
368 | 1x |
lifecycle::deprecate_warn( |
369 | 1x |
"0.9.4", |
370 | 1x |
"h_ggkm()", |
371 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
372 |
) |
|
373 | 1x |
checkmate::assert_numeric(lty, null.ok = TRUE) |
374 | 1x |
checkmate::assert_character(col, null.ok = TRUE) |
375 | ||
376 | 1x |
if (is.null(ylim)) { |
377 | 1x |
data_lims <- data |
378 | ! |
if (yval == "Failure") data_lims[["estimate"]] <- 1 - data_lims[["estimate"]] |
379 | 1x |
if (!is.null(max_time)) { |
380 | ! |
y_lwr <- min(data_lims[data_lims$time < max_time, ][["estimate"]]) |
381 | ! |
y_upr <- max(data_lims[data_lims$time < max_time, ][["estimate"]]) |
382 |
} else { |
|
383 | 1x |
y_lwr <- min(data_lims[["estimate"]]) |
384 | 1x |
y_upr <- max(data_lims[["estimate"]]) |
385 |
} |
|
386 | 1x |
ylim <- c(y_lwr, y_upr) |
387 |
} |
|
388 | 1x |
checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE) |
389 | ||
390 |
# change estimates of survival to estimates of failure (1 - survival) |
|
391 | 1x |
if (yval == "Failure") { |
392 | ! |
data$estimate <- 1 - data$estimate |
393 | ! |
data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high) |
394 | ! |
data$censor <- 1 - data$censor |
395 |
} |
|
396 | ||
397 | 1x |
gg <- { |
398 | 1x |
ggplot2::ggplot( |
399 | 1x |
data = data, |
400 | 1x |
mapping = ggplot2::aes( |
401 | 1x |
x = .data[["time"]], |
402 | 1x |
y = .data[["estimate"]], |
403 | 1x |
ymin = .data[["conf.low"]], |
404 | 1x |
ymax = .data[["conf.high"]], |
405 | 1x |
color = .data[["strata"]], |
406 | 1x |
fill = .data[["strata"]] |
407 |
) |
|
408 |
) + |
|
409 | 1x |
ggplot2::geom_hline(yintercept = 0) |
410 |
} |
|
411 | ||
412 | 1x |
if (ci_ribbon) { |
413 | ! |
gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0) |
414 |
} |
|
415 | ||
416 | 1x |
gg <- if (is.null(lty)) { |
417 | 1x |
gg + |
418 | 1x |
ggplot2::geom_step(linewidth = lwd) |
419 | 1x |
} else if (checkmate::test_number(lty)) { |
420 | ! |
gg + |
421 | ! |
ggplot2::geom_step(linewidth = lwd, lty = lty) |
422 | 1x |
} else if (is.numeric(lty)) { |
423 | ! |
gg + |
424 | ! |
ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) + |
425 | ! |
ggplot2::scale_linetype_manual(values = lty) |
426 |
} |
|
427 | ||
428 | 1x |
gg <- gg + |
429 | 1x |
ggplot2::coord_cartesian(ylim = ylim) + |
430 | 1x |
ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes) |
431 | ||
432 | 1x |
if (!is.null(col)) { |
433 | ! |
gg <- gg + |
434 | ! |
ggplot2::scale_color_manual(values = col) + |
435 | ! |
ggplot2::scale_fill_manual(values = col) |
436 |
} |
|
437 | 1x |
if (censor_show) { |
438 | 1x |
dt <- data[data$n.censor != 0, ] |
439 | 1x |
dt$censor_lbl <- factor("Censored") |
440 | ||
441 | 1x |
gg <- gg + ggplot2::geom_point( |
442 | 1x |
data = dt, |
443 | 1x |
ggplot2::aes( |
444 | 1x |
x = .data[["time"]], |
445 | 1x |
y = .data[["censor"]], |
446 | 1x |
shape = .data[["censor_lbl"]] |
447 |
), |
|
448 | 1x |
size = size, |
449 | 1x |
show.legend = TRUE, |
450 | 1x |
inherit.aes = TRUE |
451 |
) + |
|
452 | 1x |
ggplot2::scale_shape_manual(name = NULL, values = pch) + |
453 | 1x |
ggplot2::guides( |
454 | 1x |
shape = ggplot2::guide_legend(override.aes = list(linetype = NA)), |
455 | 1x |
fill = ggplot2::guide_legend(override.aes = list(shape = NA)) |
456 |
) |
|
457 |
} |
|
458 | ||
459 | 1x |
if (!is.null(max_time) && !is.null(xticks)) { |
460 | ! |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time)))) |
461 | 1x |
} else if (!is.null(xticks)) { |
462 | 1x |
if (max(data$time) <= max(xticks)) { |
463 | 1x |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks))) |
464 |
} else { |
|
465 | ! |
gg <- gg + ggplot2::scale_x_continuous(breaks = xticks) |
466 |
} |
|
467 | ! |
} else if (!is.null(max_time)) { |
468 | ! |
gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time)) |
469 |
} |
|
470 | ||
471 | 1x |
if (!is.null(ggtheme)) { |
472 | 1x |
gg <- gg + ggtheme |
473 |
} |
|
474 | ||
475 | 1x |
gg + ggplot2::theme( |
476 | 1x |
legend.position = "bottom", |
477 | 1x |
legend.title = ggplot2::element_blank(), |
478 | 1x |
legend.key.height = unit(0.02, "npc"), |
479 | 1x |
panel.grid.major.x = ggplot2::element_line(linewidth = 2) |
480 |
) |
|
481 |
} |
|
482 | ||
483 |
#' `ggplot` decomposition |
|
484 |
#' |
|
485 |
#' @description `r lifecycle::badge("deprecated")` |
|
486 |
#' |
|
487 |
#' The elements composing the `ggplot` are extracted and organized in a `list`. |
|
488 |
#' |
|
489 |
#' @param gg (`ggplot`)\cr a graphic to decompose. |
|
490 |
#' |
|
491 |
#' @return A named `list` with elements: |
|
492 |
#' * `panel`: The panel. |
|
493 |
#' * `yaxis`: The y-axis. |
|
494 |
#' * `xaxis`: The x-axis. |
|
495 |
#' * `xlab`: The x-axis label. |
|
496 |
#' * `ylab`: The y-axis label. |
|
497 |
#' * `guide`: The legend. |
|
498 |
#' |
|
499 |
#' @examples |
|
500 |
#' \donttest{ |
|
501 |
#' library(dplyr) |
|
502 |
#' library(survival) |
|
503 |
#' library(grid) |
|
504 |
#' |
|
505 |
#' fit_km <- tern_ex_adtte %>% |
|
506 |
#' filter(PARAMCD == "OS") %>% |
|
507 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
508 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
509 |
#' xticks <- h_xticks(data = data_plot) |
|
510 |
#' gg <- h_ggkm( |
|
511 |
#' data = data_plot, |
|
512 |
#' yval = "Survival", |
|
513 |
#' censor_show = TRUE, |
|
514 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
515 |
#' title = "tt", |
|
516 |
#' footnotes = "ff" |
|
517 |
#' ) |
|
518 |
#' |
|
519 |
#' g_el <- h_decompose_gg(gg) |
|
520 |
#' grid::grid.newpage() |
|
521 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5)) |
|
522 |
#' grid::grid.draw(g_el$panel) |
|
523 |
#' |
|
524 |
#' grid::grid.newpage() |
|
525 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5)) |
|
526 |
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis))) |
|
527 |
#' } |
|
528 |
#' |
|
529 |
#' @export |
|
530 |
h_decompose_gg <- function(gg) { |
|
531 | 1x |
lifecycle::deprecate_warn( |
532 | 1x |
"0.9.4", |
533 | 1x |
"h_decompose_gg()", |
534 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
535 |
) |
|
536 | 1x |
g_el <- ggplot2::ggplotGrob(gg) |
537 | 1x |
y <- c( |
538 | 1x |
panel = "panel", |
539 | 1x |
yaxis = "axis-l", |
540 | 1x |
xaxis = "axis-b", |
541 | 1x |
xlab = "xlab-b", |
542 | 1x |
ylab = "ylab-l", |
543 | 1x |
guide = "guide" |
544 |
) |
|
545 | 1x |
lapply(X = y, function(x) gtable::gtable_filter(g_el, x)) |
546 |
} |
|
547 | ||
548 |
#' Helper function to prepare a KM layout |
|
549 |
#' |
|
550 |
#' @description `r lifecycle::badge("deprecated")` |
|
551 |
#' |
|
552 |
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve. |
|
553 |
#' |
|
554 |
#' @inheritParams g_km |
|
555 |
#' @inheritParams h_ggkm |
|
556 |
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`. |
|
557 |
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of |
|
558 |
#' patient at risk matching the main grid of the Kaplan-Meier curve. |
|
559 |
#' |
|
560 |
#' @return A grid layout. |
|
561 |
#' |
|
562 |
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the |
|
563 |
#' dimension are fixed, only the curve is flexible and will accommodate with the remaining free space. |
|
564 |
#' * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient |
|
565 |
#' at risk tabulation. The main constraint is about the width of the columns which must allow the writing of |
|
566 |
#' the strata name. |
|
567 |
#' * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table. |
|
568 |
#' |
|
569 |
#' @examples |
|
570 |
#' \donttest{ |
|
571 |
#' library(dplyr) |
|
572 |
#' library(survival) |
|
573 |
#' library(grid) |
|
574 |
#' |
|
575 |
#' fit_km <- tern_ex_adtte %>% |
|
576 |
#' filter(PARAMCD == "OS") %>% |
|
577 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
578 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
579 |
#' xticks <- h_xticks(data = data_plot) |
|
580 |
#' gg <- h_ggkm( |
|
581 |
#' data = data_plot, |
|
582 |
#' censor_show = TRUE, |
|
583 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
584 |
#' title = "tt", footnotes = "ff", yval = "Survival" |
|
585 |
#' ) |
|
586 |
#' g_el <- h_decompose_gg(gg) |
|
587 |
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f") |
|
588 |
#' grid.show.layout(lyt) |
|
589 |
#' } |
|
590 |
#' |
|
591 |
#' @export |
|
592 |
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE, annot_at_risk_title = TRUE) { |
|
593 | 1x |
lifecycle::deprecate_warn( |
594 | 1x |
"0.9.4", |
595 | 1x |
"h_km_layout()", |
596 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
597 |
) |
|
598 | 1x |
txtlines <- levels(as.factor(data$strata)) |
599 | 1x |
nlines <- nlevels(as.factor(data$strata)) |
600 | 1x |
col_annot_width <- max( |
601 | 1x |
c( |
602 | 1x |
as.numeric(grid::convertX(g_el$yaxis$widths + g_el$ylab$widths, "pt")), |
603 | 1x |
as.numeric( |
604 | 1x |
grid::convertX( |
605 | 1x |
grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt" |
606 |
) |
|
607 |
) |
|
608 |
) |
|
609 |
) |
|
610 | ||
611 | 1x |
ttl_row <- as.numeric(!is.null(title)) |
612 | 1x |
foot_row <- as.numeric(!is.null(footnotes)) |
613 | 1x |
no_tbl_ind <- c() |
614 | 1x |
ht_x <- c() |
615 | 1x |
ht_units <- c() |
616 | ||
617 | 1x |
if (ttl_row == 1) { |
618 | 1x |
no_tbl_ind <- c(no_tbl_ind, TRUE) |
619 | 1x |
ht_x <- c(ht_x, 2) |
620 | 1x |
ht_units <- c(ht_units, "lines") |
621 |
} |
|
622 | ||
623 | 1x |
no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2)) |
624 | 1x |
ht_x <- c( |
625 | 1x |
ht_x, |
626 | 1x |
1, |
627 | 1x |
grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") + grid::unit(5, "pt"), |
628 | 1x |
grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"), |
629 | 1x |
1, |
630 | 1x |
nlines + 0.5, |
631 | 1x |
grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") |
632 |
) |
|
633 | 1x |
ht_units <- c( |
634 | 1x |
ht_units, |
635 | 1x |
"null", |
636 | 1x |
"pt", |
637 | 1x |
"pt", |
638 | 1x |
"lines", |
639 | 1x |
"lines", |
640 | 1x |
"pt" |
641 |
) |
|
642 | ||
643 | 1x |
if (foot_row == 1) { |
644 | 1x |
no_tbl_ind <- c(no_tbl_ind, TRUE) |
645 | 1x |
ht_x <- c(ht_x, 1) |
646 | 1x |
ht_units <- c(ht_units, "lines") |
647 |
} |
|
648 | 1x |
if (annot_at_risk) { |
649 | 1x |
no_at_risk_tbl <- rep(TRUE, 6 + ttl_row + foot_row) |
650 | 1x |
if (!annot_at_risk_title) { |
651 | ! |
no_at_risk_tbl[length(no_at_risk_tbl) - 2 - foot_row] <- FALSE |
652 |
} |
|
653 |
} else { |
|
654 | ! |
no_at_risk_tbl <- no_tbl_ind |
655 |
} |
|
656 | ||
657 | 1x |
grid::grid.layout( |
658 | 1x |
nrow = sum(no_at_risk_tbl), ncol = 2, |
659 | 1x |
widths = grid::unit(c(col_annot_width, 1), c("pt", "null")), |
660 | 1x |
heights = grid::unit( |
661 | 1x |
x = ht_x[no_at_risk_tbl], |
662 | 1x |
units = ht_units[no_at_risk_tbl] |
663 |
) |
|
664 |
) |
|
665 |
} |
|
666 | ||
667 |
#' Helper function to create patient-at-risk grobs |
|
668 |
#' |
|
669 |
#' @description `r lifecycle::badge("deprecated")` |
|
670 |
#' |
|
671 |
#' Two graphical objects are obtained, one corresponding to row labeling and the second to the table of |
|
672 |
#' numbers of patients at risk. If `title = TRUE`, a third object corresponding to the table title is |
|
673 |
#' also obtained. |
|
674 |
#' |
|
675 |
#' @inheritParams g_km |
|
676 |
#' @inheritParams h_ggkm |
|
677 |
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which |
|
678 |
#' includes the number of patients at risk at given time points. |
|
679 |
#' @param xlim (`numeric(1)`)\cr the maximum value on the x-axis (used to ensure the at risk table aligns with the KM |
|
680 |
#' graph). |
|
681 |
#' @param title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk` |
|
682 |
#' table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`. |
|
683 |
#' |
|
684 |
#' @return A named `list` of two `gTree` objects if `title = FALSE`: `at_risk` and `label`, or three |
|
685 |
#' `gTree` objects if `title = TRUE`: `at_risk`, `label`, and `title`. |
|
686 |
#' |
|
687 |
#' @examples |
|
688 |
#' \donttest{ |
|
689 |
#' library(dplyr) |
|
690 |
#' library(survival) |
|
691 |
#' library(grid) |
|
692 |
#' |
|
693 |
#' fit_km <- tern_ex_adtte %>% |
|
694 |
#' filter(PARAMCD == "OS") %>% |
|
695 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
696 |
#' |
|
697 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
698 |
#' |
|
699 |
#' xticks <- h_xticks(data = data_plot) |
|
700 |
#' |
|
701 |
#' gg <- h_ggkm( |
|
702 |
#' data = data_plot, |
|
703 |
#' censor_show = TRUE, |
|
704 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
705 |
#' title = "tt", footnotes = "ff", yval = "Survival" |
|
706 |
#' ) |
|
707 |
#' |
|
708 |
#' # The annotation table reports the patient at risk for a given strata and |
|
709 |
#' # times (`xticks`). |
|
710 |
#' annot_tbl <- summary(fit_km, times = xticks) |
|
711 |
#' if (is.null(fit_km$strata)) { |
|
712 |
#' annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All")) |
|
713 |
#' } else { |
|
714 |
#' strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") |
|
715 |
#' levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] |
|
716 |
#' annot_tbl <- data.frame( |
|
717 |
#' n.risk = annot_tbl$n.risk, |
|
718 |
#' time = annot_tbl$time, |
|
719 |
#' strata = annot_tbl$strata |
|
720 |
#' ) |
|
721 |
#' } |
|
722 |
#' |
|
723 |
#' # The annotation table is transformed into a grob. |
|
724 |
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks)) |
|
725 |
#' |
|
726 |
#' # For the representation, the layout is estimated for which the decomposition |
|
727 |
#' # of the graphic element is necessary. |
|
728 |
#' g_el <- h_decompose_gg(gg) |
|
729 |
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f") |
|
730 |
#' |
|
731 |
#' grid::grid.newpage() |
|
732 |
#' pushViewport(viewport(layout = lyt, height = .95, width = .95)) |
|
733 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1)) |
|
734 |
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 2)) |
|
735 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1)) |
|
736 |
#' grid::grid.draw(tbl$at_risk) |
|
737 |
#' popViewport() |
|
738 |
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 1)) |
|
739 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1)) |
|
740 |
#' grid::grid.draw(tbl$label) |
|
741 |
#' } |
|
742 |
#' |
|
743 |
#' @export |
|
744 |
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim, title = TRUE) { |
|
745 | 1x |
lifecycle::deprecate_warn( |
746 | 1x |
"0.9.4", |
747 | 1x |
"h_grob_tbl_at_risk()", |
748 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
749 |
) |
|
750 | 1x |
txtlines <- levels(as.factor(data$strata)) |
751 | 1x |
nlines <- nlevels(as.factor(data$strata)) |
752 | 1x |
y_int <- annot_tbl$time[2] - annot_tbl$time[1] |
753 | 1x |
annot_tbl <- expand.grid( |
754 | 1x |
time = seq(0, xlim, y_int), |
755 | 1x |
strata = unique(annot_tbl$strata) |
756 | 1x |
) %>% dplyr::left_join(annot_tbl, by = c("time", "strata")) |
757 | 1x |
annot_tbl[is.na(annot_tbl)] <- 0 |
758 | 1x |
y_str_unit <- as.numeric(annot_tbl$strata) |
759 | 1x |
vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines")) |
760 | 1x |
if (title) { |
761 | 1x |
gb_table_title <- grid::gList( |
762 | 1x |
grid::textGrob( |
763 | 1x |
label = "Patients at Risk:", |
764 | 1x |
x = 1, |
765 | 1x |
y = grid::unit(0.2, "native"), |
766 | 1x |
gp = grid::gpar(fontface = "bold", fontsize = 10) |
767 |
) |
|
768 |
) |
|
769 |
} |
|
770 | 1x |
gb_table_left_annot <- grid::gList( |
771 | 1x |
grid::rectGrob( |
772 | 1x |
x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), |
773 | 1x |
gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"), |
774 | 1x |
height = grid::unit(1, "lines"), just = "bottom", hjust = 0 |
775 |
), |
|
776 | 1x |
grid::textGrob( |
777 | 1x |
label = unique(annot_tbl$strata), |
778 | 1x |
x = 0.5, |
779 | 1x |
y = grid::unit( |
780 | 1x |
(max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75, |
781 | 1x |
"native" |
782 |
), |
|
783 | 1x |
gp = grid::gpar(fontface = "italic", fontsize = 10) |
784 |
) |
|
785 |
) |
|
786 | 1x |
gb_patient_at_risk <- grid::gList( |
787 | 1x |
grid::rectGrob( |
788 | 1x |
x = 0, y = grid::unit(c(1:nlines) - 1, "lines"), |
789 | 1x |
gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"), |
790 | 1x |
height = grid::unit(1, "lines"), just = "bottom", hjust = 0 |
791 |
), |
|
792 | 1x |
grid::textGrob( |
793 | 1x |
label = annot_tbl$n.risk, |
794 | 1x |
x = grid::unit(annot_tbl$time, "native"), |
795 | 1x |
y = grid::unit( |
796 | 1x |
(max(y_str_unit) - y_str_unit) + .5, |
797 | 1x |
"line" |
798 | 1x |
) # maybe native |
799 |
) |
|
800 |
) |
|
801 | ||
802 | 1x |
ret <- list( |
803 | 1x |
at_risk = grid::gList( |
804 | 1x |
grid::gTree( |
805 | 1x |
vp = vp_table, |
806 | 1x |
children = grid::gList( |
807 | 1x |
grid::gTree( |
808 | 1x |
vp = grid::dataViewport( |
809 | 1x |
xscale = c(0, xlim) + c(-0.05, 0.05) * xlim, |
810 | 1x |
yscale = c(0, nlines + 1), |
811 | 1x |
extension = c(0.05, 0) |
812 |
), |
|
813 | 1x |
children = grid::gList(gb_patient_at_risk) |
814 |
) |
|
815 |
) |
|
816 |
) |
|
817 |
), |
|
818 | 1x |
label = grid::gList( |
819 | 1x |
grid::gTree( |
820 | 1x |
vp = grid::viewport(width = max(grid::stringWidth(txtlines))), |
821 | 1x |
children = grid::gList( |
822 | 1x |
grid::gTree( |
823 | 1x |
vp = grid::dataViewport( |
824 | 1x |
xscale = 0:1, |
825 | 1x |
yscale = c(0, nlines + 1), |
826 | 1x |
extension = c(0.0, 0) |
827 |
), |
|
828 | 1x |
children = grid::gList(gb_table_left_annot) |
829 |
) |
|
830 |
) |
|
831 |
) |
|
832 |
) |
|
833 |
) |
|
834 | ||
835 | 1x |
if (title) { |
836 | 1x |
ret[["title"]] <- grid::gList( |
837 | 1x |
grid::gTree( |
838 | 1x |
vp = grid::viewport(width = max(grid::stringWidth(txtlines))), |
839 | 1x |
children = grid::gList( |
840 | 1x |
grid::gTree( |
841 | 1x |
vp = grid::dataViewport( |
842 | 1x |
xscale = 0:1, |
843 | 1x |
yscale = c(0, 1), |
844 | 1x |
extension = c(0, 0) |
845 |
), |
|
846 | 1x |
children = grid::gList(gb_table_title) |
847 |
) |
|
848 |
) |
|
849 |
) |
|
850 |
) |
|
851 |
} |
|
852 | ||
853 | 1x |
ret |
854 |
} |
|
855 | ||
856 |
#' Helper function to create survival estimation grobs |
|
857 |
#' |
|
858 |
#' @description `r lifecycle::badge("deprecated")` |
|
859 |
#' |
|
860 |
#' The survival fit is transformed in a grob containing a table with groups in |
|
861 |
#' rows characterized by N, median and 95% confidence interval. |
|
862 |
#' |
|
863 |
#' @inheritParams g_km |
|
864 |
#' @inheritParams h_data_plot |
|
865 |
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()]. |
|
866 |
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location. |
|
867 |
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location. |
|
868 |
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob. |
|
869 |
#' |
|
870 |
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`). |
|
871 |
#' |
|
872 |
#' @examples |
|
873 |
#' \donttest{ |
|
874 |
#' library(dplyr) |
|
875 |
#' library(survival) |
|
876 |
#' library(grid) |
|
877 |
#' |
|
878 |
#' grid::grid.newpage() |
|
879 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1)) |
|
880 |
#' tern_ex_adtte %>% |
|
881 |
#' filter(PARAMCD == "OS") %>% |
|
882 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>% |
|
883 |
#' h_grob_median_surv() %>% |
|
884 |
#' grid::grid.draw() |
|
885 |
#' } |
|
886 |
#' |
|
887 |
#' @export |
|
888 |
h_grob_median_surv <- function(fit_km, |
|
889 |
armval = "All", |
|
890 |
x = 0.9, |
|
891 |
y = 0.9, |
|
892 |
width = grid::unit(0.3, "npc"), |
|
893 |
ttheme = gridExtra::ttheme_default()) { |
|
894 | 1x |
lifecycle::deprecate_warn( |
895 | 1x |
"0.9.4", |
896 | 1x |
"h_grob_median_surv()", |
897 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
898 |
) |
|
899 | 1x |
data <- h_tbl_median_surv(fit_km, armval = armval) |
900 | ||
901 | 1x |
width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") |
902 | 1x |
height <- width * (nrow(data) + 1) / 12 |
903 | ||
904 | 1x |
w <- paste(" ", c( |
905 | 1x |
rownames(data)[which.max(nchar(rownames(data)))], |
906 | 1x |
sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) |
907 |
)) |
|
908 | 1x |
w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) |
909 | ||
910 | 1x |
w_txt <- sapply(1:64, function(x) { |
911 | 64x |
graphics::par(ps = x) |
912 | 64x |
graphics::strwidth(w[4], units = "in") |
913 |
}) |
|
914 | 1x |
f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) |
915 | ||
916 | 1x |
h_txt <- sapply(1:64, function(x) { |
917 | 64x |
graphics::par(ps = x) |
918 | 64x |
graphics::strheight(grid::stringHeight("X"), units = "in") |
919 |
}) |
|
920 | 1x |
f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) |
921 | ||
922 | 1x |
if (ttheme$core$fg_params$fontsize == 12) { |
923 | 1x |
ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) |
924 | 1x |
ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
925 | 1x |
ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
926 |
} |
|
927 | ||
928 | 1x |
gt <- gridExtra::tableGrob( |
929 | 1x |
d = data, |
930 | 1x |
theme = ttheme |
931 |
) |
|
932 | 1x |
gt$widths <- ((w_unit / sum(w_unit)) * width) |
933 | 1x |
gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) |
934 | ||
935 | 1x |
vp <- grid::viewport( |
936 | 1x |
x = grid::unit(x, "npc") + grid::unit(1, "lines"), |
937 | 1x |
y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), |
938 | 1x |
height = height, |
939 | 1x |
width = width, |
940 | 1x |
just = c("right", "top") |
941 |
) |
|
942 | ||
943 | 1x |
grid::gList( |
944 | 1x |
grid::gTree( |
945 | 1x |
vp = vp, |
946 | 1x |
children = grid::gList(gt) |
947 |
) |
|
948 |
) |
|
949 |
} |
|
950 | ||
951 |
#' Helper function to create grid object with y-axis annotation |
|
952 |
#' |
|
953 |
#' @description `r lifecycle::badge("deprecated")` |
|
954 |
#' |
|
955 |
#' Build the y-axis annotation from a decomposed `ggplot`. |
|
956 |
#' |
|
957 |
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`. |
|
958 |
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`. |
|
959 |
#' |
|
960 |
#' @return A `gTree` object containing the y-axis annotation from a `ggplot`. |
|
961 |
#' |
|
962 |
#' @examples |
|
963 |
#' \donttest{ |
|
964 |
#' library(dplyr) |
|
965 |
#' library(survival) |
|
966 |
#' library(grid) |
|
967 |
#' |
|
968 |
#' fit_km <- tern_ex_adtte %>% |
|
969 |
#' filter(PARAMCD == "OS") %>% |
|
970 |
#' survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) |
|
971 |
#' data_plot <- h_data_plot(fit_km = fit_km) |
|
972 |
#' xticks <- h_xticks(data = data_plot) |
|
973 |
#' gg <- h_ggkm( |
|
974 |
#' data = data_plot, |
|
975 |
#' censor_show = TRUE, |
|
976 |
#' xticks = xticks, xlab = "Days", ylab = "Survival Probability", |
|
977 |
#' title = "title", footnotes = "footnotes", yval = "Survival" |
|
978 |
#' ) |
|
979 |
#' |
|
980 |
#' g_el <- h_decompose_gg(gg) |
|
981 |
#' |
|
982 |
#' grid::grid.newpage() |
|
983 |
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20)) |
|
984 |
#' pushViewport(pvp) |
|
985 |
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)) |
|
986 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA)) |
|
987 |
#' } |
|
988 |
#' |
|
989 |
#' @export |
|
990 |
h_grob_y_annot <- function(ylab, yaxis) { |
|
991 | 1x |
lifecycle::deprecate_warn( |
992 | 1x |
"0.9.4", |
993 | 1x |
"h_grob_y_annot()", |
994 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
995 |
) |
|
996 | 1x |
grid::gList( |
997 | 1x |
grid::gTree( |
998 | 1x |
vp = grid::viewport( |
999 | 1x |
width = grid::convertX(yaxis$widths + ylab$widths, "pt"), |
1000 | 1x |
x = grid::unit(1, "npc"), |
1001 | 1x |
just = "right" |
1002 |
), |
|
1003 | 1x |
children = grid::gList(cbind(ylab, yaxis)) |
1004 |
) |
|
1005 |
) |
|
1006 |
} |
|
1007 | ||
1008 |
#' Helper function to create Cox-PH grobs |
|
1009 |
#' |
|
1010 |
#' @description `r lifecycle::badge("deprecated")` |
|
1011 |
#' |
|
1012 |
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()] |
|
1013 |
#' |
|
1014 |
#' @inheritParams h_grob_median_surv |
|
1015 |
#' @param ... arguments to pass to [h_tbl_coxph_pairwise()]. |
|
1016 |
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location. |
|
1017 |
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location. |
|
1018 |
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob. |
|
1019 |
#' |
|
1020 |
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`), |
|
1021 |
#' and `p-value (log-rank)`. |
|
1022 |
#' |
|
1023 |
#' @examples |
|
1024 |
#' \donttest{ |
|
1025 |
#' library(dplyr) |
|
1026 |
#' library(survival) |
|
1027 |
#' library(grid) |
|
1028 |
#' |
|
1029 |
#' grid::grid.newpage() |
|
1030 |
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1)) |
|
1031 |
#' data <- tern_ex_adtte %>% |
|
1032 |
#' filter(PARAMCD == "OS") %>% |
|
1033 |
#' mutate(is_event = CNSR == 0) |
|
1034 |
#' tbl_grob <- h_grob_coxph( |
|
1035 |
#' df = data, |
|
1036 |
#' variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"), |
|
1037 |
#' control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5 |
|
1038 |
#' ) |
|
1039 |
#' grid::grid.draw(tbl_grob) |
|
1040 |
#' } |
|
1041 |
#' |
|
1042 |
#' @export |
|
1043 |
h_grob_coxph <- function(..., |
|
1044 |
x = 0, |
|
1045 |
y = 0, |
|
1046 |
width = grid::unit(0.4, "npc"), |
|
1047 |
ttheme = gridExtra::ttheme_default( |
|
1048 |
padding = grid::unit(c(1, .5), "lines"), |
|
1049 |
core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5)) |
|
1050 |
)) { |
|
1051 | 1x |
lifecycle::deprecate_warn( |
1052 | 1x |
"0.9.4", |
1053 | 1x |
"h_grob_coxph()", |
1054 | 1x |
details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`." |
1055 |
) |
|
1056 | 1x |
data <- h_tbl_coxph_pairwise(...) |
1057 | ||
1058 | 1x |
width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in") |
1059 | 1x |
height <- width * (nrow(data) + 1) / 12 |
1060 | ||
1061 | 1x |
w <- paste(" ", c( |
1062 | 1x |
rownames(data)[which.max(nchar(rownames(data)))], |
1063 | 1x |
sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))]) |
1064 |
)) |
|
1065 | 1x |
w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE) |
1066 | ||
1067 | 1x |
w_txt <- sapply(1:64, function(x) { |
1068 | 64x |
graphics::par(ps = x) |
1069 | 64x |
graphics::strwidth(w[4], units = "in") |
1070 |
}) |
|
1071 | 1x |
f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]]) |
1072 | ||
1073 | 1x |
h_txt <- sapply(1:64, function(x) { |
1074 | 64x |
graphics::par(ps = x) |
1075 | 64x |
graphics::strheight(grid::stringHeight("X"), units = "in") |
1076 |
}) |
|
1077 | 1x |
f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))]) |
1078 | ||
1079 | 1x |
if (ttheme$core$fg_params$fontsize == 12) { |
1080 | 1x |
ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h) |
1081 | 1x |
ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
1082 | 1x |
ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h) |
1083 |
} |
|
1084 | ||
1085 | 1x |
tryCatch( |
1086 | 1x |
expr = { |
1087 | 1x |
gt <- gridExtra::tableGrob( |
1088 | 1x |
d = data, |
1089 | 1x |
theme = ttheme |
1090 | 1x |
) # ERROR 'data' must be of a vector type, was 'NULL' |
1091 | 1x |
gt$widths <- ((w_unit / sum(w_unit)) * width) |
1092 | 1x |
gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt)) |
1093 | 1x |
vp <- grid::viewport( |
1094 | 1x |
x = grid::unit(x, "npc") + grid::unit(1, "lines"), |
1095 | 1x |
y = grid::unit(y, "npc") + grid::unit(1.5, "lines"), |
1096 | 1x |
height = height, |
1097 | 1x |
width = width, |
1098 | 1x |
just = c("left", "bottom") |
1099 |
) |
|
1100 | 1x |
grid::gList( |
1101 | 1x |
grid::gTree( |
1102 | 1x |
vp = vp, |
1103 | 1x |
children = grid::gList(gt) |
1104 |
) |
|
1105 |
) |
|
1106 |
}, |
|
1107 | 1x |
error = function(w) { |
1108 | ! |
message(paste( |
1109 | ! |
"Warning: Cox table will not be displayed as there is", |
1110 | ! |
"not any level to be compared in the arm variable." |
1111 |
)) |
|
1112 | ! |
return( |
1113 | ! |
grid::gList( |
1114 | ! |
grid::gTree( |
1115 | ! |
vp = NULL, |
1116 | ! |
children = NULL |
1117 |
) |
|
1118 |
) |
|
1119 |
) |
|
1120 |
} |
|
1121 |
) |
|
1122 |
} |
1 |
#' Helper functions for multivariate logistic regression |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Helper functions used in calculations for logistic regression. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. |
|
9 |
#' Limited functionality is also available for conditional logistic regression models fitted by |
|
10 |
#' [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()]. |
|
11 |
#' @param x (`character`)\cr a variable or interaction term in `fit_glm` (depending on the helper function used). |
|
12 |
#' |
|
13 |
#' @examples |
|
14 |
#' library(dplyr) |
|
15 |
#' library(broom) |
|
16 |
#' |
|
17 |
#' adrs_f <- tern_ex_adrs %>% |
|
18 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
19 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% |
|
20 |
#' mutate( |
|
21 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), |
|
22 |
#' RACE = factor(RACE), |
|
23 |
#' SEX = factor(SEX) |
|
24 |
#' ) |
|
25 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
26 |
#' mod1 <- fit_logistic( |
|
27 |
#' data = adrs_f, |
|
28 |
#' variables = list( |
|
29 |
#' response = "Response", |
|
30 |
#' arm = "ARMCD", |
|
31 |
#' covariates = c("AGE", "RACE") |
|
32 |
#' ) |
|
33 |
#' ) |
|
34 |
#' mod2 <- fit_logistic( |
|
35 |
#' data = adrs_f, |
|
36 |
#' variables = list( |
|
37 |
#' response = "Response", |
|
38 |
#' arm = "ARMCD", |
|
39 |
#' covariates = c("AGE", "RACE"), |
|
40 |
#' interaction = "AGE" |
|
41 |
#' ) |
|
42 |
#' ) |
|
43 |
#' |
|
44 |
#' @name h_logistic_regression |
|
45 |
NULL |
|
46 | ||
47 |
#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted |
|
48 |
#' model assuming only one interaction term. |
|
49 |
#' |
|
50 |
#' @return Vector of names of interaction variables. |
|
51 |
#' |
|
52 |
#' @export |
|
53 |
h_get_interaction_vars <- function(fit_glm) { |
|
54 | 34x |
checkmate::assert_class(fit_glm, "glm") |
55 | 34x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
56 | 34x |
terms_order <- attr(stats::terms(fit_glm), "order") |
57 | 34x |
interaction_term <- terms_name[terms_order == 2] |
58 | 34x |
checkmate::assert_string(interaction_term) |
59 | 34x |
strsplit(interaction_term, split = ":")[[1]] |
60 |
} |
|
61 | ||
62 |
#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the |
|
63 |
#' interaction variable names and the given levels. The main value here is that the order |
|
64 |
#' of first and second variable is checked in the `interaction_vars` input. |
|
65 |
#' |
|
66 |
#' @param interaction_vars (`character(2)`)\cr interaction variable names. |
|
67 |
#' @param first_var_with_level (`character(2)`)\cr the first variable name with the interaction level. |
|
68 |
#' @param second_var_with_level (`character(2)`)\cr the second variable name with the interaction level. |
|
69 |
#' |
|
70 |
#' @return Name of coefficient. |
|
71 |
#' |
|
72 |
#' @export |
|
73 |
h_interaction_coef_name <- function(interaction_vars, |
|
74 |
first_var_with_level, |
|
75 |
second_var_with_level) { |
|
76 | 55x |
checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE) |
77 | 55x |
checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE) |
78 | 55x |
checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE) |
79 | 55x |
checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars) |
80 | ||
81 | 55x |
first_name <- paste(first_var_with_level, collapse = "") |
82 | 55x |
second_name <- paste(second_var_with_level, collapse = "") |
83 | 55x |
if (first_var_with_level[1] == interaction_vars[1]) { |
84 | 36x |
paste(first_name, second_name, sep = ":") |
85 | 19x |
} else if (second_var_with_level[1] == interaction_vars[1]) { |
86 | 19x |
paste(second_name, first_name, sep = ":") |
87 |
} |
|
88 |
} |
|
89 | ||
90 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
91 |
#' for the case when both the odds ratio and the interaction variable are categorical. |
|
92 |
#' |
|
93 |
#' @param odds_ratio_var (`string`)\cr the odds ratio variable. |
|
94 |
#' @param interaction_var (`string`)\cr the interaction variable. |
|
95 |
#' |
|
96 |
#' @return Odds ratio. |
|
97 |
#' |
|
98 |
#' @export |
|
99 |
h_or_cat_interaction <- function(odds_ratio_var, |
|
100 |
interaction_var, |
|
101 |
fit_glm, |
|
102 |
conf_level = 0.95) { |
|
103 | 8x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
104 | 8x |
checkmate::assert_string(odds_ratio_var) |
105 | 8x |
checkmate::assert_string(interaction_var) |
106 | 8x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
107 | 8x |
checkmate::assert_vector(interaction_vars, len = 2) |
108 | ||
109 | 8x |
xs_level <- fit_glm$xlevels |
110 | 8x |
xs_coef <- stats::coef(fit_glm) |
111 | 8x |
xs_vcov <- stats::vcov(fit_glm) |
112 | 8x |
y <- list() |
113 | 8x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
114 | 14x |
x <- list() |
115 | 14x |
for (ref_level in xs_level[[interaction_var]]) { |
116 | 38x |
coef_names <- paste0(odds_ratio_var, var_level) |
117 | 38x |
if (ref_level != xs_level[[interaction_var]][1]) { |
118 | 24x |
interaction_coef_name <- h_interaction_coef_name( |
119 | 24x |
interaction_vars, |
120 | 24x |
c(odds_ratio_var, var_level), |
121 | 24x |
c(interaction_var, ref_level) |
122 |
) |
|
123 | 24x |
coef_names <- c( |
124 | 24x |
coef_names, |
125 | 24x |
interaction_coef_name |
126 |
) |
|
127 |
} |
|
128 | 38x |
if (length(coef_names) > 1) { |
129 | 24x |
ones <- t(c(1, 1)) |
130 | 24x |
est <- as.numeric(ones %*% xs_coef[coef_names]) |
131 | 24x |
se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones))) |
132 |
} else { |
|
133 | 14x |
est <- xs_coef[coef_names] |
134 | 14x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
135 |
} |
|
136 | 38x |
or <- exp(est) |
137 | 38x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
138 | 38x |
x[[ref_level]] <- list(or = or, ci = ci) |
139 |
} |
|
140 | 14x |
y[[var_level]] <- x |
141 |
} |
|
142 | 8x |
y |
143 |
} |
|
144 | ||
145 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
146 |
#' for the case when either the odds ratio or the interaction variable is continuous. |
|
147 |
#' |
|
148 |
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise |
|
149 |
#' the median is used. |
|
150 |
#' |
|
151 |
#' @return Odds ratio. |
|
152 |
#' |
|
153 |
#' @note We don't provide a function for the case when both variables are continuous because |
|
154 |
#' this does not arise in this table, as the treatment arm variable will always be involved |
|
155 |
#' and categorical. |
|
156 |
#' |
|
157 |
#' @export |
|
158 |
h_or_cont_interaction <- function(odds_ratio_var, |
|
159 |
interaction_var, |
|
160 |
fit_glm, |
|
161 |
at = NULL, |
|
162 |
conf_level = 0.95) { |
|
163 | 13x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
164 | 13x |
checkmate::assert_string(odds_ratio_var) |
165 | 13x |
checkmate::assert_string(interaction_var) |
166 | 13x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
167 | 13x |
checkmate::assert_vector(interaction_vars, len = 2) |
168 | 13x |
checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE) |
169 | 13x |
xs_level <- fit_glm$xlevels |
170 | 13x |
xs_coef <- stats::coef(fit_glm) |
171 | 13x |
xs_vcov <- stats::vcov(fit_glm) |
172 | 13x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
173 | 13x |
model_data <- fit_glm$model |
174 | 13x |
if (!is.null(at)) { |
175 | 3x |
checkmate::assert_set_equal(xs_class[interaction_var], "numeric") |
176 |
} |
|
177 | 12x |
y <- list() |
178 | 12x |
if (xs_class[interaction_var] == "numeric") { |
179 | 7x |
if (is.null(at)) { |
180 | 5x |
at <- ceiling(stats::median(model_data[[interaction_var]])) |
181 |
} |
|
182 | ||
183 | 7x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
184 | 14x |
x <- list() |
185 | 14x |
for (increment in at) { |
186 | 20x |
coef_names <- paste0(odds_ratio_var, var_level) |
187 | 20x |
if (increment != 0) { |
188 | 20x |
interaction_coef_name <- h_interaction_coef_name( |
189 | 20x |
interaction_vars, |
190 | 20x |
c(odds_ratio_var, var_level), |
191 | 20x |
c(interaction_var, "") |
192 |
) |
|
193 | 20x |
coef_names <- c( |
194 | 20x |
coef_names, |
195 | 20x |
interaction_coef_name |
196 |
) |
|
197 |
} |
|
198 | 20x |
if (length(coef_names) > 1) { |
199 | 20x |
xvec <- t(c(1, increment)) |
200 | 20x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
201 | 20x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
202 |
} else { |
|
203 | ! |
est <- xs_coef[coef_names] |
204 | ! |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
205 |
} |
|
206 | 20x |
or <- exp(est) |
207 | 20x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
208 | 20x |
x[[as.character(increment)]] <- list(or = or, ci = ci) |
209 |
} |
|
210 | 14x |
y[[var_level]] <- x |
211 |
} |
|
212 |
} else { |
|
213 | 5x |
checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric") |
214 | 5x |
checkmate::assert_set_equal(xs_class[interaction_var], "factor") |
215 | 5x |
for (var_level in xs_level[[interaction_var]]) { |
216 | 15x |
coef_names <- odds_ratio_var |
217 | 15x |
if (var_level != xs_level[[interaction_var]][1]) { |
218 | 10x |
interaction_coef_name <- h_interaction_coef_name( |
219 | 10x |
interaction_vars, |
220 | 10x |
c(odds_ratio_var, ""), |
221 | 10x |
c(interaction_var, var_level) |
222 |
) |
|
223 | 10x |
coef_names <- c( |
224 | 10x |
coef_names, |
225 | 10x |
interaction_coef_name |
226 |
) |
|
227 |
} |
|
228 | 15x |
if (length(coef_names) > 1) { |
229 | 10x |
xvec <- t(c(1, 1)) |
230 | 10x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
231 | 10x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
232 |
} else { |
|
233 | 5x |
est <- xs_coef[coef_names] |
234 | 5x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
235 |
} |
|
236 | 15x |
or <- exp(est) |
237 | 15x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
238 | 15x |
y[[var_level]] <- list(or = or, ci = ci) |
239 |
} |
|
240 |
} |
|
241 | 12x |
y |
242 |
} |
|
243 | ||
244 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
245 |
#' in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and |
|
246 |
#' [h_or_cat_interaction()]. |
|
247 |
#' |
|
248 |
#' @return Odds ratio. |
|
249 |
#' |
|
250 |
#' @export |
|
251 |
h_or_interaction <- function(odds_ratio_var, |
|
252 |
interaction_var, |
|
253 |
fit_glm, |
|
254 |
at = NULL, |
|
255 |
conf_level = 0.95) { |
|
256 | 15x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
257 | 15x |
if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) { |
258 | 9x |
h_or_cont_interaction( |
259 | 9x |
odds_ratio_var, |
260 | 9x |
interaction_var, |
261 | 9x |
fit_glm, |
262 | 9x |
at = at, |
263 | 9x |
conf_level = conf_level |
264 |
) |
|
265 | 6x |
} else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) { |
266 | 6x |
h_or_cat_interaction( |
267 | 6x |
odds_ratio_var, |
268 | 6x |
interaction_var, |
269 | 6x |
fit_glm, |
270 | 6x |
conf_level = conf_level |
271 |
) |
|
272 |
} else { |
|
273 | ! |
stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor") |
274 |
} |
|
275 |
} |
|
276 | ||
277 |
#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table |
|
278 |
#' of numbers of patients. |
|
279 |
#' |
|
280 |
#' @param terms (`character`)\cr simple terms. |
|
281 |
#' @param table (`table`)\cr table containing numbers for terms. |
|
282 |
#' |
|
283 |
#' @return Term labels containing numbers of patients. |
|
284 |
#' |
|
285 |
#' @export |
|
286 |
h_simple_term_labels <- function(terms, |
|
287 |
table) { |
|
288 | 54x |
checkmate::assert_true(is.table(table)) |
289 | 54x |
checkmate::assert_multi_class(terms, classes = c("factor", "character")) |
290 | 54x |
terms <- as.character(terms) |
291 | 54x |
term_n <- table[terms] |
292 | 54x |
paste0(terms, ", n = ", term_n) |
293 |
} |
|
294 | ||
295 |
#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table |
|
296 |
#' of numbers of patients. |
|
297 |
#' |
|
298 |
#' @param terms1 (`character`)\cr terms for first dimension (rows). |
|
299 |
#' @param terms2 (`character`)\cr terms for second dimension (rows). |
|
300 |
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the |
|
301 |
#' number of patients. In that case they can only be scalar (strings). |
|
302 |
#' |
|
303 |
#' @return Term labels containing numbers of patients. |
|
304 |
#' |
|
305 |
#' @export |
|
306 |
h_interaction_term_labels <- function(terms1, |
|
307 |
terms2, |
|
308 |
table, |
|
309 |
any = FALSE) { |
|
310 | 8x |
checkmate::assert_true(is.table(table)) |
311 | 8x |
checkmate::assert_flag(any) |
312 | 8x |
checkmate::assert_multi_class(terms1, classes = c("factor", "character")) |
313 | 8x |
checkmate::assert_multi_class(terms2, classes = c("factor", "character")) |
314 | 8x |
terms1 <- as.character(terms1) |
315 | 8x |
terms2 <- as.character(terms2) |
316 | 8x |
if (any) { |
317 | 4x |
checkmate::assert_scalar(terms1) |
318 | 4x |
checkmate::assert_scalar(terms2) |
319 | 4x |
paste0( |
320 | 4x |
terms1, " or ", terms2, ", n = ", |
321 |
# Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract. |
|
322 | 4x |
sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2] |
323 |
) |
|
324 |
} else { |
|
325 | 4x |
term_n <- table[cbind(terms1, terms2)] |
326 | 4x |
paste0(terms1, " * ", terms2, ", n = ", term_n) |
327 |
} |
|
328 |
} |
|
329 | ||
330 |
#' @describeIn h_logistic_regression Helper function to tabulate the main effect |
|
331 |
#' results of a (conditional) logistic regression model. |
|
332 |
#' |
|
333 |
#' @return Tabulated main effect results from a logistic regression model. |
|
334 |
#' |
|
335 |
#' @examples |
|
336 |
#' h_glm_simple_term_extract("AGE", mod1) |
|
337 |
#' h_glm_simple_term_extract("ARMCD", mod1) |
|
338 |
#' |
|
339 |
#' @export |
|
340 |
h_glm_simple_term_extract <- function(x, fit_glm) { |
|
341 | 78x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
342 | 78x |
checkmate::assert_string(x) |
343 | ||
344 | 78x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
345 | 78x |
xs_level <- fit_glm$xlevels |
346 | 78x |
xs_coef <- summary(fit_glm)$coefficients |
347 | 78x |
stats <- if (inherits(fit_glm, "glm")) { |
348 | 66x |
c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
349 |
} else { |
|
350 | 12x |
c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)") |
351 |
} |
|
352 |
# Make sure x is not an interaction term. |
|
353 | 78x |
checkmate::assert_subset(x, names(xs_class)) |
354 | 78x |
x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1]) |
355 | 78x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
356 | 78x |
colnames(x_stats) <- names(stats) |
357 | 78x |
x_stats$estimate <- as.list(x_stats$estimate) |
358 | 78x |
x_stats$std_error <- as.list(x_stats$std_error) |
359 | 78x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
360 | 78x |
x_stats$df <- as.list(1) |
361 | 78x |
if (xs_class[x] == "numeric") { |
362 | 60x |
x_stats$term <- x |
363 | 60x |
x_stats$term_label <- if (inherits(fit_glm, "glm")) { |
364 | 48x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
365 |
} else { |
|
366 |
# We just fill in here with the `term` itself as we don't have the data available. |
|
367 | 12x |
x |
368 |
} |
|
369 | 60x |
x_stats$is_variable_summary <- FALSE |
370 | 60x |
x_stats$is_term_summary <- TRUE |
371 |
} else { |
|
372 | 18x |
checkmate::assert_class(fit_glm, "glm") |
373 |
# The reason is that we don't have the original data set in the `clogit` object |
|
374 |
# and therefore cannot determine the `x_numbers` here. |
|
375 | 18x |
x_numbers <- table(fit_glm$data[[x]]) |
376 | 18x |
x_stats$term <- xs_level[[x]][-1] |
377 | 18x |
x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers) |
378 | 18x |
x_stats$is_variable_summary <- FALSE |
379 | 18x |
x_stats$is_term_summary <- TRUE |
380 | 18x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
381 | 18x |
x_main <- data.frame( |
382 | 18x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
383 | 18x |
term = xs_level[[x]][1], |
384 | 18x |
term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)), |
385 | 18x |
df = main_effects[x, "Df", drop = TRUE], |
386 | 18x |
stringsAsFactors = FALSE |
387 |
) |
|
388 | 18x |
x_main$pvalue <- as.list(x_main$pvalue) |
389 | 18x |
x_main$df <- as.list(x_main$df) |
390 | 18x |
x_main$estimate <- list(numeric(0)) |
391 | 18x |
x_main$std_error <- list(numeric(0)) |
392 | 18x |
if (length(xs_level[[x]][-1]) == 1) { |
393 | 8x |
x_main$pvalue <- list(numeric(0)) |
394 | 8x |
x_main$df <- list(numeric(0)) |
395 |
} |
|
396 | 18x |
x_main$is_variable_summary <- TRUE |
397 | 18x |
x_main$is_term_summary <- FALSE |
398 | 18x |
x_stats <- rbind(x_main, x_stats) |
399 |
} |
|
400 | 78x |
x_stats$variable <- x |
401 | 78x |
x_stats$variable_label <- if (inherits(fit_glm, "glm")) { |
402 | 66x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
403 |
} else { |
|
404 | 12x |
x |
405 |
} |
|
406 | 78x |
x_stats$interaction <- "" |
407 | 78x |
x_stats$interaction_label <- "" |
408 | 78x |
x_stats$reference <- "" |
409 | 78x |
x_stats$reference_label <- "" |
410 | 78x |
rownames(x_stats) <- NULL |
411 | 78x |
x_stats[c( |
412 | 78x |
"variable", |
413 | 78x |
"variable_label", |
414 | 78x |
"term", |
415 | 78x |
"term_label", |
416 | 78x |
"interaction", |
417 | 78x |
"interaction_label", |
418 | 78x |
"reference", |
419 | 78x |
"reference_label", |
420 | 78x |
"estimate", |
421 | 78x |
"std_error", |
422 | 78x |
"df", |
423 | 78x |
"pvalue", |
424 | 78x |
"is_variable_summary", |
425 | 78x |
"is_term_summary" |
426 |
)] |
|
427 |
} |
|
428 | ||
429 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction term |
|
430 |
#' results of a logistic regression model. |
|
431 |
#' |
|
432 |
#' @return Tabulated interaction term results from a logistic regression model. |
|
433 |
#' |
|
434 |
#' @examples |
|
435 |
#' h_glm_interaction_extract("ARMCD:AGE", mod2) |
|
436 |
#' |
|
437 |
#' @export |
|
438 |
h_glm_interaction_extract <- function(x, fit_glm) { |
|
439 | 7x |
vars <- h_get_interaction_vars(fit_glm) |
440 | 7x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
441 | ||
442 | 7x |
checkmate::assert_string(x) |
443 | ||
444 |
# Only take two-way interaction |
|
445 | 7x |
checkmate::assert_vector(vars, len = 2) |
446 | ||
447 |
# Only consider simple case: first variable in interaction is arm, a categorical variable |
|
448 | 7x |
checkmate::assert_disjunct(xs_class[vars[1]], "numeric") |
449 | ||
450 | 7x |
xs_level <- fit_glm$xlevels |
451 | 7x |
xs_coef <- summary(fit_glm)$coefficients |
452 | 7x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
453 | 7x |
stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
454 | 7x |
v1_comp <- xs_level[[vars[1]]][-1] |
455 | 7x |
if (xs_class[vars[2]] == "numeric") { |
456 | 4x |
x_stats <- as.data.frame( |
457 | 4x |
xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE], |
458 | 4x |
stringsAsFactors = FALSE |
459 |
) |
|
460 | 4x |
colnames(x_stats) <- names(stats) |
461 | 4x |
x_stats$term <- v1_comp |
462 | 4x |
x_numbers <- table(fit_glm$data[[vars[1]]]) |
463 | 4x |
x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers) |
464 | 4x |
v1_ref <- xs_level[[vars[1]]][1] |
465 | 4x |
term_main <- v1_ref |
466 | 4x |
ref_label <- h_simple_term_labels(v1_ref, x_numbers) |
467 | 3x |
} else if (xs_class[vars[2]] != "numeric") { |
468 | 3x |
v2_comp <- xs_level[[vars[2]]][-1] |
469 | 3x |
v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp) |
470 | 3x |
x_sel <- paste( |
471 | 3x |
paste0(vars[1], v1_v2_grid$v1), |
472 | 3x |
paste0(vars[2], v1_v2_grid$v2), |
473 | 3x |
sep = ":" |
474 |
) |
|
475 | 3x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
476 | 3x |
colnames(x_stats) <- names(stats) |
477 | 3x |
x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2) |
478 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]]) |
479 | 3x |
x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers) |
480 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
481 | 3x |
v2_ref <- xs_level[[vars[2]]][1] |
482 | 3x |
term_main <- paste(vars[1], vars[2], sep = " * ") |
483 | 3x |
ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE) |
484 |
} |
|
485 | 7x |
x_stats$df <- as.list(1) |
486 | 7x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
487 | 7x |
x_stats$is_variable_summary <- FALSE |
488 | 7x |
x_stats$is_term_summary <- TRUE |
489 | 7x |
x_main <- data.frame( |
490 | 7x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
491 | 7x |
term = term_main, |
492 | 7x |
term_label = paste("Reference", ref_label), |
493 | 7x |
df = main_effects[x, "Df", drop = TRUE], |
494 | 7x |
stringsAsFactors = FALSE |
495 |
) |
|
496 | 7x |
x_main$pvalue <- as.list(x_main$pvalue) |
497 | 7x |
x_main$df <- as.list(x_main$df) |
498 | 7x |
x_main$estimate <- list(numeric(0)) |
499 | 7x |
x_main$std_error <- list(numeric(0)) |
500 | 7x |
x_main$is_variable_summary <- TRUE |
501 | 7x |
x_main$is_term_summary <- FALSE |
502 | ||
503 | 7x |
x_stats <- rbind(x_main, x_stats) |
504 | 7x |
x_stats$variable <- x |
505 | 7x |
x_stats$variable_label <- paste( |
506 | 7x |
"Interaction of", |
507 | 7x |
formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE), |
508 |
"*", |
|
509 | 7x |
formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE) |
510 |
) |
|
511 | 7x |
x_stats$interaction <- "" |
512 | 7x |
x_stats$interaction_label <- "" |
513 | 7x |
x_stats$reference <- "" |
514 | 7x |
x_stats$reference_label <- "" |
515 | 7x |
rownames(x_stats) <- NULL |
516 | 7x |
x_stats[c( |
517 | 7x |
"variable", |
518 | 7x |
"variable_label", |
519 | 7x |
"term", |
520 | 7x |
"term_label", |
521 | 7x |
"interaction", |
522 | 7x |
"interaction_label", |
523 | 7x |
"reference", |
524 | 7x |
"reference_label", |
525 | 7x |
"estimate", |
526 | 7x |
"std_error", |
527 | 7x |
"df", |
528 | 7x |
"pvalue", |
529 | 7x |
"is_variable_summary", |
530 | 7x |
"is_term_summary" |
531 |
)] |
|
532 |
} |
|
533 | ||
534 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction |
|
535 |
#' results of a logistic regression model. This basically is a wrapper for |
|
536 |
#' [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results |
|
537 |
#' in the right data frame format. |
|
538 |
#' |
|
539 |
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model. |
|
540 |
#' |
|
541 |
#' @examples |
|
542 |
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2) |
|
543 |
#' |
|
544 |
#' @export |
|
545 |
h_glm_inter_term_extract <- function(odds_ratio_var, |
|
546 |
interaction_var, |
|
547 |
fit_glm, |
|
548 |
...) { |
|
549 |
# First obtain the main effects. |
|
550 | 13x |
main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm) |
551 | 13x |
main_stats$is_reference_summary <- FALSE |
552 | 13x |
main_stats$odds_ratio <- NA |
553 | 13x |
main_stats$lcl <- NA |
554 | 13x |
main_stats$ucl <- NA |
555 | ||
556 |
# Then we get the odds ratio estimates and put into df form. |
|
557 | 13x |
or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...) |
558 | 13x |
is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric" |
559 | ||
560 | 13x |
if (is_num_or_var) { |
561 |
# Numeric OR variable case. |
|
562 | 4x |
references <- names(or_numbers) |
563 | 4x |
n_ref <- length(references) |
564 | ||
565 | 4x |
extract_from_list <- function(l, name, pos = 1) { |
566 | 12x |
unname(unlist( |
567 | 12x |
lapply(or_numbers, function(x) { |
568 | 36x |
x[[name]][pos] |
569 |
}) |
|
570 |
)) |
|
571 |
} |
|
572 | 4x |
or_stats <- data.frame( |
573 | 4x |
variable = odds_ratio_var, |
574 | 4x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
575 | 4x |
term = odds_ratio_var, |
576 | 4x |
term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
577 | 4x |
interaction = interaction_var, |
578 | 4x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
579 | 4x |
reference = references, |
580 | 4x |
reference_label = references, |
581 | 4x |
estimate = NA, |
582 | 4x |
std_error = NA, |
583 | 4x |
odds_ratio = extract_from_list(or_numbers, "or"), |
584 | 4x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
585 | 4x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
586 | 4x |
df = NA, |
587 | 4x |
pvalue = NA, |
588 | 4x |
is_variable_summary = FALSE, |
589 | 4x |
is_term_summary = FALSE, |
590 | 4x |
is_reference_summary = TRUE |
591 |
) |
|
592 |
} else { |
|
593 |
# Categorical OR variable case. |
|
594 | 9x |
references <- names(or_numbers[[1]]) |
595 | 9x |
n_ref <- length(references) |
596 | ||
597 | 9x |
extract_from_list <- function(l, name, pos = 1) { |
598 | 27x |
unname(unlist( |
599 | 27x |
lapply(or_numbers, function(x) { |
600 | 48x |
lapply(x, function(y) y[[name]][pos]) |
601 |
}) |
|
602 |
)) |
|
603 |
} |
|
604 | 9x |
or_stats <- data.frame( |
605 | 9x |
variable = odds_ratio_var, |
606 | 9x |
variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)), |
607 | 9x |
term = rep(names(or_numbers), each = n_ref), |
608 | 9x |
term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])), |
609 | 9x |
interaction = interaction_var, |
610 | 9x |
interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)), |
611 | 9x |
reference = unlist(lapply(or_numbers, names)), |
612 | 9x |
reference_label = unlist(lapply(or_numbers, names)), |
613 | 9x |
estimate = NA, |
614 | 9x |
std_error = NA, |
615 | 9x |
odds_ratio = extract_from_list(or_numbers, "or"), |
616 | 9x |
lcl = extract_from_list(or_numbers, "ci", pos = "lcl"), |
617 | 9x |
ucl = extract_from_list(or_numbers, "ci", pos = "ucl"), |
618 | 9x |
df = NA, |
619 | 9x |
pvalue = NA, |
620 | 9x |
is_variable_summary = FALSE, |
621 | 9x |
is_term_summary = FALSE, |
622 | 9x |
is_reference_summary = TRUE |
623 |
) |
|
624 |
} |
|
625 | ||
626 | 13x |
df <- rbind( |
627 | 13x |
main_stats[, names(or_stats)], |
628 | 13x |
or_stats |
629 |
) |
|
630 | 13x |
df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ] |
631 |
} |
|
632 | ||
633 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including |
|
634 |
#' odds ratios and confidence intervals of simple terms. |
|
635 |
#' |
|
636 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model. |
|
637 |
#' |
|
638 |
#' @examples |
|
639 |
#' h_logistic_simple_terms("AGE", mod1) |
|
640 |
#' |
|
641 |
#' @export |
|
642 |
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) { |
|
643 | 53x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
644 | 53x |
if (inherits(fit_glm, "glm")) { |
645 | 42x |
checkmate::assert_set_equal(fit_glm$family$family, "binomial") |
646 |
} |
|
647 | 53x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
648 | 53x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
649 | 53x |
interaction <- terms_name[which(!terms_name %in% names(xs_class))] |
650 | 53x |
checkmate::assert_subset(x, terms_name) |
651 | 53x |
if (length(interaction) != 0) { |
652 |
# Make sure any item in x is not part of interaction term |
|
653 | 2x |
checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":"))) |
654 |
} |
|
655 | 53x |
x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm) |
656 | 53x |
x_stats <- do.call(rbind, x_stats) |
657 | 53x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
658 | 53x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
659 | 53x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
660 | 53x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
661 | 53x |
x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl) |
662 | 53x |
x_stats |
663 |
} |
|
664 | ||
665 |
#' @describeIn h_logistic_regression Helper function to tabulate the results including |
|
666 |
#' odds ratios and confidence intervals of interaction terms. |
|
667 |
#' |
|
668 |
#' @return Tabulated statistics for the given variable(s) from the logistic regression model. |
|
669 |
#' |
|
670 |
#' @examples |
|
671 |
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2) |
|
672 |
#' |
|
673 |
#' @export |
|
674 |
h_logistic_inter_terms <- function(x, |
|
675 |
fit_glm, |
|
676 |
conf_level = 0.95, |
|
677 |
at = NULL) { |
|
678 |
# Find out the interaction variables and interaction term. |
|
679 | 5x |
inter_vars <- h_get_interaction_vars(fit_glm) |
680 | 5x |
checkmate::assert_vector(inter_vars, len = 2) |
681 | ||
682 | ||
683 | 5x |
inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x)) |
684 | 5x |
inter_term <- x[inter_term_index] |
685 | ||
686 |
# For the non-interaction vars we need the standard stuff. |
|
687 | 5x |
normal_terms <- setdiff(x, union(inter_vars, inter_term)) |
688 | ||
689 | 5x |
x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm) |
690 | 5x |
x_stats <- do.call(rbind, x_stats) |
691 | 5x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
692 | 5x |
x_stats$odds_ratio <- lapply(x_stats$estimate, exp) |
693 | 5x |
x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
694 | 5x |
x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error) |
695 | 5x |
normal_stats <- x_stats |
696 | 5x |
normal_stats$is_reference_summary <- FALSE |
697 | ||
698 |
# Now the interaction term itself. |
|
699 | 5x |
inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm) |
700 | 5x |
inter_term_stats$odds_ratio <- NA |
701 | 5x |
inter_term_stats$lcl <- NA |
702 | 5x |
inter_term_stats$ucl <- NA |
703 | 5x |
inter_term_stats$is_reference_summary <- FALSE |
704 | ||
705 | 5x |
is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric" |
706 | ||
707 |
# Interaction stuff. |
|
708 | 5x |
inter_stats_one <- h_glm_inter_term_extract( |
709 | 5x |
inter_vars[1], |
710 | 5x |
inter_vars[2], |
711 | 5x |
fit_glm, |
712 | 5x |
conf_level = conf_level, |
713 | 5x |
at = `if`(is_intervar1_numeric, NULL, at) |
714 |
) |
|
715 | 5x |
inter_stats_two <- h_glm_inter_term_extract( |
716 | 5x |
inter_vars[2], |
717 | 5x |
inter_vars[1], |
718 | 5x |
fit_glm, |
719 | 5x |
conf_level = conf_level, |
720 | 5x |
at = `if`(is_intervar1_numeric, at, NULL) |
721 |
) |
|
722 | ||
723 |
# Now just combine everything in one data frame. |
|
724 | 5x |
col_names <- c( |
725 | 5x |
"variable", |
726 | 5x |
"variable_label", |
727 | 5x |
"term", |
728 | 5x |
"term_label", |
729 | 5x |
"interaction", |
730 | 5x |
"interaction_label", |
731 | 5x |
"reference", |
732 | 5x |
"reference_label", |
733 | 5x |
"estimate", |
734 | 5x |
"std_error", |
735 | 5x |
"df", |
736 | 5x |
"pvalue", |
737 | 5x |
"odds_ratio", |
738 | 5x |
"lcl", |
739 | 5x |
"ucl", |
740 | 5x |
"is_variable_summary", |
741 | 5x |
"is_term_summary", |
742 | 5x |
"is_reference_summary" |
743 |
) |
|
744 | 5x |
df <- rbind( |
745 | 5x |
inter_stats_one[, col_names], |
746 | 5x |
inter_stats_two[, col_names], |
747 | 5x |
inter_term_stats[, col_names] |
748 |
) |
|
749 | 5x |
if (length(normal_terms) > 0) { |
750 | 5x |
df <- rbind( |
751 | 5x |
normal_stats[, col_names], |
752 | 5x |
df |
753 |
) |
|
754 |
} |
|
755 | 5x |
df$ci <- combine_vectors(df$lcl, df$ucl) |
756 | 5x |
df |
757 |
} |
1 |
#' Create a STEP graph |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR |
|
6 |
#' along the continuous biomarker value subgroups. |
|
7 |
#' |
|
8 |
#' @param df (`tibble`)\cr result of [tidy.step()]. |
|
9 |
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual |
|
10 |
#' biomarker values. |
|
11 |
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line. |
|
12 |
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval |
|
13 |
#' ribbon area, or `NULL` to not plot a CI ribbon. |
|
14 |
#' @param col (`character`)\cr color(s). |
|
15 |
#' |
|
16 |
#' @return A `ggplot` STEP graph. |
|
17 |
#' |
|
18 |
#' @seealso Custom tidy method [tidy.step()]. |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' library(survival) |
|
22 |
#' lung$sex <- factor(lung$sex) |
|
23 |
#' |
|
24 |
#' # Survival example. |
|
25 |
#' vars <- list( |
|
26 |
#' time = "time", |
|
27 |
#' event = "status", |
|
28 |
#' arm = "sex", |
|
29 |
#' biomarker = "age" |
|
30 |
#' ) |
|
31 |
#' |
|
32 |
#' step_matrix <- fit_survival_step( |
|
33 |
#' variables = vars, |
|
34 |
#' data = lung, |
|
35 |
#' control = c(control_coxph(), control_step(num_points = 10, degree = 2)) |
|
36 |
#' ) |
|
37 |
#' step_data <- broom::tidy(step_matrix) |
|
38 |
#' |
|
39 |
#' # Default plot. |
|
40 |
#' g_step(step_data) |
|
41 |
#' |
|
42 |
#' # Add the reference 1 horizontal line. |
|
43 |
#' library(ggplot2) |
|
44 |
#' g_step(step_data) + |
|
45 |
#' ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2) |
|
46 |
#' |
|
47 |
#' # Use actual values instead of percentiles, different color for estimate and no CI, |
|
48 |
#' # use log scale for y axis. |
|
49 |
#' g_step( |
|
50 |
#' step_data, |
|
51 |
#' use_percentile = FALSE, |
|
52 |
#' est = list(col = "blue", lty = 1), |
|
53 |
#' ci_ribbon = NULL |
|
54 |
#' ) + scale_y_log10() |
|
55 |
#' |
|
56 |
#' # Adding another curve based on additional column. |
|
57 |
#' step_data$extra <- exp(step_data$`Percentile Center`) |
|
58 |
#' g_step(step_data) + |
|
59 |
#' ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green") |
|
60 |
#' |
|
61 |
#' # Response example. |
|
62 |
#' vars <- list( |
|
63 |
#' response = "status", |
|
64 |
#' arm = "sex", |
|
65 |
#' biomarker = "age" |
|
66 |
#' ) |
|
67 |
#' |
|
68 |
#' step_matrix <- fit_rsp_step( |
|
69 |
#' variables = vars, |
|
70 |
#' data = lung, |
|
71 |
#' control = c( |
|
72 |
#' control_logistic(response_definition = "I(response == 2)"), |
|
73 |
#' control_step() |
|
74 |
#' ) |
|
75 |
#' ) |
|
76 |
#' step_data <- broom::tidy(step_matrix) |
|
77 |
#' g_step(step_data) |
|
78 |
#' |
|
79 |
#' @export |
|
80 |
g_step <- function(df, |
|
81 |
use_percentile = "Percentile Center" %in% names(df), |
|
82 |
est = list(col = "blue", lty = 1), |
|
83 |
ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5), |
|
84 |
col = getOption("ggplot2.discrete.colour")) { |
|
85 | 2x |
checkmate::assert_tibble(df) |
86 | 2x |
checkmate::assert_flag(use_percentile) |
87 | 2x |
checkmate::assert_character(col, null.ok = TRUE) |
88 | 2x |
checkmate::assert_list(est, names = "named") |
89 | 2x |
checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE) |
90 | ||
91 | 2x |
x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center") |
92 | 2x |
df$x <- df[[x_var]] |
93 | 2x |
attrs <- attributes(df) |
94 | 2x |
df$y <- df[[attrs$estimate]] |
95 | ||
96 |
# Set legend names. To be modified also at call level |
|
97 | 2x |
legend_names <- c("Estimate", "CI 95%") |
98 | ||
99 | 2x |
p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]])) |
100 | ||
101 | 2x |
if (!is.null(col)) { |
102 | 2x |
p <- p + |
103 | 2x |
ggplot2::scale_color_manual(values = col) |
104 |
} |
|
105 | ||
106 | 2x |
if (!is.null(ci_ribbon)) { |
107 | 1x |
if (is.null(ci_ribbon$fill)) { |
108 | ! |
ci_ribbon$fill <- "lightblue" |
109 |
} |
|
110 | 1x |
p <- p + ggplot2::geom_ribbon( |
111 | 1x |
ggplot2::aes( |
112 | 1x |
ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]], |
113 | 1x |
fill = legend_names[2] |
114 |
), |
|
115 | 1x |
alpha = ci_ribbon$alpha |
116 |
) + |
|
117 | 1x |
scale_fill_manual( |
118 | 1x |
name = "", values = c("CI 95%" = ci_ribbon$fill) |
119 |
) |
|
120 |
} |
|
121 | 2x |
suppressMessages(p <- p + |
122 | 2x |
ggplot2::geom_line( |
123 | 2x |
ggplot2::aes(y = .data[["y"]], color = legend_names[1]), |
124 | 2x |
linetype = est$lty |
125 |
) + |
|
126 | 2x |
scale_colour_manual( |
127 | 2x |
name = "", values = c("Estimate" = "blue") |
128 |
)) |
|
129 | ||
130 | 2x |
p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate) |
131 | 2x |
if (use_percentile) { |
132 | 1x |
p <- p + ggplot2::scale_x_continuous(labels = scales::percent) |
133 |
} |
|
134 | 2x |
p |
135 |
} |
|
136 | ||
137 |
#' Custom tidy method for STEP results |
|
138 |
#' |
|
139 |
#' @description `r lifecycle::badge("stable")` |
|
140 |
#' |
|
141 |
#' Tidy the STEP results into a `tibble` format ready for plotting. |
|
142 |
#' |
|
143 |
#' @param x (`matrix`)\cr results from [fit_survival_step()]. |
|
144 |
#' @param ... not used. |
|
145 |
#' |
|
146 |
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale, |
|
147 |
#' respectively. Additional attributes carry metadata also used for plotting. |
|
148 |
#' |
|
149 |
#' @seealso [g_step()] which consumes the result from this function. |
|
150 |
#' |
|
151 |
#' @method tidy step |
|
152 |
#' |
|
153 |
#' @examples |
|
154 |
#' library(survival) |
|
155 |
#' lung$sex <- factor(lung$sex) |
|
156 |
#' vars <- list( |
|
157 |
#' time = "time", |
|
158 |
#' event = "status", |
|
159 |
#' arm = "sex", |
|
160 |
#' biomarker = "age" |
|
161 |
#' ) |
|
162 |
#' step_matrix <- fit_survival_step( |
|
163 |
#' variables = vars, |
|
164 |
#' data = lung, |
|
165 |
#' control = c(control_coxph(), control_step(num_points = 10, degree = 2)) |
|
166 |
#' ) |
|
167 |
#' broom::tidy(step_matrix) |
|
168 |
#' |
|
169 |
#' @export |
|
170 |
tidy.step <- function(x, ...) { # nolint |
|
171 | 7x |
checkmate::assert_class(x, "step") |
172 | 7x |
dat <- as.data.frame(x) |
173 | 7x |
nams <- names(dat) |
174 | 7x |
is_surv <- "loghr" %in% names(dat) |
175 | 7x |
est_var <- ifelse(is_surv, "loghr", "logor") |
176 | 7x |
new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio") |
177 | 7x |
new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper")) |
178 | 7x |
names(dat)[match(est_var, nams)] <- new_est_var |
179 | 7x |
dat[, new_y_vars] <- exp(dat[, new_y_vars]) |
180 | 7x |
any_is_na <- any(is.na(dat[, new_y_vars])) |
181 | 7x |
any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE) |
182 | 7x |
if (any_is_na) { |
183 | 2x |
warning(paste( |
184 | 2x |
"Missing values in the point estimate or CI columns,", |
185 | 2x |
"this will lead to holes in the `g_step()` plot" |
186 |
)) |
|
187 |
} |
|
188 | 7x |
if (any_is_very_large) { |
189 | 2x |
warning(paste( |
190 | 2x |
"Very large absolute values in the point estimate or CI columns,", |
191 | 2x |
"consider adding `scale_y_log10()` to the `g_step()` result for plotting" |
192 |
)) |
|
193 |
} |
|
194 | 7x |
if (any_is_na || any_is_very_large) { |
195 | 4x |
warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting") |
196 |
} |
|
197 | 7x |
structure( |
198 | 7x |
tibble::as_tibble(dat), |
199 | 7x |
estimate = new_est_var, |
200 | 7x |
biomarker = attr(x, "variables")$biomarker, |
201 | 7x |
ci = f_conf_level(attr(x, "control")$conf_level) |
202 |
) |
|
203 |
} |
1 |
# summarize_glm_count ---------------------------------------------------------- |
|
2 |
#' Summarize Poisson negative binomial regression |
|
3 |
#' |
|
4 |
#' @description `r lifecycle::badge("experimental")` |
|
5 |
#' |
|
6 |
#' Summarize results of a Poisson negative binomial regression. |
|
7 |
#' This can be used to analyze count and/or frequency data using a linear model. |
|
8 |
#' It is specifically useful for analyzing count data (using the Poisson or Negative |
|
9 |
#' Binomial distribution) that is result of a generalized linear model of one (e.g. arm) or more |
|
10 |
#' covariates. |
|
11 |
#' |
|
12 |
#' @inheritParams h_glm_count |
|
13 |
#' @inheritParams argument_convention |
|
14 |
#' @param rate_mean_method (`character(1)`)\cr method used to estimate the mean odds ratio. Defaults to `emmeans`. |
|
15 |
#' see details for more information. |
|
16 |
#' @param scale (`numeric(1)`)\cr linear scaling factor for rate and confidence intervals. Defaults to `1`. |
|
17 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
18 |
#' |
|
19 |
#' Options are: ``r shQuote(get_stats("summarize_glm_count"))`` |
|
20 |
#' |
|
21 |
#' @details |
|
22 |
#' `summarize_glm_count()` uses `s_glm_count()` to calculate the statistics for the table. This |
|
23 |
#' analysis function uses [h_glm_count()] to estimate the GLM with [stats::glm()] for Poisson and Quasi-Poisson |
|
24 |
#' distributions or [MASS::glm.nb()] for Negative Binomial distribution. All methods assume a |
|
25 |
#' logarithmic link function. |
|
26 |
#' |
|
27 |
#' At this point, rates and confidence intervals are estimated from the model using |
|
28 |
#' either [emmeans::emmeans()] when `rate_mean_method = "emmeans"` or [h_ppmeans()] |
|
29 |
#' when `rate_mean_method = "ppmeans"`. |
|
30 |
#' |
|
31 |
#' If a reference group is specified while building the table with `split_cols_by(ref_group)`, |
|
32 |
#' no rate ratio or `p-value` are calculated. Otherwise, we use [emmeans::contrast()] to |
|
33 |
#' calculate the rate ratio and `p-value` for the reference group. Values are always estimated |
|
34 |
#' with `method = "trt.vs.ctrl"` and `ref` equal to the first `arm` value. |
|
35 |
#' |
|
36 |
#' @name summarize_glm_count |
|
37 |
NULL |
|
38 | ||
39 |
#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments |
|
40 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
41 |
#' |
|
42 |
#' @return |
|
43 |
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions, |
|
44 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
45 |
#' the statistics from `s_glm_count()` to the table layout. |
|
46 |
#' |
|
47 |
#' @examples |
|
48 |
#' library(dplyr) |
|
49 |
#' |
|
50 |
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE") |
|
51 |
#' anl$AVAL_f <- as.factor(anl$AVAL) |
|
52 |
#' |
|
53 |
#' lyt <- basic_table() %>% |
|
54 |
#' split_cols_by("ARM", ref_group = "B: Placebo") %>% |
|
55 |
#' add_colcounts() %>% |
|
56 |
#' analyze_vars( |
|
57 |
#' "AVAL_f", |
|
58 |
#' var_labels = "Number of exacerbations per patient", |
|
59 |
#' .stats = c("count_fraction"), |
|
60 |
#' .formats = c("count_fraction" = "xx (xx.xx%)"), |
|
61 |
#' .labels = c("Number of exacerbations per patient") |
|
62 |
#' ) %>% |
|
63 |
#' summarize_glm_count( |
|
64 |
#' vars = "AVAL", |
|
65 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL), |
|
66 |
#' conf_level = 0.95, |
|
67 |
#' distribution = "poisson", |
|
68 |
#' rate_mean_method = "emmeans", |
|
69 |
#' var_labels = "Adjusted (P) exacerbation rate (per year)", |
|
70 |
#' table_names = "adjP", |
|
71 |
#' .stats = c("rate"), |
|
72 |
#' .labels = c(rate = "Rate") |
|
73 |
#' ) %>% |
|
74 |
#' summarize_glm_count( |
|
75 |
#' vars = "AVAL", |
|
76 |
#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")), |
|
77 |
#' conf_level = 0.95, |
|
78 |
#' distribution = "quasipoisson", |
|
79 |
#' rate_mean_method = "ppmeans", |
|
80 |
#' var_labels = "Adjusted (QP) exacerbation rate (per year)", |
|
81 |
#' table_names = "adjQP", |
|
82 |
#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"), |
|
83 |
#' .labels = c( |
|
84 |
#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio", |
|
85 |
#' rate_ratio_ci = "Rate Ratio CI", pval = "p value" |
|
86 |
#' ) |
|
87 |
#' ) |
|
88 |
#' |
|
89 |
#' build_table(lyt = lyt, df = anl) |
|
90 |
#' |
|
91 |
#' @export |
|
92 |
summarize_glm_count <- function(lyt, |
|
93 |
vars, |
|
94 |
variables, |
|
95 |
distribution, |
|
96 |
conf_level, |
|
97 |
rate_mean_method = c("emmeans", "ppmeans")[1], |
|
98 |
weights = stats::weights, |
|
99 |
scale = 1, |
|
100 |
var_labels, |
|
101 |
na_str = default_na_str(), |
|
102 |
nested = TRUE, |
|
103 |
..., |
|
104 |
show_labels = "visible", |
|
105 |
table_names = vars, |
|
106 |
.stats = get_stats("summarize_glm_count"), |
|
107 |
.formats = NULL, |
|
108 |
.labels = NULL, |
|
109 |
.indent_mods = c( |
|
110 |
"n" = 0L, |
|
111 |
"rate" = 0L, |
|
112 |
"rate_ci" = 1L, |
|
113 |
"rate_ratio" = 0L, |
|
114 |
"rate_ratio_ci" = 1L, |
|
115 |
"pval" = 1L |
|
116 |
)) { |
|
117 | 3x |
checkmate::assert_choice(rate_mean_method, c("emmeans", "ppmeans")) |
118 | ||
119 | 3x |
extra_args <- list( |
120 | 3x |
variables = variables, distribution = distribution, conf_level = conf_level, |
121 | 3x |
rate_mean_method = rate_mean_method, weights = weights, scale = scale, ... |
122 |
) |
|
123 | ||
124 |
# Selecting parameters following the statistics |
|
125 | 3x |
.formats <- get_formats_from_stats(.stats, formats_in = .formats) |
126 | 3x |
.labels <- get_labels_from_stats(.stats, labels_in = .labels) |
127 | 3x |
.indent_mods <- get_indents_from_stats(.stats, indents_in = .indent_mods) |
128 | ||
129 | 3x |
afun <- make_afun( |
130 | 3x |
s_glm_count, |
131 | 3x |
.stats = .stats, |
132 | 3x |
.formats = .formats, |
133 | 3x |
.labels = .labels, |
134 | 3x |
.indent_mods = .indent_mods, |
135 | 3x |
.null_ref_cells = FALSE |
136 |
) |
|
137 | ||
138 | 3x |
analyze( |
139 | 3x |
lyt, |
140 | 3x |
vars, |
141 | 3x |
var_labels = var_labels, |
142 | 3x |
show_labels = show_labels, |
143 | 3x |
table_names = table_names, |
144 | 3x |
afun = afun, |
145 | 3x |
na_str = na_str, |
146 | 3x |
nested = nested, |
147 | 3x |
extra_args = extra_args |
148 |
) |
|
149 |
} |
|
150 | ||
151 |
#' @describeIn summarize_glm_count Statistics function that produces a named list of results |
|
152 |
#' of the investigated Poisson model. |
|
153 |
#' |
|
154 |
#' @return |
|
155 |
#' * `s_glm_count()` returns a named `list` of 5 statistics: |
|
156 |
#' * `n`: Count of complete sample size for the group. |
|
157 |
#' * `rate`: Estimated event rate per follow-up time. |
|
158 |
#' * `rate_ci`: Confidence level for estimated rate per follow-up time. |
|
159 |
#' * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm. |
|
160 |
#' * `rate_ratio_ci`: Confidence level for the rate ratio. |
|
161 |
#' * `pval`: p-value. |
|
162 |
#' |
|
163 |
#' @keywords internal |
|
164 |
s_glm_count <- function(df, |
|
165 |
.var, |
|
166 |
.df_row, |
|
167 |
variables, |
|
168 |
.ref_group, |
|
169 |
.in_ref_col, |
|
170 |
distribution, |
|
171 |
conf_level, |
|
172 |
rate_mean_method, |
|
173 |
weights, |
|
174 |
scale = 1) { |
|
175 | 14x |
arm <- variables$arm |
176 | ||
177 | 14x |
y <- df[[.var]] |
178 | 13x |
smry_level <- as.character(unique(df[[arm]])) |
179 | ||
180 |
# ensure there is only 1 value |
|
181 | 13x |
checkmate::assert_scalar(smry_level) |
182 | ||
183 | 13x |
results <- h_glm_count( |
184 | 13x |
.var = .var, |
185 | 13x |
.df_row = .df_row, |
186 | 13x |
variables = variables, |
187 | 13x |
distribution = distribution, |
188 | 13x |
weights |
189 |
) |
|
190 | ||
191 | 13x |
if (rate_mean_method == "emmeans") { |
192 | 13x |
emmeans_smry <- summary(results$emmeans_fit, level = conf_level) |
193 | ! |
} else if (rate_mean_method == "ppmeans") { |
194 | ! |
emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level) |
195 |
} |
|
196 | ||
197 | 13x |
emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ] |
198 | ||
199 |
# This happens if there is a reference col. No Ratio is calculated? |
|
200 | 13x |
if (.in_ref_col) { |
201 | 5x |
list( |
202 | 5x |
n = length(y[!is.na(y)]), |
203 | 5x |
rate = formatters::with_label( |
204 | 5x |
ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate * scale), |
205 | 5x |
"Adjusted Rate" |
206 |
), |
|
207 | 5x |
rate_ci = formatters::with_label( |
208 | 5x |
c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), |
209 | 5x |
f_conf_level(conf_level) |
210 |
), |
|
211 | 5x |
rate_ratio = formatters::with_label(character(), "Adjusted Rate Ratio"), |
212 | 5x |
rate_ratio_ci = formatters::with_label(character(), f_conf_level(conf_level)), |
213 | 5x |
pval = formatters::with_label(character(), "p-value") |
214 |
) |
|
215 |
} else { |
|
216 | 8x |
emmeans_contrasts <- emmeans::contrast( |
217 | 8x |
results$emmeans_fit, |
218 | 8x |
method = "trt.vs.ctrl", |
219 | 8x |
ref = grep( |
220 | 8x |
as.character(unique(.ref_group[[arm]])), |
221 | 8x |
as.data.frame(results$emmeans_fit)[[arm]] |
222 |
) |
|
223 |
) |
|
224 | ||
225 | 8x |
contrasts_smry <- summary( |
226 | 8x |
emmeans_contrasts, |
227 | 8x |
infer = TRUE, |
228 | 8x |
adjust = "none" |
229 |
) |
|
230 | ||
231 | 8x |
smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ] |
232 | ||
233 | 8x |
list( |
234 | 8x |
n = length(y[!is.na(y)]), |
235 | 8x |
rate = formatters::with_label( |
236 | 8x |
ifelse(distribution == "negbin", |
237 | 8x |
emmeans_smry_level$response * scale, |
238 | 8x |
emmeans_smry_level$rate * scale |
239 |
), |
|
240 | 8x |
"Adjusted Rate" |
241 |
), |
|
242 | 8x |
rate_ci = formatters::with_label( |
243 | 8x |
c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale), |
244 | 8x |
f_conf_level(conf_level) |
245 |
), |
|
246 | 8x |
rate_ratio = formatters::with_label( |
247 | 8x |
smry_contrasts_level$ratio, |
248 | 8x |
"Adjusted Rate Ratio" |
249 |
), |
|
250 | 8x |
rate_ratio_ci = formatters::with_label( |
251 | 8x |
c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL), |
252 | 8x |
f_conf_level(conf_level) |
253 |
), |
|
254 | 8x |
pval = formatters::with_label( |
255 | 8x |
smry_contrasts_level$p.value, |
256 | 8x |
"p-value" |
257 |
) |
|
258 |
) |
|
259 |
} |
|
260 |
} |
|
261 |
# h_glm_count ------------------------------------------------------------------ |
|
262 |
#' Helper functions for Poisson models |
|
263 |
#' |
|
264 |
#' @description `r lifecycle::badge("experimental")` |
|
265 |
#' |
|
266 |
#' Helper functions that returns the results of [stats::glm()] when Poisson or Quasi-Poisson |
|
267 |
#' distributions are needed (see `family` parameter), or [MASS::glm.nb()] for Negative Binomial |
|
268 |
#' distributions. Link function for the GLM is `log`. |
|
269 |
#' |
|
270 |
#' @inheritParams argument_convention |
|
271 |
#' |
|
272 |
#' @seealso [summarize_glm_count] |
|
273 |
#' |
|
274 |
#' @name h_glm_count |
|
275 |
NULL |
|
276 | ||
277 |
#' @describeIn h_glm_count Helper function to return the results of the |
|
278 |
#' selected model (Poisson, Quasi-Poisson, negative binomial). |
|
279 |
#' |
|
280 |
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called |
|
281 |
#' in `.var` and `variables`. |
|
282 |
#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with |
|
283 |
#' expected elements: |
|
284 |
#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple |
|
285 |
#' groups will be summarized. Specifically, the first level of `arm` variable is taken as the |
|
286 |
#' reference group. |
|
287 |
#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as |
|
288 |
#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`. |
|
289 |
#' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset. |
|
290 |
#' @param distribution (`character`)\cr a character value specifying the distribution |
|
291 |
#' used in the regression (Poisson, Quasi-Poisson, negative binomial). |
|
292 |
#' @param weights (`character`)\cr a character vector specifying weights used |
|
293 |
#' in averaging predictions. Number of weights must equal the number of levels included in the covariates. |
|
294 |
#' Weights option passed to [emmeans::emmeans()]. |
|
295 |
#' |
|
296 |
#' @return |
|
297 |
#' * `h_glm_count()` returns the results of the selected model. |
|
298 |
#' |
|
299 |
#' @keywords internal |
|
300 |
h_glm_count <- function(.var, |
|
301 |
.df_row, |
|
302 |
variables, |
|
303 |
distribution, |
|
304 |
weights) { |
|
305 | 21x |
checkmate::assert_subset(distribution, c("poisson", "quasipoisson", "negbin"), empty.ok = FALSE) |
306 | 19x |
switch(distribution, |
307 | 13x |
poisson = h_glm_poisson(.var, .df_row, variables, weights), |
308 | 1x |
quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights), |
309 | 5x |
negbin = h_glm_negbin(.var, .df_row, variables, weights) |
310 |
) |
|
311 |
} |
|
312 | ||
313 |
#' @describeIn h_glm_count Helper function to return results of a Poisson model. |
|
314 |
#' |
|
315 |
#' @return |
|
316 |
#' * `h_glm_poisson()` returns the results of a Poisson model. |
|
317 |
#' |
|
318 |
#' @keywords internal |
|
319 |
h_glm_poisson <- function(.var, |
|
320 |
.df_row, |
|
321 |
variables, |
|
322 |
weights) { |
|
323 | 17x |
arm <- variables$arm |
324 | 17x |
covariates <- variables$covariates |
325 | 17x |
offset <- .df_row[[variables$offset]] |
326 | ||
327 | 15x |
formula <- stats::as.formula(paste0( |
328 | 15x |
.var, " ~ ", |
329 |
" + ", |
|
330 | 15x |
paste(covariates, collapse = " + "), |
331 |
" + ", |
|
332 | 15x |
arm |
333 |
)) |
|
334 | ||
335 | 15x |
glm_fit <- stats::glm( |
336 | 15x |
formula = formula, |
337 | 15x |
offset = offset, |
338 | 15x |
data = .df_row, |
339 | 15x |
family = stats::poisson(link = "log") |
340 |
) |
|
341 | ||
342 | 15x |
emmeans_fit <- emmeans::emmeans( |
343 | 15x |
glm_fit, |
344 | 15x |
specs = arm, |
345 | 15x |
data = .df_row, |
346 | 15x |
type = "response", |
347 | 15x |
offset = 0, |
348 | 15x |
weights = weights |
349 |
) |
|
350 | ||
351 | 15x |
list( |
352 | 15x |
glm_fit = glm_fit, |
353 | 15x |
emmeans_fit = emmeans_fit |
354 |
) |
|
355 |
} |
|
356 | ||
357 |
#' @describeIn h_glm_count Helper function to return results of a Quasi-Poisson model. |
|
358 |
#' |
|
359 |
#' @return |
|
360 |
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model. |
|
361 |
#' |
|
362 |
#' @keywords internal |
|
363 |
h_glm_quasipoisson <- function(.var, |
|
364 |
.df_row, |
|
365 |
variables, |
|
366 |
weights) { |
|
367 | 5x |
arm <- variables$arm |
368 | 5x |
covariates <- variables$covariates |
369 | 5x |
offset <- .df_row[[variables$offset]] |
370 | ||
371 | 3x |
formula <- stats::as.formula(paste0( |
372 | 3x |
.var, " ~ ", |
373 |
" + ", |
|
374 | 3x |
paste(covariates, collapse = " + "), |
375 |
" + ", |
|
376 | 3x |
arm |
377 |
)) |
|
378 | ||
379 | 3x |
glm_fit <- stats::glm( |
380 | 3x |
formula = formula, |
381 | 3x |
offset = offset, |
382 | 3x |
data = .df_row, |
383 | 3x |
family = stats::quasipoisson(link = "log") |
384 |
) |
|
385 | ||
386 | 3x |
emmeans_fit <- emmeans::emmeans( |
387 | 3x |
glm_fit, |
388 | 3x |
specs = arm, |
389 | 3x |
data = .df_row, |
390 | 3x |
type = "response", |
391 | 3x |
offset = 0, |
392 | 3x |
weights = weights |
393 |
) |
|
394 | ||
395 | 3x |
list( |
396 | 3x |
glm_fit = glm_fit, |
397 | 3x |
emmeans_fit = emmeans_fit |
398 |
) |
|
399 |
} |
|
400 | ||
401 |
#' @describeIn h_glm_count Helper function to return results of a negative binomial model. |
|
402 |
#' |
|
403 |
#' @return |
|
404 |
#' * `h_glm_negbin()` returns the results of a negative binomial model. |
|
405 |
#' |
|
406 |
#' @keywords internal |
|
407 |
h_glm_negbin <- function(.var, |
|
408 |
.df_row, |
|
409 |
variables, |
|
410 |
weights) { |
|
411 | 9x |
arm <- variables$arm |
412 | 9x |
covariates <- variables$covariates |
413 | ||
414 | 9x |
formula <- stats::as.formula(paste0( |
415 | 9x |
.var, " ~ ", |
416 |
" + ", |
|
417 | 9x |
paste(covariates, collapse = " + "), |
418 |
" + ", |
|
419 | 9x |
arm |
420 |
)) |
|
421 | ||
422 | 9x |
glm_fit <- MASS::glm.nb( |
423 | 9x |
formula = formula, |
424 | 9x |
data = .df_row, |
425 | 9x |
link = "log" |
426 |
) |
|
427 | ||
428 | 7x |
emmeans_fit <- emmeans::emmeans( |
429 | 7x |
glm_fit, |
430 | 7x |
specs = arm, |
431 | 7x |
data = .df_row, |
432 | 7x |
type = "response", |
433 | 7x |
offset = 0, |
434 | 7x |
weights = weights |
435 |
) |
|
436 | ||
437 | 7x |
list( |
438 | 7x |
glm_fit = glm_fit, |
439 | 7x |
emmeans_fit = emmeans_fit |
440 |
) |
|
441 |
} |
|
442 | ||
443 |
# h_ppmeans -------------------------------------------------------------------- |
|
444 |
#' Function to return the estimated means using predicted probabilities |
|
445 |
#' |
|
446 |
#' @description |
|
447 |
#' For each arm level, the predicted mean rate is calculated using the fitted model object, with `newdata` |
|
448 |
#' set to the result of `stats::model.frame`, a reconstructed data or the original data, depending on the |
|
449 |
#' object formula (coming from the fit). The confidence interval is derived using the `conf_level` parameter. |
|
450 |
#' |
|
451 |
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm. |
|
452 |
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called in `.var` and `variables`. |
|
453 |
#' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be |
|
454 |
#' summarized. Specifically, the first level of `arm` variable is taken as the reference group. |
|
455 |
#' @param conf_level (`proportion`)\cr value used to derive the confidence interval for the rate. |
|
456 |
#' |
|
457 |
#' @return |
|
458 |
#' * `h_ppmeans()` returns the estimated means. |
|
459 |
#' |
|
460 |
#' @seealso [summarize_glm_count()]. |
|
461 |
#' |
|
462 |
#' @export |
|
463 |
h_ppmeans <- function(obj, .df_row, arm, conf_level) { |
|
464 | 1x |
alpha <- 1 - conf_level |
465 | 1x |
p <- 1 - alpha / 2 |
466 | ||
467 | 1x |
arm_levels <- levels(.df_row[[arm]]) |
468 | ||
469 | 1x |
out <- lapply(arm_levels, function(lev) { |
470 | 3x |
temp <- .df_row |
471 | 3x |
temp[[arm]] <- factor(lev, levels = arm_levels) |
472 | ||
473 | 3x |
mf <- stats::model.frame(obj$formula, data = temp) |
474 | 3x |
X <- stats::model.matrix(obj$formula, data = mf) # nolint |
475 | ||
476 | 3x |
rate <- stats::predict(obj, newdata = mf, type = "response") |
477 | 3x |
rate_hat <- mean(rate) |
478 | ||
479 | 3x |
zz <- colMeans(rate * X) |
480 | 3x |
se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz)) |
481 | 3x |
rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat) |
482 | 3x |
rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat) |
483 | ||
484 | 3x |
c(rate_hat, rate_lwr, rate_upr) |
485 |
}) |
|
486 | ||
487 | 1x |
names(out) <- arm_levels |
488 | 1x |
out <- do.call(rbind, out) |
489 | 1x |
if ("negbin" %in% class(obj)) { |
490 | ! |
colnames(out) <- c("response", "asymp.LCL", "asymp.UCL") |
491 |
} else { |
|
492 | 1x |
colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL") |
493 |
} |
|
494 | 1x |
out <- as.data.frame(out) |
495 | 1x |
out[[arm]] <- rownames(out) |
496 | 1x |
out |
497 |
} |
1 |
#' Compare variables between groups |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The analyze function [compare_vars()] creates a layout element to summarize and compare one or more variables, using |
|
6 |
#' the S3 generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics |
|
7 |
#' for numeric variables can be viewed by running `get_stats("analyze_vars_numeric", add_pval = TRUE)` and for |
|
8 |
#' non-numeric variables by running `get_stats("analyze_vars_counts", add_pval = TRUE)`. Use the `.stats` parameter to |
|
9 |
#' specify the statistics to include in your output summary table. |
|
10 |
#' |
|
11 |
#' Prior to using this function in your table layout you must use [rtables::split_cols_by()] to create a column |
|
12 |
#' split on the variable to be used in comparisons, and specify a reference group via the `ref_group` parameter. |
|
13 |
#' Comparisons can be performed for each group (column) against the specified reference group by including the p-value |
|
14 |
#' statistic. |
|
15 |
#' |
|
16 |
#' @inheritParams argument_convention |
|
17 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
18 |
#' |
|
19 |
#' Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric", add_pval = TRUE))`` |
|
20 |
#' |
|
21 |
#' Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts", add_pval = TRUE))`` |
|
22 |
#' |
|
23 |