1 |
#' Confidence Intervals for a Difference of Binomials |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' Several confidence intervals for the difference between proportions. |
|
6 |
#' |
|
7 |
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups |
|
8 |
#' (e.g. reference and treatment group). |
|
9 |
#' |
|
10 |
#' @name desctools_binom |
|
11 |
NULL |
|
12 | ||
13 |
#' Recycle List of Parameters |
|
14 |
#' |
|
15 |
#' This function recycles all supplied elements to the maximal dimension. |
|
16 |
#' |
|
17 |
#' @param ... (`any`)\cr Elements to recycle. |
|
18 |
#' |
|
19 |
#' @return A `list`. |
|
20 |
#' |
|
21 |
#' @keywords internal |
|
22 |
#' @noRd |
|
23 |
h_recycle <- function(...) { |
|
24 | 60x |
lst <- list(...) |
25 | 60x |
maxdim <- max(lengths(lst)) |
26 | 60x |
res <- lapply(lst, rep, length.out = maxdim) |
27 | 60x |
attr(res, "maxdim") <- maxdim |
28 | 60x |
return(res) |
29 |
} |
|
30 | ||
31 |
#' @describeIn desctools_binom Several confidence intervals for the difference between proportions. |
|
32 |
#' |
|
33 |
#' @return A `matrix` of 3 values: |
|
34 |
#' * `est`: estimate of proportion difference. |
|
35 |
#' * `lwr.ci`: estimate of lower end of the confidence interval. |
|
36 |
#' * `upr.ci`: estimate of upper end of the confidence interval. |
|
37 |
#' |
|
38 |
#' @examples |
|
39 |
#' # Internal function - desctools_binom |
|
40 |
#' \dontrun{ |
|
41 |
#' set.seed(2) |
|
42 |
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20) |
|
43 |
#' grp <- factor(c(rep("A", 10), rep("B", 10))) |
|
44 |
#' tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE))) |
|
45 |
#' desctools_binom( |
|
46 |
#' tbl[1], sum(tbl[1], tbl[3]), tbl[2], sum(tbl[2], tbl[4]), |
|
47 |
#' conf.level = 0.90, method = "waldcc" |
|
48 |
#' ) |
|
49 |
#' } |
|
50 |
#' |
|
51 |
#' @keywords internal |
|
52 |
desctools_binom <- function(x1, n1, x2, n2, conf.level = 0.95, sides = c( # nolint |
|
53 |
"two.sided", |
|
54 |
"left", "right" |
|
55 |
), method = c( |
|
56 |
"ac", "wald", "waldcc", "score", |
|
57 |
"scorecc", "mn", "mee", "blj", "ha", "hal", "jp" |
|
58 |
)) { |
|
59 | 18x |
if (missing(sides)) { |
60 | 18x |
sides <- match.arg(sides) |
61 |
} |
|
62 | 18x |
if (missing(method)) { |
63 | 1x |
method <- match.arg(method) |
64 |
} |
|
65 | 18x |
iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, # nolint |
66 | 18x |
method) { |
67 | 18x |
if (sides != "two.sided") { |
68 | ! |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
69 |
} |
|
70 | 18x |
alpha <- 1 - conf.level |
71 | 18x |
kappa <- stats::qnorm(1 - alpha / 2) |
72 | 18x |
p1_hat <- x1 / n1 |
73 | 18x |
p2_hat <- x2 / n2 |
74 | 18x |
est <- p1_hat - p2_hat |
75 | 18x |
switch(method, |
76 | 18x |
wald = { |
77 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
78 | 2x |
term2 <- kappa * sqrt(vd) |
79 | 2x |
ci_lwr <- max(-1, est - term2) |
80 | 2x |
ci_upr <- min(1, est + term2) |
81 |
}, |
|
82 | 18x |
waldcc = { |
83 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
84 | 2x |
term2 <- kappa * sqrt(vd) |
85 | 2x |
term2 <- term2 + 0.5 * (1 / n1 + 1 / n2) |
86 | 2x |
ci_lwr <- max(-1, est - term2) |
87 | 2x |
ci_upr <- min(1, est + term2) |
88 |
}, |
|
89 | 18x |
ac = { |
90 | 2x |
n1 <- n1 + 2 |
91 | 2x |
n2 <- n2 + 2 |
92 | 2x |
x1 <- x1 + 1 |
93 | 2x |
x2 <- x2 + 1 |
94 | 2x |
p1_hat <- x1 / n1 |
95 | 2x |
p2_hat <- x2 / n2 |
96 | 2x |
est1 <- p1_hat - p2_hat |
97 | 2x |
vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2 |
98 | 2x |
term2 <- kappa * sqrt(vd) |
99 | 2x |
ci_lwr <- max(-1, est1 - term2) |
100 | 2x |
ci_upr <- min(1, est1 + term2) |
101 |
}, |
|
102 | 18x |
exact = { |
103 | ! |
ci_lwr <- NA |
104 | ! |
ci_upr <- NA |
105 |
}, |
|
106 | 18x |
score = { |
107 | 2x |
w1 <- desctools_binomci( |
108 | 2x |
x = x1, n = n1, conf.level = conf.level, |
109 | 2x |
method = "wilson" |
110 |
) |
|
111 | 2x |
w2 <- desctools_binomci( |
112 | 2x |
x = x2, n = n2, conf.level = conf.level, |
113 | 2x |
method = "wilson" |
114 |
) |
|
115 | 2x |
l1 <- w1[2] |
116 | 2x |
u1 <- w1[3] |
117 | 2x |
l2 <- w2[2] |
118 | 2x |
u2 <- w2[3] |
119 | 2x |
ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + |
120 | 2x |
u2 * (1 - u2) / n2) |
121 | 2x |
ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + |
122 | 2x |
l2 * (1 - l2) / n2) |
123 |
}, |
|
124 | 18x |
scorecc = { |
125 | 1x |
w1 <- desctools_binomci( |
126 | 1x |
x = x1, n = n1, conf.level = conf.level, |
127 | 1x |
method = "wilsoncc" |
128 |
) |
|
129 | 1x |
w2 <- desctools_binomci( |
130 | 1x |
x = x2, n = n2, conf.level = conf.level, |
131 | 1x |
method = "wilsoncc" |
132 |
) |
|
133 | 1x |
l1 <- w1[2] |
134 | 1x |
u1 <- w1[3] |
135 | 1x |
l2 <- w2[2] |
136 | 1x |
u2 <- w2[3] |
137 | 1x |
ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + |
138 | 1x |
(u2 - p2_hat)^2)) |
139 | 1x |
ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - |
140 | 1x |
l2)^2)) |
141 |
}, |
|
142 | 18x |
mee = { |
143 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
144 | ! |
if (dif > 1) dif <- 1 |
145 | ! |
if (dif < -1) dif <- -1 |
146 | 24x |
diff <- p1 - p2 - dif |
147 | 24x |
if (abs(diff) == 0) { |
148 | ! |
res <- 0 |
149 |
} else { |
|
150 | 24x |
t <- n2 / n1 |
151 | 24x |
a <- 1 + t |
152 | 24x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
153 | 24x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + |
154 | 24x |
t * p2 |
155 | 24x |
d <- -p1 * dif * (1 + dif) |
156 | 24x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
157 | 24x |
if (abs(v) < .Machine$double.eps) v <- 0 |
158 | 24x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
159 | 24x |
u <- ifelse(v > 0, 1, -1) * s |
160 | 24x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
161 | 24x |
p1d <- 2 * u * cos(w) - b / a / 3 |
162 | 24x |
p2d <- p1d - dif |
163 | 24x |
n <- n1 + n2 |
164 | 24x |
res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) |
165 |
} |
|
166 | 24x |
return(sqrt(res)) |
167 |
} |
|
168 | 1x |
pval <- function(delta) { |
169 | 24x |
z <- (est - delta) / .score( |
170 | 24x |
p1_hat, n1, p2_hat, |
171 | 24x |
n2, delta |
172 |
) |
|
173 | 24x |
2 * min(stats::pnorm(z), 1 - stats::pnorm(z)) |
174 |
} |
|
175 | 1x |
ci_lwr <- max(-1, stats::uniroot(function(delta) { |
176 | 12x |
pval(delta) - |
177 | 12x |
alpha |
178 | 1x |
}, interval = c(-1 + 1e-06, est - 1e-06))$root) |
179 | 1x |
ci_upr <- min(1, stats::uniroot(function(delta) { |
180 | 12x |
pval(delta) - |
181 | 12x |
alpha |
182 | 1x |
}, interval = c(est + 1e-06, 1 - 1e-06))$root) |
183 |
}, |
|
184 | 18x |
blj = { |
185 | 1x |
p1_dash <- (x1 + 0.5) / (n1 + 1) |
186 | 1x |
p2_dash <- (x2 + 0.5) / (n2 + 1) |
187 | 1x |
vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - |
188 | 1x |
p2_dash) / n2 |
189 | 1x |
term2 <- kappa * sqrt(vd) |
190 | 1x |
est_dash <- p1_dash - p2_dash |
191 | 1x |
ci_lwr <- max(-1, est_dash - term2) |
192 | 1x |
ci_upr <- min(1, est_dash + term2) |
193 |
}, |
|
194 | 18x |
ha = { |
195 | 4x |
term2 <- 1 / (2 * min(n1, n2)) + kappa * sqrt(p1_hat * |
196 | 4x |
(1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - |
197 | 4x |
1)) |
198 | 4x |
ci_lwr <- max(-1, est - term2) |
199 | 4x |
ci_upr <- min(1, est + term2) |
200 |
}, |
|
201 | 18x |
mn = { |
202 | 1x |
.conf <- function(x1, n1, x2, n2, z, lower = FALSE) { |
203 | 2x |
p1 <- x1 / n1 |
204 | 2x |
p2 <- x2 / n2 |
205 | 2x |
p_hat <- p1 - p2 |
206 | 2x |
dp <- 1 + ifelse(lower, 1, -1) * p_hat |
207 | 2x |
i <- 1 |
208 | 2x |
while (i <= 50) { |
209 | 46x |
dp <- 0.5 * dp |
210 | 46x |
y <- p_hat + ifelse(lower, -1, 1) * dp |
211 | 46x |
score <- .score(p1, n1, p2, n2, y) |
212 | 46x |
if (score < z) { |
213 | 20x |
p_hat <- y |
214 |
} |
|
215 | 46x |
if ((dp < 1e-07) || (abs(z - score) < 1e-06)) { |
216 | 2x |
(break)() |
217 |
} else { |
|
218 | 44x |
i <- i + |
219 | 44x |
1 |
220 |
} |
|
221 |
} |
|
222 | 2x |
return(y) |
223 |
} |
|
224 | 1x |
.score <- function(p1, n1, p2, n2, dif) { |
225 | 46x |
diff <- p1 - p2 - dif |
226 | 46x |
if (abs(diff) == 0) { |
227 | ! |
res <- 0 |
228 |
} else { |
|
229 | 46x |
t <- n2 / n1 |
230 | 46x |
a <- 1 + t |
231 | 46x |
b <- -(1 + t + p1 + t * p2 + dif * (t + 2)) |
232 | 46x |
c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + |
233 | 46x |
t * p2 |
234 | 46x |
d <- -p1 * dif * (1 + dif) |
235 | 46x |
v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 |
236 | 46x |
s <- sqrt((b / a / 3)^2 - c / a / 3) |
237 | 46x |
u <- ifelse(v > 0, 1, -1) * s |
238 | 46x |
w <- (3.141592654 + acos(v / u^3)) / 3 |
239 | 46x |
p1d <- 2 * u * cos(w) - b / a / 3 |
240 | 46x |
p2d <- p1d - dif |
241 | 46x |
n <- n1 + n2 |
242 | 46x |
var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * |
243 | 46x |
n / (n - 1) |
244 | 46x |
res <- diff^2 / var |
245 |
} |
|
246 | 46x |
return(res) |
247 |
} |
|
248 | 1x |
z <- stats::qchisq(conf.level, 1) |
249 | 1x |
ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE)) |
250 | 1x |
ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE)) |
251 |
}, |
|
252 | 18x |
beal = { |
253 | ! |
a <- p1_hat + p2_hat |
254 | ! |
b <- p1_hat - p2_hat |
255 | ! |
u <- ((1 / n1) + (1 / n2)) / 4 |
256 | ! |
v <- ((1 / n1) - (1 / n2)) / 4 |
257 | ! |
V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint |
258 | ! |
z <- stats::qchisq(p = 1 - alpha / 2, df = 1) |
259 | ! |
A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint |
260 | ! |
B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint |
261 | ! |
ci_lwr <- max(-1, B - A / (1 + z * u)) |
262 | ! |
ci_upr <- min(1, B + A / (1 + z * u)) |
263 |
}, |
|
264 | 18x |
hal = { |
265 | 1x |
psi <- (p1_hat + p2_hat) / 2 |
266 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
267 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
268 | 1x |
z <- kappa |
269 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * |
270 | 1x |
psi)) / (1 + z^2 * u) |
271 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - |
272 | 1x |
(p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
273 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * |
274 | 1x |
psi + z^2 * v^2 * (1 - 2 * psi)^2) |
275 | 1x |
c(theta + w, theta - w) |
276 | 1x |
ci_lwr <- max(-1, theta - w) |
277 | 1x |
ci_upr <- min(1, theta + w) |
278 |
}, |
|
279 | 18x |
jp = { |
280 | 1x |
psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + |
281 | 1x |
1)) |
282 | 1x |
u <- (1 / n1 + 1 / n2) / 4 |
283 | 1x |
v <- (1 / n1 - 1 / n2) / 4 |
284 | 1x |
z <- kappa |
285 | 1x |
theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * |
286 | 1x |
psi)) / (1 + z^2 * u) |
287 | 1x |
w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - |
288 | 1x |
(p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) * |
289 | 1x |
(p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * |
290 | 1x |
psi + z^2 * v^2 * (1 - 2 * psi)^2) |
291 | 1x |
c(theta + w, theta - w) |
292 | 1x |
ci_lwr <- max(-1, theta - w) |
293 | 1x |
ci_upr <- min(1, theta + w) |
294 |
}, |
|
295 |
) |
|
296 | 18x |
ci <- c( |
297 | 18x |
est = est, lwr.ci = min(ci_lwr, ci_upr), |
298 | 18x |
upr.ci = max(ci_lwr, ci_upr) |
299 |
) |
|
300 | 18x |
if (sides == "left") { |
301 | ! |
ci[3] <- 1 |
302 | 18x |
} else if (sides == "right") { |
303 | ! |
ci[2] <- -1 |
304 |
} |
|
305 | 18x |
return(ci) |
306 |
} |
|
307 | 18x |
method <- match.arg(arg = method, several.ok = TRUE) |
308 | 18x |
sides <- match.arg(arg = sides, several.ok = TRUE) |
309 | 18x |
lst <- h_recycle( |
310 | 18x |
x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level, |
311 | 18x |
sides = sides, method = method |
312 |
) |
|
313 | 18x |
res <- t(sapply(1:attr(lst, "maxdim"), function(i) { |
314 | 18x |
iBinomDiffCI( |
315 | 18x |
x1 = lst$x1[i], |
316 | 18x |
n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i], |
317 | 18x |
sides = lst$sides[i], method = lst$method[i] |
318 |
) |
|
319 |
})) |
|
320 | 18x |
lgn <- h_recycle(x1 = if (is.null(names(x1))) { |
321 | 18x |
paste("x1", seq_along(x1), sep = ".") |
322 |
} else { |
|
323 | ! |
names(x1) |
324 | 18x |
}, n1 = if (is.null(names(n1))) { |
325 | 18x |
paste("n1", seq_along(n1), sep = ".") |
326 |
} else { |
|
327 | ! |
names(n1) |
328 | 18x |
}, x2 = if (is.null(names(x2))) { |
329 | 18x |
paste("x2", seq_along(x2), sep = ".") |
330 |
} else { |
|
331 | ! |
names(x2) |
332 | 18x |
}, n2 = if (is.null(names(n2))) { |
333 | 18x |
paste("n2", seq_along(n2), sep = ".") |
334 |
} else { |
|
335 | ! |
names(n2) |
336 | 18x |
}, conf.level = conf.level, sides = sides, method = method) |
337 | 18x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { |
338 | 126x |
length(unique(x)) != |
339 | 126x |
1 |
340 | 18x |
})]), 1, paste, collapse = ":") |
341 | 18x |
rownames(res) <- xn |
342 | 18x |
return(res) |
343 |
} |
|
344 | ||
345 |
#' @describeIn desctools_binom Compute confidence intervals for binomial proportions. |
|
346 |
#' |
|
347 |
#' @param x (`count`)\cr number of successes |
|
348 |
#' @param n (`count`)\cr number of trials |
|
349 |
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95. |
|
350 |
#' @param sides (`character`)\cr side of the confidence interval to compute. Must be one of "two-sided" (default), |
|
351 |
#' "left", or "right". |
|
352 |
#' @param method (`character`)\cr method to use. Can be one out of: "wald", "wilson", "wilsoncc", "agresti-coull", |
|
353 |
#' "jeffreys", "modified wilson", "modified jeffreys", "clopper-pearson", "arcsine", "logit", "witting", "pratt", |
|
354 |
#' "midp", "lik", and "blaker". |
|
355 |
#' |
|
356 |
#' @return A `matrix` with 3 columns containing: |
|
357 |
#' * `est`: estimate of proportion difference. |
|
358 |
#' * `lwr.ci`: lower end of the confidence interval. |
|
359 |
#' * `upr.ci`: upper end of the confidence interval. |
|
360 |
#' |
|
361 |
#' @keywords internal |
|
362 |
desctools_binomci <- function(x, |
|
363 |
n, |
|
364 |
conf.level = 0.95, # nolint |
|
365 |
sides = c("two.sided", "left", "right"), |
|
366 |
method = c( |
|
367 |
"wilson", "wald", "waldcc", "agresti-coull", |
|
368 |
"jeffreys", "modified wilson", "wilsoncc", "modified jeffreys", |
|
369 |
"clopper-pearson", "arcsine", "logit", "witting", "pratt", |
|
370 |
"midp", "lik", "blaker" |
|
371 |
), |
|
372 |
rand = 123, |
|
373 |
tol = 1e-05) { |
|
374 | 24x |
if (missing(method)) { |
375 | 1x |
method <- "wilson" |
376 |
} |
|
377 | 24x |
if (missing(sides)) { |
378 | 23x |
sides <- "two.sided" |
379 |
} |
|
380 | 24x |
iBinomCI <- function(x, n, conf.level = 0.95, sides = c( # nolint |
381 | 24x |
"two.sided", |
382 | 24x |
"left", "right" |
383 | 24x |
), method = c( |
384 | 24x |
"wilson", "wilsoncc", "wald", |
385 | 24x |
"waldcc", "agresti-coull", "jeffreys", "modified wilson", |
386 | 24x |
"modified jeffreys", "clopper-pearson", "arcsine", "logit", |
387 | 24x |
"witting", "pratt", "midp", "lik", "blaker" |
388 | 24x |
), rand = 123, |
389 | 24x |
tol = 1e-05) { |
390 | 24x |
if (length(x) != 1) { |
391 | ! |
stop("'x' has to be of length 1 (number of successes)") |
392 |
} |
|
393 | 24x |
if (length(n) != 1) { |
394 | ! |
stop("'n' has to be of length 1 (number of trials)") |
395 |
} |
|
396 | 24x |
if (length(conf.level) != 1) { |
397 | ! |
stop("'conf.level' has to be of length 1 (confidence level)") |
398 |
} |
|
399 | 24x |
if (conf.level < 0.5 || conf.level > 1) { |
400 | ! |
stop("'conf.level' has to be in [0.5, 1]") |
401 |
} |
|
402 | 24x |
sides <- match.arg(sides, choices = c( |
403 | 24x |
"two.sided", "left", |
404 | 24x |
"right" |
405 | 24x |
), several.ok = FALSE) |
406 | 24x |
if (sides != "two.sided") { |
407 | 1x |
conf.level <- 1 - 2 * (1 - conf.level) # nolint |
408 |
} |
|
409 | 24x |
alpha <- 1 - conf.level |
410 | 24x |
kappa <- stats::qnorm(1 - alpha / 2) |
411 | 24x |
p_hat <- x / n |
412 | 24x |
q_hat <- 1 - p_hat |
413 | 24x |
est <- p_hat |
414 | 24x |
switch(match.arg(arg = method, choices = c( |
415 | 24x |
"wilson", |
416 | 24x |
"wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys", |
417 | 24x |
"modified wilson", "modified jeffreys", "clopper-pearson", |
418 | 24x |
"arcsine", "logit", "witting", "pratt", "midp", "lik", |
419 | 24x |
"blaker" |
420 |
)), |
|
421 | 24x |
wald = { |
422 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
423 | 1x |
ci_lwr <- max(0, p_hat - term2) |
424 | 1x |
ci_upr <- min(1, p_hat + term2) |
425 |
}, |
|
426 | 24x |
waldcc = { |
427 | 1x |
term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n) |
428 | 1x |
term2 <- term2 + 1 / (2 * n) |
429 | 1x |
ci_lwr <- max(0, p_hat - term2) |
430 | 1x |
ci_upr <- min(1, p_hat + term2) |
431 |
}, |
|
432 | 24x |
wilson = { |
433 | 6x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
434 | 6x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * |
435 | 6x |
q_hat + kappa^2 / (4 * n)) |
436 | 6x |
ci_lwr <- max(0, term1 - term2) |
437 | 6x |
ci_upr <- min(1, term1 + term2) |
438 |
}, |
|
439 | 24x |
wilsoncc = { |
440 | 3x |
lci <- (2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - |
441 | 3x |
2 - 1 / n + 4 * p_hat * (n * q_hat + 1))) / (2 * |
442 | 3x |
(n + kappa^2)) |
443 | 3x |
uci <- (2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + |
444 | 3x |
2 - 1 / n + 4 * p_hat * (n * q_hat - 1))) / (2 * |
445 | 3x |
(n + kappa^2)) |
446 | 3x |
ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci)) |
447 | 3x |
ci_upr <- min(1, ifelse(p_hat == 1, 1, uci)) |
448 |
}, |
|
449 | 24x |
`agresti-coull` = { |
450 | 1x |
x_tilde <- x + kappa^2 / 2 |
451 | 1x |
n_tilde <- n + kappa^2 |
452 | 1x |
p_tilde <- x_tilde / n_tilde |
453 | 1x |
q_tilde <- 1 - p_tilde |
454 | 1x |
est <- p_tilde |
455 | 1x |
term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) |
456 | 1x |
ci_lwr <- max(0, p_tilde - term2) |
457 | 1x |
ci_upr <- min(1, p_tilde + term2) |
458 |
}, |
|
459 | 24x |
jeffreys = { |
460 | 1x |
if (x == 0) { |
461 | ! |
ci_lwr <- 0 |
462 |
} else { |
|
463 | 1x |
ci_lwr <- stats::qbeta( |
464 | 1x |
alpha / 2, |
465 | 1x |
x + 0.5, n - x + 0.5 |
466 |
) |
|
467 |
} |
|
468 | 1x |
if (x == n) { |
469 | ! |
ci_upr <- 1 |
470 |
} else { |
|
471 | 1x |
ci_upr <- stats::qbeta(1 - |
472 | 1x |
alpha / 2, x + 0.5, n - x + 0.5) |
473 |
} |
|
474 |
}, |
|
475 | 24x |
`modified wilson` = { |
476 | 1x |
term1 <- (x + kappa^2 / 2) / (n + kappa^2) |
477 | 1x |
term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * |
478 | 1x |
q_hat + kappa^2 / (4 * n)) |
479 | 1x |
if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% |
480 | 1x |
c(1:3))) { |
481 | ! |
ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * |
482 | ! |
x) / n |
483 |
} else { |
|
484 | 1x |
ci_lwr <- max(0, term1 - term2) |
485 |
} |
|
486 | 1x |
if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & |
487 | 1x |
x %in% c(n - (1:3)))) { |
488 | ! |
ci_upr <- 1 - 0.5 * stats::qchisq( |
489 | ! |
alpha, |
490 | ! |
2 * (n - x) |
491 | ! |
) / n |
492 |
} else { |
|
493 | 1x |
ci_upr <- min(1, term1 + |
494 | 1x |
term2) |
495 |
} |
|
496 |
}, |
|
497 | 24x |
`modified jeffreys` = { |
498 | 1x |
if (x == n) { |
499 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
500 |
} else { |
|
501 | 1x |
if (x <= 1) { |
502 | ! |
ci_lwr <- 0 |
503 |
} else { |
|
504 | 1x |
ci_lwr <- stats::qbeta( |
505 | 1x |
alpha / 2, |
506 | 1x |
x + 0.5, n - x + 0.5 |
507 |
) |
|
508 |
} |
|
509 |
} |
|
510 | 1x |
if (x == 0) { |
511 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
512 |
} else { |
|
513 | 1x |
if (x >= n - 1) { |
514 | ! |
ci_upr <- 1 |
515 |
} else { |
|
516 | 1x |
ci_upr <- stats::qbeta(1 - |
517 | 1x |
alpha / 2, x + 0.5, n - x + 0.5) |
518 |
} |
|
519 |
} |
|
520 |
}, |
|
521 | 24x |
`clopper-pearson` = { |
522 | 1x |
ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1) |
523 | 1x |
ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x) |
524 |
}, |
|
525 | 24x |
arcsine = { |
526 | 1x |
p_tilde <- (x + 0.375) / (n + 0.75) |
527 | 1x |
est <- p_tilde |
528 | 1x |
ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2 |
529 | 1x |
ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2 |
530 |
}, |
|
531 | 24x |
logit = { |
532 | 1x |
lambda_hat <- log(x / (n - x)) |
533 | 1x |
V_hat <- n / (x * (n - x)) # nolint |
534 | 1x |
lambda_lower <- lambda_hat - kappa * sqrt(V_hat) |
535 | 1x |
lambda_upper <- lambda_hat + kappa * sqrt(V_hat) |
536 | 1x |
ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower)) |
537 | 1x |
ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper)) |
538 |
}, |
|
539 | 24x |
witting = { |
540 | 1x |
set.seed(rand) |
541 | 1x |
x_tilde <- x + stats::runif(1, min = 0, max = 1) |
542 | 1x |
pbinom_abscont <- function(q, size, prob) { |
543 | 22x |
v <- trunc(q) |
544 | 22x |
term1 <- stats::pbinom(v - 1, size = size, prob = prob) |
545 | 22x |
term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob) |
546 | 22x |
return(term1 + term2) |
547 |
} |
|
548 | 1x |
qbinom_abscont <- function(p, size, x) { |
549 | 2x |
fun <- function(prob, size, x, p) { |
550 | 22x |
pbinom_abscont(x, size, prob) - p |
551 |
} |
|
552 | 2x |
stats::uniroot(fun, |
553 | 2x |
interval = c(0, 1), size = size, |
554 | 2x |
x = x, p = p |
555 | 2x |
)$root |
556 |
} |
|
557 | 1x |
ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde) |
558 | 1x |
ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde) |
559 |
}, |
|
560 | 24x |
pratt = { |
561 | 1x |
if (x == 0) { |
562 | ! |
ci_lwr <- 0 |
563 | ! |
ci_upr <- 1 - alpha^(1 / n) |
564 | 1x |
} else if (x == 1) { |
565 | ! |
ci_lwr <- 1 - (1 - alpha / 2)^(1 / n) |
566 | ! |
ci_upr <- 1 - (alpha / 2)^(1 / n) |
567 | 1x |
} else if (x == (n - 1)) { |
568 | ! |
ci_lwr <- (alpha / 2)^(1 / n) |
569 | ! |
ci_upr <- (1 - alpha / 2)^(1 / n) |
570 | 1x |
} else if (x == n) { |
571 | ! |
ci_lwr <- alpha^(1 / n) |
572 | ! |
ci_upr <- 1 |
573 |
} else { |
|
574 | 1x |
z <- stats::qnorm(1 - alpha / 2) |
575 | 1x |
A <- ((x + 1) / (n - x))^2 # nolint |
576 | 1x |
B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint |
577 | 1x |
C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint |
578 | 1x |
D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint |
579 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
580 | 1x |
ci_upr <- 1 / E |
581 | 1x |
A <- (x / (n - x - 1))^2 # nolint |
582 | 1x |
B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint |
583 | 1x |
C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint |
584 | 1x |
D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint |
585 | 1x |
E <- 1 + A * ((B + C) / D)^3 # nolint |
586 | 1x |
ci_lwr <- 1 / E |
587 |
} |
|
588 |
}, |
|
589 | 24x |
midp = { |
590 | 1x |
f_low <- function(pi, x, n) { |
591 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x, |
592 | 12x |
size = n, prob = pi, lower.tail = FALSE |
593 |
) - |
|
594 | 12x |
(1 - conf.level) / 2 |
595 |
} |
|
596 | 1x |
f_up <- function(pi, x, n) { |
597 | 12x |
1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - |
598 | 12x |
1, size = n, prob = pi) - (1 - conf.level) / 2 |
599 |
} |
|
600 | 1x |
ci_lwr <- 0 |
601 | 1x |
ci_upr <- 1 |
602 | 1x |
if (x != 0) { |
603 | 1x |
ci_lwr <- stats::uniroot(f_low, |
604 | 1x |
interval = c(0, p_hat), |
605 | 1x |
x = x, n = n |
606 | 1x |
)$root |
607 |
} |
|
608 | 1x |
if (x != n) { |
609 | 1x |
ci_upr <- stats::uniroot(f_up, interval = c( |
610 | 1x |
p_hat, |
611 | 1x |
1 |
612 | 1x |
), x = x, n = n)$root |
613 |
} |
|
614 |
}, |
|
615 | 24x |
lik = { |
616 | 2x |
ci_lwr <- 0 |
617 | 2x |
ci_upr <- 1 |
618 | 2x |
z <- stats::qnorm(1 - alpha * 0.5) |
619 | 2x |
tol <- .Machine$double.eps^0.5 |
620 | 2x |
BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint |
621 |
...) { |
|
622 | 40x |
ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt, |
623 | 40x |
y, |
624 | 40x |
log = TRUE |
625 |
)) |
|
626 | 40x |
ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x, |
627 | 40x |
wt, mu, |
628 | 40x |
log = TRUE |
629 |
)) |
|
630 | 40x |
res <- ifelse(abs(y - mu) < tol, 0, sign(y - |
631 | 40x |
mu) * sqrt(-2 * (ll_y - ll_mu))) |
632 | 40x |
return(res - bound) |
633 |
} |
|
634 | 2x |
if (x != 0 && tol < p_hat) { |
635 | 2x |
ci_lwr <- if (BinDev( |
636 | 2x |
tol, x, p_hat, n, -z, |
637 | 2x |
tol |
638 | 2x |
) <= 0) { |
639 | 2x |
stats::uniroot( |
640 | 2x |
f = BinDev, interval = c(tol, if (p_hat < |
641 | 2x |
tol || p_hat == 1) { |
642 | ! |
1 - tol |
643 |
} else { |
|
644 | 2x |
p_hat |
645 | 2x |
}), bound = -z, |
646 | 2x |
x = x, mu = p_hat, wt = n |
647 | 2x |
)$root |
648 |
} |
|
649 |
} |
|
650 | 2x |
if (x != n && p_hat < (1 - tol)) { |
651 | 2x |
ci_upr <- if (BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > |
652 | 2x |
1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < |
653 | 2x |
0) { |
654 | ! |
ci_lwr <- if (BinDev( |
655 | ! |
tol, x, if (p_hat < |
656 | ! |
tol || p_hat == 1) { |
657 | ! |
1 - tol |
658 |
} else { |
|
659 | ! |
p_hat |
660 | ! |
}, n, |
661 | ! |
-z, tol |
662 | ! |
) <= 0) { |
663 | ! |
stats::uniroot( |
664 | ! |
f = BinDev, interval = c(tol, p_hat), |
665 | ! |
bound = -z, x = x, mu = p_hat, wt = n |
666 | ! |
)$root |
667 |
} |
|
668 |
} else { |
|
669 | 2x |
stats::uniroot( |
670 | 2x |
f = BinDev, interval = c(if (p_hat > |
671 | 2x |
1 - tol) { |
672 | ! |
tol |
673 |
} else { |
|
674 | 2x |
p_hat |
675 | 2x |
}, 1 - tol), bound = z, |
676 | 2x |
x = x, mu = p_hat, wt = n |
677 | 2x |
)$root |
678 |
} |
|
679 |
} |
|
680 |
}, |
|
681 | 24x |
blaker = { |
682 | 1x |
acceptbin <- function(x, n, p) { |
683 | 3954x |
p1 <- 1 - stats::pbinom(x - 1, n, p) |
684 | 3954x |
p2 <- stats::pbinom(x, n, p) |
685 | 3954x |
a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p) |
686 | 3954x |
a2 <- p2 + 1 - stats::pbinom( |
687 | 3954x |
stats::qbinom(1 - p2, n, p), n, |
688 | 3954x |
p |
689 |
) |
|
690 | 3954x |
return(min(a1, a2)) |
691 |
} |
|
692 | 1x |
ci_lwr <- 0 |
693 | 1x |
ci_upr <- 1 |
694 | 1x |
if (x != 0) { |
695 | 1x |
ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - |
696 | 1x |
x + 1) |
697 | 1x |
while (acceptbin(x, n, ci_lwr + tol) < (1 - |
698 | 1x |
conf.level)) { |
699 | 1976x |
ci_lwr <- ci_lwr + tol |
700 |
} |
|
701 |
} |
|
702 | 1x |
if (x != n) { |
703 | 1x |
ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + |
704 | 1x |
1, n - x) |
705 | 1x |
while (acceptbin(x, n, ci_upr - tol) < (1 - |
706 | 1x |
conf.level)) { |
707 | 1976x |
ci_upr <- ci_upr - tol |
708 |
} |
|
709 |
} |
|
710 |
} |
|
711 |
) |
|
712 | 24x |
ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min( |
713 | 24x |
1, |
714 | 24x |
ci_upr |
715 |
)) |
|
716 | 24x |
if (sides == "left") { |
717 | 1x |
ci[3] <- 1 |
718 | 23x |
} else if (sides == "right") { |
719 | ! |
ci[2] <- 0 |
720 |
} |
|
721 | 24x |
return(ci) |
722 |
} |
|
723 | 24x |
lst <- list( |
724 | 24x |
x = x, n = n, conf.level = conf.level, sides = sides, |
725 | 24x |
method = method, rand = rand |
726 |
) |
|
727 | 24x |
maxdim <- max(unlist(lapply(lst, length))) |
728 | 24x |
lgp <- lapply(lst, rep, length.out = maxdim) |
729 | 24x |
lgn <- h_recycle(x = if (is.null(names(x))) { |
730 | 24x |
paste("x", seq_along(x), sep = ".") |
731 |
} else { |
|
732 | ! |
names(x) |
733 | 24x |
}, n = if (is.null(names(n))) { |
734 | 24x |
paste("n", seq_along(n), sep = ".") |
735 |
} else { |
|
736 | ! |
names(n) |
737 | 24x |
}, conf.level = conf.level, sides = sides, method = method) |
738 | 24x |
xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) { |
739 | 120x |
length(unique(x)) != |
740 | 120x |
1 |
741 | 24x |
})]), 1, paste, collapse = ":") |
742 | 24x |
res <- t(sapply(1:maxdim, function(i) { |
743 | 24x |
iBinomCI( |
744 | 24x |
x = lgp$x[i], |
745 | 24x |
n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i], |
746 | 24x |
method = lgp$method[i], rand = lgp$rand[i] |
747 |
) |
|
748 |
})) |
|
749 | 24x |
colnames(res)[1] <- c("est") |
750 | 24x |
rownames(res) <- xn |
751 | 24x |
return(res) |
752 |
} |
1 |
#' Missing Data |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Substitute missing data with a string or factor level. |
|
6 |
#' |
|
7 |
#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted. |
|
8 |
#' @param label (`character`)\cr string that missing data should be replaced with. |
|
9 |
#' |
|
10 |
#' @return `x` with any `NA` values substituted by `label`. |
|
11 |
#' |
|
12 |
#' @examples |
|
13 |
#' explicit_na(c(NA, "a", "b")) |
|
14 |
#' is.na(explicit_na(c(NA, "a", "b"))) |
|
15 |
#' |
|
16 |
#' explicit_na(factor(c(NA, "a", "b"))) |
|
17 |
#' is.na(explicit_na(factor(c(NA, "a", "b")))) |
|
18 |
#' |
|
19 |
#' explicit_na(sas_na(c("a", ""))) |
|
20 |
#' |
|
21 |
#' @export |
|
22 |
explicit_na <- function(x, label = "<Missing>") { |
|
23 | 409x |
checkmate::assert_string(label) |
24 | ||
25 | 409x |
if (is.factor(x)) { |
26 | 307x |
x <- forcats::fct_na_value_to_level(x, label) |
27 | 307x |
forcats::fct_drop(x, only = label) |
28 | 102x |
} else if (is.character(x)) { |
29 | 102x |
x[is.na(x)] <- label |
30 | 102x |
x |
31 |
} else { |
|
32 | ! |
stop("only factors and character vectors allowed") |
33 |
} |
|
34 |
} |
|
35 | ||
36 |
#' Convert Strings to `NA` |
|
37 |
#' |
|
38 |
#' @description `r lifecycle::badge("stable")` |
|
39 |
#' |
|
40 |
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to |
|
41 |
#' convert these values to `NA`s. |
|
42 |
#' |
|
43 |
#' @inheritParams explicit_na |
|
44 |
#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`. |
|
45 |
#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`. |
|
46 |
#' |
|
47 |
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of |
|
48 |
#' `empty` and `whitespaces`. |
|
49 |
#' |
|
50 |
#' @examples |
|
51 |
#' sas_na(c("1", "", " ", " ", "b")) |
|
52 |
#' sas_na(factor(c("", " ", "b"))) |
|
53 |
#' |
|
54 |
#' is.na(sas_na(c("1", "", " ", " ", "b"))) |
|
55 |
#' |
|
56 |
#' @export |
|
57 |
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) { |
|
58 | 406x |
checkmate::assert_flag(empty) |
59 | 406x |
checkmate::assert_flag(whitespaces) |
60 | ||
61 | 406x |
if (is.factor(x)) { |
62 | 300x |
empty_levels <- levels(x) == "" |
63 | 11x |
if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA |
64 | ||
65 | 300x |
ws_levels <- grepl("^\\s+$", levels(x)) |
66 | ! |
if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA |
67 | ||
68 | 300x |
x |
69 | 106x |
} else if (is.character(x)) { |
70 | 106x |
if (empty) x[x == ""] <- NA_character_ |
71 | ||
72 | 106x |
if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_ |
73 | ||
74 | 106x |
x |
75 |
} else { |
|
76 | ! |
stop("only factors and character vectors allowed") |
77 |
} |
|
78 |
} |
1 |
#' Compare Variables Between Groups |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Comparison with a reference group for different `x` objects. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' |
|
9 |
#' @note |
|
10 |
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions |
|
11 |
#' between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would |
|
12 |
#' be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted |
|
13 |
#' for as explicit factor levels. |
|
14 |
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values |
|
15 |
#' set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit |
|
16 |
#' factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the |
|
17 |
#' default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`. |
|
18 |
#' * For character variables, automatic conversion to factor does not guarantee that the table |
|
19 |
#' will be generated correctly. In particular for sparse tables this very likely can fail. |
|
20 |
#' Therefore it is always better to manually convert character variables to factors during pre-processing. |
|
21 |
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison |
|
22 |
#' is well defined. |
|
23 |
#' |
|
24 |
#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally |
|
25 |
#' to compute a summary within `s_compare()`. |
|
26 |
#' |
|
27 |
#' @name compare_variables |
|
28 |
#' @include summarize_variables.R |
|
29 |
NULL |
|
30 | ||
31 |
#' @describeIn compare_variables S3 generic function to produce a comparison summary. |
|
32 |
#' |
|
33 |
#' @return |
|
34 |
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values. |
|
35 |
#' |
|
36 |
#' @export |
|
37 |
s_compare <- function(x, |
|
38 |
.ref_group, |
|
39 |
.in_ref_col, |
|
40 |
...) { |
|
41 | 9x |
UseMethod("s_compare", x) |
42 |
} |
|
43 | ||
44 |
#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test |
|
45 |
#' to calculate the p-value. |
|
46 |
#' |
|
47 |
#' @method s_compare numeric |
|
48 |
#' |
|
49 |
#' @examples |
|
50 |
#' # `s_compare.numeric` |
|
51 |
#' |
|
52 |
#' ## Usual case where both this and the reference group vector have more than 1 value. |
|
53 |
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE) |
|
54 |
#' |
|
55 |
#' ## If one group has not more than 1 value, then p-value is not calculated. |
|
56 |
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE) |
|
57 |
#' |
|
58 |
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value. |
|
59 |
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE) |
|
60 |
#' |
|
61 |
#' @export |
|
62 |
s_compare.numeric <- function(x, |
|
63 |
.ref_group, |
|
64 |
.in_ref_col, |
|
65 |
...) { |
|
66 | 2x |
checkmate::assert_numeric(x) |
67 | 2x |
checkmate::assert_numeric(.ref_group) |
68 | 2x |
checkmate::assert_flag(.in_ref_col) |
69 | ||
70 | 2x |
y <- s_summary.numeric(x = x, ...) |
71 | ||
72 | 2x |
y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) { |
73 | 1x |
stats::t.test(x, .ref_group)$p.value |
74 |
} else { |
|
75 | 1x |
character() |
76 |
} |
|
77 | ||
78 | 2x |
y |
79 |
} |
|
80 | ||
81 |
#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test |
|
82 |
#' to calculate the p-value. |
|
83 |
#' |
|
84 |
#' @param denom (`string`)\cr choice of denominator for factor proportions, |
|
85 |
#' can only be `n` (number of values in this row and column intersection). |
|
86 |
#' |
|
87 |
#' @method s_compare factor |
|
88 |
#' |
|
89 |
#' @examples |
|
90 |
#' # `s_compare.factor` |
|
91 |
#' |
|
92 |
#' ## Basic usage: |
|
93 |
#' x <- factor(c("a", "a", "b", "c", "a")) |
|
94 |
#' y <- factor(c("a", "b", "c")) |
|
95 |
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE) |
|
96 |
#' |
|
97 |
#' ## Management of NA values. |
|
98 |
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA))) |
|
99 |
#' y <- explicit_na(factor(c("a", "b", "c", NA))) |
|
100 |
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE) |
|
101 |
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE) |
|
102 |
#' |
|
103 |
#' @export |
|
104 |
s_compare.factor <- function(x, |
|
105 |
.ref_group, |
|
106 |
.in_ref_col, |
|
107 |
denom = "n", |
|
108 |
na.rm = TRUE, # nolint |
|
109 |
...) { |
|
110 | 3x |
checkmate::assert_flag(.in_ref_col) |
111 | 3x |
assert_valid_factor(x) |
112 | 3x |
assert_valid_factor(.ref_group) |
113 | 3x |
denom <- match.arg(denom) |
114 | ||
115 | 3x |
y <- s_summary.factor( |
116 | 3x |
x = x, |
117 | 3x |
denom = denom, |
118 | 3x |
na.rm = na.rm, |
119 |
... |
|
120 |
) |
|
121 | ||
122 | 3x |
if (na.rm) { |
123 | 3x |
x <- x[!is.na(x)] %>% fct_discard("<Missing>") |
124 | 3x |
.ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>") |
125 |
} else { |
|
126 | ! |
x <- x %>% explicit_na(label = "NA") |
127 | ! |
.ref_group <- .ref_group %>% explicit_na(label = "NA") |
128 |
} |
|
129 | ||
130 | 3x |
checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2) |
131 | ||
132 | 3x |
y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { |
133 | 3x |
tab <- rbind(table(x), table(.ref_group)) |
134 | 3x |
res <- suppressWarnings(stats::chisq.test(tab)) |
135 | 3x |
res$p.value |
136 |
} else { |
|
137 | ! |
character() |
138 |
} |
|
139 | ||
140 | 3x |
y |
141 |
} |
|
142 | ||
143 |
#' @describeIn compare_variables Method for `character` class. This makes an automatic |
|
144 |
#' conversion to `factor` (with a warning) and then forwards to the method for factors. |
|
145 |
#' |
|
146 |
#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used |
|
147 |
#' to print out information about factor casting. Defaults to `TRUE`. |
|
148 |
#' |
|
149 |
#' @method s_compare character |
|
150 |
#' |
|
151 |
#' @examples |
|
152 |
#' # `s_compare.character` |
|
153 |
#' |
|
154 |
#' ## Basic usage: |
|
155 |
#' x <- c("a", "a", "b", "c", "a") |
|
156 |
#' y <- c("a", "b", "c") |
|
157 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE) |
|
158 |
#' |
|
159 |
#' ## Note that missing values handling can make a large difference: |
|
160 |
#' x <- c("a", "a", "b", "c", "a", NA) |
|
161 |
#' y <- c("a", "b", "c", rep(NA, 20)) |
|
162 |
#' s_compare(x, |
|
163 |
#' .ref_group = y, .in_ref_col = FALSE, |
|
164 |
#' .var = "x", verbose = FALSE |
|
165 |
#' ) |
|
166 |
#' s_compare(x, |
|
167 |
#' .ref_group = y, .in_ref_col = FALSE, .var = "x", |
|
168 |
#' na.rm = FALSE, verbose = FALSE |
|
169 |
#' ) |
|
170 |
#' |
|
171 |
#' @export |
|
172 |
s_compare.character <- function(x, |
|
173 |
.ref_group, |
|
174 |
.in_ref_col, |
|
175 |
denom = "n", |
|
176 |
na.rm = TRUE, # nolint |
|
177 |
.var, |
|
178 |
verbose = TRUE, |
|
179 |
...) { |
|
180 | 1x |
x <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose) |
181 | 1x |
.ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, verbose = verbose) |
182 | 1x |
s_compare( |
183 | 1x |
x = x, |
184 | 1x |
.ref_group = .ref_group, |
185 | 1x |
.in_ref_col = .in_ref_col, |
186 | 1x |
denom = denom, |
187 | 1x |
na.rm = na.rm, |
188 |
... |
|
189 |
) |
|
190 |
} |
|
191 | ||
192 |
#' @describeIn compare_variables Method for `logical` class. A chi-squared test |
|
193 |
#' is used. If missing values are not removed, then they are counted as `FALSE`. |
|
194 |
#' |
|
195 |
#' @method s_compare logical |
|
196 |
#' |
|
197 |
#' @examples |
|
198 |
#' # `s_compare.logical` |
|
199 |
#' |
|
200 |
#' ## Basic usage: |
|
201 |
#' x <- c(TRUE, FALSE, TRUE, TRUE) |
|
202 |
#' y <- c(FALSE, FALSE, TRUE) |
|
203 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE) |
|
204 |
#' |
|
205 |
#' ## Management of NA values. |
|
206 |
#' x <- c(NA, TRUE, FALSE) |
|
207 |
#' y <- c(NA, NA, NA, NA, FALSE) |
|
208 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE) |
|
209 |
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE) |
|
210 |
#' |
|
211 |
#' @export |
|
212 |
s_compare.logical <- function(x, |
|
213 |
.ref_group, |
|
214 |
.in_ref_col, |
|
215 |
na.rm = TRUE, # nolint |
|
216 |
denom = "n", |
|
217 |
...) { |
|
218 | 3x |
denom <- match.arg(denom) |
219 | ||
220 | 3x |
y <- s_summary.logical( |
221 | 3x |
x = x, |
222 | 3x |
na.rm = na.rm, |
223 | 3x |
denom = denom, |
224 |
... |
|
225 |
) |
|
226 | ||
227 | 3x |
if (na.rm) { |
228 | 2x |
x <- stats::na.omit(x) |
229 | 2x |
.ref_group <- stats::na.omit(.ref_group) |
230 |
} else { |
|
231 | 1x |
x[is.na(x)] <- FALSE |
232 | 1x |
.ref_group[is.na(.ref_group)] <- FALSE |
233 |
} |
|
234 | ||
235 | 3x |
y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { |
236 | 3x |
x <- factor(x, levels = c(TRUE, FALSE)) |
237 | 3x |
.ref_group <- factor(.ref_group, levels = c(TRUE, FALSE)) |
238 | 3x |
tbl <- rbind(table(x), table(.ref_group)) |
239 | 3x |
suppressWarnings(prop_chisq(tbl)) |
240 |
} else { |
|
241 | ! |
character() |
242 |
} |
|
243 | ||
244 | 3x |
y |
245 |
} |
|
246 | ||
247 |
#' @describeIn compare_variables Formatted analysis function which is used as `afun` |
|
248 |
#' in `compare_vars()`. |
|
249 |
#' |
|
250 |
#' @return |
|
251 |
#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
252 |
#' |
|
253 |
#' @export |
|
254 |
a_compare <- function(x, |
|
255 |
.ref_group, |
|
256 |
.in_ref_col, |
|
257 |
..., |
|
258 |
.var) { |
|
259 | ! |
UseMethod("a_compare", x) |
260 |
} |
|
261 | ||
262 |
#' @describeIn compare_variables Formatted analysis function method for `numeric` class. |
|
263 |
#' |
|
264 |
#' @examples |
|
265 |
#' # `a_compare.numeric` |
|
266 |
#' a_compare( |
|
267 |
#' rnorm(10, 5, 1), |
|
268 |
#' .ref_group = rnorm(20, -5, 1), |
|
269 |
#' .in_ref_col = FALSE, |
|
270 |
#' .var = "bla" |
|
271 |
#' ) |
|
272 |
#' |
|
273 |
#' @export |
|
274 |
a_compare.numeric <- make_afun( |
|
275 |
s_compare.numeric, |
|
276 |
.formats = c( |
|
277 |
.a_summary_numeric_formats, |
|
278 |
pval = "x.xxxx | (<0.0001)" |
|
279 |
), |
|
280 |
.labels = c( |
|
281 |
.a_summary_numeric_labels, |
|
282 |
pval = "p-value (t-test)" |
|
283 |
), |
|
284 |
.null_ref_cells = FALSE |
|
285 |
) |
|
286 | ||
287 |
.a_compare_counts_formats <- c( |
|
288 |
.a_summary_counts_formats, |
|
289 |
pval = "x.xxxx | (<0.0001)" |
|
290 |
) |
|
291 | ||
292 |
.a_compare_counts_labels <- c( |
|
293 |
pval = "p-value (chi-squared test)" |
|
294 |
) |
|
295 | ||
296 |
#' @describeIn compare_variables Formatted analysis function method for `factor` class. |
|
297 |
#' |
|
298 |
#' @examples |
|
299 |
#' # `a_compare.factor` |
|
300 |
#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting |
|
301 |
#' # functions can be applied correctly. |
|
302 |
#' afun <- make_afun( |
|
303 |
#' getS3method("a_compare", "factor"), |
|
304 |
#' .ungroup_stats = c("count", "count_fraction") |
|
305 |
#' ) |
|
306 |
#' x <- factor(c("a", "a", "b", "c", "a")) |
|
307 |
#' y <- factor(c("a", "a", "b", "c")) |
|
308 |
#' afun(x, .ref_group = y, .in_ref_col = FALSE) |
|
309 |
#' |
|
310 |
#' @export |
|
311 |
a_compare.factor <- make_afun( |
|
312 |
s_compare.factor, |
|
313 |
.formats = .a_compare_counts_formats, |
|
314 |
.labels = .a_compare_counts_labels, |
|
315 |
.null_ref_cells = FALSE |
|
316 |
) |
|
317 | ||
318 |
#' @describeIn compare_variables Formatted analysis function method for `character` class. |
|
319 |
#' |
|
320 |
#' @examples |
|
321 |
#' # `a_compare.character` |
|
322 |
#' afun <- make_afun( |
|
323 |
#' getS3method("a_compare", "character"), |
|
324 |
#' .ungroup_stats = c("count", "count_fraction") |
|
325 |
#' ) |
|
326 |
#' x <- c("A", "B", "A", "C") |
|
327 |
#' y <- c("B", "A", "C") |
|
328 |
#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE) |
|
329 |
#' |
|
330 |
#' @export |
|
331 |
a_compare.character <- make_afun( |
|
332 |
s_compare.character, |
|
333 |
.formats = .a_compare_counts_formats, |
|
334 |
.labels = .a_compare_counts_labels, |
|
335 |
.null_ref_cells = FALSE |
|
336 |
) |
|
337 | ||
338 |
#' @describeIn compare_variables Formatted analysis function method for `logical` class. |
|
339 |
#' |
|
340 |
#' @examples |
|
341 |
#' # `a_compare.logical` |
|
342 |
#' afun <- make_afun( |
|
343 |
#' getS3method("a_compare", "logical") |
|
344 |
#' ) |
|
345 |
#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE) |
|
346 |
#' y <- c(TRUE, FALSE) |
|
347 |
#' afun(x, .ref_group = y, .in_ref_col = FALSE) |
|
348 |
#' |
|
349 |
#' @export |
|
350 |
a_compare.logical <- make_afun( |
|
351 |
s_compare.logical, |
|
352 |
.formats = .a_compare_counts_formats, |
|
353 |
.labels = .a_compare_counts_labels, |
|
354 |
.null_ref_cells = FALSE |
|
355 |
) |
|
356 | ||
357 |
#' Constructor Function for [compare_vars()] |
|
358 |
#' |
|
359 |
#' @description `r lifecycle::badge("stable")` |
|
360 |
#' |
|
361 |
#' Constructor function which creates a combined formatted analysis function. |
|
362 |
#' |
|
363 |
#' @inheritParams argument_convention |
|
364 |
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector |
|
365 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation |
|
366 |
#' for that statistic's row label. |
|
367 |
#' |
|
368 |
#' @return Combined formatted analysis function for use in [compare_vars()]. |
|
369 |
#' |
|
370 |
#' @note Since [a_compare()] is generic and we want customization of the formatting arguments |
|
371 |
#' via [rtables::make_afun()], we need to create another temporary generic function, with |
|
372 |
#' corresponding customized methods. Then in order for the methods to be found, |
|
373 |
#' we need to wrap them in a combined `afun`. Since this is required by two layout creating |
|
374 |
#' functions (and possibly others in the future), we provide a constructor that does this: |
|
375 |
#' [create_afun_compare()]. |
|
376 |
#' |
|
377 |
#' @seealso [compare_vars()] |
|
378 |
#' |
|
379 |
#' @examples |
|
380 |
#' # `create_afun_compare()` to create combined `afun` |
|
381 |
#' |
|
382 |
#' afun <- create_afun_compare( |
|
383 |
#' .stats = c("n", "count_fraction", "mean_sd", "pval"), |
|
384 |
#' .indent_mods = c(pval = 1L) |
|
385 |
#' ) |
|
386 |
#' |
|
387 |
#' lyt <- basic_table() %>% |
|
388 |
#' split_cols_by("ARMCD", ref_group = "ARM A") %>% |
|
389 |
#' analyze( |
|
390 |
#' "AGE", |
|
391 |
#' afun = afun, |
|
392 |
#' show_labels = "visible" |
|
393 |
#' ) |
|
394 |
#' build_table(lyt, df = tern_ex_adsl) |
|
395 |
#' |
|
396 |
#' lyt <- basic_table() %>% |
|
397 |
#' split_cols_by("ARMCD", ref_group = "ARM A") %>% |
|
398 |
#' analyze( |
|
399 |
#' "SEX", |
|
400 |
#' afun = afun, |
|
401 |
#' show_labels = "visible" |
|
402 |
#' ) |
|
403 |
#' build_table(lyt, df = tern_ex_adsl) |
|
404 |
#' |
|
405 |
#' @export |
|
406 |
create_afun_compare <- function(.stats = NULL, |
|
407 |
.formats = NULL, |
|
408 |
.labels = NULL, |
|
409 |
.indent_mods = NULL) { |
|
410 | 3x |
function(x, |
411 | 3x |
.ref_group, |
412 | 3x |
.in_ref_col, |
413 |
..., |
|
414 | 3x |
.var) { |
415 | 15x |
afun <- function(x, ...) { |
416 | 15x |
UseMethod("afun", x) |
417 |
} |
|
418 | ||
419 | 15x |
numeric_stats <- afun_selected_stats( |
420 | 15x |
.stats, |
421 | 15x |
all_stats = c(names(.a_summary_numeric_formats), "pval") |
422 |
) |
|
423 | 15x |
afun.numeric <- make_afun( # nolint |
424 | 15x |
a_compare.numeric, |
425 | 15x |
.stats = numeric_stats, |
426 | 15x |
.formats = extract_by_name(.formats, numeric_stats), |
427 | 15x |
.labels = extract_by_name(.labels, numeric_stats), |
428 | 15x |
.indent_mods = extract_by_name(.indent_mods, numeric_stats), |
429 | 15x |
.null_ref_cells = FALSE |
430 |
) |
|
431 | ||
432 | 15x |
factor_stats <- afun_selected_stats( |
433 | 15x |
.stats, |
434 | 15x |
all_stats = names(.a_compare_counts_formats) |
435 |
) |
|
436 | 15x |
ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction")) |
437 | 15x |
afun.factor <- make_afun( # nolint |
438 | 15x |
a_compare.factor, |
439 | 15x |
.stats = factor_stats, |
440 | 15x |
.formats = extract_by_name(.formats, factor_stats), |
441 | 15x |
.labels = extract_by_name(.labels, factor_stats), |
442 | 15x |
.indent_mods = extract_by_name(.indent_mods, factor_stats), |
443 | 15x |
.ungroup_stats = ungroup_stats, |
444 | 15x |
.null_ref_cells = FALSE |
445 |
) |
|
446 | ||
447 | 15x |
afun.character <- make_afun( # nolint |
448 | 15x |
a_compare.character, |
449 | 15x |
.stats = factor_stats, |
450 | 15x |
.formats = extract_by_name(.formats, factor_stats), |
451 | 15x |
.labels = extract_by_name(.labels, factor_stats), |
452 | 15x |
.indent_mods = extract_by_name(.indent_mods, factor_stats), |
453 | 15x |
.ungroup_stats = ungroup_stats, |
454 | 15x |
.null_ref_cells = FALSE |
455 |
) |
|
456 | ||
457 | 15x |
afun.logical <- make_afun( # nolint |
458 | 15x |
a_compare.logical, |
459 | 15x |
.stats = factor_stats, |
460 | 15x |
.formats = extract_by_name(.formats, factor_stats), |
461 | 15x |
.labels = extract_by_name(.labels, factor_stats), |
462 | 15x |
.indent_mods = extract_by_name(.indent_mods, factor_stats), |
463 | 15x |
.null_ref_cells = FALSE |
464 |
) |
|
465 | ||
466 | 15x |
afun( |
467 | 15x |
x = x, |
468 | 15x |
.ref_group = .ref_group, |
469 | 15x |
.in_ref_col = .in_ref_col, |
470 |
..., |
|
471 | 15x |
.var = .var |
472 |
) |
|
473 |
} |
|
474 |
} |
|
475 | ||
476 |
#' @describeIn compare_variables Layout-creating function which can take statistics function arguments |
|
477 |
#' and additional format arguments. This function is a wrapper for [rtables::analyze()]. |
|
478 |
#' |
|
479 |
#' @param ... arguments passed to `s_compare()`. |
|
480 |
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector |
|
481 |
#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation |
|
482 |
#' for that statistic's row label. |
|
483 |
#' |
|
484 |
#' @return |
|
485 |
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions, |
|
486 |
#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing |
|
487 |
#' the statistics from `s_compare()` to the table layout. |
|
488 |
#' |
|
489 |
#' @examples |
|
490 |
#' # `compare_vars()` in `rtables` pipelines |
|
491 |
#' |
|
492 |
#' ## Default output within a `rtables` pipeline. |
|
493 |
#' lyt <- basic_table() %>% |
|
494 |
#' split_cols_by("ARMCD", ref_group = "ARM B") %>% |
|
495 |
#' compare_vars(c("AGE", "SEX")) |
|
496 |
#' build_table(lyt, tern_ex_adsl) |
|
497 |
#' |
|
498 |
#' ## Select and format statistics output. |
|
499 |
#' lyt <- basic_table() %>% |
|
500 |
#' split_cols_by("ARMCD", ref_group = "ARM C") %>% |
|
501 |
#' compare_vars( |
|
502 |
#' vars = "AGE", |
|
503 |
#' .stats = c("mean_sd", "pval"), |
|
504 |
#' .formats = c(mean_sd = "xx.x, xx.x"), |
|
505 |
#' .labels = c(mean_sd = "Mean, SD") |
|
506 |
#' ) |
|
507 |
#' build_table(lyt, df = tern_ex_adsl) |
|
508 |
#' |
|
509 |
#' @export |
|
510 |
compare_vars <- function(lyt, |
|
511 |
vars, |
|
512 |
var_labels = vars, |
|
513 |
nested = TRUE, |
|
514 |
..., |
|
515 |
na_level = NA_character_, |
|
516 |
show_labels = "default", |
|
517 |
table_names = vars, |
|
518 |
.stats = c("n", "mean_sd", "count_fraction", "pval"), |
|
519 |
.formats = NULL, |
|
520 |
.labels = NULL, |
|
521 |
.indent_mods = NULL) { |
|
522 | 3x |
afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods) |
523 | ||
524 | 3x |
analyze( |
525 | 3x |
lyt = lyt, |
526 | 3x |
vars = vars, |
527 | 3x |
var_labels = var_labels, |
528 | 3x |
afun = afun, |
529 | 3x |
nested = nested, |
530 | 3x |
extra_args = list(...), |
531 | 3x |
na_str = na_level, |
532 | 3x |
inclNAs = TRUE, |
533 | 3x |
show_labels = show_labels, |
534 | 3x |
table_names = table_names |
535 |
) |
|
536 |
} |
1 |
#' Summary numeric variables in columns |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' Layout-creating function which can be used for creating column-wise summary tables. |
|
6 |
#' This function sets the analysis methods as column labels and is a wrapper for |
|
7 |
#' [rtables::analyze_colvars()]. It was designed principally for PK tables. |
|
8 |
#' |
|
9 |
#' @inheritParams argument_convention |
|
10 |
#' @inheritParams rtables::analyze_colvars |
|
11 |
#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels` |
|
12 |
#' character vector applies on the column space. You can change the row labels by defining this |
|
13 |
#' parameter to a named character vector with names corresponding to the split values. It defaults |
|
14 |
#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label. |
|
15 |
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current |
|
16 |
#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr` |
|
17 |
#' to define row labels. This behavior is not supported as we never need to overload row labels. |
|
18 |
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns. |
|
19 |
#' This option allows you to add multiple instances of this functions, also in a nested fashion, |
|
20 |
#' without adding more splits. This split must happen only one time on a single layout. |
|
21 |
#' |
|
22 |
#' @return |
|
23 |
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()]. |
|
24 |
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output |
|
25 |
#' in columns, and add it to the table layout. |
|
26 |
#' |
|
27 |
#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and |
|
28 |
#' [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its |
|
29 |
#' support to more complex analysis pipelines on the column space. For the same reasons, |
|
30 |
#' we encourage to read the examples carefully and file issues for cases that differ from |
|
31 |
#' them. |
|
32 |
#' |
|
33 |
#' Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`), |
|
34 |
#' row labels are assigned automatically to the split values in case of `rtables::analyze_colvars` |
|
35 |
#' (`do_summarize_row_groups = FALSE`, the default), and to the group label for |
|
36 |
#' `do_summarize_row_groups = TRUE`. |
|
37 |
#' |
|
38 |
#' @seealso [summarize_vars()], [rtables::analyze_colvars()]. |
|
39 |
#' |
|
40 |
#' @examples |
|
41 |
#' library(dplyr) |
|
42 |
#' |
|
43 |
#' # Data preparation |
|
44 |
#' adpp <- tern_ex_adpp %>% h_pkparam_sort() |
|
45 |
#' |
|
46 |
#' lyt <- basic_table() %>% |
|
47 |
#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>% |
|
48 |
#' split_rows_by( |
|
49 |
#' var = "SEX", |
|
50 |
#' label_pos = "topleft", |
|
51 |
#' child_label = "hidden" |
|
52 |
#' ) %>% # Removes duplicated labels |
|
53 |
#' analyze_vars_in_cols(vars = "AGE") |
|
54 |
#' result <- build_table(lyt = lyt, df = adpp) |
|
55 |
#' result |
|
56 |
#' |
|
57 |
#' # By selecting just some statistics and ad-hoc labels |
|
58 |
#' lyt <- basic_table() %>% |
|
59 |
#' split_rows_by(var = "ARM", label_pos = "topleft") %>% |
|
60 |
#' split_rows_by( |
|
61 |
#' var = "SEX", |
|
62 |
#' label_pos = "topleft", |
|
63 |
#' child_labels = "hidden", |
|
64 |
#' split_fun = drop_split_levels |
|
65 |
#' ) %>% |
|
66 |
#' analyze_vars_in_cols( |
|
67 |
#' vars = "AGE", |
|
68 |
#' .stats = c("n", "cv", "geom_mean"), |
|
69 |
#' .labels = c( |
|
70 |
#' n = "aN", |
|
71 |
#' cv = "aCV", |
|
72 |
#' geom_mean = "aGeomMean" |
|
73 |
#' ) |
|
74 |
#' ) |
|
75 |
#' result <- build_table(lyt = lyt, df = adpp) |
|
76 |
#' result |
|
77 |
#' |
|
78 |
#' # Changing row labels |
|
79 |
#' lyt <- basic_table() %>% |
|
80 |
#' analyze_vars_in_cols( |
|
81 |
#' vars = "AGE", |
|
82 |
#' row_labels = "some custom label" |
|
83 |
#' ) |
|
84 |
#' result <- build_table(lyt, df = adpp) |
|
85 |
#' result |
|
86 |
#' |
|
87 |
#' # Pharmacokinetic parameters |
|
88 |
#' lyt <- basic_table() %>% |
|
89 |
#' split_rows_by( |
|
90 |
#' var = "TLG_DISPLAY", |
|
91 |
#' split_label = "PK Parameter", |
|
92 |
#' label_pos = "topleft", |
|
93 |
#' child_label = "hidden" |
|
94 |
#' ) %>% |
|
95 |
#' analyze_vars_in_cols( |
|
96 |
#' vars = "AVAL" |
|
97 |
#' ) |
|
98 |
#' result <- build_table(lyt, df = adpp) |
|
99 |
#' result |
|
100 |
#' |
|
101 |
#' # Multiple calls (summarize label and analyze underneath) |
|
102 |
#' lyt <- basic_table() %>% |
|
103 |
#' split_rows_by( |
|
104 |
#' var = "TLG_DISPLAY", |
|
105 |
#' split_label = "PK Parameter", |
|
106 |
#' label_pos = "topleft" |
|
107 |
#' ) %>% |
|
108 |
#' analyze_vars_in_cols( |
|
109 |
#' vars = "AVAL", |
|
110 |
#' do_summarize_row_groups = TRUE # does a summarize level |
|
111 |
#' ) %>% |
|
112 |
#' split_rows_by("SEX", |
|
113 |
#' child_label = "hidden", |
|
114 |
#' label_pos = "topleft" |
|
115 |
#' ) %>% |
|
116 |
#' analyze_vars_in_cols( |
|
117 |
#' vars = "AVAL", |
|
118 |
#' split_col_vars = FALSE # avoids re-splitting the columns |
|
119 |
#' ) |
|
120 |
#' result <- build_table(lyt, df = adpp) |
|
121 |
#' result |
|
122 |
#' |
|
123 |
#' @export |
|
124 |
analyze_vars_in_cols <- function(lyt, |
|
125 |
vars, |
|
126 |
..., |
|
127 |
.stats = c( |
|
128 |
"n", |
|
129 |
"mean", |
|
130 |
"sd", |
|
131 |
"se", |
|
132 |
"cv", |
|
133 |
"geom_cv" |
|
134 |
), |
|
135 |
.labels = c( |
|
136 |
n = "n", |
|
137 |
mean = "Mean", |
|
138 |
sd = "SD", |
|
139 |
se = "SE", |
|
140 |
cv = "CV (%)", |
|
141 |
geom_cv = "CV % Geometric Mean" |
|
142 |
), |
|
143 |
row_labels = NULL, |
|
144 |
do_summarize_row_groups = FALSE, |
|
145 |
split_col_vars = TRUE, |
|
146 |
.indent_mods = NULL, |
|
147 |
nested = TRUE, |
|
148 |
na_level = NULL, |
|
149 |
.formats = NULL) { |
|
150 | 6x |
checkmate::assert_string(na_level, null.ok = TRUE) |
151 | 6x |
checkmate::assert_character(row_labels, null.ok = TRUE) |
152 | 6x |
checkmate::assert_int(.indent_mods, null.ok = TRUE) |
153 | 6x |
checkmate::assert_flag(nested) |
154 | 6x |
checkmate::assert_flag(split_col_vars) |
155 | 6x |
checkmate::assert_flag(do_summarize_row_groups) |
156 | ||
157 |
# Automatic assignment of formats |
|
158 | 6x |
if (is.null(.formats)) { |
159 |
# General values |
|
160 | 6x |
sf_numeric <- summary_formats("numeric") |
161 | 6x |
sf_counts <- summary_formats("counts")[-1] |
162 | 6x |
formats_v <- c(sf_numeric, sf_counts) |
163 |
} else { |
|
164 | ! |
formats_v <- .formats |
165 |
} |
|
166 | ||
167 |
# Check for vars in the case that one or more are used |
|
168 | 6x |
if (length(vars) == 1) { |
169 | 5x |
vars <- rep(vars, length(.stats)) |
170 | 1x |
} else if (length(vars) != length(.stats)) { |
171 | 1x |
stop( |
172 | 1x |
"Analyzed variables (vars) does not have the same ", |
173 | 1x |
"number of elements of specified statistics (.stats)." |
174 |
) |
|
175 |
} |
|
176 | ||
177 | 5x |
if (split_col_vars) { |
178 |
# Checking there is not a previous identical column split |
|
179 | 4x |
clyt <- tail(clayout(lyt), 1)[[1]] |
180 | ||
181 | 4x |
dummy_lyt <- split_cols_by_multivar( |
182 | 4x |
lyt = basic_table(), |
183 | 4x |
vars = vars, |
184 | 4x |
varlabels = .labels[.stats] |
185 |
) |
|
186 | ||
187 | 4x |
if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) { |
188 | ! |
stop( |
189 | ! |
"Column split called again with the same values. ", |
190 | ! |
"This can create many unwanted columns. Please consider adding ", |
191 | ! |
"split_col_vars = FALSE to the last call of ", |
192 | ! |
deparse(sys.calls()[[sys.nframe() - 1]]), "." |
193 |
) |
|
194 |
} |
|
195 | ||
196 |
# Main col split |
|
197 | 4x |
lyt <- split_cols_by_multivar( |
198 | 4x |
lyt = lyt, |
199 | 4x |
vars = vars, |
200 | 4x |
varlabels = .labels[.stats] |
201 |
) |
|
202 |
} |
|
203 | ||
204 | 5x |
if (do_summarize_row_groups) { |
205 | 2x |
if (length(unique(vars)) > 1) { |
206 | ! |
stop("When using do_summarize_row_groups only one label level var should be inserted.") |
207 |
} |
|
208 | ||
209 |
# Function list for do_summarize_row_groups. Slightly different handling of labels |
|
210 | 2x |
cfun_list <- Map( |
211 | 2x |
function(stat) { |
212 | 12x |
function(u, .spl_context, labelstr, ...) { |
213 |
# Statistic |
|
214 | 24x |
res <- s_summary(u, ...)[[stat]] |
215 | ||
216 |
# Label check and replacement |
|
217 | 24x |
if (length(row_labels) > 1) { |
218 | 12x |
if (!(labelstr %in% names(row_labels))) { |
219 | ! |
stop( |
220 | ! |
"Replacing the labels in do_summarize_row_groups needs a named vector", |
221 | ! |
"that contains the split values. In the current split variable ", |
222 | ! |
.spl_context$split[nrow(.spl_context)], |
223 | ! |
" the labelstr value (split value by default) ", labelstr, " is not in", |
224 | ! |
" row_labels names: ", names(row_labels) |
225 |
) |
|
226 |
} |
|
227 | 12x |
lbl <- unlist(row_labels[labelstr]) |
228 |
} else { |
|
229 | 12x |
lbl <- labelstr |
230 |
} |
|
231 | ||
232 |
# Cell creation |
|
233 | 24x |
rcell(res, |
234 | 24x |
label = lbl, |
235 | 24x |
format = formats_v[names(formats_v) == stat][[1]], |
236 | 24x |
format_na_str = na_level, |
237 | 24x |
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods) |
238 |
) |
|
239 |
} |
|
240 |
}, |
|
241 | 2x |
stat = .stats |
242 |
) |
|
243 | ||
244 |
# Main call to rtables |
|
245 | 2x |
summarize_row_groups( |
246 | 2x |
lyt = lyt, |
247 | 2x |
var = unique(vars), |
248 | 2x |
cfun = cfun_list, |
249 | 2x |
extra_args = list(...) |
250 |
) |
|
251 |
} else { |
|
252 |
# Function list for analyze_colvars |
|
253 | 3x |
afun_list <- Map( |
254 | 3x |
function(stat) { |
255 | 15x |
function(u, .spl_context, ...) { |
256 |
# Main statistics |
|
257 | 78x |
res <- s_summary(u, ...)[[stat]] |
258 | ||
259 |
# Label from context |
|
260 | 78x |
label_from_context <- .spl_context$value[nrow(.spl_context)] |
261 | ||
262 |
# Label switcher |
|
263 | 78x |
if (is.null(row_labels)) { |
264 | 18x |
lbl <- label_from_context |
265 |
} else { |
|
266 | 60x |
if (length(row_labels) > 1) { |
267 | 48x |
if (!(label_from_context %in% names(row_labels))) { |
268 | ! |
stop( |
269 | ! |
"Replacing the labels in do_summarize_row_groups needs a named vector", |
270 | ! |
"that contains the split values. In the current split variable ", |
271 | ! |
.spl_context$split[nrow(.spl_context)], |
272 | ! |
" the split value ", label_from_context, " is not in", |
273 | ! |
" row_labels names: ", names(row_labels) |
274 |
) |
|
275 |
} |
|
276 | 48x |
lbl <- unlist(row_labels[label_from_context]) |
277 |
} else { |
|
278 | 12x |
lbl <- row_labels |
279 |
} |
|
280 |
} |
|
281 | ||
282 |
# Cell creation |
|
283 | 78x |
rcell(res, |
284 | 78x |
label = lbl, |
285 | 78x |
format = formats_v[names(formats_v) == stat][[1]], |
286 | 78x |
format_na_str = na_level, |
287 | 78x |
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods) |
288 |
) |
|
289 |
} |
|
290 |
}, |
|
291 | 3x |
stat = .stats |
292 |
) |
|
293 | ||
294 |
# Main call to rtables |
|
295 | 3x |
analyze_colvars(lyt, |
296 | 3x |
afun = afun_list, |
297 | 3x |
nested = nested, |
298 | 3x |
extra_args = list(...) |
299 |
) |
|
300 |
} |
|
301 |
} |
|
302 | ||
303 |
# Help function |
|
304 |
get_last_col_split <- function(lyt) { |
|
305 | ! |
tail(tail(clayout(lyt), 1)[[1]], 1)[[1]] |
306 |
} |
1 |
#' Controls for Cox Regression |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Sets a list of parameters for Cox regression fit. Used internally. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`. |
|
9 |
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied |
|
10 |
#' treatment and candidate covariate. Note that for univariate models without treatment arm, and |
|
11 |
#' multivariate models, no interaction can be used so that this needs to be `FALSE`. |
|
12 |
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`, |
|
13 |
#' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R. |
|
14 |
#' |
|
15 |
#' @return A `list` of items with names corresponding to the arguments. |
|
16 |
#' |
|
17 |
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()]. |
|
18 |
#' |
|
19 |
#' @examples |
|
20 |
#' control_coxreg() |
|
21 |
#' |
|
22 |
#' @export |
|
23 |
control_coxreg <- function(pval_method = c("wald", "likelihood"), |
|
24 |
ties = c("exact", "efron", "breslow"), |
|
25 |
conf_level = 0.95, |
|
26 |
interaction = FALSE) { |
|
27 | 40x |
pval_method <- match.arg(pval_method) |
28 | 40x |
ties <- match.arg(ties) |
29 | 40x |
checkmate::assert_flag(interaction) |
30 | 40x |
assert_proportion_value(conf_level) |
31 | 40x |
list( |
32 | 40x |
pval_method = pval_method, |
33 | 40x |
ties = ties, |
34 | 40x |
conf_level = conf_level, |
35 | 40x |
interaction = interaction |
36 |
) |
|
37 |
} |
|
38 | ||
39 |
#' Custom Tidy Methods for Cox Regression |
|
40 |
#' |
|
41 |
#' @description `r lifecycle::badge("stable")` |
|
42 |
#' |
|
43 |
#' @inheritParams argument_convention |
|
44 |
#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models) |
|
45 |
#' or [fit_coxreg_multivar()] (for multivariate models). |
|
46 |
#' |
|
47 |
#' @return [tidy()] returns: |
|
48 |
#' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`, |
|
49 |
#' `upper .95`, `level`, and `n`. |
|
50 |
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`, |
|
51 |
#' `lcl`, `ucl`, `pval`, and `ci`. |
|
52 |
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`, |
|
53 |
#' `level`, and `ci`. |
|
54 |
#' |
|
55 |
#' @seealso [cox_regression] |
|
56 |
#' |
|
57 |
#' @name tidy_coxreg |
|
58 |
NULL |
|
59 | ||
60 |
#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results. |
|
61 |
#' |
|
62 |
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results. |
|
63 |
#' |
|
64 |
#' @method tidy summary.coxph |
|
65 |
#' |
|
66 |
#' @examples |
|
67 |
#' library(survival) |
|
68 |
#' library(broom) |
|
69 |
#' |
|
70 |
#' set.seed(1, kind = "Mersenne-Twister") |
|
71 |
#' |
|
72 |
#' dta_bladder <- with( |
|
73 |
#' data = bladder[bladder$enum < 5, ], |
|
74 |
#' data.frame( |
|
75 |
#' time = stop, |
|
76 |
#' status = event, |
|
77 |
#' armcd = as.factor(rx), |
|
78 |
#' covar1 = as.factor(enum), |
|
79 |
#' covar2 = factor( |
|
80 |
#' sample(as.factor(enum)), |
|
81 |
#' levels = 1:4, labels = c("F", "F", "M", "M") |
|
82 |
#' ) |
|
83 |
#' ) |
|
84 |
#' ) |
|
85 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)") |
|
86 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels |
|
87 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) |
|
88 |
#' |
|
89 |
#' formula <- "survival::Surv(time, status) ~ armcd + covar1" |
|
90 |
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder)) |
|
91 |
#' tidy(msum) |
|
92 |
#' |
|
93 |
#' @export |
|
94 |
tidy.summary.coxph <- function(x, # nolint |
|
95 |
...) { |
|
96 | 120x |
checkmate::assert_class(x, "summary.coxph") |
97 | 120x |
pval <- x$coefficients |
98 | 120x |
confint <- x$conf.int |
99 | 120x |
levels <- rownames(pval) |
100 | ||
101 | 120x |
pval <- tibble::as_tibble(pval) |
102 | 120x |
confint <- tibble::as_tibble(confint) |
103 | ||
104 | 120x |
ret <- cbind(pval[, grepl("Pr", names(pval))], confint) |
105 | 120x |
ret$level <- levels |
106 | 120x |
ret$n <- x[["n"]] |
107 | 120x |
ret |
108 |
} |
|
109 | ||
110 |
#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression. |
|
111 |
#' |
|
112 |
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()]. |
|
113 |
#' |
|
114 |
#' @method tidy coxreg.univar |
|
115 |
#' |
|
116 |
#' @examples |
|
117 |
#' ## Cox regression: arm + 1 covariate. |
|
118 |
#' mod1 <- fit_coxreg_univar( |
|
119 |
#' variables = list( |
|
120 |
#' time = "time", event = "status", arm = "armcd", |
|
121 |
#' covariates = "covar1" |
|
122 |
#' ), |
|
123 |
#' data = dta_bladder, |
|
124 |
#' control = control_coxreg(conf_level = 0.91) |
|
125 |
#' ) |
|
126 |
#' |
|
127 |
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates. |
|
128 |
#' mod2 <- fit_coxreg_univar( |
|
129 |
#' variables = list( |
|
130 |
#' time = "time", event = "status", arm = "armcd", |
|
131 |
#' covariates = c("covar1", "covar2") |
|
132 |
#' ), |
|
133 |
#' data = dta_bladder, |
|
134 |
#' control = control_coxreg(conf_level = 0.91, interaction = TRUE) |
|
135 |
#' ) |
|
136 |
#' |
|
137 |
#' tidy(mod1) |
|
138 |
#' tidy(mod2) |
|
139 |
#' |
|
140 |
#' @export |
|
141 |
tidy.coxreg.univar <- function(x, # nolint |
|
142 |
...) { |
|
143 | 26x |
checkmate::assert_class(x, "coxreg.univar") |
144 | 26x |
mod <- x$mod |
145 | 26x |
vars <- c(x$vars$arm, x$vars$covariates) |
146 | 26x |
has_arm <- "arm" %in% names(x$vars) |
147 | ||
148 | 26x |
result <- if (!has_arm) { |
149 | 5x |
Map( |
150 | 5x |
mod = mod, vars = vars, |
151 | 5x |
f = function(mod, vars) { |
152 | 6x |
h_coxreg_multivar_extract( |
153 | 6x |
var = vars, |
154 | 6x |
data = x$data, |
155 | 6x |
mod = mod, |
156 | 6x |
control = x$control |
157 |
) |
|
158 |
} |
|
159 |
) |
|
160 | 26x |
} else if (x$control$interaction) { |
161 | 8x |
Map( |
162 | 8x |
mod = mod, covar = vars, |
163 | 8x |
f = function(mod, covar) { |
164 | 17x |
h_coxreg_extract_interaction( |
165 | 17x |
effect = x$vars$arm, covar = covar, mod = mod, data = x$data, |
166 | 17x |
at = x$at, control = x$control |
167 |
) |
|
168 |
} |
|
169 |
) |
|
170 |
} else { |
|
171 | 13x |
Map( |
172 | 13x |
mod = mod, vars = vars, |
173 | 13x |
f = function(mod, vars) { |
174 | 34x |
h_coxreg_univar_extract( |
175 | 34x |
effect = x$vars$arm, covar = vars, data = x$data, mod = mod, |
176 | 34x |
control = x$control |
177 |
) |
|
178 |
} |
|
179 |
) |
|
180 |
} |
|
181 | 26x |
result <- do.call(rbind, result) |
182 | ||
183 | 26x |
result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) |
184 | 26x |
result$n <- lapply(result$n, empty_vector_if_na) |
185 | 26x |
result$ci <- lapply(result$ci, empty_vector_if_na) |
186 | 26x |
result$hr <- lapply(result$hr, empty_vector_if_na) |
187 | 26x |
if (x$control$interaction) { |
188 | 8x |
result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na) |
189 |
# Remove interaction p-values due to change in specifications. |
|
190 | 8x |
result$pval[result$effect != "Treatment:"] <- NA |
191 |
} |
|
192 | 26x |
result$pval <- lapply(result$pval, empty_vector_if_na) |
193 | 26x |
attr(result, "conf_level") <- x$control$conf_level |
194 | 26x |
result |
195 |
} |
|
196 | ||
197 |
#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression. |
|
198 |
#' |
|
199 |
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()]. |
|
200 |
#' |
|
201 |
#' @method tidy coxreg.multivar |
|
202 |
#' |
|
203 |
#' @examples |
|
204 |
#' multivar_model <- fit_coxreg_multivar( |
|
205 |
#' variables = list( |
|
206 |
#' time = "time", event = "status", arm = "armcd", |
|
207 |
#' covariates = c("covar1", "covar2") |
|
208 |
#' ), |
|
209 |
#' data = dta_bladder |
|
210 |
#' ) |
|
211 |
#' broom::tidy(multivar_model) |
|
212 |
#' |
|
213 |
#' @export |
|
214 |
tidy.coxreg.multivar <- function(x, # nolint |
|
215 |
...) { |
|
216 | 8x |
checkmate::assert_class(x, "coxreg.multivar") |
217 | 8x |
vars <- c(x$vars$arm, x$vars$covariates) |
218 | ||
219 |
# Convert the model summaries to data. |
|
220 | 8x |
result <- Map( |
221 | 8x |
vars = vars, |
222 | 8x |
f = function(vars) { |
223 | 28x |
h_coxreg_multivar_extract( |
224 | 28x |
var = vars, data = x$data, |
225 | 28x |
mod = x$mod, control = x$control |
226 |
) |
|
227 |
} |
|
228 |
) |
|
229 | 8x |
result <- do.call(rbind, result) |
230 | ||
231 | 8x |
result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl)) |
232 | 8x |
result$ci <- lapply(result$ci, empty_vector_if_na) |
233 | 8x |
result$hr <- lapply(result$hr, empty_vector_if_na) |
234 | 8x |
result$pval <- lapply(result$pval, empty_vector_if_na) |
235 | 8x |
result <- result[, names(result) != "n"] |
236 | 8x |
attr(result, "conf_level") <- x$control$conf_level |
237 | ||
238 | 8x |
result |
239 |
} |
|
240 | ||
241 |
#' Fits for Cox Proportional Hazards Regression |
|
242 |
#' |
|
243 |
#' @description `r lifecycle::badge("stable")` |
|
244 |
#' |
|
245 |
#' Fitting functions for univariate and multivariate Cox regression models. |
|
246 |
#' |
|
247 |
#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named |
|
248 |
#' list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from |
|
249 |
#' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect |
|
250 |
#' estimates will be tabulated later. |
|
251 |
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models. |
|
252 |
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify |
|
253 |
#' the value of the covariate at which the effect should be estimated. |
|
254 |
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()]. |
|
255 |
#' |
|
256 |
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression]. |
|
257 |
#' |
|
258 |
#' @examples |
|
259 |
#' library(survival) |
|
260 |
#' |
|
261 |
#' set.seed(1, kind = "Mersenne-Twister") |
|
262 |
#' |
|
263 |
#' # Testing dataset [survival::bladder]. |
|
264 |
#' dta_bladder <- with( |
|
265 |
#' data = bladder[bladder$enum < 5, ], |
|
266 |
#' data.frame( |
|
267 |
#' time = stop, |
|
268 |
#' status = event, |
|
269 |
#' armcd = as.factor(rx), |
|
270 |
#' covar1 = as.factor(enum), |
|
271 |
#' covar2 = factor( |
|
272 |
#' sample(as.factor(enum)), |
|
273 |
#' levels = 1:4, labels = c("F", "F", "M", "M") |
|
274 |
#' ) |
|
275 |
#' ) |
|
276 |
#' ) |
|
277 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)") |
|
278 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels |
|
279 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) |
|
280 |
#' |
|
281 |
#' plot( |
|
282 |
#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder), |
|
283 |
#' lty = 2:4, |
|
284 |
#' xlab = "Months", |
|
285 |
#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4") |
|
286 |
#' ) |
|
287 |
#' |
|
288 |
#' @name fit_coxreg |
|
289 |
NULL |
|
290 | ||
291 |
#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs. |
|
292 |
#' |
|
293 |
#' @return |
|
294 |
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list` |
|
295 |
#' with 5 elements: |
|
296 |
#' * `mod`: Cox regression models fitted by [survival::coxph()]. |
|
297 |
#' * `data`: The original data frame input. |
|
298 |
#' * `control`: The original control input. |
|
299 |
#' * `vars`: The variables used in the model. |
|
300 |
#' * `at`: Value of the covariate at which the effect should be estimated. |
|
301 |
#' |
|
302 |
#' @note When using `fit_coxreg_univar` there should be two study arms. |
|
303 |
#' |
|
304 |
#' @examples |
|
305 |
#' # fit_coxreg_univar |
|
306 |
#' |
|
307 |
#' ## Cox regression: arm + 1 covariate. |
|
308 |
#' mod1 <- fit_coxreg_univar( |
|
309 |
#' variables = list( |
|
310 |
#' time = "time", event = "status", arm = "armcd", |
|
311 |
#' covariates = "covar1" |
|
312 |
#' ), |
|
313 |
#' data = dta_bladder, |
|
314 |
#' control = control_coxreg(conf_level = 0.91) |
|
315 |
#' ) |
|
316 |
#' |
|
317 |
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates. |
|
318 |
#' mod2 <- fit_coxreg_univar( |
|
319 |
#' variables = list( |
|
320 |
#' time = "time", event = "status", arm = "armcd", |
|
321 |
#' covariates = c("covar1", "covar2") |
|
322 |
#' ), |
|
323 |
#' data = dta_bladder, |
|
324 |
#' control = control_coxreg(conf_level = 0.91, interaction = TRUE) |
|
325 |
#' ) |
|
326 |
#' |
|
327 |
#' ## Cox regression: arm + 1 covariate, stratified analysis. |
|
328 |
#' mod3 <- fit_coxreg_univar( |
|
329 |
#' variables = list( |
|
330 |
#' time = "time", event = "status", arm = "armcd", strata = "covar2", |
|
331 |
#' covariates = c("covar1") |
|
332 |
#' ), |
|
333 |
#' data = dta_bladder, |
|
334 |
#' control = control_coxreg(conf_level = 0.91) |
|
335 |
#' ) |
|
336 |
#' |
|
337 |
#' ## Cox regression: no arm, only covariates. |
|
338 |
#' mod4 <- fit_coxreg_univar( |
|
339 |
#' variables = list( |
|
340 |
#' time = "time", event = "status", |
|
341 |
#' covariates = c("covar1", "covar2") |
|
342 |
#' ), |
|
343 |
#' data = dta_bladder |
|
344 |
#' ) |
|
345 |
#' |
|
346 |
#' @export |
|
347 |
fit_coxreg_univar <- function(variables, |
|
348 |
data, |
|
349 |
at = list(), |
|
350 |
control = control_coxreg()) { |
|
351 | 31x |
checkmate::assert_list(variables, names = "named") |
352 | 31x |
has_arm <- "arm" %in% names(variables) |
353 | 31x |
arm_name <- if (has_arm) "arm" else NULL |
354 | ||
355 | 31x |
checkmate::assert_character(variables$covariates, null.ok = TRUE) |
356 | ||
357 | 31x |
assert_df_with_variables(data, variables) |
358 | 31x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
359 | ||
360 | 31x |
if (!is.null(variables$strata)) { |
361 | 4x |
checkmate::assert_disjunct(control$pval_method, "likelihood") |
362 |
} |
|
363 | 30x |
if (has_arm) { |
364 | 24x |
assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2) |
365 |
} |
|
366 | 29x |
vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE) |
367 | 29x |
for (i in vars) { |
368 | 66x |
if (is.factor(data[[i]])) { |
369 | 58x |
attr(data[[i]], "levels") <- levels(droplevels(data[[i]])) |
370 |
} |
|
371 |
} |
|
372 | 29x |
forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction) |
373 | 29x |
mod <- lapply( |
374 | 29x |
forms, function(x) { |
375 | 62x |
survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties) |
376 |
} |
|
377 |
) |
|
378 | 29x |
structure( |
379 | 29x |
list( |
380 | 29x |
mod = mod, |
381 | 29x |
data = data, |
382 | 29x |
control = control, |
383 | 29x |
vars = variables, |
384 | 29x |
at = at |
385 |
), |
|
386 | 29x |
class = "coxreg.univar" |
387 |
) |
|
388 |
} |
|
389 | ||
390 |
#' @describeIn fit_coxreg Fit a multivariate Cox regression model. |
|
391 |
#' |
|
392 |
#' @return |
|
393 |
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list |
|
394 |
#' with 4 elements: |
|
395 |
#' * `mod`: Cox regression model fitted by [survival::coxph()]. |
|
396 |
#' * `data`: The original data frame input. |
|
397 |
#' * `control`: The original control input. |
|
398 |
#' * `vars`: The variables used in the model. |
|
399 |
#' |
|
400 |
#' @examples |
|
401 |
#' # fit_coxreg_multivar |
|
402 |
#' |
|
403 |
#' ## Cox regression: multivariate Cox regression. |
|
404 |
#' multivar_model <- fit_coxreg_multivar( |
|
405 |
#' variables = list( |
|
406 |
#' time = "time", event = "status", arm = "armcd", |
|
407 |
#' covariates = c("covar1", "covar2") |
|
408 |
#' ), |
|
409 |
#' data = dta_bladder |
|
410 |
#' ) |
|
411 |
#' |
|
412 |
#' # Example without treatment arm. |
|
413 |
#' multivar_covs_model <- fit_coxreg_multivar( |
|
414 |
#' variables = list( |
|
415 |
#' time = "time", event = "status", |
|
416 |
#' covariates = c("covar1", "covar2") |
|
417 |
#' ), |
|
418 |
#' data = dta_bladder |
|
419 |
#' ) |
|
420 |
#' |
|
421 |
#' @export |
|
422 |
fit_coxreg_multivar <- function(variables, |
|
423 |
data, |
|
424 |
control = control_coxreg()) { |
|
425 | 51x |
checkmate::assert_list(variables, names = "named") |
426 | 51x |
has_arm <- "arm" %in% names(variables) |
427 | 51x |
arm_name <- if (has_arm) "arm" else NULL |
428 | ||
429 | 51x |
if (!is.null(variables$covariates)) { |
430 | 13x |
checkmate::assert_character(variables$covariates) |
431 |
} |
|
432 | ||
433 | 51x |
checkmate::assert_false(control$interaction) |
434 | 51x |
assert_df_with_variables(data, variables) |
435 | 51x |
assert_list_of_variables(variables[c(arm_name, "event", "time")]) |
436 | ||
437 | 51x |
if (!is.null(variables$strata)) { |
438 | 3x |
checkmate::assert_disjunct(control$pval_method, "likelihood") |
439 |
} |
|
440 | ||
441 | 50x |
form <- h_coxreg_multivar_formula(variables) |
442 | 50x |
mod <- survival::coxph( |
443 | 50x |
formula = stats::as.formula(form), |
444 | 50x |
data = data, |
445 | 50x |
ties = control$ties |
446 |
) |
|
447 | 50x |
structure( |
448 | 50x |
list( |
449 | 50x |
mod = mod, |
450 | 50x |
data = data, |
451 | 50x |
control = control, |
452 | 50x |
vars = variables |
453 |
), |
|
454 | 50x |
class = "coxreg.multivar" |
455 |
) |
|
456 |
} |
|
457 | ||
458 |
#' Muffled `car::Anova` |
|
459 |
#' |
|
460 |
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when |
|
461 |
#' present, this function deliberately muffles this message. |
|
462 |
#' |
|
463 |
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()]. |
|
464 |
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`. |
|
465 |
#' |
|
466 |
#' @return Returns the output of [car::Anova()], with convergence message muffled. |
|
467 |
#' |
|
468 |
#' @keywords internal |
|
469 |
muffled_car_anova <- function(mod, test_statistic) { |
|
470 | 134x |
tryCatch( |
471 | 134x |
withCallingHandlers( |
472 | 134x |
expr = { |
473 | 134x |
car::Anova( |
474 | 134x |
mod, |
475 | 134x |
test.statistic = test_statistic, |
476 | 134x |
type = "III" |
477 |
) |
|
478 |
}, |
|
479 | 134x |
message = function(m) invokeRestart("muffleMessage"), |
480 | 134x |
error = function(e) { |
481 | 1x |
stop(paste( |
482 | 1x |
"the model seems to have convergence problems, please try to change", |
483 | 1x |
"the configuration of covariates or strata variables, e.g.", |
484 | 1x |
"- original error:", e |
485 |
)) |
|
486 |
} |
|
487 |
) |
|
488 |
) |
|
489 |
} |
1 |
#' Cox Regression Helper: Interactions |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Test and estimate the effect of a treatment in interaction with a covariate. |
|
6 |
#' The effect is estimated as the HR of the tested treatment for a given level |
|
7 |
#' of the covariate, in comparison to the treatment control. |
|
8 |
#' |
|
9 |
#' @inheritParams argument_convention |
|
10 |
#' @param x (`numeric` or `factor`)\cr the values of the effect to be tested. |
|
11 |
#' @param effect (`string`)\cr the name of the effect to be tested and estimated. |
|
12 |
#' @param covar (`string`)\cr the name of the covariate in the model. |
|
13 |
#' @param mod (`coxph`)\cr the Cox regression model. |
|
14 |
#' @param label (`string`)\cr the label to be returned as `term_label`. |
|
15 |
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()]. |
|
16 |
#' @param ... see methods. |
|
17 |
#' |
|
18 |
#' @examples |
|
19 |
#' library(survival) |
|
20 |
#' |
|
21 |
#' set.seed(1, kind = "Mersenne-Twister") |
|
22 |
#' |
|
23 |
#' # Testing dataset [survival::bladder]. |
|
24 |
#' dta_bladder <- with( |
|
25 |
#' data = bladder[bladder$enum < 5, ], |
|
26 |
#' data.frame( |
|
27 |
#' time = stop, |
|
28 |
#' status = event, |
|
29 |
#' armcd = as.factor(rx), |
|
30 |
#' covar1 = as.factor(enum), |
|
31 |
#' covar2 = factor( |
|
32 |
#' sample(as.factor(enum)), |
|
33 |
#' levels = 1:4, |
|
34 |
#' labels = c("F", "F", "M", "M") |
|
35 |
#' ) |
|
36 |
#' ) |
|
37 |
#' ) |
|
38 |
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)") |
|
39 |
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels |
|
40 |
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE) |
|
41 |
#' |
|
42 |
#' plot( |
|
43 |
#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder), |
|
44 |
#' lty = 2:4, |
|
45 |
#' xlab = "Months", |
|
46 |
#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4") |
|
47 |
#' ) |
|
48 |
#' |
|
49 |
#' @name cox_regression_inter |
|
50 |
NULL |
|
51 | ||
52 |
#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect. |
|
53 |
#' |
|
54 |
#' @return |
|
55 |
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following |
|
56 |
#' variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`. |
|
57 |
#' |
|
58 |
#' @export |
|
59 |
h_coxreg_inter_effect <- function(x, |
|
60 |
effect, |
|
61 |
covar, |
|
62 |
mod, |
|
63 |
label, |
|
64 |
control, |
|
65 |
...) { |
|
66 | 16x |
UseMethod("h_coxreg_inter_effect", x) |
67 |
} |
|
68 | ||
69 |
#' @describeIn cox_regression_inter Estimate the interaction with a `numeric` covariate. |
|
70 |
#' |
|
71 |
#' @param at (`list`)\cr a list with items named after the covariate, every |
|
72 |
#' item is a vector of levels at which the interaction should be estimated. |
|
73 |
#' |
|
74 |
#' @export |
|
75 |
h_coxreg_inter_effect.numeric <- function(x, |
|
76 |
effect, |
|
77 |
covar, |
|
78 |
mod, |
|
79 |
label, |
|
80 |
control, |
|
81 |
at, |
|
82 |
...) { |
|
83 | 7x |
betas <- stats::coef(mod) |
84 | 7x |
attrs <- attr(stats::terms(mod), "term.labels") |
85 | 7x |
term_indices <- grep( |
86 | 7x |
pattern = effect, |
87 | 7x |
x = attrs[!grepl("strata\\(", attrs)] |
88 |
) |
|
89 | 7x |
checkmate::assert_vector(term_indices, len = 2) |
90 | 7x |
betas <- betas[term_indices] |
91 | 7x |
betas_var <- diag(stats::vcov(mod))[term_indices] |
92 | 7x |
betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]] |
93 | 7x |
xval <- if (is.null(at[[covar]])) { |
94 | 6x |
stats::median(x) |
95 |
} else { |
|
96 | 1x |
at[[covar]] |
97 |
} |
|
98 | 7x |
effect_index <- !grepl(covar, names(betas)) |
99 | 7x |
coef_hat <- betas[effect_index] + xval * betas[!effect_index] |
100 | 7x |
coef_se <- sqrt( |
101 | 7x |
betas_var[effect_index] + |
102 | 7x |
xval ^ 2 * betas_var[!effect_index] + # styler: off |
103 | 7x |
2 * xval * betas_cov |
104 |
) |
|
105 | 7x |
q_norm <- stats::qnorm((1 + control$conf_level) / 2) |
106 | 7x |
data.frame( |
107 | 7x |
effect = "Covariate:", |
108 | 7x |
term = rep(covar, length(xval)), |
109 | 7x |
term_label = paste0(" ", xval), |
110 | 7x |
level = as.character(xval), |
111 | 7x |
n = NA, |
112 | 7x |
hr = exp(coef_hat), |
113 | 7x |
lcl = exp(coef_hat - q_norm * coef_se), |
114 | 7x |
ucl = exp(coef_hat + q_norm * coef_se), |
115 | 7x |
pval = NA, |
116 | 7x |
pval_inter = NA, |
117 | 7x |
stringsAsFactors = FALSE |
118 |
) |
|
119 |
} |
|
120 | ||
121 |
#' @describeIn cox_regression_inter Estimate the interaction with a `factor` covariate. |
|
122 |
#' |
|
123 |
#' @param data (`data.frame`)\cr the data frame on which the model was fit. |
|
124 |
#' |
|
125 |
#' @export |
|
126 |
h_coxreg_inter_effect.factor <- function(x, |
|
127 |
effect, |
|
128 |
covar, |
|
129 |
mod, |
|
130 |
label, |
|
131 |
control, |
|
132 |
data, |
|
133 |
...) { |
|
134 | 9x |
y <- h_coxreg_inter_estimations( |
135 | 9x |
variable = effect, given = covar, |
136 | 9x |
lvl_var = levels(data[[effect]]), |
137 | 9x |
lvl_given = levels(data[[covar]]), |
138 | 9x |
mod = mod, |
139 | 9x |
conf_level = 0.95 |
140 | 9x |
)[[1]] |
141 | ||
142 | 9x |
data.frame( |
143 | 9x |
effect = "Covariate:", |
144 | 9x |
term = rep(covar, nrow(y)), |
145 | 9x |
term_label = as.character(paste0(" ", levels(data[[covar]]))), |
146 | 9x |
level = as.character(levels(data[[covar]])), |
147 | 9x |
n = NA, |
148 | 9x |
hr = y[, "hr"], |
149 | 9x |
lcl = y[, "lcl"], |
150 | 9x |
ucl = y[, "ucl"], |
151 | 9x |
pval = NA, |
152 | 9x |
pval_inter = NA, |
153 | 9x |
stringsAsFactors = FALSE |
154 |
) |
|
155 |
} |
|
156 | ||
157 |
#' @describeIn cox_regression_inter A higher level function to get |
|
158 |
#' the results of the interaction test and the estimated values. |
|
159 |
#' |
|
160 |
#' @return |
|
161 |
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If |
|
162 |
#' no interaction, [h_coxreg_univar_extract()] is applied instead. |
|
163 |
#' |
|
164 |
#' @examples |
|
165 |
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder) |
|
166 |
#' h_coxreg_extract_interaction( |
|
167 |
#' mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder, |
|
168 |
#' control = control_coxreg() |
|
169 |
#' ) |
|
170 |
#' |
|
171 |
#' @export |
|
172 |
h_coxreg_extract_interaction <- function(effect, |
|
173 |
covar, |
|
174 |
mod, |
|
175 |
data, |
|
176 |
at, |
|
177 |
control) { |
|
178 | 21x |
if (!any(attr(stats::terms(mod), "order") == 2)) { |
179 | 8x |
y <- h_coxreg_univar_extract( |
180 | 8x |
effect = effect, covar = covar, mod = mod, data = data, control = control |
181 |
) |
|
182 | 8x |
y$pval_inter <- NA |
183 | 8x |
y |
184 |
} else { |
|
185 | 13x |
test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method] |
186 | ||
187 |
# Test the main treatment effect. |
|
188 | 13x |
mod_aov <- muffled_car_anova(mod, test_statistic) |
189 | 13x |
sum_anova <- broom::tidy(mod_aov) |
190 | 13x |
pval <- sum_anova[sum_anova$term == effect, ][["p.value"]] |
191 | ||
192 |
# Test the interaction effect. |
|
193 | 13x |
pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]] |
194 | 13x |
covar_test <- data.frame( |
195 | 13x |
effect = "Covariate:", |
196 | 13x |
term = covar, |
197 | 13x |
term_label = unname(labels_or_names(data[covar])), |
198 | 13x |
level = "", |
199 | 13x |
n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval, |
200 | 13x |
pval_inter = pval_inter, |
201 | 13x |
stringsAsFactors = FALSE |
202 |
) |
|
203 |
# Estimate the interaction. |
|
204 | 13x |
y <- h_coxreg_inter_effect( |
205 | 13x |
data[[covar]], |
206 | 13x |
covar = covar, |
207 | 13x |
effect = effect, |
208 | 13x |
mod = mod, |
209 | 13x |
label = unname(labels_or_names(data[covar])), |
210 | 13x |
at = at, |
211 | 13x |
control = control, |
212 | 13x |
data = data |
213 |
) |
|
214 | 13x |
rbind(covar_test, y) |
215 |
} |
|
216 |
} |
|
217 | ||
218 |
#' @describeIn cox_regression_inter Hazard ratio estimation in interactions. |
|
219 |
#' |
|
220 |
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation |
|
221 |
#' of the levels of `variable` given the levels of `given`. |
|
222 |
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()]. |
|
223 |
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]). |
|
224 |
#' |
|
225 |
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A) |
|
226 |
#' and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex. |
|
227 |
#' The cox regression estimates the coefficients along with a variance-covariance matrix for: |
|
228 |
#' |
|
229 |
#' - b1 (arm b), b2 (arm c) |
|
230 |
#' - b3 (sex m) |
|
231 |
#' - b4 (arm b: sex m), b5 (arm c: sex m) |
|
232 |
#' |
|
233 |
#' The estimation of the Hazard Ratio for arm C/sex M is given in reference |
|
234 |
#' to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5). |
|
235 |
#' The interaction coefficient is deduced by b2 + b5 while the standard error |
|
236 |
#' is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$. |
|
237 |
#' |
|
238 |
#' @return |
|
239 |
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding |
|
240 |
#' to the combinations of `variable` and `given`, with columns: |
|
241 |
#' * `coef_hat`: Estimation of the coefficient. |
|
242 |
#' * `coef_se`: Standard error of the estimation. |
|
243 |
#' * `hr`: Hazard ratio. |
|
244 |
#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio. |
|
245 |
#' |
|
246 |
#' @examples |
|
247 |
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder) |
|
248 |
#' result <- h_coxreg_inter_estimations( |
|
249 |
#' variable = "armcd", given = "covar1", |
|
250 |
#' lvl_var = levels(dta_bladder$armcd), |
|
251 |
#' lvl_given = levels(dta_bladder$covar1), |
|
252 |
#' mod = mod, conf_level = .95 |
|
253 |
#' ) |
|
254 |
#' result |
|
255 |
#' |
|
256 |
#' @export |
|
257 |
h_coxreg_inter_estimations <- function(variable, |
|
258 |
given, |
|
259 |
lvl_var, |
|
260 |
lvl_given, |
|
261 |
mod, |
|
262 |
conf_level = 0.95) { |
|
263 | 10x |
var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level |
264 | 10x |
giv_lvl <- paste0(given, lvl_given) |
265 | 10x |
design_mat <- expand.grid(variable = var_lvl, given = giv_lvl) |
266 | 10x |
design_mat <- design_mat[order(design_mat$variable, design_mat$given), ] |
267 | 10x |
design_mat <- within( |
268 | 10x |
data = design_mat, |
269 | 10x |
expr = { |
270 | 10x |
inter <- paste0(variable, ":", given) |
271 | 10x |
rev_inter <- paste0(given, ":", variable) |
272 |
} |
|
273 |
) |
|
274 | 10x |
split_by_variable <- design_mat$variable |
275 | 10x |
interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/") |
276 | ||
277 | 10x |
mmat <- stats::model.matrix(mod)[1, ] |
278 | 10x |
mmat[!mmat == 0] <- 0 |
279 | ||
280 | 10x |
design_mat <- apply( |
281 | 10x |
X = design_mat, MARGIN = 1, FUN = function(x) { |
282 | 32x |
mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1 |
283 | 32x |
mmat |
284 |
} |
|
285 |
) |
|
286 | 10x |
colnames(design_mat) <- interaction_names |
287 | ||
288 | 10x |
coef <- stats::coef(mod) |
289 | 10x |
vcov <- stats::vcov(mod) |
290 | 10x |
betas <- as.matrix(coef) |
291 | 10x |
coef_hat <- t(design_mat) %*% betas |
292 | 10x |
dimnames(coef_hat)[2] <- "coef" |
293 | 10x |
coef_se <- apply( |
294 | 10x |
design_mat, 2, |
295 | 10x |
function(x) { |
296 | 32x |
vcov_el <- as.logical(x) |
297 | 32x |
y <- vcov[vcov_el, vcov_el] |
298 | 32x |
y <- sum(y) |
299 | 32x |
y <- sqrt(y) |
300 | 32x |
return(y) |
301 |
} |
|
302 |
) |
|
303 | 10x |
q_norm <- stats::qnorm((1 + conf_level) / 2) |
304 | 10x |
y <- cbind(coef_hat, `se(coef)` = coef_se) |
305 | 10x |
y <- apply(y, 1, function(x) { |
306 | 32x |
x["hr"] <- exp(x["coef"]) |
307 | 32x |
x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"]) |
308 | 32x |
x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"]) |
309 | 32x |
x |
310 |
}) |
|
311 | 10x |
y <- t(y) |
312 | 10x |
y <- by(y, split_by_variable, identity) |
313 | 10x |
y <- lapply(y, as.matrix) |
314 | 10x |
attr(y, "details") <- paste0( |
315 | 10x |
"Estimations of ", variable, |
316 | 10x |
" hazard ratio given the level of ", given, " compared to ", |
317 | 10x |
variable, " level ", lvl_var[1], "." |
318 |
) |
|
319 | 10x |
y |
320 |
} |
1 |
#' Encode Categorical Missing Values in a Data Frame |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' This is a helper function to encode missing entries across groups of categorical |
|
6 |
#' variables in a data frame. |
|
7 |
#' |
|
8 |
#' @details Missing entries are those with `NA` or empty strings and will |
|
9 |
#' be replaced with a specified value. If factor variables include missing |
|
10 |
#' values, the missing value will be inserted as the last level. |
|
11 |
#' Similarly, in case character or logical variables should be converted to factors |
|
12 |
#' with the `char_as_factor` or `logical_as_factor` options, the missing values will |
|
13 |
#' be set as the last level. |
|
14 |
#' |
|
15 |
#' @param data (`data.frame`)\cr data set. |
|
16 |
#' @param omit_columns (`character`)\cr names of variables from `data` that should |
|
17 |
#' not be modified by this function. |
|
18 |
#' @param char_as_factor (`flag`)\cr whether to convert character variables |
|
19 |
#' in `data` to factors. |
|
20 |
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables |
|
21 |
#' in `data` to factors. |
|
22 |
#' @param na_level (`string`)\cr used to replace all `NA` or empty |
|
23 |
#' values inside non-`omit_columns` columns. |
|
24 |
#' |
|
25 |
#' @return A `data.frame` with the chosen modifications applied. |
|
26 |
#' |
|
27 |
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions. |
|
28 |
#' |
|
29 |
#' @examples |
|
30 |
#' my_data <- data.frame( |
|
31 |
#' u = c(TRUE, FALSE, NA, TRUE), |
|
32 |
#' v = factor(c("A", NA, NA, NA), levels = c("Z", "A")), |
|
33 |
#' w = c("A", "B", NA, "C"), |
|
34 |
#' x = c("D", "E", "F", NA), |
|
35 |
#' y = c("G", "H", "I", ""), |
|
36 |
#' z = c(1, 2, 3, 4), |
|
37 |
#' stringsAsFactors = FALSE |
|
38 |
#' ) |
|
39 |
#' |
|
40 |
#' # Example 1 |
|
41 |
#' # Encode missing values in all character or factor columns. |
|
42 |
#' df_explicit_na(my_data) |
|
43 |
#' # Also convert logical columns to factor columns. |
|
44 |
#' df_explicit_na(my_data, logical_as_factor = TRUE) |
|
45 |
#' # Encode missing values in a subset of columns. |
|
46 |
#' df_explicit_na(my_data, omit_columns = c("x", "y")) |
|
47 |
#' |
|
48 |
#' # Example 2 |
|
49 |
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable. |
|
50 |
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not |
|
51 |
#' # included when generating `rtables`. |
|
52 |
#' adsl <- tern_ex_adsl |
|
53 |
#' adsl$SEX[adsl$SEX == "M"] <- NA |
|
54 |
#' adsl <- df_explicit_na(adsl) |
|
55 |
#' |
|
56 |
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument. |
|
57 |
#' adsl <- tern_ex_adsl |
|
58 |
#' adsl$SEX[adsl$SEX == "M"] <- NA |
|
59 |
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values") |
|
60 |
#' |
|
61 |
#' # Example 3 |
|
62 |
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in |
|
63 |
#' # a numeric variable will not be included in the summary statistics, nor will they be included |
|
64 |
#' # in the denominator value for calculating the percent values. |
|
65 |
#' adsl <- tern_ex_adsl |
|
66 |
#' adsl$AGE[adsl$AGE < 30] <- NA |
|
67 |
#' adsl <- df_explicit_na(adsl) |
|
68 |
#' |
|
69 |
#' @export |
|
70 |
df_explicit_na <- function(data, |
|
71 |
omit_columns = NULL, |
|
72 |
char_as_factor = TRUE, |
|
73 |
logical_as_factor = FALSE, |
|
74 |
na_level = "<Missing>") { |
|
75 | 27x |
checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
76 | 26x |
checkmate::assert_data_frame(data) |
77 | 25x |
checkmate::assert_flag(char_as_factor) |
78 | 24x |
checkmate::assert_flag(logical_as_factor) |
79 | 24x |
checkmate::assert_string(na_level) |
80 | ||
81 | 22x |
target_vars <- if (is.null(omit_columns)) { |
82 | 20x |
names(data) |
83 |
} else { |
|
84 | 2x |
setdiff(names(data), omit_columns) # May have duplicates. |
85 |
} |
|
86 | 22x |
if (length(target_vars) == 0) { |
87 | 1x |
return(data) |
88 |
} |
|
89 | ||
90 | 21x |
l_target_vars <- split(target_vars, target_vars) |
91 | ||
92 |
# Makes sure target_vars exist in data and names are not duplicated. |
|
93 | 21x |
assert_df_with_variables(data, l_target_vars) |
94 | ||
95 | 21x |
for (x in target_vars) { |
96 | 514x |
xi <- data[[x]] |
97 | 514x |
xi_label <- obj_label(xi) |
98 | ||
99 |
# Determine whether to convert character or logical input. |
|
100 | 514x |
do_char_conversion <- is.character(xi) && char_as_factor |
101 | 514x |
do_logical_conversion <- is.logical(xi) && logical_as_factor |
102 | ||
103 |
# Pre-convert logical to character to deal correctly with replacing NA |
|
104 |
# values below. |
|
105 | 514x |
if (do_logical_conversion) { |
106 | 2x |
xi <- as.character(xi) |
107 |
} |
|
108 | ||
109 | 514x |
if (is.factor(xi) || is.character(xi)) { |
110 |
# Handle empty strings and NA values. |
|
111 | 387x |
xi <- explicit_na(sas_na(xi), label = na_level) |
112 | ||
113 |
# Convert to factors if requested for the original type, |
|
114 |
# set na_level as the last value. |
|
115 | 387x |
if (do_char_conversion || do_logical_conversion) { |
116 | 81x |
levels_xi <- setdiff(sort(unique(xi)), na_level) |
117 | 81x |
if (na_level %in% unique(xi)) { |
118 | 21x |
levels_xi <- c(levels_xi, na_level) |
119 |
} |
|
120 | ||
121 | 81x |
xi <- factor(xi, levels = levels_xi) |
122 |
} |
|
123 | ||
124 | 387x |
data[, x] <- formatters::with_label(xi, label = xi_label) |
125 |
} |
|
126 |
} |
|
127 | 21x |
return(data) |
128 |
} |
1 |
#' `rtables` Access Helper Functions |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' These are a couple of functions that help with accessing the data in `rtables` objects. |
|
6 |
#' Currently these work for occurrence tables, which are defined as having a count as the first |
|
7 |
#' element and a fraction as the second element in each cell. |
|
8 |
#' |
|
9 |
#' @seealso [prune_occurrences] for usage of these functions. |
|
10 |
#' |
|
11 |
#' @name rtables_access |
|
12 |
NULL |
|
13 | ||
14 |
#' @describeIn rtables_access Helper function to extract the first values from each content |
|
15 |
#' cell and from specified columns in a `TableRow`. Defaults to all columns. |
|
16 |
#' |
|
17 |
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table. |
|
18 |
#' @param col_names (`character`)\cr the names of the columns to extract from. |
|
19 |
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided, |
|
20 |
#' then these are inferred from the names of `table_row`. Note that this currently only works well with a single |
|
21 |
#' column split. |
|
22 |
#' |
|
23 |
#' @return |
|
24 |
#' * `h_row_first_values()` returns a `vector` of numeric values. |
|
25 |
#' |
|
26 |
#' @examples |
|
27 |
#' tbl <- basic_table() %>% |
|
28 |
#' split_cols_by("ARM") %>% |
|
29 |
#' split_rows_by("RACE") %>% |
|
30 |
#' analyze("AGE", function(x) { |
|
31 |
#' list( |
|
32 |
#' "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"), |
|
33 |
#' "n" = length(x), |
|
34 |
#' "frac" = rcell(c(0.1, 0.1), format = "xx (xx)") |
|
35 |
#' ) |
|
36 |
#' }) %>% |
|
37 |
#' build_table(tern_ex_adsl) %>% |
|
38 |
#' prune_table() |
|
39 |
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]] |
|
40 |
#' result <- max(h_row_first_values(tree_row_elem)) |
|
41 |
#' result |
|
42 |
#' |
|
43 |
#' @export |
|
44 |
h_row_first_values <- function(table_row, |
|
45 |
col_names = NULL, |
|
46 |
col_indices = NULL) { |
|
47 | 727x |
col_indices <- check_names_indices(table_row, col_names, col_indices) |
48 | 727x |
checkmate::assert_integerish(col_indices) |
49 | 727x |
checkmate::assert_subset(col_indices, seq_len(ncol(table_row))) |
50 | ||
51 |
# Main values are extracted |
|
52 | 727x |
row_vals <- row_values(table_row)[col_indices] |
53 | ||
54 |
# Main return |
|
55 | 727x |
vapply(row_vals, function(rv) { |
56 | 2066x |
if (is.null(rv)) { |
57 | 727x |
NA_real_ |
58 |
} else { |
|
59 | 2063x |
rv[1L] |
60 |
} |
|
61 | 727x |
}, FUN.VALUE = numeric(1)) |
62 |
} |
|
63 | ||
64 |
#' @describeIn rtables_access Helper function that extracts row values and checks if they are |
|
65 |
#' convertible to integers (`integerish` values). |
|
66 |
#' |
|
67 |
#' @return |
|
68 |
#' * `h_row_counts()` returns a `vector` of numeric values. |
|
69 |
#' |
|
70 |
#' @examples |
|
71 |
#' # Row counts (integer values) |
|
72 |
#' \dontrun{ |
|
73 |
#' h_row_counts(tree_row_elem) # Fails because there are no integers |
|
74 |
#' } |
|
75 |
#' # Using values with integers |
|
76 |
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]] |
|
77 |
#' result <- h_row_counts(tree_row_elem) |
|
78 |
#' # result |
|
79 |
#' |
|
80 |
#' @export |
|
81 |
h_row_counts <- function(table_row, |
|
82 |
col_names = NULL, |
|
83 |
col_indices = NULL) { |
|
84 | 727x |
counts <- h_row_first_values(table_row, col_names, col_indices) |
85 | 727x |
checkmate::assert_integerish(counts) |
86 | 727x |
counts |
87 |
} |
|
88 | ||
89 |
#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`. |
|
90 |
#' More specifically it extracts the second values from each content cell and checks it is a fraction. |
|
91 |
#' |
|
92 |
#' @return |
|
93 |
#' * `h_row_fractions()` returns a `vector` of proportions. |
|
94 |
#' |
|
95 |
#' @examples |
|
96 |
#' # Row fractions |
|
97 |
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]] |
|
98 |
#' h_row_fractions(tree_row_elem) |
|
99 |
#' |
|
100 |
#' @export |
|
101 |
h_row_fractions <- function(table_row, |
|
102 |
col_names = NULL, |
|
103 |
col_indices = NULL) { |
|
104 | 243x |
col_indices <- check_names_indices(table_row, col_names, col_indices) |
105 | 243x |
row_vals <- row_values(table_row)[col_indices] |
106 | 243x |
fractions <- sapply(row_vals, "[", 2L) |
107 | 243x |
checkmate::assert_numeric(fractions, lower = 0, upper = 1) |
108 | 243x |
fractions |
109 |
} |
|
110 | ||
111 |
#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table. |
|
112 |
#' |
|
113 |
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row. |
|
114 |
#' |
|
115 |
#' @return |
|
116 |
#' * `h_col_counts()` returns a `vector` of column counts. |
|
117 |
#' |
|
118 |
#' @export |
|
119 |
h_col_counts <- function(table, |
|
120 |
col_names = NULL, |
|
121 |
col_indices = NULL) { |
|
122 | 304x |
col_indices <- check_names_indices(table, col_names, col_indices) |
123 | 304x |
counts <- col_counts(table)[col_indices] |
124 | 304x |
stats::setNames(counts, col_names) |
125 |
} |
|
126 | ||
127 |
#' @describeIn rtables_access Helper function to get first row of content table of current table. |
|
128 |
#' |
|
129 |
#' @return |
|
130 |
#' * `h_content_first_row()` returns a row from an `rtables` table. |
|
131 |
#' |
|
132 |
#' @export |
|
133 |
h_content_first_row <- function(table) { |
|
134 | 27x |
ct <- content_table(table) |
135 | 27x |
tree_children(ct)[[1]] |
136 |
} |
|
137 | ||
138 |
#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree. |
|
139 |
#' |
|
140 |
#' @return |
|
141 |
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf. |
|
142 |
#' |
|
143 |
#' @keywords internal |
|
144 |
is_leaf_table <- function(table) { |
|
145 | 168x |
children <- tree_children(table) |
146 | 168x |
child_classes <- unique(sapply(children, class)) |
147 | 168x |
identical(child_classes, "ElementaryTable") |
148 |
} |
|
149 | ||
150 |
#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices. |
|
151 |
#' |
|
152 |
#' @return |
|
153 |
#' * `check_names_indices` returns column indices. |
|
154 |
#' |
|
155 |
#' @keywords internal |
|
156 |
check_names_indices <- function(table_row, |
|
157 |
col_names = NULL, |
|
158 |
col_indices = NULL) { |
|
159 | 1274x |
if (!is.null(col_names)) { |
160 | 1231x |
if (!is.null(col_indices)) { |
161 | ! |
stop( |
162 | ! |
"Inserted both col_names and col_indices when selecting row values. ", |
163 | ! |
"Please choose one." |
164 |
) |
|
165 |
} |
|
166 | 1231x |
col_indices <- h_col_indices(table_row, col_names) |
167 |
} |
|
168 | 1274x |
if (is.null(col_indices)) { |
169 | 37x |
ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row)) |
170 | 37x |
col_indices <- seq_len(ll) |
171 |
} |
|
172 | ||
173 | 1274x |
return(col_indices) |
174 |
} |
1 |
#' Helper Functions for Multivariate Logistic Regression |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Helper functions used in calculations for logistic regression. |
|
6 |
#' |
|
7 |
#' @inheritParams argument_convention |
|
8 |
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family. |
|
9 |
#' Limited functionality is also available for conditional logistic regression models fitted by |
|
10 |
#' [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()]. |
|
11 |
#' @param x (`string` or `character`)\cr a variable or interaction term in `fit_glm` (depending on the |
|
12 |
#' helper function). |
|
13 |
#' |
|
14 |
#' @examples |
|
15 |
#' library(dplyr) |
|
16 |
#' library(broom) |
|
17 |
#' |
|
18 |
#' adrs_f <- tern_ex_adrs %>% |
|
19 |
#' filter(PARAMCD == "BESRSPI") %>% |
|
20 |
#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>% |
|
21 |
#' mutate( |
|
22 |
#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0), |
|
23 |
#' RACE = factor(RACE), |
|
24 |
#' SEX = factor(SEX) |
|
25 |
#' ) |
|
26 |
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response") |
|
27 |
#' mod1 <- fit_logistic( |
|
28 |
#' data = adrs_f, |
|
29 |
#' variables = list( |
|
30 |
#' response = "Response", |
|
31 |
#' arm = "ARMCD", |
|
32 |
#' covariates = c("AGE", "RACE") |
|
33 |
#' ) |
|
34 |
#' ) |
|
35 |
#' mod2 <- fit_logistic( |
|
36 |
#' data = adrs_f, |
|
37 |
#' variables = list( |
|
38 |
#' response = "Response", |
|
39 |
#' arm = "ARMCD", |
|
40 |
#' covariates = c("AGE", "RACE"), |
|
41 |
#' interaction = "AGE" |
|
42 |
#' ) |
|
43 |
#' ) |
|
44 |
#' |
|
45 |
#' @name h_logistic_regression |
|
46 |
NULL |
|
47 | ||
48 |
#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted |
|
49 |
#' model assuming only one interaction term. |
|
50 |
#' |
|
51 |
#' @return Vector of names of interaction variables. |
|
52 |
#' |
|
53 |
#' @export |
|
54 |
h_get_interaction_vars <- function(fit_glm) { |
|
55 | 27x |
checkmate::assert_class(fit_glm, "glm") |
56 | 27x |
terms_name <- attr(stats::terms(fit_glm), "term.labels") |
57 | 27x |
terms_order <- attr(stats::terms(fit_glm), "order") |
58 | 27x |
interaction_term <- terms_name[terms_order == 2] |
59 | 27x |
checkmate::assert_string(interaction_term) |
60 | 27x |
strsplit(interaction_term, split = ":")[[1]] |
61 |
} |
|
62 | ||
63 |
#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the |
|
64 |
#' interaction variable names and the given levels. The main value here is that the order |
|
65 |
#' of first and second variable is checked in the `interaction_vars` input. |
|
66 |
#' |
|
67 |
#' @param interaction_vars (`character` of length 2)\cr interaction variable names. |
|
68 |
#' @param first_var_with_level (`character` of length 2)\cr the first variable name with |
|
69 |
#' the interaction level. |
|
70 |
#' @param second_var_with_level (`character` of length 2)\cr the second variable name with |
|
71 |
#' the interaction level. |
|
72 |
#' |
|
73 |
#' @return Name of coefficient. |
|
74 |
#' |
|
75 |
#' @export |
|
76 |
h_interaction_coef_name <- function(interaction_vars, |
|
77 |
first_var_with_level, |
|
78 |
second_var_with_level) { |
|
79 | 45x |
checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE) |
80 | 45x |
checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE) |
81 | 45x |
checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE) |
82 | 45x |
checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars) |
83 | ||
84 | 45x |
first_name <- paste(first_var_with_level, collapse = "") |
85 | 45x |
second_name <- paste(second_var_with_level, collapse = "") |
86 | 45x |
if (first_var_with_level[1] == interaction_vars[1]) { |
87 | 34x |
paste(first_name, second_name, sep = ":") |
88 | 11x |
} else if (second_var_with_level[1] == interaction_vars[1]) { |
89 | 11x |
paste(second_name, first_name, sep = ":") |
90 |
} |
|
91 |
} |
|
92 | ||
93 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
94 |
#' for the case when both the odds ratio and the interaction variable are categorical. |
|
95 |
#' |
|
96 |
#' @param odds_ratio_var (`string`)\cr the odds ratio variable. |
|
97 |
#' @param interaction_var (`string`)\cr the interaction variable. |
|
98 |
#' |
|
99 |
#' @return Odds ratio. |
|
100 |
#' |
|
101 |
#' @export |
|
102 |
h_or_cat_interaction <- function(odds_ratio_var, |
|
103 |
interaction_var, |
|
104 |
fit_glm, |
|
105 |
conf_level = 0.95) { |
|
106 | 7x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
107 | 7x |
checkmate::assert_string(odds_ratio_var) |
108 | 7x |
checkmate::assert_string(interaction_var) |
109 | 7x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
110 | 7x |
checkmate::assert_vector(interaction_vars, len = 2) |
111 | ||
112 | 7x |
xs_level <- fit_glm$xlevels |
113 | 7x |
xs_coef <- stats::coef(fit_glm) |
114 | 7x |
xs_vcov <- stats::vcov(fit_glm) |
115 | 7x |
y <- list() |
116 | 7x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
117 | 12x |
x <- list() |
118 | 12x |
for (ref_level in xs_level[[interaction_var]]) { |
119 | 32x |
coef_names <- paste0(odds_ratio_var, var_level) |
120 | 32x |
if (ref_level != xs_level[[interaction_var]][1]) { |
121 | 20x |
interaction_coef_name <- h_interaction_coef_name( |
122 | 20x |
interaction_vars, |
123 | 20x |
c(odds_ratio_var, var_level), |
124 | 20x |
c(interaction_var, ref_level) |
125 |
) |
|
126 | 20x |
coef_names <- c( |
127 | 20x |
coef_names, |
128 | 20x |
interaction_coef_name |
129 |
) |
|
130 |
} |
|
131 | 32x |
if (length(coef_names) > 1) { |
132 | 20x |
ones <- t(c(1, 1)) |
133 | 20x |
est <- as.numeric(ones %*% xs_coef[coef_names]) |
134 | 20x |
se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones))) |
135 |
} else { |
|
136 | 12x |
est <- xs_coef[coef_names] |
137 | 12x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
138 |
} |
|
139 | 32x |
or <- exp(est) |
140 | 32x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
141 | 32x |
x[[ref_level]] <- list(or = or, ci = ci) |
142 |
} |
|
143 | 12x |
y[[var_level]] <- x |
144 |
} |
|
145 | 7x |
y |
146 |
} |
|
147 | ||
148 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
149 |
#' for the case when either the odds ratio or the interaction variable is continuous. |
|
150 |
#' |
|
151 |
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise |
|
152 |
#' the median is used. |
|
153 |
#' |
|
154 |
#' @return Odds ratio. |
|
155 |
#' |
|
156 |
#' @note We don't provide a function for the case when both variables are continuous because |
|
157 |
#' this does not arise in this table, as the treatment arm variable will always be involved |
|
158 |
#' and categorical. |
|
159 |
#' |
|
160 |
#' @export |
|
161 |
h_or_cont_interaction <- function(odds_ratio_var, |
|
162 |
interaction_var, |
|
163 |
fit_glm, |
|
164 |
at = NULL, |
|
165 |
conf_level = 0.95) { |
|
166 | 9x |
interaction_vars <- h_get_interaction_vars(fit_glm) |
167 | 9x |
checkmate::assert_string(odds_ratio_var) |
168 | 9x |
checkmate::assert_string(interaction_var) |
169 | 9x |
checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars) |
170 | 9x |
checkmate::assert_vector(interaction_vars, len = 2) |
171 | 9x |
checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE) |
172 | 9x |
xs_level <- fit_glm$xlevels |
173 | 9x |
xs_coef <- stats::coef(fit_glm) |
174 | 9x |
xs_vcov <- stats::vcov(fit_glm) |
175 | 9x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
176 | 9x |
model_data <- fit_glm$model |
177 | 9x |
if (!is.null(at)) { |
178 | 2x |
checkmate::assert_set_equal(xs_class[interaction_var], "numeric") |
179 |
} |
|
180 | 9x |
y <- list() |
181 | 9x |
if (xs_class[interaction_var] == "numeric") { |
182 | 6x |
if (is.null(at)) { |
183 | 4x |
at <- ceiling(stats::median(model_data[[interaction_var]])) |
184 |
} |
|
185 | ||
186 | 6x |
for (var_level in xs_level[[odds_ratio_var]][-1]) { |
187 | 12x |
x <- list() |
188 | 12x |
for (increment in at) { |
189 | 18x |
coef_names <- paste0(odds_ratio_var, var_level) |
190 | 18x |
if (increment != 0) { |
191 | 18x |
interaction_coef_name <- h_interaction_coef_name( |
192 | 18x |
interaction_vars, |
193 | 18x |
c(odds_ratio_var, var_level), |
194 | 18x |
c(interaction_var, "") |
195 |
) |
|
196 | 18x |
coef_names <- c( |
197 | 18x |
coef_names, |
198 | 18x |
interaction_coef_name |
199 |
) |
|
200 |
} |
|
201 | 18x |
if (length(coef_names) > 1) { |
202 | 18x |
xvec <- t(c(1, increment)) |
203 | 18x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
204 | 18x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
205 |
} else { |
|
206 | ! |
est <- xs_coef[coef_names] |
207 | ! |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
208 |
} |
|
209 | 18x |
or <- exp(est) |
210 | 18x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
211 | 18x |
x[[as.character(increment)]] <- list(or = or, ci = ci) |
212 |
} |
|
213 | 12x |
y[[var_level]] <- x |
214 |
} |
|
215 |
} else { |
|
216 | 3x |
checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric") |
217 | 3x |
checkmate::assert_set_equal(xs_class[interaction_var], "factor") |
218 | 3x |
for (var_level in xs_level[[interaction_var]]) { |
219 | 9x |
coef_names <- odds_ratio_var |
220 | 9x |
if (var_level != xs_level[[interaction_var]][1]) { |
221 | 6x |
interaction_coef_name <- h_interaction_coef_name( |
222 | 6x |
interaction_vars, |
223 | 6x |
c(odds_ratio_var, ""), |
224 | 6x |
c(interaction_var, var_level) |
225 |
) |
|
226 | 6x |
coef_names <- c( |
227 | 6x |
coef_names, |
228 | 6x |
interaction_coef_name |
229 |
) |
|
230 |
} |
|
231 | 9x |
if (length(coef_names) > 1) { |
232 | 6x |
xvec <- t(c(1, 1)) |
233 | 6x |
est <- as.numeric(xvec %*% xs_coef[coef_names]) |
234 | 6x |
se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec))) |
235 |
} else { |
|
236 | 3x |
est <- xs_coef[coef_names] |
237 | 3x |
se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names])) |
238 |
} |
|
239 | 9x |
or <- exp(est) |
240 | 9x |
ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se) |
241 | 9x |
y[[var_level]] <- list(or = or, ci = ci) |
242 |
} |
|
243 |
} |
|
244 | 9x |
y |
245 |
} |
|
246 | ||
247 |
#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates |
|
248 |
#' in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and |
|
249 |
#' [h_or_cat_interaction()]. |
|
250 |
#' |
|
251 |
#' @return Odds ratio. |
|
252 |
#' |
|
253 |
#' @export |
|
254 |
h_or_interaction <- function(odds_ratio_var, |
|
255 |
interaction_var, |
|
256 |
fit_glm, |
|
257 |
at = NULL, |
|
258 |
conf_level = 0.95) { |
|
259 | 13x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
260 | 13x |
if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) { |
261 | 7x |
h_or_cont_interaction( |
262 | 7x |
odds_ratio_var, |
263 | 7x |
interaction_var, |
264 | 7x |
fit_glm, |
265 | 7x |
at = at, |
266 | 7x |
conf_level = conf_level |
267 |
) |
|
268 | 6x |
} else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) { |
269 | 6x |
h_or_cat_interaction( |
270 | 6x |
odds_ratio_var, |
271 | 6x |
interaction_var, |
272 | 6x |
fit_glm, |
273 | 6x |
conf_level = conf_level |
274 |
) |
|
275 |
} else { |
|
276 | ! |
stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor") |
277 |
} |
|
278 |
} |
|
279 | ||
280 |
#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table |
|
281 |
#' of numbers of patients. |
|
282 |
#' |
|
283 |
#' @param terms (`character`)\cr simple terms. |
|
284 |
#' @param table (`table`)\cr table containing numbers for terms. |
|
285 |
#' |
|
286 |
#' @return Term labels containing numbers of patients. |
|
287 |
#' |
|
288 |
#' @export |
|
289 |
h_simple_term_labels <- function(terms, |
|
290 |
table) { |
|
291 | 45x |
checkmate::assert_true(is.table(table)) |
292 | 45x |
checkmate::assert_multi_class(terms, classes = c("factor", "character")) |
293 | 45x |
terms <- as.character(terms) |
294 | 45x |
term_n <- table[terms] |
295 | 45x |
paste0(terms, ", n = ", term_n) |
296 |
} |
|
297 | ||
298 |
#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table |
|
299 |
#' of numbers of patients. |
|
300 |
#' |
|
301 |
#' @param terms1 (`character`)\cr terms for first dimension (rows). |
|
302 |
#' @param terms2 (`character`)\cr terms for second dimension (rows). |
|
303 |
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the |
|
304 |
#' number of patients. In that case they can only be scalar (strings). |
|
305 |
#' |
|
306 |
#' @return Term labels containing numbers of patients. |
|
307 |
#' |
|
308 |
#' @export |
|
309 |
h_interaction_term_labels <- function(terms1, |
|
310 |
terms2, |
|
311 |
table, |
|
312 |
any = FALSE) { |
|
313 | 8x |
checkmate::assert_true(is.table(table)) |
314 | 8x |
checkmate::assert_flag(any) |
315 | 8x |
checkmate::assert_multi_class(terms1, classes = c("factor", "character")) |
316 | 8x |
checkmate::assert_multi_class(terms2, classes = c("factor", "character")) |
317 | 8x |
terms1 <- as.character(terms1) |
318 | 8x |
terms2 <- as.character(terms2) |
319 | 8x |
if (any) { |
320 | 4x |
checkmate::assert_scalar(terms1) |
321 | 4x |
checkmate::assert_scalar(terms2) |
322 | 4x |
paste0( |
323 | 4x |
terms1, " or ", terms2, ", n = ", |
324 |
# Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract. |
|
325 | 4x |
sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2] |
326 |
) |
|
327 |
} else { |
|
328 | 4x |
term_n <- table[cbind(terms1, terms2)] |
329 | 4x |
paste0(terms1, " * ", terms2, ", n = ", term_n) |
330 |
} |
|
331 |
} |
|
332 | ||
333 |
#' @describeIn h_logistic_regression Helper function to tabulate the main effect |
|
334 |
#' results of a (conditional) logistic regression model. |
|
335 |
#' |
|
336 |
#' @return Tabulated main effect results from a logistic regression model. |
|
337 |
#' |
|
338 |
#' @examples |
|
339 |
#' h_glm_simple_term_extract("AGE", mod1) |
|
340 |
#' h_glm_simple_term_extract("ARMCD", mod1) |
|
341 |
#' |
|
342 |
#' @export |
|
343 |
h_glm_simple_term_extract <- function(x, fit_glm) { |
|
344 | 61x |
checkmate::assert_multi_class(fit_glm, c("glm", "clogit")) |
345 | 61x |
checkmate::assert_string(x) |
346 | ||
347 | 61x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
348 | 61x |
xs_level <- fit_glm$xlevels |
349 | 61x |
xs_coef <- summary(fit_glm)$coefficients |
350 | 61x |
stats <- if (inherits(fit_glm, "glm")) { |
351 | 49x |
c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
352 |
} else { |
|
353 | 12x |
c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)") |
354 |
} |
|
355 |
# Make sure x is not an interaction term. |
|
356 | 61x |
checkmate::assert_subset(x, names(xs_class)) |
357 | 61x |
x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1]) |
358 | 61x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
359 | 61x |
colnames(x_stats) <- names(stats) |
360 | 61x |
x_stats$estimate <- as.list(x_stats$estimate) |
361 | 61x |
x_stats$std_error <- as.list(x_stats$std_error) |
362 | 61x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
363 | 61x |
x_stats$df <- as.list(1) |
364 | 61x |
if (xs_class[x] == "numeric") { |
365 | 46x |
x_stats$term <- x |
366 | 46x |
x_stats$term_label <- if (inherits(fit_glm, "glm")) { |
367 | 34x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
368 |
} else { |
|
369 |
# We just fill in here with the `term` itself as we don't have the data available. |
|
370 | 12x |
x |
371 |
} |
|
372 | 46x |
x_stats$is_variable_summary <- FALSE |
373 | 46x |
x_stats$is_term_summary <- TRUE |
374 |
} else { |
|
375 | 15x |
checkmate::assert_class(fit_glm, "glm") |
376 |
# The reason is that we don't have the original data set in the `clogit` object |
|
377 |
# and therefore cannot determine the `x_numbers` here. |
|
378 | 15x |
x_numbers <- table(fit_glm$data[[x]]) |
379 | 15x |
x_stats$term <- xs_level[[x]][-1] |
380 | 15x |
x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers) |
381 | 15x |
x_stats$is_variable_summary <- FALSE |
382 | 15x |
x_stats$is_term_summary <- TRUE |
383 | 15x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
384 | 15x |
x_main <- data.frame( |
385 | 15x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
386 | 15x |
term = xs_level[[x]][1], |
387 | 15x |
term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)), |
388 | 15x |
df = main_effects[x, "Df", drop = TRUE], |
389 | 15x |
stringsAsFactors = FALSE |
390 |
) |
|
391 | 15x |
x_main$pvalue <- as.list(x_main$pvalue) |
392 | 15x |
x_main$df <- as.list(x_main$df) |
393 | 15x |
x_main$estimate <- list(numeric(0)) |
394 | 15x |
x_main$std_error <- list(numeric(0)) |
395 | 15x |
if (length(xs_level[[x]][-1]) == 1) { |
396 | 6x |
x_main$pvalue <- list(numeric(0)) |
397 | 6x |
x_main$df <- list(numeric(0)) |
398 |
} |
|
399 | 15x |
x_main$is_variable_summary <- TRUE |
400 | 15x |
x_main$is_term_summary <- FALSE |
401 | 15x |
x_stats <- rbind(x_main, x_stats) |
402 |
} |
|
403 | 61x |
x_stats$variable <- x |
404 | 61x |
x_stats$variable_label <- if (inherits(fit_glm, "glm")) { |
405 | 49x |
formatters::var_labels(fit_glm$data[x], fill = TRUE) |
406 |
} else { |
|
407 | 12x |
x |
408 |
} |
|
409 | 61x |
x_stats$interaction <- "" |
410 | 61x |
x_stats$interaction_label <- "" |
411 | 61x |
x_stats$reference <- "" |
412 | 61x |
x_stats$reference_label <- "" |
413 | 61x |
rownames(x_stats) <- NULL |
414 | 61x |
x_stats[c( |
415 | 61x |
"variable", |
416 | 61x |
"variable_label", |
417 | 61x |
"term", |
418 | 61x |
"term_label", |
419 | 61x |
"interaction", |
420 | 61x |
"interaction_label", |
421 | 61x |
"reference", |
422 | 61x |
"reference_label", |
423 | 61x |
"estimate", |
424 | 61x |
"std_error", |
425 | 61x |
"df", |
426 | 61x |
"pvalue", |
427 | 61x |
"is_variable_summary", |
428 | 61x |
"is_term_summary" |
429 |
)] |
|
430 |
} |
|
431 | ||
432 |
#' @describeIn h_logistic_regression Helper function to tabulate the interaction term |
|
433 |
#' results of a logistic regression model. |
|
434 |
#' |
|
435 |
#' @return Tabulated interaction term results from a logistic regression model. |
|
436 |
#' |
|
437 |
#' @examples |
|
438 |
#' h_glm_interaction_extract("ARMCD:AGE", mod2) |
|
439 |
#' |
|
440 |
#' @export |
|
441 |
h_glm_interaction_extract <- function(x, fit_glm) { |
|
442 | 6x |
vars <- h_get_interaction_vars(fit_glm) |
443 | 6x |
xs_class <- attr(fit_glm$terms, "dataClasses") |
444 | ||
445 | 6x |
checkmate::assert_string(x) |
446 | ||
447 |
# Only take two-way interaction |
|
448 | 6x |
checkmate::assert_vector(vars, len = 2) |
449 | ||
450 |
# Only consider simple case: first variable in interaction is arm, a categorical variable |
|
451 | 6x |
checkmate::assert_disjunct(xs_class[vars[1]], "numeric") |
452 | ||
453 | 6x |
xs_level <- fit_glm$xlevels |
454 | 6x |
xs_coef <- summary(fit_glm)$coefficients |
455 | 6x |
main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald") |
456 | 6x |
stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)") |
457 | 6x |
v1_comp <- xs_level[[vars[1]]][-1] |
458 | 6x |
if (xs_class[vars[2]] == "numeric") { |
459 | 3x |
x_stats <- as.data.frame( |
460 | 3x |
xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE], |
461 | 3x |
stringsAsFactors = FALSE |
462 |
) |
|
463 | 3x |
colnames(x_stats) <- names(stats) |
464 | 3x |
x_stats$term <- v1_comp |
465 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]]) |
466 | 3x |
x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers) |
467 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
468 | 3x |
term_main <- v1_ref |
469 | 3x |
ref_label <- h_simple_term_labels(v1_ref, x_numbers) |
470 | 3x |
} else if (xs_class[vars[2]] != "numeric") { |
471 | 3x |
v2_comp <- xs_level[[vars[2]]][-1] |
472 | 3x |
v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp) |
473 | 3x |
x_sel <- paste( |
474 | 3x |
paste0(vars[1], v1_v2_grid$v1), |
475 | 3x |
paste0(vars[2], v1_v2_grid$v2), |
476 | 3x |
sep = ":" |
477 |
) |
|
478 | 3x |
x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE) |
479 | 3x |
colnames(x_stats) <- names(stats) |
480 | 3x |
x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2) |
481 | 3x |
x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]]) |
482 | 3x |
x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers) |
483 | 3x |
v1_ref <- xs_level[[vars[1]]][1] |
484 | 3x |
v2_ref <- xs_level[[vars[2]]][1] |
485 | 3x |
term_main <- paste(vars[1], vars[2], sep = " * ") |
486 | 3x |
ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE) |
487 |
} |
|
488 | 6x |
x_stats$df <- as.list(1) |
489 | 6x |
x_stats$pvalue <- as.list(x_stats$pvalue) |
490 | 6x |
x_stats$is_variable_summary <- FALSE |
491 | 6x |
x_stats$is_term_summary <- TRUE |
492 | 6x |
x_main <- data.frame( |
493 | 6x |
pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE], |
494 | 6x |
term = term_main, |
495 | 6x |
term_label = paste("Reference", ref_label), |
496 | 6x |
df = main_effects[x, "Df", drop = TRUE], |
497 | 6x |
stringsAsFactors = FALSE |
498 |
) |
|
499 | 6x |
x_main$pvalue <- as.list(x_main$pvalue) |
500 | 6x |
x_main$df <- as.list(x_main$df) |
501 | 6x |
x_main$estimate <- list(numeric(0)) |
502 | 6x |
x_main$std_error <- list(numeric(0)) |
503 | 6x |
x_main$is_variable_summary <- TRUE |
504 | 6x |
x_main$is_term_summary <- FALSE |
505 | ||
506 | 6x |
x_stats <- rbind(x_main, x_stats) |
507 | 6x |
x_stats$variable <- x |
508 | 6x |
x_stats$variable_label <- paste( |
509 | 6x |
"Interaction of", |
510 | 6x |
formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE), |
511 |
"*", |
|
512 | 6x |
formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE) |
513 |
) |
|
514 | 6x |
x_stats$interaction <- "" |
515 | 6x |
x_stats$interaction_label <- "" |
516 | 6x |
x_stats$reference <- "" |
517 | 6x |
x_stats$reference_label <- "" |
518 | 6x |
rownames(x_stats) <- NULL |
519 | 6x |
x_stats[c( |
520 | 6x |
"variable", |
521 | 6x |
"variable_label", |
522 | 6x |
"term", |
523 | 6x |
"term_label", |
524 | 6x |
"interaction", |
525 | 6x |
"interaction_label", |
526 | 6x |
"reference", |
527 | 6x |