1 |
#' Adverse Event Analysis Dataset (ADAE) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Adverse Event Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `AETERM`, `AESEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param max_n_aes (`integer`)\cr Maximum number of AEs per patient. Defaults to 10. |
|
14 |
#' @template param_cached |
|
15 |
#' @templateVar data adae |
|
16 |
#' |
|
17 |
#' @return `data.frame` |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' library(random.cdisc.data) |
|
22 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
23 |
#' |
|
24 |
#' adae <- radae(adsl, seed = 2) |
|
25 |
#' adae |
|
26 |
#' |
|
27 |
#' # Add metadata. |
|
28 |
#' aag <- utils::read.table( |
|
29 |
#' sep = ",", header = TRUE, |
|
30 |
#' text = paste( |
|
31 |
#' "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE", |
|
32 |
#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,", |
|
33 |
#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,", |
|
34 |
#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD", |
|
35 |
#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD", |
|
36 |
#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW", |
|
37 |
#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW", |
|
38 |
#' sep = "\n" |
|
39 |
#' ), stringsAsFactors = FALSE |
|
40 |
#' ) |
|
41 |
#' |
|
42 |
#' adae <- radae(adsl, lookup_aag = aag) |
|
43 |
#' |
|
44 |
#' with( |
|
45 |
#' adae, |
|
46 |
#' cbind( |
|
47 |
#' table(AEDECOD, SMQ01NAM), |
|
48 |
#' table(AEDECOD, CQ01NAM) |
|
49 |
#' ) |
|
50 |
#' ) |
|
51 |
radae <- function(adsl, |
|
52 |
max_n_aes = 10L, |
|
53 |
lookup = NULL, |
|
54 |
lookup_aag = NULL, |
|
55 |
seed = NULL, |
|
56 |
na_percentage = 0, |
|
57 |
na_vars = list( |
|
58 |
AEBODSYS = c(NA, 0.1), |
|
59 |
AEDECOD = c(1234, 0.1), |
|
60 |
AETOXGR = c(1234, 0.1) |
|
61 |
), |
|
62 |
cached = FALSE) { |
|
63 | 4x |
checkmate::assert_flag(cached) |
64 | 4x |
if (cached) { |
65 | 1x |
return(get_cached_data("cadae")) |
66 |
} |
|
67 | ||
68 | 3x |
checkmate::assert_data_frame(adsl) |
69 | 3x |
checkmate::assert_integer(max_n_aes, len = 1, any.missing = FALSE) |
70 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
71 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
72 | 3x |
checkmate::assert_true(na_percentage < 1) |
73 | ||
74 |
# check lookup parameters |
|
75 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
76 | 3x |
lookup_ae <- if (!is.null(lookup)) { |
77 | ! |
lookup |
78 |
} else { |
|
79 | 3x |
tibble::tribble( |
80 | 3x |
~AEBODSYS, ~AELLT, ~AEDECOD, ~AEHLT, ~AEHLGT, ~AETOXGR, ~AESOC, ~AESER, ~AEREL, |
81 | 3x |
"cl A.1", "llt A.1.1.1.1", "dcd A.1.1.1.1", "hlt A.1.1.1", "hlgt A.1.1", "1", "cl A", "N", "N", |
82 | 3x |
"cl A.1", "llt A.1.1.1.2", "dcd A.1.1.1.2", "hlt A.1.1.1", "hlgt A.1.1", "2", "cl A", "Y", "N", |
83 | 3x |
"cl B.1", "llt B.1.1.1.1", "dcd B.1.1.1.1", "hlt B.1.1.1", "hlgt B.1.1", "5", "cl B", "Y", "Y", |
84 | 3x |
"cl B.2", "llt B.2.1.2.1", "dcd B.2.1.2.1", "hlt B.2.1.2", "hlgt B.2.1", "3", "cl B", "N", "N", |
85 | 3x |
"cl B.2", "llt B.2.2.3.1", "dcd B.2.2.3.1", "hlt B.2.2.3", "hlgt B.2.2", "1", "cl B", "Y", "N", |
86 | 3x |
"cl C.1", "llt C.1.1.1.3", "dcd C.1.1.1.3", "hlt C.1.1.1", "hlgt C.1.1", "4", "cl C", "N", "Y", |
87 | 3x |
"cl C.2", "llt C.2.1.2.1", "dcd C.2.1.2.1", "hlt C.2.1.2", "hlgt C.2.1", "2", "cl C", "N", "Y", |
88 | 3x |
"cl D.1", "llt D.1.1.1.1", "dcd D.1.1.1.1", "hlt D.1.1.1", "hlgt D.1.1", "5", "cl D", "Y", "Y", |
89 | 3x |
"cl D.1", "llt D.1.1.4.2", "dcd D.1.1.4.2", "hlt D.1.1.4", "hlgt D.1.1", "3", "cl D", "N", "N", |
90 | 3x |
"cl D.2", "llt D.2.1.5.3", "dcd D.2.1.5.3", "hlt D.2.1.5", "hlgt D.2.1", "1", "cl D", "N", "Y" |
91 |
) |
|
92 |
} |
|
93 | ||
94 | 3x |
checkmate::assert_data_frame(lookup_aag, null.ok = TRUE) |
95 | 3x |
aag <- if (!is.null(lookup_aag)) { |
96 | ! |
lookup_aag |
97 |
} else { |
|
98 | 3x |
aag <- utils::read.table( |
99 | 3x |
sep = ",", header = TRUE, |
100 | 3x |
text = paste( |
101 | 3x |
"NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE", |
102 | 3x |
"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,", |
103 | 3x |
"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,", |
104 | 3x |
"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD", |
105 | 3x |
"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD", |
106 | 3x |
"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW", |
107 | 3x |
"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW", |
108 | 3x |
sep = "\n" |
109 | 3x |
), stringsAsFactors = FALSE |
110 |
) |
|
111 |
} |
|
112 | ||
113 | 3x |
if (!is.null(seed)) set.seed(seed) |
114 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
115 | ||
116 | 3x |
adae <- Map( |
117 | 3x |
function(id, sid) { |
118 | 30x |
n_aes <- sample(c(0, seq_len(max_n_aes)), 1) |
119 | 30x |
i <- sample(seq_len(nrow(lookup_ae)), n_aes, TRUE) |
120 | 30x |
dplyr::mutate( |
121 | 30x |
lookup_ae[i, ], |
122 | 30x |
USUBJID = id, |
123 | 30x |
STUDYID = sid |
124 |
) |
|
125 |
}, |
|
126 | 3x |
adsl$USUBJID, |
127 | 3x |
adsl$STUDYID |
128 |
) %>% |
|
129 | 3x |
Reduce(rbind, .) %>% |
130 | 3x |
`[`(c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% |
131 | 3x |
dplyr::mutate(AETERM = gsub("dcd", "trm", AEDECOD)) %>% |
132 | 3x |
dplyr::mutate(AESEV = dplyr::case_when( |
133 | 3x |
AETOXGR == 1 ~ "MILD", |
134 | 3x |
AETOXGR %in% c(2, 3) ~ "MODERATE", |
135 | 3x |
AETOXGR %in% c(4, 5) ~ "SEVERE" |
136 |
)) |
|
137 | ||
138 | 3x |
adae <- var_relabel( |
139 | 3x |
adae, |
140 | 3x |
STUDYID = "Study Identifier", |
141 | 3x |
USUBJID = "Unique Subject Identifier" |
142 |
) |
|
143 | ||
144 |
# merge adsl to be able to add AE date and study day variables |
|
145 | 3x |
adae <- dplyr::inner_join(adae, adsl, by = c("STUDYID", "USUBJID")) %>% |
146 | 3x |
dplyr::rowwise() %>% |
147 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
148 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
149 | 3x |
TRUE ~ TRTEDTM |
150 |
))) %>% |
|
151 | 3x |
dplyr::mutate(ASTDTM = sample( |
152 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
153 | 3x |
size = 1 |
154 |
)) %>% |
|
155 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
156 |
# add 1 to end of range incase both values passed to sample() are the same |
|
157 | 3x |
dplyr::mutate(AENDTM = sample( |
158 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
159 | 3x |
size = 1 |
160 |
)) %>% |
|
161 | 3x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
162 | 3x |
dplyr::mutate(LDOSEDTM = dplyr::case_when( |
163 | 3x |
TRTSDTM < ASTDTM ~ lubridate::as_datetime(stats::runif(1, TRTSDTM, ASTDTM)), |
164 | 3x |
TRUE ~ ASTDTM |
165 |
)) %>% |
|
166 | 3x |
dplyr::mutate(LDRELTM = as.numeric(difftime(ASTDTM, LDOSEDTM, units = "mins"))) %>% |
167 | 3x |
dplyr::select(-TRTENDT) %>% |
168 | 3x |
dplyr::ungroup() %>% |
169 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, AETERM) |
170 | ||
171 | 3x |
adae <- adae %>% |
172 | 3x |
dplyr::group_by(USUBJID) %>% |
173 | 3x |
dplyr::mutate(AESEQ = seq_len(dplyr::n())) %>% |
174 | 3x |
dplyr::mutate(ASEQ = AESEQ) %>% |
175 | 3x |
dplyr::ungroup() %>% |
176 | 3x |
dplyr::arrange( |
177 | 3x |
STUDYID, |
178 | 3x |
USUBJID, |
179 | 3x |
ASTDTM, |
180 | 3x |
AETERM, |
181 | 3x |
AESEQ |
182 |
) |
|
183 | ||
184 | 3x |
outcomes <- c( |
185 | 3x |
"UNKNOWN", |
186 | 3x |
"NOT RECOVERED/NOT RESOLVED", |
187 | 3x |
"RECOVERED/RESOLVED WITH SEQUELAE", |
188 | 3x |
"RECOVERING/RESOLVING", |
189 | 3x |
"RECOVERED/RESOLVED" |
190 |
) |
|
191 | ||
192 | 3x |
actions <- c( |
193 | 3x |
"DOSE RATE REDUCED", |
194 | 3x |
"UNKNOWN", |
195 | 3x |
"NOT APPLICABLE", |
196 | 3x |
"DRUG INTERRUPTED", |
197 | 3x |
"DRUG WITHDRAWN", |
198 | 3x |
"DOSE INCREASED", |
199 | 3x |
"DOSE NOT CHANGED", |
200 | 3x |
"DOSE REDUCED", |
201 | 3x |
"NOT EVALUABLE" |
202 |
) |
|
203 | ||
204 | 3x |
adae <- adae %>% |
205 | 3x |
dplyr::mutate(AEOUT = factor(ifelse( |
206 | 3x |
AETOXGR == "5", |
207 | 3x |
"FATAL", |
208 | 3x |
as.character(sample_fct(outcomes, nrow(adae), prob = c(0.1, 0.2, 0.1, 0.3, 0.3))) |
209 |
))) %>% |
|
210 | 3x |
dplyr::mutate(AEACN = factor(ifelse( |
211 | 3x |
AETOXGR == "5", |
212 | 3x |
"NOT EVALUABLE", |
213 | 3x |
as.character(sample_fct(actions, nrow(adae), prob = c(0.05, 0.05, 0.05, 0.01, 0.05, 0.1, 0.45, 0.1, 0.05))) |
214 |
))) %>% |
|
215 | 3x |
dplyr::mutate(AESDTH = dplyr::case_when( |
216 | 3x |
AEOUT == "FATAL" ~ "Y", |
217 | 3x |
TRUE ~ "N" |
218 |
)) %>% |
|
219 | 3x |
dplyr::mutate(TRTEMFL = ifelse(ASTDTM >= TRTSDTM, "Y", "")) %>% |
220 | 3x |
dplyr::mutate(AECONTRT = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>% |
221 | 3x |
dplyr::mutate( |
222 | 3x |
ANL01FL = ifelse(TRTEMFL == "Y" & ASTDTM <= TRTEDTM + lubridate::month(1), "Y", "") |
223 |
) %>% |
|
224 | 3x |
dplyr::mutate(ANL01FL = ifelse(is.na(ANL01FL), "", ANL01FL)) |
225 | ||
226 | 3x |
adae <- adae %>% |
227 | 3x |
dplyr::mutate(AERELNST = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>% |
228 | 3x |
dplyr::mutate(AEACNOTH = sample( |
229 | 3x |
x = c("MEDICATION", "PROCEDURE/SURGERY", "SUBJECT DISCONTINUED FROM STUDY", "NONE"), |
230 | 3x |
prob = c(0.2, 0.4, 0.2, 0.2), |
231 | 3x |
size = dplyr::n(), |
232 | 3x |
replace = TRUE |
233 |
)) |
|
234 | ||
235 |
# Split metadata for AEs of special interest (AESI). |
|
236 | 3x |
l_aag <- split(aag, interaction(aag$NAMVAR, aag$SRCVAR, aag$GRPTYPE, drop = TRUE)) |
237 | ||
238 |
# Create AESI flags |
|
239 | 3x |
l_aesi <- lapply(l_aag, function(d_adag, d_adae) { |
240 | 9x |
names(d_adag)[names(d_adag) == "REFTERM"] <- d_adag$SRCVAR[1] |
241 | 9x |
names(d_adag)[names(d_adag) == "REFNAME"] <- d_adag$NAMVAR[1] |
242 | ||
243 | 9x |
if (d_adag$GRPTYPE[1] == "CUSTOM") { |
244 | 3x |
d_adag <- d_adag[-which(names(d_adag) == "SCOPE")] |
245 | 6x |
} else if (d_adag$GRPTYPE[1] == "SMQ") { |
246 | 6x |
names(d_adag)[names(d_adag) == "SCOPE"] <- paste0(substr(d_adag$NAMVAR[1], 1, 5), "SC") |
247 |
} |
|
248 | ||
249 | 9x |
d_adag <- d_adag[-which(names(d_adag) %in% c("NAMVAR", "SRCVAR", "GRPTYPE"))] |
250 | 9x |
d_new <- dplyr::left_join(x = d_adae, y = d_adag, by = intersect(names(d_adae), names(d_adag))) |
251 | 9x |
d_new[, dplyr::setdiff(names(d_new), names(d_adae)), drop = FALSE] |
252 | 3x |
}, adae) |
253 | ||
254 | 3x |
adae <- dplyr::bind_cols(adae, l_aesi) |
255 | ||
256 | 3x |
adae <- dplyr::mutate(adae, AERELNST = sample( |
257 | 3x |
x = c("CONCURRENT ILLNESS", "OTHER", "DISEASE UNDER STUDY", "NONE"), |
258 | 3x |
prob = c(0.3, 0.3, 0.3, 0.1), |
259 | 3x |
size = dplyr::n(), |
260 | 3x |
replace = TRUE |
261 |
)) |
|
262 | ||
263 | ||
264 | 3x |
adae <- adae %>% |
265 | 3x |
dplyr::mutate(AES_FLAG = sample( |
266 | 3x |
x = c("AESLIFE", "AESHOSP", "AESDISAB", "AESCONG", "AESMIE"), |
267 | 3x |
prob = c(0.1, 0.2, 0.2, 0.2, 0.3), |
268 | 3x |
size = dplyr::n(), |
269 | 3x |
replace = TRUE |
270 |
)) %>% |
|
271 | 3x |
dplyr::mutate(AES_FLAG = dplyr::case_when( |
272 | 3x |
AESDTH == "Y" ~ "AESDTH", |
273 | 3x |
TRUE ~ AES_FLAG |
274 |
)) %>% |
|
275 | 3x |
dplyr::mutate( |
276 | 3x |
AESCONG = ifelse(AES_FLAG == "AESCONG", "Y", "N"), |
277 | 3x |
AESDISAB = ifelse(AES_FLAG == "AESDISAB", "Y", "N"), |
278 | 3x |
AESHOSP = ifelse(AES_FLAG == "AESHOSP", "Y", "N"), |
279 | 3x |
AESLIFE = ifelse(AES_FLAG == "AESLIFE", "Y", "N"), |
280 | 3x |
AESMIE = ifelse(AES_FLAG == "AESMIE", "Y", "N") |
281 |
) %>% |
|
282 | 3x |
dplyr::select(-"AES_FLAG") |
283 | ||
284 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
285 | ! |
adae <- mutate_na(ds = adae, na_vars = na_vars, na_percentage = na_percentage) |
286 |
} |
|
287 | ||
288 |
# apply metadata |
|
289 | 3x |
adae <- apply_metadata(adae, "metadata/ADAE.yml") |
290 | ||
291 | 3x |
return(adae) |
292 |
} |
1 |
#' Load Cached Data |
|
2 |
#' |
|
3 |
#' Return data attached to package. |
|
4 |
#' |
|
5 |
#' @keywords internal |
|
6 |
#' @noRd |
|
7 |
get_cached_data <- function(dataname) { |
|
8 | 22x |
checkmate::assert_string(dataname) |
9 | 22x |
if (!("package:random.cdisc.data" %in% search())) { |
10 | 1x |
stop("cached data can only be loaded if the random.cdisc.data package is attached.", |
11 | 1x |
"Please run library(random.cdisc.data) before loading cached data.", |
12 | 1x |
call. = FALSE |
13 |
) |
|
14 |
} else { |
|
15 | 21x |
get(dataname, envir = asNamespace("random.cdisc.data")) |
16 |
} |
|
17 |
} |
|
18 | ||
19 |
#' Create a Factor with Random Elements of x |
|
20 |
#' |
|
21 |
#' Sample elements from `x` with replacement to build a factor. |
|
22 |
#' |
|
23 |
#' @param x (`character vector` or `factor`)\cr If character vector then it is also used |
|
24 |
#' as levels of the returned factor. If factor then the levels are used as the new levels. |
|
25 |
#' @param N (`numeric`)\cr Number of items to choose. |
|
26 |
#' @param ... Additional arguments to be passed to `sample`. |
|
27 |
#' |
|
28 |
#' @return A factor of length `N`. |
|
29 |
#' @keywords internal |
|
30 |
#' |
|
31 |
#' @examples |
|
32 |
#' random.cdisc.data:::sample_fct(letters[1:3], 10) |
|
33 |
#' random.cdisc.data:::sample_fct(iris$Species, 10) |
|
34 |
sample_fct <- function(x, N, ...) { # nolint |
|
35 | 296x |
checkmate::assert_number(N) |
36 | ||
37 | 296x |
factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x) |
38 |
} |
|
39 | ||
40 |
#' Related Variables: Initialize |
|
41 |
#' |
|
42 |
#' Verify and initialize related variable values. |
|
43 |
#' |
|
44 |
#' @param relvar1 (`list` of `character`)\cr List of n elements. |
|
45 |
#' @param relvar2 (`list` of `character`)\cr List of n elements. |
|
46 |
#' |
|
47 |
#' @return A vector of n elements. |
|
48 |
#' |
|
49 |
#' @keywords internal |
|
50 |
#' |
|
51 |
#' @examples |
|
52 |
#' random.cdisc.data:::relvar_init("Alanine Aminotransferase Measurement", "ALT") |
|
53 |
#' random.cdisc.data:::relvar_init("Alanine Aminotransferase Measurement", "U/L") |
|
54 |
relvar_init <- function(relvar1, relvar2) { |
|
55 | 64x |
checkmate::assert_character(relvar1, min.len = 1, any.missing = FALSE) |
56 | 64x |
checkmate::assert_character(relvar2, min.len = 1, any.missing = FALSE) |
57 | ||
58 | 64x |
if (length(relvar1) != length(relvar2)) { |
59 | 1x |
message(simpleError( |
60 | 1x |
"The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements." |
61 |
)) |
|
62 | ! |
return(NA) |
63 |
} |
|
64 | 63x |
return(list("relvar1" = relvar1, "relvar2" = relvar2)) |
65 |
} |
|
66 | ||
67 |
#' Related Variables: Assign |
|
68 |
#' |
|
69 |
#' Assign values to a related variable within a domain. |
|
70 |
#' |
|
71 |
#' @param df (`data.frame`)\cr Data frame containing the related variables. |
|
72 |
#' @param var_name (`character`)\cr Name of variable related to `rel_var` to add to `df`. |
|
73 |
#' @param var_values (`any`)\cr Vector of values related to values of `related_var`. |
|
74 |
#' @param related_var (`character`)\cr Name of variable within `df` with values to which values |
|
75 |
#' of `var_name` must relate. |
|
76 |
#' |
|
77 |
#' @return `df` with added factor variable `var_name` containing `var_values` corresponding to `related_var`. |
|
78 |
#' @keywords internal |
|
79 |
#' |
|
80 |
#' @examples |
|
81 |
#' # Example with data.frame. |
|
82 |
#' params <- c("Level A", "Level B", "Level C") |
|
83 |
#' adlb_df <- data.frame( |
|
84 |
#' ID = 1:9, |
|
85 |
#' PARAM = factor( |
|
86 |
#' rep(c("Level A", "Level B", "Level C"), 3), |
|
87 |
#' levels = params |
|
88 |
#' ) |
|
89 |
#' ) |
|
90 |
#' random.cdisc.data:::rel_var( |
|
91 |
#' df = adlb_df, |
|
92 |
#' var_name = "PARAMCD", |
|
93 |
#' var_values = c("A", "B", "C"), |
|
94 |
#' related_var = "PARAM" |
|
95 |
#' ) |
|
96 |
#' |
|
97 |
#' # Example with tibble. |
|
98 |
#' adlb_tbl <- tibble::tibble( |
|
99 |
#' ID = 1:9, |
|
100 |
#' PARAM = factor( |
|
101 |
#' rep(c("Level A", "Level B", "Level C"), 3), |
|
102 |
#' levels = params |
|
103 |
#' ) |
|
104 |
#' ) |
|
105 |
#' random.cdisc.data:::rel_var( |
|
106 |
#' df = adlb_tbl, |
|
107 |
#' var_name = "PARAMCD", |
|
108 |
#' var_values = c("A", "B", "C"), |
|
109 |
#' related_var = "PARAM" |
|
110 |
#' ) |
|
111 |
rel_var <- function(df, var_name, related_var, var_values = NULL) { |
|
112 | 64x |
checkmate::assert_data_frame(df) |
113 | 64x |
checkmate::assert_string(var_name) |
114 | 64x |
checkmate::assert_string(related_var) |
115 | 64x |
n_relvar1 <- length(unique(df[, related_var, drop = TRUE])) |
116 | 64x |
checkmate::assert_vector(var_values, null.ok = TRUE, len = n_relvar1, any.missing = FALSE) |
117 | 1x |
if (is.null(var_values)) var_values <- rep(NA, n_relvar1) |
118 | ||
119 | 64x |
relvar1 <- unique(df[, related_var, drop = TRUE]) |
120 | 64x |
relvar2_values <- rep(NA, nrow(df)) |
121 | 64x |
for (r in seq_len(n_relvar1)) { |
122 | 538x |
matched <- which(df[, related_var, drop = TRUE] == relvar1[r]) |
123 | 538x |
relvar2_values[matched] <- var_values[r] |
124 |
} |
|
125 | 64x |
df[[var_name]] <- factor(relvar2_values) |
126 | 64x |
return(df) |
127 |
} |
|
128 | ||
129 |
#' Create Visit Schedule |
|
130 |
#' |
|
131 |
#' Create a visit schedule as a factor. |
|
132 |
#' |
|
133 |
#' X number of visits, or X number of cycles and Y number of days. |
|
134 |
#' |
|
135 |
#' @inheritParams argument_convention |
|
136 |
#' |
|
137 |
#' @return A factor of length `n_assessments`. |
|
138 |
#' @keywords internal |
|
139 |
#' |
|
140 |
#' @examples |
|
141 |
#' random.cdisc.data:::visit_schedule(visit_format = "WEeK", n_assessments = 10L) |
|
142 |
#' random.cdisc.data:::visit_schedule(visit_format = "CyCLE", n_assessments = 5L, n_days = 2L) |
|
143 |
visit_schedule <- function(visit_format = "WEEK", |
|
144 |
n_assessments = 10L, |
|
145 |
n_days = 5L) { |
|
146 | 56x |
checkmate::assert_string(visit_format, pattern = "^WEEK$|^CYCLE$", ignore.case = TRUE) |
147 | 56x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
148 | 56x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
149 | ||
150 | 56x |
if (toupper(visit_format) == "WEEK") { |
151 |
# numeric vector of n assessments/cycles/days |
|
152 | 49x |
assessments <- 1:n_assessments |
153 |
# numeric vector for ordering including screening (-1) and baseline (0) place holders |
|
154 | 49x |
assessments_ord <- -1:n_assessments |
155 |
# character vector of nominal visit values |
|
156 | 49x |
visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1)) |
157 | 7x |
} else if (toupper(visit_format) == "CYCLE") { |
158 | 7x |
cycles <- sort(rep(1:n_assessments, times = 1, each = n_days)) |
159 | 7x |
days <- rep(seq(1:n_days), times = n_assessments, each = 1) |
160 | 7x |
assessments_ord <- 0:(n_assessments * n_days) |
161 | 7x |
visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days)) |
162 |
} |
|
163 | ||
164 |
# create and order factor variable to return from function |
|
165 | 56x |
visit_values <- stats::reorder(factor(visit_values), assessments_ord) |
166 |
} |
|
167 | ||
168 |
#' Primary Keys: Retain Values |
|
169 |
#' |
|
170 |
#' Retain values within primary keys. |
|
171 |
#' |
|
172 |
#' @param df (`data.frame`)\cr Data frame in which to apply the retain. |
|
173 |
#' @param value_var (`any`)\cr Variable in `df` containing the value to be retained. |
|
174 |
#' @param event (`expression`)\cr Expression returning a logical value to trigger the retain. |
|
175 |
#' @param outside (`any`)\cr Additional value to retain. Defaults to `NA`. |
|
176 |
#' |
|
177 |
#' @keywords internal |
|
178 |
#' |
|
179 |
#' @examples |
|
180 |
#' adlb <- radlb(radsl(N = 10, na_percentage = 0), na_vars = list()) |
|
181 |
#' adlb$BASE2 <- random.cdisc.data:::retain( |
|
182 |
#' df = adlb, value_var = adlb$AVAL, |
|
183 |
#' event = adlb$ABLFL2 == "Y" |
|
184 |
#' ) |
|
185 |
retain <- function(df, value_var, event, outside = NA) { |
|
186 | 31x |
indices <- c(1, which(event == TRUE), nrow(df) + 1) |
187 | 31x |
values <- c(outside, value_var[event == TRUE]) |
188 | 31x |
rep(values, diff(indices)) |
189 |
} |
|
190 | ||
191 |
#' Primary Keys: Labels |
|
192 |
#' |
|
193 |
#' Relabel a subset of variables in a data set. |
|
194 |
#' |
|
195 |
#' @param x (`data.frame`)\cr Data frame containing variables to which labels are applied. |
|
196 |
#' @param ... (`named character`)\cr Name-Value pairs, where name corresponds to a variable |
|
197 |
#' name in `x` and the value to the new variable label. |
|
198 |
#' |
|
199 |
#' @keywords internal |
|
200 |
#' |
|
201 |
#' @examples |
|
202 |
#' adsl <- radsl() |
|
203 |
#' random.cdisc.data:::var_relabel(adsl, |
|
204 |
#' STUDYID = "Study Identifier", |
|
205 |
#' USUBJID = "Unique Subject Identifier" |
|
206 |
#' ) |
|
207 |
var_relabel <- function(x, ...) { |
|
208 | 82x |
dots <- list(...) |
209 | 82x |
varnames <- names(dots) |
210 | 82x |
if (is.null(varnames)) { |
211 | 1x |
stop("missing variable declarations") |
212 |
} |
|
213 | 81x |
map_varnames <- match(varnames, names(x)) |
214 | 81x |
for (i in seq_along(map_varnames)) { |
215 | 161x |
attr(x[[map_varnames[[i]]]], "label") <- dots[[i]] |
216 |
} |
|
217 | 81x |
x |
218 |
} |
|
219 | ||
220 |
#' Apply Metadata |
|
221 |
#' |
|
222 |
#' Apply label and variable ordering attributes to domains. |
|
223 |
#' |
|
224 |
#' @param df (`data.frame`)\cr Data frame to which metadata is applied. |
|
225 |
#' @param filename (`yaml`)\cr File containing domain metadata. |
|
226 |
#' @param add_adsl (`logical`)\cr Should ADSL data be merged to domain. |
|
227 |
#' @param adsl_filename (`yaml`)\cr File containing ADSL metadata. |
|
228 |
#' |
|
229 |
#' @keywords internal |
|
230 |
#' |
|
231 |
#' @examples |
|
232 |
#' seed <- 1 |
|
233 |
#' adsl <- radsl(seed = seed) |
|
234 |
#' adsub <- radsub(adsl, seed = seed) |
|
235 |
#' yaml_path <- file.path(path.package("random.cdisc.data"), "inst", "metadata") |
|
236 |
#' adsl <- random.cdisc.data:::apply_metadata(adsl, file.path(yaml_path, "ADSL.yml"), FALSE) |
|
237 |
#' adsub <- random.cdisc.data:::apply_metadata( |
|
238 |
#' adsub, file.path(yaml_path, "ADSUB.yml"), TRUE, |
|
239 |
#' file.path(yaml_path, "ADSL.yml") |
|
240 |
#' ) |
|
241 |
apply_metadata <- function(df, filename, add_adsl = TRUE, adsl_filename = "metadata/ADSL.yml") { |
|
242 | 90x |
checkmate::assert_data_frame(df) |
243 | 90x |
checkmate::assert_string(filename) |
244 | 90x |
checkmate::assert_flag(add_adsl) |
245 | 90x |
checkmate::assert_string(adsl_filename) |
246 | ||
247 | 90x |
apply_type <- function(df, var, type) { |
248 | 5986x |
if (is.null(type)) { |
249 | ! |
return() |
250 |
} |
|
251 | ||
252 | 5986x |
if (type == "character" && !is.character(df[[var]])) { |
253 | 12x |
df[[var]] <<- as.character(df[[var]]) |
254 | 5974x |
} else if (type == "factor" && !is.factor(df[[var]])) { |
255 | 730x |
df[[var]] <<- as.factor(df[[var]]) |
256 | 5244x |
} else if (type == "integer" && !is.integer(df[[var]])) { |
257 | 225x |
df[[var]] <<- as.integer(df[[var]]) |
258 | 5019x |
} else if (type == "numeric" && !is.numeric(df[[var]])) { |
259 | 3x |
df[[var]] <<- as.numeric(df[[var]]) |
260 | 5016x |
} else if (type == "logical" && !is.logical(df[[var]])) { |
261 | ! |
df[[var]] <<- as.logical(df[[var]]) |
262 | 5016x |
} else if (type == "datetime" && !lubridate::is.POSIXct(df[[var]])) { |
263 | 9x |
df[[var]] <<- as.POSIXct(df[[var]]) |
264 | 5007x |
} else if (type == "date" && !lubridate::is.Date(df[[var]])) { |
265 | ! |
df[[var]] <<- as.Date(df[[var]]) |
266 |
} |
|
267 |
} |
|
268 | ||
269 |
# remove existing attributes |
|
270 | 90x |
for (i in base::setdiff(names(attributes(df)), names(attributes(data.frame())))) { |
271 | 3x |
attr(df, i) <- NULL |
272 |
} |
|
273 | ||
274 |
# get metadata |
|
275 | 90x |
metadata <- yaml::yaml.load_file(system.file(filename, package = "random.cdisc.data")) |
276 | 90x |
adsl_metadata <- if (add_adsl) { |
277 | 64x |
yaml::yaml.load_file(system.file(adsl_filename, package = "random.cdisc.data")) |
278 |
} else { |
|
279 | 26x |
NULL |
280 |
} |
|
281 | 90x |
metadata_variables <- append(adsl_metadata$variables, metadata$variables) |
282 | 90x |
metadata_varnames <- names(metadata_variables) |
283 | ||
284 |
# find variables that does not have labels and are not it metadata |
|
285 | 90x |
missing_vars_map <- vapply( |
286 | 90x |
names(df), |
287 | 90x |
function(x) { |
288 | 5986x |
!(x %in% c("STUDYID", "USUBJID", metadata_varnames)) && is.null(attr(df[[x]], "label")) |
289 |
}, |
|
290 | 90x |
logical(1) |
291 |
) |
|
292 | 90x |
missing_vars <- names(df)[missing_vars_map] |
293 | 90x |
if (length(missing_vars) > 0) { |
294 | ! |
msg <- paste0( |
295 | ! |
"Following variables does not have label or are not found in ", |
296 | ! |
filename, |
297 |
": ", |
|
298 | ! |
paste0(missing_vars, collapse = ", ") |
299 |
) |
|
300 | ! |
warning(msg) |
301 |
} |
|
302 | ||
303 | 90x |
if (!all(metadata_varnames %in% names(df))) { |
304 | 6x |
metadata_varnames <- metadata_varnames[metadata_varnames %in% names(df)] |
305 |
} |
|
306 | ||
307 |
# assign labels to variables |
|
308 | 90x |
for (var in metadata_varnames) { |
309 | 5986x |
apply_type(df, var, metadata_variables[[var]]$type) |
310 | 5986x |
attr(df[[var]], "label") <- metadata_variables[[var]]$label |
311 |
} |
|
312 | ||
313 |
# reorder data frame columns to expected BDS order |
|
314 | 90x |
df <- df[, unique(c("STUDYID", "USUBJID", metadata_varnames, names(df)))] |
315 | ||
316 |
# assign label to data frame |
|
317 | 90x |
attr(df, "label") <- metadata$domain$label |
318 | ||
319 | 90x |
df |
320 |
} |
|
321 | ||
322 |
#' Replace Values in a Vector by NA |
|
323 |
#' |
|
324 |
#' @description `r lifecycle::badge("stable")` |
|
325 |
#' |
|
326 |
#' Randomized replacement of values by `NA`. |
|
327 |
#' |
|
328 |
#' @inheritParams argument_convention |
|
329 |
#' @param v (`any`)\cr Vector of any type. |
|
330 |
#' @param percentage (`proportion`)\cr Value between 0 and 1 defining |
|
331 |
#' how much of the vector shall be replaced by `NA`. This number |
|
332 |
#' is randomized by +/- 5% to have full randomization. |
|
333 |
#' |
|
334 |
#' @return The input vector `v` where a certain number of values are replaced by `NA`. |
|
335 |
#' |
|
336 |
#' @export |
|
337 |
replace_na <- function(v, percentage = 0.05, seed = NULL) { |
|
338 | 9x |
checkmate::assert_number(percentage, lower = 0, upper = 1) |
339 | ||
340 | 9x |
if (percentage == 0) { |
341 | 1x |
return(v) |
342 |
} |
|
343 | ||
344 | 8x |
if (!is.null(seed) && !is.na(seed)) { |
345 | 8x |
set.seed(seed) |
346 |
} |
|
347 | ||
348 |
# randomize the percentage |
|
349 | 8x |
ind <- sample(seq_along(v), round(length(v) * percentage)) |
350 | ||
351 | 8x |
v[ind] <- NA |
352 | ||
353 | 8x |
return(v) |
354 |
} |
|
355 | ||
356 |
#' Replace Values with NA |
|
357 |
#' |
|
358 |
#' @description `r lifecycle::badge("stable")` |
|
359 |
#' |
|
360 |
#' Replace column values with `NA`s. |
|
361 |
#' |
|
362 |
#' @inheritParams argument_convention |
|
363 |
#' @param ds (`data.frame`)\cr Any data set. |
|
364 |
#' |
|
365 |
#' @export |
|
366 |
mutate_na <- function(ds, na_vars = NULL, na_percentage = 0.05) { |
|
367 | 5x |
if (!is.null(na_vars)) { |
368 | 4x |
stopifnot(is.list(na_vars)) # any list is OK; as values can be left NA |
369 | 4x |
stopifnot(length(names(na_vars)) == length(na_vars)) # names for all elements |
370 |
} else { |
|
371 | 1x |
na_vars <- names(ds) |
372 |
} |
|
373 | ||
374 | 5x |
stopifnot(is.numeric(na_percentage)) |
375 | 5x |
stopifnot(na_percentage >= 0 && na_percentage < 1) |
376 | ||
377 | 5x |
for (na_var in names(na_vars)) { |
378 | 8x |
if (!is.na(na_var)) { |
379 | 8x |
if (!na_var %in% names(ds)) { |
380 | 1x |
warning(paste(na_var, "not in column names")) |
381 |
} else { |
|
382 | 7x |
ds <- ds %>% |
383 | 7x |
ungroup_rowwise_df() %>% |
384 | 7x |
dplyr::mutate( |
385 | 7x |
!!na_var := ds[[na_var]] %>% |
386 | 7x |
replace_na( |
387 | 7x |
percentage = ifelse(is.na(na_vars[[na_var]][2]), na_percentage, na_vars[[na_var]][2]), |
388 | 7x |
seed = na_vars[[na_var]][1] |
389 |
) |
|
390 |
) |
|
391 |
} |
|
392 |
} |
|
393 |
} |
|
394 | 5x |
return(ds) |
395 |
} |
|
396 | ||
397 |
ungroup_rowwise_df <- function(x) { |
|
398 | 7x |
class(x) <- c("tbl", "tbl_df", "data.frame") |
399 | 7x |
return(x) |
400 |
} |
|
401 | ||
402 |
#' Zero-Truncated Poisson Distribution |
|
403 |
#' |
|
404 |
#' @description `r lifecycle::badge("stable")` |
|
405 |
#' |
|
406 |
#' This generates random numbers from a zero-truncated Poisson distribution, |
|
407 |
#' i.e. from `X | X > 0` when `X ~ Poisson(lambda)`. The advantage here is that |
|
408 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
409 |
#' This solution was provided in a post by |
|
410 |
#' [Peter Dalgaard](https://stat.ethz.ch/pipermail/r-help/2005-May/070680.html). |
|
411 |
#' |
|
412 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
413 |
#' @param lambda (`numeric`)\cr Non-negative mean(s). |
|
414 |
#' |
|
415 |
#' @return The random numbers. |
|
416 |
#' @export |
|
417 |
#' |
|
418 |
#' @examples |
|
419 |
#' x <- rpois(1e6, lambda = 5) |
|
420 |
#' x <- x[x > 0] |
|
421 |
#' hist(x) |
|
422 |
#' |
|
423 |
#' y <- rtpois(1e6, lambda = 5) |
|
424 |
#' hist(y) |
|
425 |
rtpois <- function(n, lambda) { |
|
426 | 121x |
stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda) |
427 |
} |
|
428 | ||
429 |
#' Truncated Exponential Distribution |
|
430 |
#' |
|
431 |
#' @description `r lifecycle::badge("stable")` |
|
432 |
#' |
|
433 |
#' This generates random numbers from a truncated Exponential distribution, |
|
434 |
#' i.e. from `X | X > l` or `X | X < r` when `X ~ Exp(rate)`. The advantage here is that |
|
435 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
436 |
#' This can be derived from the quantile functions of the left- and right-truncated |
|
437 |
#' Exponential distributions. |
|
438 |
#' |
|
439 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
440 |
#' @param rate (`numeric`)\cr Non-negative rate. |
|
441 |
#' @param l (`numeric`)\cr Positive left-hand truncation parameter. |
|
442 |
#' @param r (`numeric`)\cr Positive right-hand truncation parameter. |
|
443 |
#' |
|
444 |
#' @return The random numbers. If neither `l` nor `r` are provided then the usual Exponential |
|
445 |
#' distribution is used. |
|
446 |
#' @export |
|
447 |
#' |
|
448 |
#' @examples |
|
449 |
#' x <- stats::rexp(1e6, rate = 5) |
|
450 |
#' x <- x[x > 0.5] |
|
451 |
#' hist(x) |
|
452 |
#' |
|
453 |
#' y <- rtexp(1e6, rate = 5, l = 0.5) |
|
454 |
#' hist(y) |
|
455 |
#' |
|
456 |
#' z <- rtexp(1e6, rate = 5, r = 0.5) |
|
457 |
#' hist(z) |
|
458 |
rtexp <- function(n, rate, l = NULL, r = NULL) { |
|
459 | 123x |
if (!is.null(l)) { |
460 | 1x |
l - log(1 - stats::runif(n)) / rate |
461 | 122x |
} else if (!is.null(r)) { |
462 | 121x |
-log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate |
463 |
} else { |
|
464 | 1x |
stats::rexp(n, rate) |
465 |
} |
|
466 |
} |
1 |
#' Time to Adverse Event Analysis Dataset (ADAETTE) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function to generate random Time-to-AE Dataset for a |
|
6 |
#' given Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param event_descr (`character vector`)\cr Descriptions of events. Defaults to `NULL`. |
|
14 |
#' @param censor_descr (`character vector`)\cr Descriptions of censors. Defaults to `NULL`. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adaette |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author Xiuting Mi |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' library(random.cdisc.data) |
|
25 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
26 |
#' |
|
27 |
#' adaette <- radaette(adsl, seed = 2) |
|
28 |
#' adaette |
|
29 |
radaette <- function(adsl, |
|
30 |
event_descr = NULL, |
|
31 |
censor_descr = NULL, |
|
32 |
lookup = NULL, |
|
33 |
seed = NULL, |
|
34 |
na_percentage = 0, |
|
35 |
na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1)), |
|
36 |
cached = FALSE) { |
|
37 | 6x |
checkmate::assert_flag(cached) |
38 | 6x |
if (cached) { |
39 | 1x |
return(get_cached_data("cadaette")) |
40 |
} |
|
41 | ||
42 | 5x |
checkmate::assert_data_frame(adsl) |
43 | 5x |
checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
44 | 5x |
checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
45 | 5x |
checkmate::assert_number(seed, null.ok = TRUE) |
46 | 5x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
47 | 5x |
checkmate::assert_true(na_percentage < 1) |
48 | ||
49 | 5x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
50 | 5x |
lookup_adaette <- if (!is.null(lookup)) { |
51 | ! |
lookup |
52 |
} else { |
|
53 | 5x |
tibble::tribble( |
54 | 5x |
~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P, |
55 | 5x |
"ARM A", "1", "any adverse event", 1 / 80, 0.4, |
56 | 5x |
"ARM B", "1", "any adverse event", 1 / 100, 0.2, |
57 | 5x |
"ARM C", "1", "any adverse event", 1 / 60, 0.42, |
58 | 5x |
"ARM A", "2", "any serious adverse event", 1 / 100, 0.3, |
59 | 5x |
"ARM B", "2", "any serious adverse event", 1 / 150, 0.1, |
60 | 5x |
"ARM C", "2", "any serious adverse event", 1 / 80, 0.32, |
61 | 5x |
"ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2, |
62 | 5x |
"ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08, |
63 | 5x |
"ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23 |
64 |
) |
|
65 |
} |
|
66 | ||
67 | 5x |
if (!is.null(seed)) { |
68 | 5x |
set.seed(seed) |
69 |
} |
|
70 | 5x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
71 | ||
72 | 5x |
evntdescr_sel <- if (!is.null(event_descr)) { |
73 | ! |
event_descr |
74 |
} else { |
|
75 | 5x |
"Preferred Term" |
76 |
} |
|
77 | ||
78 | 5x |
cnsdtdscr_sel <- if (!is.null(censor_descr)) { |
79 | ! |
censor_descr |
80 |
} else { |
|
81 | 5x |
c( |
82 | 5x |
"Clinical Cut Off", |
83 | 5x |
"Completion or Discontinuation", |
84 | 5x |
"End of AE Reporting Period" |
85 |
) |
|
86 |
} |
|
87 | ||
88 | 5x |
random_patient_data <- function(patient_info) { |
89 | 50x |
startdt <- lubridate::date(patient_info$TRTSDTM) |
90 | 50x |
trtedtm <- lubridate::floor_date(dplyr::case_when( |
91 | 50x |
is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs, |
92 | 50x |
TRUE ~ lubridate::date(patient_info$TRTEDTM) |
93 | 50x |
), unit = "day") |
94 | 50x |
enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm)) |
95 | 50x |
enddts_min_index <- which.min(enddts) |
96 | 50x |
adt <- enddts[enddts_min_index] |
97 | 50x |
adtm <- lubridate::as_datetime(adt) |
98 | 50x |
ady <- as.numeric(adt - startdt + 1) |
99 | 50x |
data.frame( |
100 | 50x |
ARM = patient_info$ARM, |
101 | 50x |
STUDYID = patient_info$STUDYID, |
102 | 50x |
SITEID = patient_info$SITEID, |
103 | 50x |
USUBJID = patient_info$USUBJID, |
104 | 50x |
PARAMCD = "AEREPTTE", |
105 | 50x |
PARAM = "Time to end of AE reporting period", |
106 | 50x |
CNSR = 0, |
107 | 50x |
AVAL = lubridate::days(ady) / lubridate::years(1), |
108 | 50x |
AVALU = "YEARS", |
109 | 50x |
EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"), |
110 | 50x |
CNSDTDSC = NA, |
111 | 50x |
ADTM = adtm, |
112 | 50x |
ADY = ady, |
113 | 50x |
stringsAsFactors = FALSE |
114 |
) |
|
115 |
} |
|
116 | ||
117 |
# validate and initialize related variables for Hy's law |
|
118 | 5x |
paramcd_hy <- c("HYSTTEUL", "HYSTTEBL") |
119 | 5x |
param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline") |
120 | 5x |
param_init_list <- relvar_init(param_hy, paramcd_hy) |
121 | 5x |
adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM") |
122 | ||
123 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
124 | 5x |
adaette_hy <- expand.grid( |
125 | 5x |
STUDYID = unique(adsl$STUDYID), |
126 | 5x |
USUBJID = adsl$USUBJID, |
127 | 5x |
PARAM = as.factor(param_init_list$relvar1), |
128 | 5x |
stringsAsFactors = FALSE |
129 |
) |
|
130 | ||
131 |
# Add other variables to adaette_hy |
|
132 | 5x |
adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID")) %>% |
133 | 5x |
rel_var( |
134 | 5x |
var_name = "PARAMCD", |
135 | 5x |
related_var = "PARAM", |
136 | 5x |
var_values = param_init_list$relvar2 |
137 |
) %>% |
|
138 | 5x |
dplyr::mutate( |
139 | 5x |
CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE), |
140 | 5x |
EVNTDESC = dplyr::if_else( |
141 | 5x |
CNSR == 0, |
142 | 5x |
"First Post-Baseline Raised ALT or AST Elevation Result", |
143 | 5x |
NA_character_ |
144 |
), |
|
145 | 5x |
CNSDTDSC = dplyr::if_else(CNSR == 0, NA_character_, |
146 | 5x |
sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"), |
147 | 5x |
prob = c(0.9, 0.1), |
148 | 5x |
size = dplyr::n(), replace = TRUE |
149 |
) |
|
150 |
) |
|
151 |
) %>% |
|
152 | 5x |
dplyr::rowwise() %>% |
153 | 5x |
dplyr::mutate(ADTM = dplyr::case_when( |
154 | 5x |
CNSDTDSC == "Treatment Start" ~ TRTSDTM, |
155 | 5x |
TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE) |
156 |
)) %>% |
|
157 | 5x |
dplyr::mutate( |
158 | 5x |
ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1, |
159 | 5x |
ADY = as.numeric(ADY_int), |
160 | 5x |
AVAL = lubridate::days(ADY_int) / lubridate::weeks(1), |
161 | 5x |
AVALU = "WEEKS" |
162 |
) %>% |
|
163 | 5x |
dplyr::select(-TRTSDTM, -ADY_int) |
164 | ||
165 | 5x |
random_ae_data <- function(lookup_info, patient_info, patient_data) { |
166 | 150x |
cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P)) |
167 | 150x |
ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"] |
168 | 150x |
data.frame( |
169 | 150x |
ARM = rep(patient_data$ARM, 2), |
170 | 150x |
STUDYID = rep(patient_data$STUDYID, 2), |
171 | 150x |
SITEID = rep(patient_data$SITEID, 2), |
172 | 150x |
USUBJID = rep(patient_data$USUBJID, 2), |
173 | 150x |
PARAMCD = c( |
174 | 150x |
paste0("AETTE", lookup_info$CATCD), |
175 | 150x |
paste0("AETOT", lookup_info$CATCD) |
176 |
), |
|
177 | 150x |
PARAM = c( |
178 | 150x |
paste("Time to first occurrence of", lookup_info$CAT), |
179 | 150x |
paste("Number of occurrences of", lookup_info$CAT) |
180 |
), |
|
181 | 150x |
CNSR = c( |
182 | 150x |
cnsr, |
183 | 150x |
NA |
184 |
), |
|
185 | 150x |
AVAL = c( |
186 |
# We generate these values conditional on the censoring information. |
|
187 |
# If this time to event is censored, then there were no AEs reported and the time is set |
|
188 |
# to the AE reporting period time. Otherwise we draw from truncated distributions to make |
|
189 |
# sure that we are within the AE reporting time and above 0 AEs. |
|
190 | 150x |
ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)), |
191 | 150x |
ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25)) |
192 |
), |
|
193 | 150x |
AVALU = c( |
194 | 150x |
"YEARS", |
195 | 150x |
NA |
196 |
), |
|
197 | 150x |
EVNTDESC = c( |
198 | 150x |
ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""), |
199 | 150x |
NA |
200 |
), |
|
201 | 150x |
CNSDTDSC = c( |
202 | 150x |
ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""), |
203 | 150x |
NA |
204 |
), |
|
205 | 150x |
stringsAsFactors = FALSE |
206 | 150x |
) %>% dplyr::mutate( |
207 | 150x |
ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))), |
208 | 150x |
ADTM = dplyr::if_else( |
209 | 150x |
is.na(AVALU), |
210 | 150x |
lubridate::as_datetime(NA), |
211 | 150x |
patient_info$TRTSDTM + lubridate::days(ADY) |
212 |
) |
|
213 |
) |
|
214 |
} |
|
215 | ||
216 | 5x |
adaette <- split(adsl, adsl$USUBJID) %>% |
217 | 5x |
lapply(function(patient_info) { |
218 | 50x |
patient_data <- random_patient_data(patient_info) |
219 | 50x |
lookup_arm <- lookup_adaette %>% |
220 | 50x |
dplyr::filter(ARM == as.character(patient_info$ARMCD)) |
221 | 50x |
ae_data <- split(lookup_arm, lookup_arm$CATCD) %>% |
222 | 50x |
lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>% |
223 | 50x |
Reduce(rbind, .) |
224 | 50x |
dplyr::bind_rows(patient_data, ae_data) |
225 |
}) %>% |
|
226 | 5x |
Reduce(rbind, .) %>% |
227 | 5x |
var_relabel( |
228 | 5x |
STUDYID = "Study Identifier", |
229 | 5x |
USUBJID = "Unique Subject Identifier" |
230 |
) |
|
231 | ||
232 | 5x |
adaette <- var_relabel( |
233 | 5x |
adaette, |
234 | 5x |
STUDYID = "Study Identifier", |
235 | 5x |
USUBJID = "Unique Subject Identifier" |
236 |
) |
|
237 | ||
238 | 5x |
adaette <- rbind(adaette, adaette_hy) |
239 | ||
240 | 5x |
adaette <- dplyr::inner_join( |
241 | 5x |
dplyr::select(adaette, -"SITEID", -"ARM"), |
242 | 5x |
adsl, |
243 | 5x |
by = c("STUDYID", "USUBJID") |
244 |
) %>% |
|
245 | 5x |
dplyr::group_by(USUBJID) %>% |
246 | 5x |
dplyr::arrange(ADTM) %>% |
247 | 5x |
dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>% |
248 | 5x |
dplyr::mutate(ASEQ = TTESEQ) %>% |
249 | 5x |
dplyr::mutate(PARAM = as.factor(PARAM)) %>% |
250 | 5x |
dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>% |
251 | 5x |
dplyr::ungroup() %>% |
252 | 5x |
dplyr::arrange( |
253 | 5x |
STUDYID, |
254 | 5x |
USUBJID, |
255 | 5x |
PARAMCD, |
256 | 5x |
ADTM, |
257 | 5x |
TTESEQ |
258 |
) |
|
259 | ||
260 | 5x |
if (length(na_vars) > 0 && na_percentage > 0) { |
261 | ! |
adaette <- dplyr::mutate(ds = adaette, na_vars = na_vars, na_percentage = na_percentage) |
262 |
} |
|
263 | ||
264 |
# apply metadata |
|
265 | 5x |
adaette <- apply_metadata(adaette, "metadata/ADAETTE.yml") |
266 | ||
267 | 5x |
return(adaette) |
268 |
} |
1 |
#' Protocol Deviations Analysis Dataset (ADDV) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Protocol Deviations Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDT`, `DVTERM`, `DVSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param max_n_dv (`integer`)\cr Maximum number of deviations per patient. Defaults to 3. |
|
14 |
#' @param p_dv (`proportion`)\cr Probability of a patient having protocol deviations. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data addv |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' library(random.cdisc.data) |
|
23 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
24 |
#' |
|
25 |
#' addv <- raddv(adsl, seed = 2) |
|
26 |
#' addv |
|
27 |
raddv <- function(adsl, |
|
28 |
max_n_dv = 3L, |
|
29 |
p_dv = 0.15, |
|
30 |
lookup = NULL, |
|
31 |
seed = NULL, |
|
32 |
na_percentage = 0, |
|
33 |
na_vars = list( |
|
34 |
"ASTDT" = c(seed = 1234, percentage = 0.1), |
|
35 |
"DVCAT" = c(seed = 1234, percentage = 0.1) |
|
36 |
), |
|
37 |
cached = FALSE) { |
|
38 | 4x |
checkmate::assert_flag(cached) |
39 | 4x |
if (cached) { |
40 | 1x |
return(get_cached_data("caddv")) |
41 |
} |
|
42 | ||
43 | 3x |
checkmate::assert_data_frame(adsl) |
44 | 3x |
checkmate::assert_integer(max_n_dv, len = 1, lower = 1, any.missing = FALSE) |
45 | 3x |
checkmate::assert_number(p_dv, lower = .Machine$double.xmin, upper = 1) |
46 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
47 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
48 | 3x |
checkmate::assert_true(na_percentage < 1) |
49 | ||
50 | 3x |
if (!is.null(seed)) set.seed(seed) |
51 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
52 | ||
53 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
54 | 3x |
lookup_dv <- if (!is.null(lookup)) { |
55 | ! |
lookup |
56 |
} else { |
|
57 | 3x |
tibble::tribble( |
58 | 3x |
~DOMAIN, ~DVCAT, ~DVDECOD, ~DVTERM, ~DVREAS, ~DVEPRELI, |
59 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Received prior prohibited therapy or medication", "", "N", |
60 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Active or untreated or other excluded cns metastases", "", "N", |
61 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "History of other malignancies within the last 5 years", "", "N", |
62 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Uncontrolled concurrent condition", "", "N", |
63 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Other exclusion criteria", "", "N", |
64 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Pregnancy criteria", "", "N", |
65 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Does not meet prior therapy requirements", "", "N", |
66 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion lab values outside allowed limits", "", "N", |
67 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "No signed ICF at study entry", "", "N", |
68 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion-related test not done/out of window", "", "N", |
69 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Ineligible cancer type or current cancer stage", "", "N", |
70 | 3x |
"DV", "MAJOR", "MEDICATION", "Dose missed or significantly out of window", |
71 | 3x |
"Site action due to epidemic/pandemic", "Y", |
72 | 3x |
"DV", "MAJOR", "MEDICATION", "Received incorrect study medication", "", "N", |
73 | 3x |
"DV", "MAJOR", "MEDICATION", "Received prohibited concomitant medication", "", "N", |
74 | 3x |
"DV", "MAJOR", "MEDICATION", "Discontinued study drug for unspecified reason", "", "N", |
75 | 3x |
"DV", "MAJOR", "MEDICATION", "Significant deviation from planned dose", |
76 | 3x |
"Site action due to epidemic/pandemic", "Y", |
77 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Missed assessment affecting safety/study outcomes", "", "N", |
78 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Eligibility-related test not done/out of window", "", "N", |
79 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Failure to sign updated ICF within two visits", |
80 | 3x |
"Site action due to epidemic/pandemic", "Y", |
81 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Omission of complete lab panel required by protocol", "", "N", |
82 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Omission of screening tumor assessment", "", "N", |
83 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Missed 2 or more efficacy assessments", |
84 | 3x |
"Site action due to epidemic/pandemic", "Y" |
85 |
) |
|
86 |
} |
|
87 | ||
88 | ||
89 | 3x |
addv <- Map( |
90 | 3x |
function(id, sid) { |
91 | 30x |
n_dv <- stats::rbinom(1, 1, p_dv) * sample(c(1, seq_len(max_n_dv)), 1) |
92 | 30x |
i <- sample(seq_len(nrow(lookup_dv)), n_dv, TRUE) |
93 | 30x |
dplyr::mutate( |
94 | 30x |
lookup_dv[i, ], |
95 | 30x |
USUBJID = id, |
96 | 30x |
STUDYID = sid |
97 |
) |
|
98 |
}, |
|
99 | 3x |
adsl$USUBJID, |
100 | 3x |
adsl$STUDYID |
101 |
) %>% |
|
102 | 3x |
Reduce(rbind, .) %>% |
103 | 3x |
dplyr::mutate(DVSCAT = DVCAT) |
104 | ||
105 | 3x |
addv <- var_relabel( |
106 | 3x |
addv, |
107 | 3x |
STUDYID = "Study Identifier", |
108 | 3x |
USUBJID = "Unique Subject Identifier" |
109 |
) |
|
110 | ||
111 |
# merge ADSL to be able to add deviation date and study day variables |
|
112 | 3x |
addv <- dplyr::inner_join(addv, adsl, by = c("STUDYID", "USUBJID")) %>% |
113 | 3x |
dplyr::rowwise() %>% |
114 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
115 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
116 | 3x |
TRUE ~ TRTEDTM |
117 |
))) %>% |
|
118 | 3x |
dplyr::mutate(ASTDTM = sample( |
119 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
120 | 3x |
size = 1 |
121 |
)) %>% |
|
122 | 3x |
dplyr::mutate(ASTDT = lubridate::date(ASTDTM)) %>% |
123 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
124 | 3x |
dplyr::select(-TRTENDT, -ASTDTM) %>% |
125 | 3x |
dplyr::ungroup() %>% |
126 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM) |
127 | ||
128 | 3x |
addv <- addv %>% |
129 | 3x |
dplyr::group_by(USUBJID) %>% |
130 | 3x |
dplyr::mutate(DVSEQ = seq_len(dplyr::n())) %>% |
131 | 3x |
dplyr::ungroup() %>% |
132 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM, DVSEQ) |
133 | ||
134 | 3x |
addv <- addv %>% |
135 | 3x |
dplyr::mutate(AEPRELFL = ifelse(DVEPRELI == "Y", DVEPRELI, "")) |
136 | ||
137 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
138 | ! |
addv <- mutate_na(ds = addv, na_vars = na_vars, na_percentage = na_percentage) |
139 |
} |
|
140 | ||
141 |
# apply metadata |
|
142 | 3x |
addv <- apply_metadata(addv, "metadata/ADDV.yml") |
143 | ||
144 | 3x |
return(addv) |
145 |
} |
1 |
#' Questionnaires Analysis Dataset (ADQS) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Questionnaires Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @template param_cached |
|
14 |
#' @templateVar data adqs |
|
15 |
#' |
|
16 |
#' @return `data.frame` |
|
17 |
#' @export |
|
18 |
#' |
|
19 |
#' @author npaszty |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' library(random.cdisc.data) |
|
23 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
24 |
#' |
|
25 |
#' adqs <- radqs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
26 |
#' adqs |
|
27 |
#' |
|
28 |
#' adqs <- radqs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2) |
|
29 |
#' adqs |
|
30 |
radqs <- function(adsl, |
|
31 |
param = c( |
|
32 |
"BFI All Questions", |
|
33 |
"Fatigue Interference", |
|
34 |
"Function/Well-Being (GF1,GF3,GF7)", |
|
35 |
"Treatment Side Effects (GP2,C5,GP5)", |
|
36 |
"FKSI-19 All Questions" |
|
37 |
), |
|
38 |
paramcd = c("BFIALL", "FATIGI", "FKSI-FWB", "FKSI-TSE", "FKSIALL"), |
|
39 |
visit_format = "WEEK", |
|
40 |
n_assessments = 5L, |
|
41 |
n_days = 5L, |
|
42 |
seed = NULL, |
|
43 |
na_percentage = 0, |
|
44 |
na_vars = list( |
|
45 |
LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1), |
|
46 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
47 |
), |
|
48 |
cached = FALSE) { |
|
49 | 4x |
checkmate::assert_flag(cached) |
50 | 4x |
if (cached) { |
51 | 1x |
return(get_cached_data("cadqs")) |
52 |
} |
|
53 | ||
54 | 3x |
checkmate::assert_data_frame(adsl) |
55 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
56 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
57 | 3x |
checkmate::assert_string(visit_format) |
58 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
59 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
60 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
61 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
62 | 3x |
checkmate::assert_true(na_percentage < 1) |
63 | ||
64 |
# validate and initialize param vectors |
|
65 | 3x |
param_init_list <- relvar_init(param, paramcd) |
66 | ||
67 | 3x |
if (!is.null(seed)) { |
68 | 3x |
set.seed(seed) |
69 |
} |
|
70 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
71 | ||
72 | 3x |
adqs <- expand.grid( |
73 | 3x |
STUDYID = unique(adsl$STUDYID), |
74 | 3x |
USUBJID = adsl$USUBJID, |
75 | 3x |
PARAM = param_init_list$relvar1, |
76 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
77 | 3x |
stringsAsFactors = FALSE |
78 |
) |
|
79 | ||
80 | 3x |
adqs <- dplyr::mutate( |
81 | 3x |
adqs, |
82 | 3x |
AVISITN = dplyr::case_when( |
83 | 3x |
AVISIT == "SCREENING" ~ -1, |
84 | 3x |
AVISIT == "BASELINE" ~ 0, |
85 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
86 | 3x |
TRUE ~ NA_real_ |
87 |
) |
|
88 |
) |
|
89 | ||
90 |
# assign related variable values: PARAMxPARAMCD are related |
|
91 | 3x |
adqs <- adqs %>% rel_var( |
92 | 3x |
var_name = "PARAMCD", |
93 | 3x |
related_var = "PARAM", |
94 | 3x |
var_values = param_init_list$relvar2 |
95 |
) |
|
96 | ||
97 | 3x |
adqs$AVAL <- stats::rnorm(nrow(adqs), mean = 50, sd = 8) + adqs$AVISITN * stats::rnorm(nrow(adqs), mean = 5, sd = 2) |
98 | ||
99 |
# order to prepare for change from screening and baseline values |
|
100 | 3x |
adqs <- adqs[order(adqs$STUDYID, adqs$USUBJID, adqs$PARAMCD, adqs$AVISITN), ] |
101 | ||
102 | 3x |
adqs <- Reduce( |
103 | 3x |
rbind, |
104 | 3x |
lapply( |
105 | 3x |
split(adqs, adqs$USUBJID), |
106 | 3x |
function(x) { |
107 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
108 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
109 | 30x |
x$ABLFL <- ifelse( |
110 | 30x |
toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
111 | 30x |
"Y", |
112 | 30x |
ifelse( |
113 | 30x |
toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", |
114 | 30x |
"Y", |
115 |
"" |
|
116 |
) |
|
117 |
) |
|
118 | 30x |
x$LOQFL <- ifelse(x$AVAL < 32, "Y", "N") |
119 | 30x |
x |
120 |
} |
|
121 |
) |
|
122 |
) |
|
123 | ||
124 | 3x |
adqs$BASE2 <- retain(adqs, adqs$AVAL, adqs$ABLFL2 == "Y") |
125 | 3x |
adqs$BASE <- ifelse(adqs$ABLFL2 != "Y", retain(adqs, adqs$AVAL, adqs$ABLFL == "Y"), NA) |
126 | ||
127 | 3x |
adqs <- adqs %>% |
128 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
129 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
130 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
131 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
132 | 3x |
var_relabel( |
133 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
134 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
135 |
) |
|
136 | ||
137 | 3x |
adqs <- var_relabel( |
138 | 3x |
adqs, |
139 | 3x |
STUDYID = "Study Identifier", |
140 | 3x |
USUBJID = "Unique Subject Identifier" |
141 |
) |
|
142 | ||
143 |
# merge ADSL to be able to add QS date and study day variables |
|
144 | 3x |
adqs <- dplyr::inner_join( |
145 | 3x |
adqs, |
146 | 3x |
adsl, |
147 | 3x |
by = c("STUDYID", "USUBJID") |
148 |
) %>% |
|
149 | 3x |
dplyr::rowwise() %>% |
150 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
151 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
152 | 3x |
TRUE ~ TRTEDTM |
153 |
))) %>% |
|
154 | 3x |
ungroup() |
155 | ||
156 | 3x |
adqs <- adqs %>% |
157 | 3x |
group_by(USUBJID) %>% |
158 | 3x |
arrange(USUBJID, AVISITN) %>% |
159 | 3x |
dplyr::mutate(ADTM = rep( |
160 | 3x |
sort(sample( |
161 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
162 | 3x |
size = nlevels(AVISIT) |
163 |
)), |
|
164 | 3x |
each = n() / nlevels(AVISIT) |
165 |
)) %>% |
|
166 | 3x |
dplyr::ungroup() %>% |
167 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
168 | 3x |
dplyr::select(-TRTENDT) %>% |
169 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
170 | ||
171 | 3x |
adqs <- adqs %>% |
172 | 3x |
dplyr::group_by(USUBJID) %>% |
173 | 3x |
dplyr::mutate(QSSEQ = seq_len(dplyr::n())) %>% |
174 | 3x |
dplyr::mutate(ASEQ = QSSEQ) %>% |
175 | 3x |
dplyr::ungroup() %>% |
176 | 3x |
dplyr::arrange( |
177 | 3x |
STUDYID, |
178 | 3x |
USUBJID, |
179 | 3x |
PARAMCD, |
180 | 3x |
AVISITN, |
181 | 3x |
ADTM, |
182 | 3x |
QSSEQ |
183 |
) |
|
184 | ||
185 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
186 | ! |
adqs <- mutate_na(ds = adqs, na_vars = na_vars, na_percentage = na_percentage) |
187 |
} |
|
188 | ||
189 |
# apply metadata |
|
190 | 3x |
adqs <- apply_metadata(adqs, "metadata/ADQS.yml") |
191 | ||
192 | 3x |
return(adqs) |
193 |
} |
1 |
#' Subject-Level Analysis Dataset (ADSL) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The Subject-Level Analysis Dataset (ADSL) is used to provide the variables |
|
6 |
#' that describe attributes of a subject. ADSL is a source for subject-level |
|
7 |
#' variables used in other analysis data sets, such as population flags and |
|
8 |
#' treatment variables. There is only one ADSL per study. ADSL and its related |
|
9 |
#' metadata are required in a CDISC-based submission of data from a clinical |
|
10 |
#' trial even if no other analysis data sets are submitted. |
|
11 |
#' |
|
12 |
#' @details One record per subject. |
|
13 |
#' |
|
14 |
#' Keys: `STUDYID`, `USUBJID` |
|
15 |
#' |
|
16 |
#' @inheritParams argument_convention |
|
17 |
#' @param N (`numeric`)\cr Number of patients. |
|
18 |
#' @param study_duration (`numeric`)\cr Duration of study in years. |
|
19 |
#' @param with_trt02 (`logical`)\cr Should period 2 be added. |
|
20 |
#' @param ae_withdrawal_prob (`proportion`)\cr Probability that there is at least one |
|
21 |
#' Adverse Event leading to the withdrawal of a study drug. |
|
22 |
#' @template param_cached |
|
23 |
#' @templateVar data adsl |
|
24 |
#' |
|
25 |
#' @return `data.frame` |
|
26 |
#' @export |
|
27 |
# |
|
28 |
#' @examples |
|
29 |
#' library(random.cdisc.data) |
|
30 |
#' |
|
31 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
32 |
#' adsl |
|
33 |
#' |
|
34 |
#' adsl <- radsl( |
|
35 |
#' N = 10, seed = 1, |
|
36 |
#' na_percentage = 0.1, |
|
37 |
#' na_vars = list( |
|
38 |
#' DTHDT = c(seed = 1234, percentage = 0.1), |
|
39 |
#' LSTALVDT = c(seed = 1234, percentage = 0.1) |
|
40 |
#' ) |
|
41 |
#' ) |
|
42 |
#' adsl |
|
43 |
#' |
|
44 |
#' adsl <- radsl(N = 10, seed = 1, na_percentage = .1) |
|
45 |
#' adsl |
|
46 |
radsl <- function(N = 400, # nolint |
|
47 |
study_duration = 2, |
|
48 |
seed = NULL, |
|
49 |
with_trt02 = TRUE, |
|
50 |
na_percentage = 0, |
|
51 |
na_vars = list( |
|
52 |
"AGE" = NA, "SEX" = NA, "RACE" = NA, "STRATA1" = NA, "STRATA2" = NA, |
|
53 |
"BMRKR1" = c(seed = 1234, percentage = 0.1), "BMRKR2" = c(1234, 0.1), "BEP01FL" = NA |
|
54 |
), |
|
55 |
ae_withdrawal_prob = 0.05, |
|
56 |
cached = FALSE) { |
|
57 | 28x |
checkmate::assert_flag(cached) |
58 | 28x |
if (cached) { |
59 | 2x |
return(get_cached_data("cadsl")) |
60 |
} |
|
61 | ||
62 | 26x |
checkmate::assert_number(N) |
63 | 26x |
checkmate::assert_number(seed, null.ok = TRUE) |
64 | 26x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
65 | 26x |
checkmate::assert_number(study_duration, lower = 1) |
66 | 26x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
67 | 26x |
checkmate::assert_true(na_percentage < 1) |
68 | ||
69 | 26x |
if (!is.null(seed)) { |
70 | 26x |
set.seed(seed) |
71 |
} |
|
72 | ||
73 | 26x |
study_duration_secs <- lubridate::seconds(lubridate::years(study_duration)) |
74 | 26x |
sys_dtm <- lubridate::fast_strptime("20/2/2019 11:16:16.683", "%d/%m/%Y %H:%M:%OS") |
75 | 26x |
discons <- max(1, floor((N * .3))) |
76 | 26x |
country_site_prob <- c(.5, .121, .077, .077, .075, .052, .046, .025, .014, .003) |
77 | ||
78 | 26x |
adsl <- tibble::tibble( |
79 | 26x |
STUDYID = rep("AB12345", N), |
80 | 26x |
COUNTRY = sample_fct( |
81 | 26x |
c("CHN", "USA", "BRA", "PAK", "NGA", "RUS", "JPN", "GBR", "CAN", "CHE"), |
82 | 26x |
N, |
83 | 26x |
prob = country_site_prob |
84 |
), |
|
85 | 26x |
SITEID = sample_fct(1:20, N, prob = rep(country_site_prob, times = 2)), |
86 | 26x |
SUBJID = paste("id", seq_len(N), sep = "-"), |
87 | 26x |
AGE = sapply(stats::rchisq(N, df = 5, ncp = 10), max, 0) + 20, |
88 | 26x |
AGEU = "YEARS", |
89 | 26x |
SEX = c("F", "M") %>% sample_fct(N, prob = c(.52, .48)), |
90 | 26x |
ARMCD = c("ARM A", "ARM B", "ARM C") %>% sample_fct(N), |
91 | 26x |
RACE = c( |
92 | 26x |
"ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", |
93 | 26x |
"MULTIPLE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "OTHER", "UNKNOWN" |
94 |
) %>% |
|
95 | 26x |
sample_fct(N, prob = c(.55, .23, .16, .05, .004, .003, .002, .002)), |
96 | 26x |
TRTSDTM = sys_dtm + sample(seq(0, study_duration_secs), size = N, replace = TRUE), |
97 | 26x |
RANDDT = lubridate::date(TRTSDTM - lubridate::days(floor(stats::runif(N, min = 0, max = 5)))), |
98 | 26x |
TRTEDTM = TRTSDTM + study_duration_secs, |
99 | 26x |
STRATA1 = c("A", "B", "C") %>% sample_fct(N), |
100 | 26x |
STRATA2 = c("S1", "S2") %>% sample_fct(N), |
101 | 26x |
BMRKR1 = stats::rchisq(N, 6), |
102 | 26x |
BMRKR2 = sample_fct(c("LOW", "MEDIUM", "HIGH"), N), |
103 | 26x |
BMEASIFL = sample_fct(c("Y", "N"), N), |
104 | 26x |
BEP01FL = sample_fct(c("Y", "N"), N), |
105 | 26x |
AEWITHFL = sample_fct(c("Y", "N"), N, prob = c(ae_withdrawal_prob, 1 - ae_withdrawal_prob)) |
106 |
) %>% |
|
107 | 26x |
dplyr::mutate(ARM = dplyr::recode( |
108 | 26x |
ARMCD, |
109 | 26x |
"ARM A" = "A: Drug X", "ARM B" = "B: Placebo", "ARM C" = "C: Combination" |
110 |
)) %>% |
|
111 | 26x |
dplyr::mutate(ACTARM = ARM) %>% |
112 | 26x |
dplyr::mutate(ACTARMCD = ARMCD) %>% |
113 | 26x |
dplyr::mutate(TRT01P = ARM) %>% |
114 | 26x |
dplyr::mutate(TRT01A = ACTARM) %>% |
115 | 26x |
dplyr::mutate(ITTFL = factor("Y")) %>% |
116 | 26x |
dplyr::mutate(SAFFL = factor("Y")) %>% |
117 | 26x |
dplyr::arrange(TRTSDTM) |
118 | ||
119 | 26x |
adds <- adsl[sample(nrow(adsl), discons), ] %>% |
120 | 26x |
dplyr::mutate(TRTEDTM_discon = sample( |
121 | 26x |
seq(from = max(TRTSDTM), to = sys_dtm + study_duration_secs, by = 1), |
122 | 26x |
size = discons, |
123 | 26x |
replace = TRUE |
124 |
)) %>% |
|
125 | 26x |
dplyr::select(SUBJID, TRTSDTM, TRTEDTM_discon) %>% |
126 | 26x |
dplyr::arrange(TRTSDTM) |
127 | ||
128 | 26x |
adsl <- dplyr::left_join(adsl, adds, by = c("SUBJID", "TRTSDTM")) %>% |
129 | 26x |
dplyr::mutate(TRTEDTM = dplyr::case_when( |
130 | 26x |
!is.na(TRTEDTM_discon) ~ TRTEDTM_discon, |
131 | 26x |
TRTSDTM >= quantile(TRTSDTM)[2] & TRTSDTM <= quantile(TRTSDTM)[3] ~ lubridate::as_datetime(NA), |
132 | 26x |
TRUE ~ TRTEDTM |
133 |
)) %>% |
|
134 | 26x |
dplyr::select(-"TRTEDTM_discon") |
135 | ||
136 |
# add period 2 if needed |
|
137 | 26x |
if (with_trt02) { |
138 | 26x |
with_trt02 <- lubridate::seconds(lubridate::years(1)) |
139 | 26x |
adsl <- adsl %>% |
140 | 26x |
dplyr::mutate(TRT02P = sample(ARM)) %>% |
141 | 26x |
dplyr::mutate(TRT02A = sample(ACTARM)) %>% |
142 | 26x |
dplyr::mutate( |
143 | 26x |
TRT01SDTM = TRTSDTM, |
144 | 26x |
AP01SDTM = TRT01SDTM, |
145 | 26x |
TRT01EDTM = TRTEDTM, |
146 | 26x |
AP01EDTM = TRT01EDTM, |
147 | 26x |
TRT02SDTM = TRTEDTM, |
148 | 26x |
AP02SDTM = TRT02SDTM, |
149 | 26x |
TRT02EDTM = TRT01EDTM + with_trt02, |
150 | 26x |
AP02EDTM = TRT02EDTM, |
151 | 26x |
TRTEDTM = TRT02EDTM |
152 |
) |
|
153 |
} |
|
154 | ||
155 | 26x |
adsl <- adsl %>% |
156 | 26x |
dplyr::mutate(EOSDT = lubridate::date(TRTEDTM)) %>% |
157 | 26x |
dplyr::mutate(EOSDY = ceiling(difftime(TRTEDTM, TRTSDTM))) %>% |
158 | 26x |
dplyr::mutate(EOSSTT = dplyr::case_when( |
159 | 26x |
EOSDY == max(EOSDY, na.rm = TRUE) ~ "COMPLETED", |
160 | 26x |
EOSDY < max(EOSDY, na.rm = TRUE) ~ "DISCONTINUED", |
161 | 26x |
is.na(TRTEDTM) ~ "ONGOING" |
162 |
)) %>% |
|
163 | 26x |
dplyr::mutate(EOTSTT = EOSSTT) |
164 | ||
165 |
# disposition related variables |
|
166 |
# using probability of 1 for the "DEATH" level to ensure at least one death record exists |
|
167 | 26x |
l_dcsreas <- list( |
168 | 26x |
choices = c( |
169 | 26x |
"ADVERSE EVENT", "DEATH", "LACK OF EFFICACY", "PHYSICIAN DECISION", |
170 | 26x |
"PROTOCOL VIOLATION", "WITHDRAWAL BY PARENT/GUARDIAN", "WITHDRAWAL BY SUBJECT" |
171 |
), |
|
172 | 26x |
prob = c(.2, 1, .1, .1, .2, .1, .1) |
173 |
) |
|
174 | 26x |
l_dthcat_other <- list( |
175 | 26x |
choices = c( |
176 | 26x |
"Post-study reporting of death", "LOST TO FOLLOW UP", "MISSING", "SUICIDE", "UNKNOWN" |
177 |
), |
|
178 | 26x |
prob = c(.1, .3, .3, .2, .1) |
179 |
) |
|
180 | ||
181 | 26x |
adsl <- adsl %>% |
182 | 26x |
dplyr::mutate( |
183 | 26x |
DCSREAS = ifelse( |
184 | 26x |
EOSSTT == "DISCONTINUED", |
185 | 26x |
sample(x = l_dcsreas$choices, size = N, replace = TRUE, prob = l_dcsreas$prob), |
186 | 26x |
as.character(NA) |
187 |
) |
|
188 |
) %>% |
|
189 | 26x |
dplyr::mutate(DTHFL = dplyr::case_when( |
190 | 26x |
DCSREAS == "DEATH" ~ "Y", |
191 | 26x |
TRUE ~ "N" |
192 |
)) %>% |
|
193 | 26x |
dplyr::mutate( |
194 | 26x |
DTHCAT = ifelse( |
195 | 26x |
DCSREAS == "DEATH", |
196 | 26x |
sample(x = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER"), size = N, replace = TRUE), |
197 | 26x |
as.character(NA) |
198 |
) |
|
199 |
) %>% |
|
200 | 26x |
dplyr::mutate(DTHCAUS = dplyr::case_when( |
201 | 26x |
DTHCAT == "ADVERSE EVENT" ~ "ADVERSE EVENT", |
202 | 26x |
DTHCAT == "PROGRESSIVE DISEASE" ~ "DISEASE PROGRESSION", |
203 | 26x |
DTHCAT == "OTHER" ~ sample(x = l_dthcat_other$choices, size = N, replace = TRUE, prob = l_dthcat_other$prob), |
204 | 26x |
TRUE ~ as.character(NA) |
205 |
)) %>% |
|
206 | 26x |
dplyr::mutate(ADTHAUT = dplyr::case_when( |
207 | 26x |
DTHCAUS %in% c("ADVERSE EVENT", "DISEASE PROGRESSION") ~ "Yes", |
208 | 26x |
DTHCAUS %in% c("UNKNOWN", "SUICIDE", "Post-study reporting of death") ~ sample( |
209 | 26x |
x = c("Yes", "No"), size = N, replace = TRUE, prob = c(0.25, 0.75) |
210 |
), |
|
211 | 26x |
TRUE ~ as.character(NA) |
212 |
)) %>% |
|
213 |
# adding some random number of days post last treatment date so that death days from last trt admin |
|
214 |
# supports the LDDTHGR1 derivation below |
|
215 | 26x |
dplyr::mutate(DTHDT = dplyr::case_when( |
216 | 26x |
DCSREAS == "DEATH" ~ lubridate::date(TRTEDTM + lubridate::days(sample(0:50, size = N, replace = TRUE))), |
217 | 26x |
TRUE ~ NA |
218 |
)) %>% |
|
219 | 26x |
dplyr::mutate(LDDTHELD = difftime(DTHDT, lubridate::date(TRTEDTM), units = "days")) %>% |
220 | 26x |
dplyr::mutate(LDDTHGR1 = dplyr::case_when( |
221 | 26x |
LDDTHELD <= 30 ~ "<=30", |
222 | 26x |
LDDTHELD > 30 ~ ">30", |
223 | 26x |
TRUE ~ as.character(NA) |
224 |
)) %>% |
|
225 | 26x |
dplyr::mutate(LSTALVDT = dplyr::case_when( |
226 | 26x |
DCSREAS == "DEATH" ~ DTHDT, |
227 | 26x |
TRUE ~ lubridate::date(TRTEDTM) + lubridate::days(floor(stats::runif(N, min = 10, max = 30))) |
228 |
)) |
|
229 | ||
230 |
# add random ETHNIC (Ethnicity) |
|
231 | 26x |
adsl <- adsl %>% |
232 | 26x |
dplyr::mutate(ETHNIC = sample( |
233 | 26x |
x = c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", " NOT REPORTED", "UNKNOWN"), |
234 | 26x |
size = N, replace = TRUE, prob = c(.1, .8, .06, .04) |
235 |
)) |
|
236 | ||
237 |
# associate DTHADY (Relative Day of Death) with Death date |
|
238 |
# Date of Death [adsl.DTHDT] - date part of Date of First Exposure to Treatment [adsl.TRTSDTM] |
|
239 | ||
240 | 26x |
adsl <- adsl %>% |
241 | 26x |
dplyr::mutate(DTHADY = difftime(DTHDT, TRTSDTM, units = "days")) |
242 | ||
243 | ||
244 |
# associate sites with countries and regions |
|
245 | 26x |
adsl <- adsl %>% |
246 | 26x |
dplyr::mutate(SITEID = paste0(COUNTRY, "-", SITEID)) %>% |
247 | 26x |
dplyr::mutate(REGION1 = dplyr::case_when( |
248 | 26x |
COUNTRY %in% c("NGA") ~ "Africa", |
249 | 26x |
COUNTRY %in% c("CHN", "JPN", "PAK") ~ "Asia", |
250 | 26x |
COUNTRY %in% c("RUS") ~ "Eurasia", |
251 | 26x |
COUNTRY %in% c("GBR") ~ "Europe", |
252 | 26x |
COUNTRY %in% c("CAN", "USA") ~ "North America", |
253 | 26x |
COUNTRY %in% c("BRA") ~ "South America", |
254 | 26x |
TRUE ~ as.character(NA) |
255 |
)) %>% |
|
256 | 26x |
dplyr::mutate(INVID = paste("INV ID", SITEID)) %>% |
257 | 26x |
dplyr::mutate(INVNAM = paste("Dr.", SITEID, "Doe")) %>% |
258 | 26x |
dplyr::mutate(USUBJID = paste(STUDYID, SITEID, SUBJID, sep = "-")) |
259 | ||
260 | ||
261 | 26x |
if (length(na_vars) > 0 && na_percentage > 0) { |
262 | ! |
adsl <- mutate_na(ds = adsl, na_vars = na_vars, na_percentage = na_percentage) |
263 |
} |
|
264 | ||
265 |
# apply metadata |
|
266 | 26x |
adsl <- apply_metadata(adsl, "metadata/ADSL.yml", FALSE) |
267 | ||
268 | 26x |
attr(adsl, "study_duration_secs") <- as.numeric(study_duration_secs) |
269 | 26x |
return(adsl) |
270 |
} |
1 |
#' EORTC QLQ-C30 V3 Analysis Dataset (ADQLQC) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random EORTC QLQ-C30 V3 Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARCAT1N`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `ADTM`, `QSSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param percent (`numeric`)\cr Completion - Completed at least y percent of questions, 1 record per visit |
|
14 |
#' @param number (`numeric`)\cr Completion - Completed at least x question(s), 1 record per visit |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adqlqc |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' library(random.cdisc.data) |
|
23 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
24 |
#' |
|
25 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
26 |
#' adqlqc |
|
27 |
radqlqc <- function(adsl, |
|
28 |
percent, |
|
29 |
number, |
|
30 |
seed = NULL, |
|
31 |
cached = FALSE) { |
|
32 | 4x |
checkmate::assert_flag(cached) |
33 | 4x |
if (cached) { |
34 | 1x |
return(get_cached_data("cadqlqc")) |
35 |
} |
|
36 | ||
37 | 3x |
checkmate::assert_data_frame(adsl) |
38 | 3x |
checkmate::assert_number(percent, lower = 1, upper = 100) |
39 | 3x |
checkmate::assert_number(number, lower = 1) |
40 | ||
41 | 3x |
if (!is.null(seed)) { |
42 | 3x |
set.seed(seed) |
43 |
} |
|
44 | ||
45 |
# ADQLQC data ------------------------------------------------------------- |
|
46 | 3x |
qs <- get_qs_data(adsl, n_assessments = 5L, seed = seed, na_percentage = 0.1) |
47 |
# prepare ADaM ADQLQC data |
|
48 | 3x |
adqlqc1 <- prep_adqlqc(df = qs) |
49 |
# derive AVAL and AVALC |
|
50 | 3x |
adqlqc1 <- mutate( |
51 | 3x |
adqlqc1, |
52 | 3x |
AVAL = as.numeric(QSSTRESC), |
53 | 3x |
AVALC = case_when( |
54 | 3x |
QSTESTCD == "QSALL" ~ QSREASND, |
55 | 3x |
TRUE ~ QSORRES |
56 |
), |
|
57 | 3x |
AVISIT = VISIT, |
58 | 3x |
AVISITN = VISITNUM, |
59 | 3x |
ADTM = QSDTC |
60 |
) |
|
61 |
# include scale calculation |
|
62 | 3x |
adqlqc_tmp <- calc_scales(adqlqc1) |
63 |
# order to prepare for change from screening and baseline values |
|
64 | 3x |
adqlqc_tmp <- adqlqc_tmp[order(adqlqc_tmp$STUDYID, adqlqc_tmp$USUBJID, adqlqc_tmp$PARAMCD, adqlqc_tmp$AVISITN), ] |
65 | ||
66 | 3x |
adqlqc_tmp <- Reduce( |
67 | 3x |
rbind, |
68 | 3x |
lapply( |
69 | 3x |
split(adqlqc_tmp, adqlqc_tmp$USUBJID), |
70 | 3x |
function(x) { |
71 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
72 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
73 | 30x |
x$ABLFL <- ifelse( |
74 | 30x |
x$AVISIT == "BASELINE" & |
75 | 30x |
x$PARAMCD != "EX028", |
76 | 30x |
"Y", |
77 | 30x |
ifelse( |
78 | 30x |
x$AVISIT == "CYCLE 1 DAY 1" & |
79 | 30x |
x$PARAMCD != "EX028", |
80 | 30x |
"Y", |
81 |
"" |
|
82 |
) |
|
83 |
) |
|
84 | 30x |
x |
85 |
} |
|
86 |
) |
|
87 |
) |
|
88 | ||
89 | 3x |
adqlqc_tmp$BASE2 <- ifelse( |
90 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
91 | 3x |
retain( |
92 | 3x |
df = adqlqc_tmp, |
93 | 3x |
value_var = adqlqc_tmp$AVAL, |
94 | 3x |
event = adqlqc_tmp$ABLFL2 == "Y" |
95 |
), |
|
96 | 3x |
NA |
97 |
) |
|
98 | ||
99 | 3x |
adqlqc_tmp$BASE <- ifelse( |
100 | 3x |
adqlqc_tmp$ABLFL2 != "Y" & |
101 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
102 | 3x |
retain( |
103 | 3x |
adqlqc_tmp, |
104 | 3x |
adqlqc_tmp$AVAL, |
105 | 3x |
adqlqc_tmp$ABLFL == "Y" |
106 |
), |
|
107 | 3x |
NA |
108 |
) |
|
109 | ||
110 | 3x |
adqlqc_tmp <- adqlqc_tmp %>% |
111 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
112 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
113 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
114 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
115 | 3x |
var_relabel( |
116 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
117 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
118 |
) |
|
119 |
# derive CHGCAT1 ---------------------------------------------------------- |
|
120 | 3x |
adqlqc_tmp <- derv_chgcat1(dataset = adqlqc_tmp) |
121 | ||
122 | 3x |
adqlqc_tmp <- var_relabel( |
123 | 3x |
adqlqc_tmp, |
124 | 3x |
STUDYID = "Study Identifier", |
125 | 3x |
USUBJID = "Unique Subject Identifier" |
126 |
) |
|
127 | ||
128 | 3x |
adqlqc_tmp <- arrange( |
129 | 3x |
adqlqc_tmp, |
130 | 3x |
USUBJID, |
131 | 3x |
AVISITN |
132 |
) |
|
133 |
# Merge ADSL -------------------------------------------------------------- |
|
134 |
# ADSL variables needed for ADQLQC |
|
135 | 3x |
adsl_vars <- c( |
136 | 3x |
"STUDYID", "USUBJID", "SUBJID", "SITEID", "REGION1", "COUNTRY", "ETHNIC", "AGE", |
137 | 3x |
"AGEU", "AAGE", "AAGEU", "AGEGR1", "AGEGR2", "AGEGR3", "STRATwNM", "STRATw", "STRATwV", |
138 | 3x |
"SEX", "RACE", "ITTFL", "SAFFL", "PPROTFL", "TRT01P", "TRT01A", |
139 | 3x |
"TRTSEQP", "TRTSEQA", "TRTSDTM", "TRTSDT", "TRTEDTM", "TRTEDT", "DCUTDT" |
140 |
) |
|
141 | 3x |
adsl <- select( |
142 | 3x |
adsl, |
143 | 3x |
any_of(adsl_vars) |
144 |
) |
|
145 | 3x |
adqlqc <- dplyr::inner_join( |
146 | 3x |
adqlqc_tmp, |
147 | 3x |
adsl, |
148 | 3x |
by = c("STUDYID", "USUBJID") |
149 |
) %>% |
|
150 | 3x |
dplyr::mutate( |
151 | 3x |
ADY_der = ceiling(difftime(ADTM, TRTSDTM, units = "days")), |
152 | 3x |
ADY = case_when( |
153 | 3x |
ADY_der >= 0 ~ ADY_der + 1, |
154 | 3x |
TRUE ~ ADY_der |
155 |
) |
|
156 |
) %>% |
|
157 | 3x |
select(-ADY_der) |
158 | ||
159 |
# get compliance data --------------------------------------------------- |
|
160 | 3x |
compliance_data <- comp_derv( |
161 | 3x |
dataset = adqlqc, |
162 | 3x |
percent = percent, |
163 | 3x |
number = number |
164 |
) |
|
165 |
# add ADSL variables |
|
166 | 3x |
compliance_data <- left_join( |
167 | 3x |
compliance_data, |
168 | 3x |
adsl, |
169 | 3x |
by = c("STUDYID", "USUBJID") |
170 |
) |
|
171 |
# add completion to ADQLQC |
|
172 | 3x |
adqlqc <- bind_rows( |
173 | 3x |
adqlqc, |
174 | 3x |
compliance_data |
175 |
) %>% |
|
176 | 3x |
arrange( |
177 | 3x |
USUBJID, |
178 | 3x |
AVISITN, |
179 | 3x |
QSTESTCD |
180 |
) |
|
181 |
# find first set of questionnaire observations |
|
182 | 3x |
adqlqc_x <- arrange( |
183 | 3x |
adqlqc, |
184 | 3x |
USUBJID, |
185 | 3x |
ADTM |
186 |
) %>% |
|
187 | 3x |
filter( |
188 | 3x |
PARAMCD != "QSALL" & |
189 | 3x |
!str_detect(AVISIT, "SCREENING|UNSCHEDULED") |
190 |
) %>% |
|
191 | 3x |
group_by( |
192 | 3x |
USUBJID, |
193 | 3x |
ADTM |
194 |
) %>% |
|
195 | 3x |
summarise(first_date = first(ADTM), .groups = "drop") |
196 | ||
197 | 3x |
adqlqc <- left_join( |
198 | 3x |
adqlqc, |
199 | 3x |
adqlqc_x, |
200 | 3x |
by = c("USUBJID", "ADTM") |
201 |
) %>% |
|
202 | 3x |
mutate( |
203 | 3x |
ANL01FL = case_when( |
204 | 3x |
PARAMCD != "QSALL" & ABLFL == "Y" ~ "Y", |
205 | 3x |
PARAMCD != "QSALL" & |
206 | 3x |
!str_detect(AVISIT, "UNSCHEDULED") & |
207 | 3x |
!is.na(first_date) ~ "Y" |
208 |
) |
|
209 |
) %>% |
|
210 | 3x |
select(-first_date) |
211 | ||
212 |
# final dataset ----------------------------------------------------------- |
|
213 | 3x |
adqlqc_final <- adqlqc %>% |
214 | 3x |
dplyr::group_by(USUBJID) %>% |
215 | 3x |
dplyr::mutate(ASEQ = row_number()) %>% |
216 | 3x |
dplyr::ungroup() %>% |
217 | 3x |
dplyr::arrange( |
218 | 3x |
STUDYID, |
219 | 3x |
USUBJID, |
220 | 3x |
AVISITN |
221 |
) %>% |
|
222 | 3x |
select( |
223 | 3x |
-c("BASE2", "CHG2", "PCHG2", "ABLFL2") |
224 |
) %>% |
|
225 | 3x |
ungroup() |
226 | ||
227 | 3x |
adam_vars <- c( |
228 | 3x |
adsl_vars, "QSSEQ", "QSCAT", "QSSCAT", "QSDTC", "QSSPID", "QSSTAT", "QSSTRESN", |
229 | 3x |
"QSSTRESC", "QSSTRESU", "QSORRES", "QSORRESU", "QSTEST", "QSTESTCD", "QSTPT", |
230 | 3x |
"QSTPTNUM", "QSTPTREF", "QSDY", "QSREASND", "QSTSTDTL", "QSEVAL", "VISIT", "VISITNUM", |
231 | 3x |
"PARAM", "PARAMCD", "PARCAT1", "PARCAT1N", "PARCAT2", "AVAL", "AVALC", "AREASND", |
232 | 3x |
"BASE", "BASETYPE", "ABLFL", "CHG", "PCHG", "CHGCAT1", "CRIT1", "CRIT1FL", "DTYPE", |
233 | 3x |
"ADTM", "ADT", "ADY", "ADTF", "ATMF", "ATPT", "ATPTN", "AVISIT", "AVISITN", "APHASE", |
234 | 3x |
"APHASEN", "APERIOD", "APERIODC", "APERIODC", "ASPER", "ASPERC", "PERADY", "TRTP", |
235 | 3x |
"TRTA", "ONTRTFL", "LAST02FL", "FIRS02FL", "ANL01FL", "ANL02FL", "ANL03FL", |
236 | 3x |
"ANL04FL", "CGCAT1NX" |
237 |
) |
|
238 |
# order variables in mapped qs by variables in adam_vars |
|
239 | 3x |
adqlqc_name_ordered <- names(adqlqc_final)[order(match(names(adqlqc_final), adam_vars))] |
240 |
# adqlqc with variables ordered per gdsr |
|
241 | 3x |
adqlqc_final <- adqlqc_final %>% |
242 | 3x |
select( |
243 | 3x |
any_of(adqlqc_name_ordered) |
244 |
) |
|
245 | ||
246 | 3x |
adqlqc_final <- relocate(adqlqc_final, "QSEVLINT", .after = "QSTESTCD") %>% |
247 | 3x |
arrange( |
248 | 3x |
USUBJID, |
249 | 3x |
AVISITN, |
250 | 3x |
ASEQ, |
251 | 3x |
QSTESTCD |
252 |
) |
|
253 |
# apply metadata |
|
254 | 3x |
adqlqc_final <- apply_metadata(adqlqc_final, "metadata/ADQLQC.yml") |
255 | 3x |
return(adqlqc_final) |
256 |
} |
|
257 | ||
258 |
#' Helper Functions for Constructing ADQLQC |
|
259 |
#' |
|
260 |
#' Internal functions used by `radqlqc`. |
|
261 |
#' |
|
262 |
#' @inheritParams argument_convention |
|
263 |
#' @inheritParams radqlqc |
|
264 |
#' |
|
265 |
#' @examples |
|
266 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
267 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
268 |
#' |
|
269 |
#' @name h_adqlqc |
|
270 |
NULL |
|
271 | ||
272 |
#' @describeIn h_adqlqc Questionnaires EORTC QLQ-C30 V3.0 SDTM (QS) |
|
273 |
#' |
|
274 |
#' Function for generating random Questionnaires SDTM domain |
|
275 |
#' |
|
276 |
#' @return a dataframe with SDTM questionnaire data |
|
277 |
#' @keywords internal |
|
278 |
#' |
|
279 |
#' @examples |
|
280 |
#' \dontrun{ |
|
281 |
#' qs <- random.cdisc.data:::get_qs_data(adsl, n_assessments = 5L, seed = 1, na_percentage = 0.1) |
|
282 |
#' qs |
|
283 |
#' } |
|
284 |
get_qs_data <- function(adsl, |
|
285 |
visit_format = "CYCLE", |
|
286 |
n_assessments = 5L, |
|
287 |
n_days = 1L, |
|
288 |
lookup = NULL, |
|
289 |
seed = NULL, |
|
290 |
na_percentage = 0, |
|
291 |
na_vars = list( |
|
292 |
QSORRES = c(1234, 0.2), |
|
293 |
QSSTRESC = c(1234, 0.2) |
|
294 |
)) { |
|
295 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
296 | 3x |
checkmate::assert_string(visit_format) |
297 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
298 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
299 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
300 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
301 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
302 | 3x |
checkmate::assert_true(na_percentage < 1) |
303 | ||
304 |
# get subjects for QS data from ADSL |
|
305 |
# get studyid, subject for QS generation |
|
306 | 3x |
qs <- select( |
307 | 3x |
adsl, |
308 | 3x |
STUDYID, |
309 | 3x |
USUBJID |
310 |
) %>% |
|
311 | 3x |
mutate( |
312 | 3x |
DOMAIN = "QS" |
313 |
) |
|
314 | ||
315 |
# QS prep ----------------------------------------------------------------- |
|
316 |
# get questionnaire function for QS |
|
317 |
# QSTESTCD: EOR0101 to EOR0130 |
|
318 | 3x |
eortc_qlq_c30_sub <- filter( |
319 | 3x |
eortc_qlq_c30, |
320 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & |
321 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130 |
322 |
) %>% |
|
323 | 3x |
select(-publication_name) |
324 | ||
325 |
# validate and initialize QSTEST vectors |
|
326 | 3x |
qstest_init_list <- relvar_init( |
327 | 3x |
unique(eortc_qlq_c30_sub$QSTEST), |
328 | 3x |
unique(eortc_qlq_c30_sub$QSTESTCD) |
329 |
) |
|
330 | ||
331 | 3x |
if (!is.null(seed)) { |
332 | 3x |
set.seed(seed) |
333 |
} |
|
334 | ||
335 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
336 | ||
337 | 3x |
lookup_qs <- if (!is.null(lookup)) { |
338 | ! |
lookup |
339 |
} else { |
|
340 | 3x |
expand.grid( |
341 | 3x |
STUDYID = unique(qs$STUDYID), |
342 | 3x |
USUBJID = qs$USUBJID, |
343 | 3x |
QSTEST = qstest_init_list$relvar1, |
344 | 3x |
VISIT = visit_schedule( |
345 | 3x |
visit_format = visit_format, |
346 | 3x |
n_assessments = n_assessments, |
347 | 3x |
n_days = n_days |
348 |
), |
|
349 | 3x |
stringsAsFactors = FALSE |
350 |
) |
|
351 |
} |
|
352 | ||
353 |
# assign related variable values: QSTESTxQSTESTCD are related |
|
354 | 3x |
lookup_qs <- lookup_qs %>% rel_var( |
355 | 3x |
var_name = "QSTESTCD", |
356 | 3x |
related_var = "QSTEST", |
357 | 3x |
var_values = qstest_init_list$relvar2 |
358 |
) |
|
359 | ||
360 | 3x |
lookup_qs <- left_join( |
361 | 3x |
lookup_qs, |
362 | 3x |
eortc_qlq_c30_sub, |
363 | 3x |
by = c( |
364 | 3x |
"QSTEST", |
365 | 3x |
"QSTESTCD" |
366 |
), |
|
367 | 3x |
multiple = "all", |
368 | 3x |
relationship = "many-to-many" |
369 |
) |
|
370 | ||
371 | 3x |
lookup_qs <- dplyr::mutate( |
372 | 3x |
lookup_qs, |
373 | 3x |
VISITNUM = dplyr::case_when( |
374 | 3x |
VISIT == "SCREENING" ~ -1, |
375 | 3x |
VISIT == "BASELINE" ~ 0, |
376 | 3x |
(grepl("^WEEK", VISIT) | grepl("^CYCLE", VISIT)) ~ as.numeric(VISIT) - 2, |
377 | 3x |
TRUE ~ NA_real_ |
378 |
) |
|
379 | 3x |
) %>% arrange(USUBJID) |
380 | ||
381 |
# # prep QSALL -------------------------------------------------------------- |
|
382 |
# get last subject and visit for QSALL |
|
383 | 3x |
last_subj_vis <- select(lookup_qs, USUBJID, VISIT) %>% |
384 | 3x |
distinct() %>% |
385 | 3x |
slice(n()) |
386 | 3x |
last_subj_vis_full <- filter( |
387 | 3x |
lookup_qs, |
388 | 3x |
USUBJID == last_subj_vis$USUBJID, |
389 | 3x |
VISIT == last_subj_vis$VISIT |
390 |
) |
|
391 | ||
392 | 3x |
qsall_data1 <- tibble::tibble( |
393 | 3x |
STUDYID = unique(last_subj_vis_full$STUDYID), |
394 | 3x |
USUBJID = unique(last_subj_vis_full$USUBJID), |
395 | 3x |
VISIT = unique(last_subj_vis_full$VISIT), |
396 | 3x |
VISITNUM = unique(last_subj_vis_full$VISITNUM), |
397 | 3x |
QSTESTCD = "QSALL", |
398 | 3x |
QSTEST = "Questionnaires", |
399 | 3x |
QSSTAT = "NOT DONE", |
400 | 3x |
QSREASND = "SUBJECT REFUSED" |
401 |
) |
|
402 | ||
403 |
# remove last subject and visit from main data |
|
404 | 3x |
lookup_qs_sub <- anti_join( |
405 | 3x |
lookup_qs, |
406 | 3x |
last_subj_vis_full, |
407 | 3x |
by = c("USUBJID", "VISIT") |
408 |
) |
|
409 | ||
410 | 3x |
set.seed(seed) |
411 | 3x |
lookup_qs_sub_x <- lookup_qs_sub %>% |
412 | 3x |
group_by( |
413 | 3x |
USUBJID, |
414 | 3x |
QSTESTCD, |
415 | 3x |
VISIT |
416 |
) %>% |
|
417 | 3x |
slice_sample(n = 1) %>% |
418 | 3x |
ungroup() %>% |
419 | 3x |
as.data.frame() |
420 | ||
421 | 3x |
lookup_qs_sub_x <- arrange( |
422 | 3x |
lookup_qs_sub_x, |
423 | 3x |
USUBJID, |
424 | 3x |
VISITNUM |
425 |
) |
|
426 | ||
427 |
# add date: QSDTC --------------------------------------------------------- |
|
428 |
# get treatment dates from ADSL |
|
429 | 3x |
adsl_trt <- select( |
430 | 3x |
adsl, |
431 | 3x |
USUBJID, |
432 | 3x |
TRTSDTM, |
433 | 3x |
TRTEDTM |
434 |
) |
|
435 |
# use to derive QSDTC |
|
436 |
# if no treatment end date, create an arbituary one |
|
437 | 3x |
trt_end_date <- max(adsl_trt$TRTEDTM, na.rm = TRUE) |
438 | ||
439 | 3x |
lookup_qs_sub_x <- left_join( |
440 | 3x |
lookup_qs_sub_x, |
441 | 3x |
adsl_trt, |
442 | 3x |
by = "USUBJID" |
443 |
) %>% |
|
444 | 3x |
group_by( |
445 | 3x |
USUBJID |
446 |
) %>% |
|
447 | 3x |
mutate(QSDTC = get_random_dates_between( |
448 | 3x |
from = TRTSDTM, |
449 | 3x |
to = ifelse( |
450 | 3x |
is.na(TRTEDTM), |
451 | 3x |
trt_end_date, |
452 | 3x |
TRTEDTM |
453 |
), |
|
454 | 3x |
visit_id = VISITNUM |
455 |
)) %>% |
|
456 | 3x |
select(-c("TRTSDTM", "TRTEDTM")) |
457 | ||
458 |
# filter out subjects with missing dates |
|
459 | 3x |
lookup_qs_sub_x1 <- filter( |
460 | 3x |
lookup_qs_sub_x, |
461 | 3x |
!is.na(QSDTC) |
462 |
) |
|
463 | ||
464 |
# subjects with missing dates |
|
465 | 3x |
lookup_qs_sub_x2 <- filter( |
466 | 3x |
lookup_qs_sub_x, |
467 | 3x |
is.na(QSDTC) |
468 |
) %>% |
|
469 | 3x |
select( |
470 | 3x |
STUDYID, |
471 | 3x |
USUBJID, |
472 | 3x |
VISIT, |
473 | 3x |
VISITNUM |
474 |
) %>% |
|
475 | 3x |
distinct() |
476 | ||
477 |
# generate QSALL for subjects with missing dates |
|
478 | 3x |
qsall_data2 <- mutate( |
479 | 3x |
lookup_qs_sub_x2, |
480 | 3x |
QSTESTCD = "QSALL", |
481 | 3x |
QSTEST = "Questionnaires", |
482 | 3x |
QSSTAT = "NOT DONE", |
483 | 3x |
QSREASND = "SUBJECT REFUSED" |
484 |
) |
|
485 | ||
486 |
# add qsall data to original item data |
|
487 | 3x |
lookup_qs_sub_all <- bind_rows( |
488 | 3x |
lookup_qs_sub_x1, |
489 | 3x |
qsall_data1, |
490 | 3x |
qsall_data2 |
491 |
) |
|
492 | ||
493 | 3x |
qs_all <- lookup_qs_sub_all %>% |
494 | 3x |
arrange( |
495 | 3x |
STUDYID, |
496 | 3x |
USUBJID, |
497 | 3x |
VISITNUM |
498 |
) %>% |
|
499 | 3x |
dplyr::group_by(USUBJID) %>% |
500 | 3x |
dplyr::ungroup() |
501 | ||
502 |
# get first and second subject ids |
|
503 | 3x |
first_second_subj <- select(qs_all, USUBJID) %>% |
504 | 3x |
distinct() %>% |
505 | 3x |
slice(1:2) |
506 | ||
507 | 3x |
qs1 <- filter( |
508 | 3x |
qs_all, |
509 | 3x |
USUBJID %in% first_second_subj$USUBJID |
510 |
) |
|
511 | ||
512 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
513 | 3x |
qs1 <- mutate_na(ds = qs1, na_vars = na_vars, na_percentage = na_percentage) |
514 |
} |
|
515 | ||
516 |
# QSSTAT = NOT DONE |
|
517 | 3x |
qs1 <- mutate( |
518 | 3x |
qs1, |
519 | 3x |
QSSTAT = case_when( |
520 | 3x |
is.na(QSORRES) & is.na(QSSTRESC) ~ "NOT DONE" |
521 |
) |
|
522 |
) |
|
523 | ||
524 |
# remove first and second subjects from main data |
|
525 | 3x |
qs2 <- anti_join( |
526 | 3x |
qs_all, |
527 | 3x |
qs1, |
528 | 3x |
by = c("USUBJID") |
529 |
) |
|
530 | ||
531 | 3x |
final_qs <- rbind( |
532 | 3x |
qs1, |
533 | 3x |
qs2 |
534 |
) %>% |
|
535 | 3x |
group_by(USUBJID) %>% |
536 | 3x |
dplyr::mutate(QSSEQ = row_number()) %>% |
537 | 3x |
arrange( |
538 | 3x |
STUDYID, |
539 | 3x |
USUBJID, |
540 | 3x |
VISITNUM |
541 |
) %>% |
|
542 | 3x |
ungroup() |
543 | ||
544 |
# ordered variables as per gdsr |
|
545 | 3x |
final_qs <- select( |
546 | 3x |
final_qs, |
547 | 3x |
STUDYID, |
548 | 3x |
USUBJID, |
549 | 3x |
QSSEQ, |
550 | 3x |
QSTESTCD, |
551 | 3x |
QSTEST, |
552 | 3x |
QSCAT, |
553 | 3x |
QSSCAT, |
554 | 3x |
QSORRES, |
555 | 3x |
QSORRESU, |
556 | 3x |
QSSTRESC, |
557 | 3x |
QSSTRESU, |
558 | 3x |
QSSTAT, |
559 | 3x |
QSREASND, |
560 | 3x |
VISITNUM, |
561 | 3x |
VISIT, |
562 | 3x |
QSDTC, |
563 | 3x |
QSEVLINT |
564 |
) |
|
565 | 3x |
return(final_qs) |
566 |
} |
|
567 | ||
568 |
#' @describeIn h_adqlqc Function for generating random dates between 2 dates |
|
569 |
#' |
|
570 |
#' @param from (`datetime vector`)\cr Start date/times. |
|
571 |
#' @param to (`datetime vector`)\cr End date/times. |
|
572 |
#' @param visit_id (`vector`)\cr Visit identifiers. |
|
573 |
#' |
|
574 |
#' @return Data frame with new randomly generated dates variable. |
|
575 |
#' @keywords internal |
|
576 |
#' |
|
577 |
#' @examples |
|
578 |
#' \dontrun{ |
|
579 |
#' df <- dplyr::left_join( |
|
580 |
#' adsl, |
|
581 |
#' qs, |
|
582 |
#' by = c("STUDYID", "USUBJID"), |
|
583 |
#' multiple = "all" |
|
584 |
#' ) |> |
|
585 |
#' dplyr::mutate( |
|
586 |
#' AVISIT = VISIT, |
|
587 |
#' PARAMCD = QSTESTCD, |
|
588 |
#' AVISITN = VISITNUM |
|
589 |
#' ) |> |
|
590 |
#' dplyr::mutate(ADTM = random.cdisc.data:::get_random_dates_between(TRTSDTM, TRTEDTM, AVISITN)) |
|
591 |
#' df |
|
592 |
#' } |
|
593 |
get_random_dates_between <- function(from, to, visit_id) { |
|
594 | 30x |
min_date <- min(lubridate::as_datetime(from), na.rm = TRUE) |
595 | 30x |
max_date <- max(lubridate::as_datetime(to), na.rm = TRUE) |
596 | 30x |
date_seq <- seq(from = min_date + lubridate::days(1), to = max_date, by = "28 days") |
597 | ||
598 | 30x |
visit_ids <- unique(visit_id) |
599 | 30x |
out <- sapply(visit_ids, simplify = TRUE, USE.NAMES = TRUE, FUN = function(x) { |
600 | 177x |
if (x == -1) { |
601 | 30x |
random_days_to_subtract <- lubridate::days(sample(1:10, size = 1)) |
602 | 30x |
min_date - random_days_to_subtract |
603 | 147x |
} else if (x == 0) { |
604 | 30x |
min_date |
605 | 117x |
} else if (x > 0) { |
606 | 117x |
if (x %in% seq_along(date_seq)) { |
607 | 117x |
date_seq[[x]] |
608 |
} else { |
|
609 | 30x |
NA |
610 |
} |
|
611 |
} |
|
612 |
}) |
|
613 | 30x |
lubridate::as_datetime(out[match(visit_id, visit_ids)]) |
614 |
} |
|
615 | ||
616 |
#' @describeIn h_adqlqc Prepare ADaM ADQLQC data, adding PARAMCD to SDTM QS data |
|
617 |
#' |
|
618 |
#' @param df (`data.frame`)\cr SDTM QS dataset. |
|
619 |
#' |
|
620 |
#' @return `data.frame` |
|
621 |
#' @keywords internal |
|
622 |
#' |
|
623 |
#' @examples |
|
624 |
#' \dontrun{ |
|
625 |
#' adqlqc1 <- random.cdisc.data:::prep_adqlqc(df = qs) |
|
626 |
#' adqlqc1 |
|
627 |
#' } |
|
628 |
prep_adqlqc <- function(df) { |
|
629 |
# create PARAMCD from QSTESTCD |
|
630 | 3x |
adqlqc <- dplyr::mutate( |
631 | 3x |
df, |
632 | 3x |
PARAMCD = case_when( |
633 | 3x |
QSTESTCD == "EOR0101" ~ "QS02801", |
634 | 3x |
QSTESTCD == "EOR0102" ~ "QS02802", |
635 | 3x |
QSTESTCD == "EOR0103" ~ "QS02803", |
636 | 3x |
QSTESTCD == "EOR0104" ~ "QS02804", |
637 | 3x |
QSTESTCD == "EOR0105" ~ "QS02805", |
638 | 3x |
QSTESTCD == "EOR0106" ~ "QS02806", |
639 | 3x |
QSTESTCD == "EOR0107" ~ "QS02807", |
640 | 3x |
QSTESTCD == "EOR0108" ~ "QS02808", |
641 | 3x |
QSTESTCD == "EOR0109" ~ "QS02809", |
642 | 3x |
QSTESTCD == "EOR0110" ~ "QS02810", |
643 | 3x |
QSTESTCD == "EOR0111" ~ "QS02811", |
644 | 3x |
QSTESTCD == "EOR0112" ~ "QS02812", |
645 | 3x |
QSTESTCD == "EOR0113" ~ "QS02813", |
646 | 3x |
QSTESTCD == "EOR0114" ~ "QS02814", |
647 | 3x |
QSTESTCD == "EOR0115" ~ "QS02815", |
648 | 3x |
QSTESTCD == "EOR0116" ~ "QS02816", |
649 | 3x |
QSTESTCD == "EOR0117" ~ "QS02817", |
650 | 3x |
QSTESTCD == "EOR0118" ~ "QS02818", |
651 | 3x |
QSTESTCD == "EOR0119" ~ "QS02819", |
652 | 3x |
QSTESTCD == "EOR0120" ~ "QS02820", |
653 | 3x |
QSTESTCD == "EOR0121" ~ "QS02821", |
654 | 3x |
QSTESTCD == "EOR0122" ~ "QS02822", |
655 | 3x |
QSTESTCD == "EOR0123" ~ "QS02823", |
656 | 3x |
QSTESTCD == "EOR0124" ~ "QS02824", |
657 | 3x |
QSTESTCD == "EOR0125" ~ "QS02825", |
658 | 3x |
QSTESTCD == "EOR0126" ~ "QS02826", |
659 | 3x |
QSTESTCD == "EOR0127" ~ "QS02827", |
660 | 3x |
QSTESTCD == "EOR0128" ~ "QS02828", |
661 | 3x |
QSTESTCD == "EOR0129" ~ "QS02829", |
662 | 3x |
QSTESTCD == "EOR0130" ~ "QS02830", |
663 | 3x |
TRUE ~ QSTESTCD |
664 |
) |
|
665 |
) |
|
666 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
667 | 3x |
adqlqc1 <- dplyr::left_join( |
668 | 3x |
adqlqc, |
669 | 3x |
gdsr_param_adqlqc, |
670 | 3x |
by = "PARAMCD" |
671 |
) |
|
672 | 3x |
return(adqlqc1) |
673 |
} |
|
674 | ||
675 |
#' @describeIn h_adqlqc Scale calculation for ADQLQC data |
|
676 |
#' |
|
677 |
#' @param adqlqc1 (`data.frame`)\cr Prepared data generated from the [prep_adqlqc()] function. |
|
678 |
#' |
|
679 |
#' @return `data.frame` |
|
680 |
#' @keywords internal |
|
681 |
#' |
|
682 |
#' @examples |
|
683 |
#' \dontrun{ |
|
684 |
#' df_scales <- random.cdisc.data:::calc_scales(df) |
|
685 |
#' df_scales |
|
686 |
#' } |
|
687 |
calc_scales <- function(adqlqc1) { |
|
688 |
# Prep scale data --------------------------------------------------------- |
|
689 |
# parcat2 = scales or global health status |
|
690 |
# global health status/scales data |
|
691 |
# QSTESTCD: EOR0131 to EOR0145 (global health status and scales) |
|
692 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
693 | 3x |
eortc_qlq_c30_sub <- filter( |
694 | 3x |
eortc_qlq_c30, |
695 | 3x |
!(as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & |
696 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130) |
697 |
) %>% |
|
698 | 3x |
mutate( |
699 | 3x |
PARAMCD = case_when( |
700 | 3x |
QSTESTCD == "EOR0131" ~ "QS028QL2", |
701 | 3x |
QSTESTCD == "EOR0132" ~ "QS028PF2", |
702 | 3x |
QSTESTCD == "EOR0133" ~ "QS028RF2", |
703 | 3x |
QSTESTCD == "EOR0134" ~ "QS028EF", |
704 | 3x |
QSTESTCD == "EOR0135" ~ "QS028CF", |
705 | 3x |
QSTESTCD == "EOR0136" ~ "QS028SF", |
706 | 3x |
QSTESTCD == "EOR0137" ~ "QS028FA", |
707 | 3x |
QSTESTCD == "EOR0138" ~ "QS028NV", |
708 | 3x |
QSTESTCD == "EOR0139" ~ "QS028PA", |
709 | 3x |
QSTESTCD == "EOR0140" ~ "QS028DY", |
710 | 3x |
QSTESTCD == "EOR0141" ~ "QS028SL", |
711 | 3x |
QSTESTCD == "EOR0142" ~ "QS028AP", |
712 | 3x |
QSTESTCD == "EOR0143" ~ "QS028CO", |
713 | 3x |
QSTESTCD == "EOR0144" ~ "QS028DI", |
714 | 3x |
QSTESTCD == "EOR0145" ~ "QS028FI", |
715 | 3x |
TRUE ~ QSTESTCD |
716 |
) |
|
717 |
) %>% |
|
718 | 3x |
select(-publication_name) |
719 | ||
720 |
# ADaM global health status and scales from gdsr |
|
721 | 3x |
gdsr_param_adqlqc <- gdsr_param_adqlqc %>% |
722 | 3x |
filter( |
723 | 3x |
!str_detect(PARCAT2, "Original Items|Completion") |
724 |
) |
|
725 | ||
726 | 3x |
ghs_scales <- left_join( |
727 | 3x |
eortc_qlq_c30_sub, |
728 | 3x |
gdsr_param_adqlqc, |
729 | 3x |
by = "PARAMCD" |
730 |
) |
|
731 |
# scale data |
|
732 | 3x |
df <- data.frame(index = seq_len(nrow(ghs_scales))) |
733 | 3x |
df$previous <- list( |
734 | 3x |
c("QS02826", "QS02827"), |
735 | 3x |
c("QS02811"), |
736 | 3x |
c("QS02810", "QS02812", "QS02818"), |
737 | 3x |
c("QS02806", "QS02807"), |
738 | 3x |
c("QS02814", "QS02815"), |
739 | 3x |
c("QS02808"), |
740 | 3x |
c("QS02817"), |
741 | 3x |
c("QS02816"), |
742 | 3x |
c("QS02821", "QS02822", "QS02823", "QS02824"), |
743 | 3x |
c("QS02829", "QS02830"), |
744 | 3x |
c("QS02813"), |
745 | 3x |
c("QS02801", "QS02802", "QS02803", "QS02804", "QS02805"), |
746 | 3x |
c("QS02809", "QS02819"), |
747 | 3x |
c("QS02820", "QS02825"), |
748 | 3x |
c("QS02828") |
749 |
) |
|
750 | 3x |
df$newName <- list( |
751 | 3x |
"QS028SF", |
752 | 3x |
"QS028SL", |
753 | 3x |
"QS028FA", |
754 | 3x |
"QS028RF2", |
755 | 3x |
"QS028NV", |
756 | 3x |
"QS028DY", |
757 | 3x |
"QS028DI", |
758 | 3x |
"QS028CO", |
759 | 3x |
"QS028EF", |
760 | 3x |
"QS028QL2", |
761 | 3x |
"QS028AP", |
762 | 3x |
"QS028PF2", |
763 | 3x |
"QS028PA", |
764 | 3x |
"QS028CF", |
765 | 3x |
"QS028FI" |
766 |
) |
|
767 | 3x |
df$newNamelabel <- list( |
768 | 3x |
"EORTC QLQ-C30: Social functioning", |
769 | 3x |
"EORTC QLQ-C30: Insomnia", |
770 | 3x |
"EORTC QLQ-C30: Fatigue", |
771 | 3x |
"EORTC QLQ-C30: Role functioning (revised)", |
772 | 3x |
"EORTC QLQ-C30: Nausea and vomiting", |
773 | 3x |
"EORTC QLQ-C30: Dyspnoea", |
774 | 3x |
"EORTC QLQ-C30: Diarrhoea", |
775 | 3x |
"EORTC QLQ-C30: Constipation", |
776 | 3x |
"EORTC QLQ-C30: Emotional functioning", |
777 | 3x |
"EORTC QLQ-C30: Global health status/QoL (revised)", |
778 | 3x |
"EORTC QLQ-C30: Appetite loss", |
779 | 3x |
"EORTC QLQ-C30: Physical functioning (revised)", |
780 | 3x |
"EORTC QLQ-C30: Pain", |
781 | 3x |
"EORTC QLQ-C30: Cognitive functioning", |
782 | 3x |
"EORTC QLQ-C30: Financial difficulties" |
783 |
) |
|
784 | 3x |
df$newNameCategory <- list( |
785 | 3x |
"Functional Scales", |
786 | 3x |
"Symptom Scales", |
787 | 3x |
"Symptom Scales", |
788 | 3x |
"Functional Scales", |
789 | 3x |
"Symptom Scales", |
790 | 3x |
"Symptom Scales", |
791 | 3x |
"Symptom Scales", |
792 | 3x |
"Symptom Scales", |
793 | 3x |
"Functional Scales", |
794 | 3x |
"Global Health Status", |
795 | 3x |
"Symptom Scales", |
796 | 3x |
"Functional Scales", |
797 | 3x |
"Symptom Scales", |
798 | 3x |
"Functional Scales", |
799 | 3x |
"Symptom Scales" |
800 |
) |
|
801 | 3x |
df$num_param <- list( |
802 | 3x |
"1", |
803 | 3x |
"1", |
804 | 3x |
"2", |
805 | 3x |
"1", |
806 | 3x |
"1", |
807 | 3x |
"1", |
808 | 3x |
"1", |
809 | 3x |
"1", |
810 | 3x |
"2", |
811 | 3x |
"1", |
812 | 3x |
"1", |
813 | 3x |
"3", |
814 | 3x |
"1", |
815 | 3x |
"1", |
816 | 3x |
"1" |
817 |
) |
|
818 | 3x |
df$equation <- list( |
819 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
820 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
821 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
822 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
823 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
824 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
825 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
826 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
827 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
828 | 3x |
"new_value = ((temp_val/var_length-1)/6)*100.0", |
829 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
830 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
831 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
832 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
833 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0" |
834 |
) |
|
835 | ||
836 | 3x |
expect_data <- data.frame( |
837 | 3x |
PARAM = expect$PARAM, |
838 | 3x |
PARAMCD = expect$PARAMCD, |
839 | 3x |
PARCAT2 = expect$PARCAT2, |
840 | 3x |
PARCAT1N = expect$PARCAT1N, |
841 | 3x |
AVAL = c(0, 1), |
842 | 3x |
AVALC = c( |
843 | 3x |
"Not expected to complete questionnaire", |
844 | 3x |
"Expected to complete questionnaire" |
845 |
) |
|
846 |
) |
|
847 | ||
848 | 3x |
df_saved <- data.frame() |
849 | ||
850 | 3x |
unique_id <- unique(adqlqc1$USUBJID) |
851 | ||
852 | 3x |
for (id in unique_id) { |
853 | 30x |
id_data <- adqlqc1[adqlqc1$USUBJID == id, ] |
854 | 30x |
unique_avisit <- unique(id_data$AVISIT) |
855 | 30x |
for (visit in unique_avisit) { |
856 | 180x |
if (is.na(visit)) { |
857 | ! |
next |
858 |
} |
|
859 | 180x |
id_data_at_visit <- id_data[id_data$AVISIT == visit, ] |
860 | ||
861 | 180x |
if (any(id_data_at_visit$PARAMCD != "QSALL")) { |
862 | 177x |
for (idx in seq_along(df$index)) { |
863 | 2655x |
previous_names <- df$previous[idx] |
864 | 2655x |
current_name <- df$newName[idx] |
865 | 2655x |
current_name_label <- df$newNamelabel[idx] |
866 | 2655x |
current_name_category <- df$newNameCategory[idx] |
867 | 2655x |
eqn <- df$equation[idx] |
868 | 2655x |
temp_val <- 0 |
869 | 2655x |
var_length <- 0 |
870 | 2655x |
for (param_name in previous_names[[1]]) { |
871 | 5310x |
if (param_name %in% id_data_at_visit$PARAMCD) { #### |
872 | 5310x |
current_val <- as.numeric(as.character(id_data_at_visit$AVAL[id_data_at_visit$PARAMCD == param_name])) |
873 | 5310x |
if (!is.na(current_val)) { |
874 | 5094x |
temp_val <- temp_val + current_val ### |
875 | 5094x |
var_length <- var_length + 1 |
876 |
} |
|
877 |
} # if |
|
878 |
} # param_name |
|
879 |
# eval |
|
880 | 2655x |
if (var_length >= as.numeric(df$num_param[idx])) { |
881 | 2604x |
eval(parse(text = eqn)) ##### |
882 |
} else { |
|
883 | 51x |
new_value <- NA |
884 |
} |
|
885 | ||
886 | 2655x |
new_data_row <- data.frame( |
887 | 2655x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
888 | 2655x |
id, |
889 | 2655x |
visit, |
890 | 2655x |
id_data_at_visit$AVISITN[1], |
891 | 2655x |
id_data_at_visit$QSDTC[1], |
892 | 2655x |
current_name_category, |
893 | 2655x |
current_name_label, |
894 | 2655x |
current_name, |
895 | 2655x |
new_value, |
896 | 2655x |
NA, |
897 | 2655x |
stringsAsFactors = FALSE |
898 |
) |
|
899 | 2655x |
colnames(new_data_row) <- c( |
900 | 2655x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
901 | 2655x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", |
902 | 2655x |
"AVAL", "AVALC" |
903 |
) ### |
|
904 | 2655x |
df_saved <- rbind(df_saved, new_data_row) ##### |
905 |
} # idx |
|
906 |
} |
|
907 |
# add expect data |
|
908 | 180x |
expect_value <- sample(expect_data$AVAL, 1, prob = c(0.10, 0.90)) |
909 | 180x |
expect_valuec <- expect_data$AVALC[expect_data$AVAL == expect_value] |
910 | ||
911 | 180x |
new_data_row <- data.frame( |
912 | 180x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
913 | 180x |
id, |
914 | 180x |
visit, |
915 | 180x |
id_data_at_visit$AVISITN[1], |
916 | 180x |
datetime = NA, |
917 | 180x |
expect_data$PARCAT2[1], |
918 | 180x |
expect_data$PARAM[1], |
919 | 180x |
expect_data$PARAMCD[1], |
920 | 180x |
expect_value, |
921 | 180x |
expect_valuec, |
922 | 180x |
stringsAsFactors = FALSE |
923 |
) |
|
924 | 180x |
colnames(new_data_row) <- c( |
925 | 180x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
926 | 180x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", "AVAL", |
927 | 180x |
"AVALC" |
928 |
) ### |
|
929 | 180x |
df_saved <- rbind(df_saved, new_data_row) |
930 |
} # visit |
|
931 |
} # id |
|
932 | ||
933 | 3x |
df_saved1 <- left_join( |
934 | 3x |
df_saved, |
935 | 3x |
ghs_scales, |
936 | 3x |
by = c( |
937 | 3x |
"PARAM", |
938 | 3x |
"PARAMCD", |
939 | 3x |
"PARCAT2" |
940 |
) |
|
941 |
) %>% |
|
942 | 3x |
mutate( |
943 | 3x |
AVALC = ifelse(is.na(AVALC), as.character(AVAL), AVALC), |
944 | 3x |
PARCAT1 = ifelse(PARAMCD == "EX028", expect$PARCAT1, PARCAT1), |
945 | 3x |
PARCAT1N = ifelse(PARAMCD == "EX028", expect$PARCAT1N, PARCAT1N) |
946 |
) |
|
947 | ||
948 | 3x |
adqlqc_tmp <- bind_rows(adqlqc1, df_saved1) %>% |
949 | 3x |
arrange( |
950 | 3x |
USUBJID, |
951 | 3x |
AVISITN, |
952 | 3x |
QSTESTCD |
953 |
) |
|
954 | 3x |
return(adqlqc_tmp) |
955 |
} |
|
956 | ||
957 |
#' @describeIn h_adqlqc Calculate Change from Baseline Category 1 |
|
958 |
#' |
|
959 |
#' @param dataset (`data.frame`)\cr ADaM dataset. |
|
960 |
#' |
|
961 |
#' @return `data.frame` |
|
962 |
#' @keywords internal |
|
963 |
#' |
|
964 |
#' @examples |
|
965 |
#' \dontrun{ |
|
966 |
#' adqlqc <- random.cdisc.data:::derv_chgcat1(dataset = adqlqc |> dplyr::select(-CHGCAT1)) |
|
967 |
#' adqlqc |
|
968 |
#' } |
|
969 |
derv_chgcat1 <- function(dataset) { |
|
970 |
# derivation of CHGCAT1 |
|
971 | 3x |
check_vars <- c("PARCAT2", "CHG") |
972 | ||
973 | 3x |
if (all(check_vars %in% names(dataset))) { |
974 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG <= -10, |
975 | 3x |
"Improved", "" |
976 |
) |
|
977 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG >= 10, |
978 | 3x |
"Worsened", dataset$CHGCAT1 |
979 |
) |
|
980 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARCAT2 == "Symptom Scales" & |
981 | 3x |
!is.na(dataset$CHG) & dataset$CHG > -10 & |
982 | 3x |
dataset$CHG < 10, |
983 | 3x |
"No change", dataset$CHGCAT1 |
984 |
) |
|
985 | ||
986 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
987 | 3x |
!is.na(dataset$CHG) & dataset$CHG >= 10, |
988 | 3x |
"Improved", dataset$CHGCAT1 |
989 |
) |
|
990 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
991 | 3x |
!is.na(dataset$CHG) & dataset$CHG <= -10, |
992 | 3x |
"Worsened", dataset$CHGCAT1 |
993 |
) |
|
994 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
995 | 3x |
!is.na(dataset$CHG) & |
996 | 3x |
dataset$CHG > -10 & dataset$CHG < 10, |
997 | 3x |
"No change", dataset$CHGCAT1 |
998 |
) |
|
999 | ||
1000 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 6, |
1001 | 3x |
"Improved by six levels", dataset$CHGCAT1 |
1002 |
) |
|
1003 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 5, |
1004 | 3x |
"Improved by five levels", dataset$CHGCAT1 |
1005 |
) |
|
1006 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 4, |
1007 | 3x |
"Improved by four levels", dataset$CHGCAT1 |
1008 |
) |
|
1009 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 3, |
1010 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1011 |
) |
|
1012 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 2, |
1013 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1014 |
) |
|
1015 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 1, |
1016 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1017 |
) |
|
1018 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 0, |
1019 | 3x |
"No change", dataset$CHGCAT1 |
1020 |
) |
|
1021 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -1, |
1022 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1023 |
) |
|
1024 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -2, |
1025 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1026 |
) |
|
1027 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -3, |
1028 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1029 |
) |
|
1030 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -4, |
1031 | 3x |
"Worsened by four levels", dataset$CHGCAT1 |
1032 |
) |
|
1033 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -5, |
1034 | 3x |
"Worsened by five levels", dataset$CHGCAT1 |
1035 |
) |
|
1036 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -6, |
1037 | 3x |
"Worsened by six levels", dataset$CHGCAT1 |
1038 |
) |
|
1039 | ||
1040 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -3, |
1041 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1042 |
) |
|
1043 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -2, |
1044 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1045 |
) |
|
1046 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -1, |
1047 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1048 |
) |
|
1049 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 0, |
1050 | 3x |
"No change", dataset$CHGCAT1 |
1051 |
) |
|
1052 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 1, |
1053 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1054 |
) |
|
1055 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 2, |
1056 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1057 |
) |
|
1058 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 3, |
1059 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1060 |
) |
|
1061 | ||
1062 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == -3, |
1063 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1064 |
) |
|
1065 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == -2, |
1066 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1067 |
) |
|
1068 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == -1, |
1069 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1070 |
) |
|
1071 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == 0, |
1072 | 3x |
"No changed", dataset$CHGCAT1 |
1073 |
) |
|
1074 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == 1, |
1075 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1076 |
) |
|
1077 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == 2, |
1078 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1079 |
) |
|
1080 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD == "QS02801" & dataset$CHG == 3, |
1081 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1082 |
) |
|
1083 | ||
1084 | 3x |
paramcd_vec <- c( |
1085 | 3x |
"QS02803", "QS02804", "QS02805", "QS02807", "QS02808", "QS02809", "QS02810", |
1086 | 3x |
"QS02811", "QS02812", "QS02813", "QS02814", "QS02815", "QS02816", "QS02817", |
1087 | 3x |
"QS02818", "QS02819", "QS02820", "QS02821", "QS02822", "QS02823", "QS02824", |
1088 | 3x |
"QS02825", "QS02826", "QS02827", "QS02828" |
1089 |
) |
|
1090 | ||
1091 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -3, |
1092 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1093 |
) |
|
1094 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -2, |
1095 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1096 |
) |
|
1097 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -1, |
1098 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1099 |
) |
|
1100 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 0, |
1101 | 3x |
"No change", dataset$CHGCAT1 |
1102 |
) |
|
1103 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 1, |
1104 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1105 |
) |
|
1106 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 2, |
1107 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1108 |
) |
|
1109 | 3x |
dataset$CHGCAT1 <- ifelse(dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 3, |
1110 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1111 |
) |
|
1112 | ||
1113 | 3x |
return(dataset) |
1114 |
} else { |
|
1115 | ! |
collapse_vars <- paste(check_vars, collapse = ", ") |
1116 | ! |
stop(sprintf( |
1117 | ! |
"%s: one or both variables is/are missing, needed for derivation", |
1118 | ! |
collapse_vars |
1119 |
)) |
|
1120 |
} |
|
1121 |
} |
|
1122 | ||
1123 |
#' @describeIn h_adqlqc Completion/Compliance Data Calculation |
|
1124 |
#' |
|
1125 |
#' @param dataset (`data.frame`)\cr Dataset. |
|
1126 |
#' |
|
1127 |
#' @return `data.frame` |
|
1128 |
#' @keywords internal |
|
1129 |
#' |
|
1130 |
#' @examples |
|
1131 |
#' \dontrun{ |
|
1132 |
#' compliance_data <- random.cdisc.data:::comp_derv(adqlqc, 80, 2) |
|
1133 |
#' compliance_data |
|
1134 |
#' } |
|
1135 |
comp_derv <- function(dataset, percent, number) { |
|
1136 |
# original items data |
|
1137 | 3x |
orig_data <- filter( |
1138 | 3x |
dataset, |
1139 | 3x |
PARCAT2 == "Original Items" |
1140 |
) |
|
1141 |
# total number of questionnaires |
|
1142 | 3x |
comp_count_all <- select( |
1143 | 3x |
orig_data, |
1144 | 3x |
PARAMCD |
1145 |
) %>% |
|
1146 | 3x |
distinct() %>% |
1147 | 3x |
count() |
1148 | 3x |
comp_count_all <- comp_count_all$n |
1149 |
# original items data count of questions answered |
|
1150 | 3x |
orig_data_summ <- group_by( |
1151 | 3x |
orig_data, |
1152 | 3x |
STUDYID, |
1153 | 3x |
USUBJID, |
1154 | 3x |
PARCAT1, |
1155 | 3x |
AVISIT, |
1156 | 3x |
AVISITN, |
1157 | 3x |
ADTM, |
1158 | 3x |
ADY |
1159 |
) %>% |
|
1160 | 3x |
summarise( |
1161 | 3x |
comp_count = sum(!is.na(AVAL)), |
1162 | 3x |
comp_count_all = comp_count_all, |
1163 | 3x |
.groups = "drop" |
1164 |
) %>% |
|
1165 | 3x |
mutate( |
1166 | 3x |
per_comp = trunc((comp_count / comp_count_all) * 100) |
1167 |
) |
|
1168 |
# expected data |
|
1169 | 3x |
ex028_data <- filter( |
1170 | 3x |
dataset, |
1171 | 3x |
PARAMCD == "EX028", |
1172 | 3x |
AVAL == 1 |
1173 |
) %>% |
|
1174 | 3x |
select( |
1175 | 3x |
STUDYID, |
1176 | 3x |
USUBJID, |
1177 | 3x |
PARCAT1, |
1178 | 3x |
AVISIT, |
1179 | 3x |
AVISITN, |
1180 | 3x |
ADTM, |
1181 | 3x |
ADY, |
1182 | 3x |
AVAL_ex028 = AVAL |
1183 |
) %>% |
|
1184 | 3x |
mutate( |
1185 | 3x |
comp_count_all = comp_count_all |
1186 |
) |
|
1187 | ||
1188 | 3x |
joined <- left_join( |
1189 | 3x |
ex028_data, |
1190 | 3x |
orig_data_summ, |
1191 | 3x |
by = c( |
1192 | 3x |
"STUDYID", |
1193 | 3x |
"USUBJID", |
1194 | 3x |
"PARCAT1", |
1195 | 3x |
"AVISIT", |
1196 | 3x |
"AVISITN", |
1197 | 3x |
"comp_count_all" |
1198 |
) |
|
1199 |
) %>% |
|
1200 | 3x |
select(-c("ADTM.x", "ADY.x")) |
1201 | ||
1202 | 3x |
joined <- rename( |
1203 | 3x |
joined, |
1204 | 3x |
ADTM = ADTM.y, |
1205 | 3x |
ADY = ADY.y |
1206 |
) |
|
1207 |
# CO028ALL |
|
1208 | 3x |
co028all <- mutate( |
1209 | 3x |
joined, |
1210 | 3x |
PARAMCD = "CO028ALL", |
1211 | 3x |
PARAM = "EORTC QLQ-C30: Completion - Completed all questions", |
1212 | 3x |
PARCAT2 = "Completion", |
1213 | 3x |
AVAL = case_when( |
1214 | 3x |
AVAL_ex028 == 1 & comp_count == comp_count_all ~ 1, |
1215 | 3x |
AVAL_ex028 == 1 & (is.na(comp_count) | comp_count < comp_count_all) ~ 0 |
1216 |
), |
|
1217 | 3x |
AVALC = case_when( |
1218 | 3x |
AVAL == 1 ~ "Completed all questions", |
1219 | 3x |
AVAL == 0 ~ "Did not complete all questions" |
1220 |
) |
|
1221 |
) |
|
1222 |
# CO028<y>P |
|
1223 | 3x |
co028p <- mutate( |
1224 | 3x |
joined, |
1225 | 3x |
PARAMCD = paste0("CO028", as.character(percent), "P"), |
1226 | 3x |
PARAM = sprintf( |
1227 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s%% of questions", |
1228 | 3x |
as.character(percent) |
1229 |
), |
|
1230 | 3x |
PARCAT2 = "Completion", |
1231 | 3x |
AVAL = case_when( |
1232 | 3x |
AVAL_ex028 == 1 & per_comp >= percent ~ 1, |
1233 | 3x |
AVAL_ex028 == 1 & (is.na(per_comp) | per_comp < percent) ~ 0 |
1234 |
), |
|
1235 | 3x |
AVALC = case_when( |
1236 | 3x |
AVAL == 1 ~ sprintf( |
1237 | 3x |
"Completed at least %s%% of questions", |
1238 | 3x |
as.character(percent) |
1239 |
), |
|
1240 | 3x |
AVAL == 0 ~ sprintf( |
1241 | 3x |
"Did not complete at least %s%% of questions", |
1242 | 3x |
as.character(percent) |
1243 |
) |
|
1244 |
) |
|
1245 |
) |
|
1246 |
# CO028<x>Q |
|
1247 | 3x |
co028q <- mutate( |
1248 | 3x |
joined, |
1249 | 3x |
PARAMCD = paste0("CO028", as.character(number), "Q"), |
1250 | 3x |
PARAM = sprintf( |
1251 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s question(s)", |
1252 | 3x |
as.character(number) |
1253 |
), |
|
1254 | 3x |
PARCAT2 = "Completion", |
1255 | 3x |
AVAL = case_when( |
1256 | 3x |
AVAL_ex028 == 1 & comp_count >= number ~ 1, |
1257 | 3x |
AVAL_ex028 == 1 & (comp_count < number | is.na(comp_count)) ~ 0 |
1258 |
), |
|
1259 | 3x |
AVALC = case_when( |
1260 | 3x |
AVAL == 1 ~ sprintf( |
1261 | 3x |
"Completed at least %s questions", |
1262 | 3x |
as.character(number) |
1263 |
), |
|
1264 | 3x |
AVAL == 0 ~ sprintf( |
1265 | 3x |
"Did not complete at least %s question(s)", |
1266 | 3x |
as.character(number) |
1267 |
) |
|
1268 |
) |
|
1269 |
) |
|
1270 | ||
1271 | 3x |
co028_bind <- rbind( |
1272 | 3x |
co028all, |
1273 | 3x |
co028p, |
1274 | 3x |
co028q |
1275 |
) %>% |
|
1276 | 3x |
select( |
1277 | 3x |
-c("AVAL_ex028", "comp_count", "comp_count_all", "per_comp") |
1278 |
) |
|
1279 | 3x |
return(co028_bind) |
1280 |
} |
1 |
#' Tumor Response Analysis Dataset (ADRS) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Tumor Response Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' One record per subject per parameter per analysis visit per analysis date. |
|
10 |
#' SDTM variables are populated on new records coming from other single records. |
|
11 |
#' Otherwise, SDTM variables are left blank. |
|
12 |
#' |
|
13 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADT`, `RSSEQ` |
|
14 |
#' |
|
15 |
#' @inheritParams argument_convention |
|
16 |
#' @param avalc (`character vector`)\cr Analysis value categories. |
|
17 |
#' @template param_cached |
|
18 |
#' @templateVar data adrs |
|
19 |
#' |
|
20 |
#' @return `data.frame` |
|
21 |
#' @export |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' library(random.cdisc.data) |
|
25 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
26 |
#' |
|
27 |
#' adrs <- radrs(adsl, seed = 2) |
|
28 |
#' adrs |
|
29 |
radrs <- function(adsl, |
|
30 |
avalc = NULL, |
|
31 |
lookup = NULL, |
|
32 |
seed = NULL, |
|
33 |
na_percentage = 0, |
|
34 |
na_vars = list(AVISIT = c(NA, 0.1), AVAL = c(1234, 0.1), AVALC = c(1234, 0.1)), |
|
35 |
cached = FALSE) { |
|
36 | 7x |
checkmate::assert_flag(cached) |
37 | 7x |
if (cached) { |
38 | 1x |
return(get_cached_data("cadrs")) |
39 |
} |
|
40 | ||
41 | 6x |
checkmate::assert_data_frame(adsl) |
42 | 6x |
checkmate::assert_vector(avalc, null.ok = TRUE) |
43 | 6x |
checkmate::assert_number(seed, null.ok = TRUE) |
44 | 6x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
45 | 6x |
checkmate::assert_true(na_percentage < 1) |
46 | ||
47 | 6x |
param_codes <- if (!is.null(avalc)) { |
48 | ! |
avalc |
49 |
} else { |
|
50 | 6x |
stats::setNames(1:5, c("CR", "PR", "SD", "PD", "NE")) |
51 |
} |
|
52 | ||
53 | 6x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
54 | 6x |
lookup_ars <- if (!is.null(lookup)) { |
55 | ! |
lookup |
56 |
} else { |
|
57 | 6x |
expand.grid( |
58 | 6x |
ARM = c("A: Drug X", "B: Placebo", "C: Combination"), |
59 | 6x |
AVALC = names(param_codes) |
60 | 6x |
) %>% dplyr::mutate( |
61 | 6x |
AVAL = param_codes[AVALC], |
62 | 6x |
p_scr = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)), |
63 | 6x |
p_bsl = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)), |
64 | 6x |
p_cycle = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)), |
65 | 6x |
p_eoi = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)), |
66 | 6x |
p_fu = c(c(.3, .2, .4), c(.2, .1, .3), c(.2, .2, .2), c(.3, .5, 0.1), rep(0, 3)) |
67 |
) |
|
68 |
} |
|
69 | ||
70 | 6x |
if (!is.null(seed)) { |
71 | 6x |
set.seed(seed) |
72 |
} |
|
73 | 6x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
74 | ||
75 | 6x |
adrs <- split(adsl, adsl$USUBJID) %>% |
76 | 6x |
lapply(function(pinfo) { |
77 | 60x |
probs <- dplyr::filter(lookup_ars, ARM == as.character(pinfo$ACTARM)) |
78 | ||
79 |
# screening |
|
80 | 60x |
rsp_screen <- sample(probs$AVALC, 1, prob = probs$p_scr) %>% as.character() |
81 | ||
82 |
# baseline |
|
83 | 60x |
rsp_bsl <- sample(probs$AVALC, 1, prob = probs$p_bsl) %>% as.character() |
84 | ||
85 |
# cycle |
|
86 | 60x |
rsp_c2d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character() |
87 | 60x |
rsp_c4d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character() |
88 | ||
89 |
# end of induction |
|
90 | 60x |
rsp_eoi <- sample(probs$AVALC, 1, prob = probs$p_eoi) %>% as.character() |
91 | ||
92 |
# follow up |
|
93 | 60x |
rsp_fu <- sample(probs$AVALC, 1, prob = probs$p_fu) %>% as.character() |
94 | ||
95 | 60x |
best_rsp <- min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)]) |
96 | 60x |
best_rsp_i <- which.min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)]) |
97 | ||
98 | 60x |
avisit <- c("SCREENING", "BASELINE", "CYCLE 2 DAY 1", "CYCLE 4 DAY 1", "END OF INDUCTION", "FOLLOW UP") |
99 | ||
100 |
# meaningful date information |
|
101 | 60x |
trtstdt <- lubridate::date(pinfo$TRTSDTM) |
102 | 60x |
trtendt <- lubridate::date(dplyr::if_else( |
103 | 60x |
!is.na(pinfo$TRTEDTM), pinfo$TRTEDTM, |
104 | 60x |
lubridate::floor_date(trtstdt + study_duration_secs, unit = "day") |
105 |
)) |
|
106 | 60x |
scr_date <- trtstdt - lubridate::days(100) |
107 | 60x |
bs_date <- trtstdt |
108 | 60x |
flu_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
109 | 60x |
eoi_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
110 | 60x |
c2d1_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
111 | 60x |
c4d1_date <- min(lubridate::date(c2d1_date + lubridate::days(60)), trtendt) |
112 | ||
113 | 60x |
tibble::tibble( |
114 | 60x |
STUDYID = pinfo$STUDYID, |
115 | 60x |
SITEID = pinfo$SITEID, |
116 | 60x |
USUBJID = pinfo$USUBJID, |
117 | 60x |
PARAMCD = as.factor(c(rep("OVRINV", 6), "BESRSPI", "INVET")), |
118 | 60x |
PARAM = as.factor(dplyr::recode( |
119 | 60x |
PARAMCD, |
120 | 60x |
OVRINV = "Overall Response by Investigator - by visit", |
121 | 60x |
OVRSPI = "Best Overall Response by Investigator (no confirmation required)", |
122 | 60x |
BESRSPI = "Best Confirmed Overall Response by Investigator", |
123 | 60x |
INVET = "Investigator End Of Induction Response" |
124 |
)), |
|
125 | 60x |
AVALC = c( |
126 | 60x |
rsp_screen, rsp_bsl, rsp_c2d1, rsp_c4d1, rsp_eoi, rsp_fu, |
127 | 60x |
names(param_codes)[best_rsp], |
128 | 60x |
rsp_eoi |
129 |
), |
|
130 | 60x |
AVAL = param_codes[AVALC], |
131 | 60x |
AVISIT = factor(c(avisit, avisit[best_rsp_i], avisit[5]), levels = avisit) |
132 |
) %>% |
|
133 | 60x |
merge( |
134 | 60x |
tibble::tibble( |
135 | 60x |
AVISIT = avisit, |
136 | 60x |
ADTM = c(scr_date, bs_date, c2d1_date, c4d1_date, eoi_date, flu_date), |
137 | 60x |
AVISITN = c(-1, 0, 2, 4, 999, 999), |
138 | 60x |
TRTSDTM = pinfo$TRTSDTM |
139 |
) %>% |
|
140 | 60x |
dplyr::mutate( |
141 | 60x |
ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days")) |
142 |
) %>% |
|
143 | 60x |
dplyr::select(-"TRTSDTM"), |
144 | 60x |
by = "AVISIT" |
145 |
) |
|
146 |
}) %>% |
|
147 | 6x |
Reduce(rbind, .) %>% |
148 | 6x |
dplyr::mutate(AVALC = factor(AVALC, levels = names(param_codes))) %>% |
149 | 6x |
var_relabel( |
150 | 6x |
STUDYID = "Study Identifier", |
151 | 6x |
USUBJID = "Unique Subject Identifier" |
152 |
) |
|
153 | ||
154 | 6x |
adrs <- var_relabel( |
155 | 6x |
adrs, |
156 | 6x |
STUDYID = "Study Identifier", |
157 | 6x |
USUBJID = "Unique Subject Identifier" |
158 |
) |
|
159 | ||
160 |
# merge ADSL to be able to add RS date and study day variables |
|
161 | ||
162 | ||
163 | 6x |
adrs <- dplyr::inner_join( |
164 | 6x |
dplyr::select(adrs, -"SITEID"), |
165 | 6x |
adsl, |
166 | 6x |
by = c("STUDYID", "USUBJID") |
167 |
) |
|
168 | ||
169 | 6x |
adrs <- adrs %>% |
170 | 6x |
dplyr::group_by(USUBJID) %>% |
171 | 6x |
dplyr::mutate(RSSEQ = seq_len(dplyr::n())) %>% |
172 | 6x |
dplyr::mutate(ASEQ = RSSEQ) %>% |
173 | 6x |
dplyr::ungroup() %>% |
174 | 6x |
dplyr::arrange( |
175 | 6x |
STUDYID, |
176 | 6x |
USUBJID, |
177 | 6x |
PARAMCD, |
178 | 6x |
AVISITN, |
179 | 6x |
ADTM, |
180 | 6x |
RSSEQ |
181 |
) |
|
182 | ||
183 | 6x |
if (length(na_vars) > 0 && na_percentage > 0) { |
184 | ! |
adrs <- mutate_na(ds = adrs, na_vars = na_vars, na_percentage = na_percentage) |
185 |
} |
|
186 | ||
187 |
# apply metadata |
|
188 | 6x |
adrs <- apply_metadata(adrs, "metadata/ADRS.yml") |
189 | ||
190 | 6x |
return(adrs) |
191 |
} |
1 |
#' Anti-Drug Antibody Analysis Dataset (ADAB) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Anti-Drug Antibody Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset and Pharmacokinetics Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @inheritParams argument_convention |
|
9 |
#' @inheritParams radpc |
|
10 |
#' @param adpc (`data.frame`)\cr Pharmacokinetics Analysis Dataset. |
|
11 |
#' @template param_cached |
|
12 |
#' @templateVar data adab |
|
13 |
#' |
|
14 |
#' @return `data.frame` |
|
15 |
#' @export |
|
16 |
#' |
|
17 |
#' @details One record per study per subject per parameter per time point: "R1800000", "RESULT1", "R1800001", "RESULT2". |
|
18 |
#' |
|
19 |
#' @examples |
|
20 |
#' library(random.cdisc.data) |
|
21 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
22 |
#' adpc <- radpc(adsl, seed = 2, duration = 9 * 7) |
|
23 |
#' |
|
24 |
#' adab <- radab(adsl, adpc, seed = 2) |
|
25 |
#' adab |
|
26 |
radab <- function(adsl, |
|
27 |
adpc, |
|
28 |
constants = c(D = 100, ka = 0.8, ke = 1), |
|
29 |
paramcd = c( |
|
30 |
"R1800000", "RESULT1", "R1800001", "RESULT2", "ADASTAT1", "INDUCD1", "ENHANC1", |
|
31 |
"TRUNAFF1", "EMERNEG1", "EMERPOS1", "PERSADA1", "TRANADA1", "BFLAG1", "TIMADA1", |
|
32 |
"ADADUR1", "ADASTAT2", "INDUCD2", "ENHANC2", "EMERNEG2", "EMERPOS2", "BFLAG2", |
|
33 |
"TRUNAFF2" |
|
34 |
), |
|
35 |
param = c( |
|
36 |
"Antibody titer units", "ADA interpreted per sample result", |
|
37 |
"Neutralizing Antibody titer units", "NAB interpreted per sample result", |
|
38 |
"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA", |
|
39 |
"Treatment unaffected", "Treatment Emergent - Negative", |
|
40 |
"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline", |
|
41 |
"Time to onset of ADA", "ADA Duration", "NAB Status of a patient", |
|
42 |
"Treatment induced ADA, Neutralizing Antibody", |
|
43 |
"Treatment enhanced ADA, Neutralizing Antibody", |
|
44 |
"Treatment Emergent - Negative, Neutralizing Antibody", |
|
45 |
"Treatment Emergent - Positive, Neutralizing Antibody", |
|
46 |
"Baseline, Neutralizing Antibody", |
|
47 |
"Treatment unaffected, Neutralizing Antibody" |
|
48 |
), |
|
49 |
avalu = c( |
|
50 |
"titer", "", "titer", "", "", "", "", "", "", "", "", "", "", "weeks", "weeks", |
|
51 |
"", "", "", "", "", "", "" |
|
52 |
), |
|
53 |
seed = NULL, |
|
54 |
na_percentage = 0, |
|
55 |
na_vars = list( |
|
56 |
AVAL = c(NA, 0.1) |
|
57 |
), |
|
58 |
cached = FALSE) { |
|
59 | 4x |
checkmate::assert_flag(cached) |
60 | 4x |
if (cached) { |
61 | 1x |
return(get_cached_data("cadpc")) |
62 |
} |
|
63 | ||
64 | 3x |
checkmate::assert_data_frame(adpc) |
65 | 3x |
checkmate::assert_subset(names(constants), c("D", "ka", "ke")) |
66 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
67 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
68 | 3x |
checkmate::assert_list(na_vars) |
69 | 3x |
checkmate::assert_character(paramcd) |
70 | 3x |
checkmate::assert_character(param, len = length(paramcd)) |
71 | 3x |
checkmate::assert_character(avalu, len = length(paramcd)) |
72 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
73 | 3x |
checkmate::assert_true(na_percentage < 1) |
74 | ||
75 | 3x |
if (!is.null(seed)) { |
76 | 3x |
set.seed(seed) |
77 |
} |
|
78 | ||
79 |
# validate and initialize related variables |
|
80 | 3x |
param_init_list <- relvar_init(param, paramcd) |
81 | 3x |
unit_init_list <- relvar_init(param, avalu) |
82 | ||
83 | 3x |
adpc <- adpc %>% dplyr::filter(ASMED == "PLASMA") |
84 | 3x |
adab0 <- expand.grid( |
85 | 3x |
STUDYID = unique(adsl$STUDYID), |
86 | 3x |
USUBJID = unique(adsl$USUBJID), |
87 | 3x |
VISIT = unique(adpc$VISIT), |
88 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(1:4)]), |
89 | 3x |
PARCAT1 = "A: Drug X Antibody", |
90 | 3x |
stringsAsFactors = FALSE |
91 |
) |
|
92 |
# Set random values for observations |
|
93 | 3x |
visit_lvl_params <- c( |
94 | 3x |
"Antibody titer units", "Neutralizing Antibody titer units", |
95 | 3x |
"ADA interpreted per sample result", "NAB interpreted per sample result" |
96 |
) |
|
97 | 3x |
aval_random <- stats::rnorm(nrow(unique(adab0 %>% dplyr::select(USUBJID, VISIT))), mean = 1, sd = 0.2) |
98 | 3x |
aval_random <- cbind(unique(adab0 %>% dplyr::select(USUBJID, VISIT)), AVAL1 = aval_random) |
99 | ||
100 | 3x |
adab_visit <- adab0 %>% dplyr::left_join(aval_random, by = c("USUBJID", "VISIT")) |
101 | 3x |
adab_visit <- adab_visit %>% |
102 | 3x |
dplyr::mutate( |
103 | 3x |
AVAL2 = ifelse(AVAL1 >= 1, AVAL1, NA), |
104 | 3x |
AVALC = dplyr::case_when( |
105 | 3x |
!is.na(AVAL2) ~ "POSITIVE", |
106 | 3x |
is.na(AVAL2) ~ "NEGATIVE" |
107 |
), |
|
108 | 3x |
AVAL = dplyr::case_when( |
109 | 3x |
(PARAM %in% visit_lvl_params[3:4] & !is.na(AVAL2)) ~ 1, |
110 | 3x |
(PARAM %in% visit_lvl_params[3:4] & is.na(AVAL2)) ~ 0, |
111 | 3x |
(PARAM %in% visit_lvl_params[1:2] & !is.na(AVAL2)) ~ AVAL2, |
112 | 3x |
TRUE ~ as.numeric(NA) |
113 |
) |
|
114 |
) %>% |
|
115 | 3x |
dplyr::select(-c(AVAL1, AVAL2)) |
116 | ||
117 |
# retrieve other variables from adpc |
|
118 | 3x |
adab_visit <- adab_visit %>% |
119 | 3x |
dplyr::inner_join( |
120 | 3x |
adpc %>% |
121 | 3x |
dplyr::filter(PCTPT %in% c("Predose", "24H")) %>% |
122 | 3x |
dplyr::select( |
123 | 3x |
STUDYID, |
124 | 3x |
USUBJID, |
125 | 3x |
VISIT, |
126 | 3x |
PCTPT, |
127 | 3x |
ARM, |
128 | 3x |
ACTARM, |
129 | 3x |
VISITDY, |
130 | 3x |
AFRLT, |
131 | 3x |
NFRLT, |
132 | 3x |
ARRLT, |
133 | 3x |
NRRLT, |
134 | 3x |
RELTMU |
135 |
) %>% |
|
136 | 3x |
unique(), |
137 | 3x |
by = c("STUDYID", "USUBJID", "VISIT") |
138 |
) %>% |
|
139 | 3x |
rename(ISTPT = PCTPT) |
140 | ||
141 |
# mutate time from dose variables from adpc to convert into Days |
|
142 | 3x |
adab_visit <- adab_visit %>% dplyr::mutate_at(c("AFRLT", "NFRLT", "ARRLT", "NRRLT"), ~ . / 24) |
143 | ||
144 | ||
145 | ||
146 |
# Set random values for subject level paramaters (Y/N) |
|
147 | ||
148 | 3x |
adab1 <- expand.grid( |
149 | 3x |
STUDYID = unique(adsl$STUDYID), |
150 | 3x |
USUBJID = unique(adpc$USUBJID), |
151 | 3x |
VISIT = NA, |
152 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(5:13, 16:22)]), |
153 | 3x |
PARCAT1 = "A: Drug X Antibody", |
154 | 3x |
stringsAsFactors = FALSE |
155 |
) |
|
156 | ||
157 | 3x |
sub_lvl_params <- c( |
158 | 3x |
"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA", |
159 | 3x |
"Treatment unaffected", "Treatment Emergent - Negative", |
160 | 3x |
"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline", |
161 |
# "Time to onset of ADA", "ADA Duration", |
|
162 | 3x |
"NAB Status of a patient", |
163 | 3x |
"Treatment induced ADA, Neutralizing Antibody", |
164 | 3x |
"Treatment enhanced ADA, Neutralizing Antibody", |
165 | 3x |
"Treatment Emergent - Negative, Neutralizing Antibody", |
166 | 3x |
"Treatment Emergent - Positive, Neutralizing Antibody", |
167 | 3x |
"Baseline, Neutralizing Antibody", |
168 | 3x |
"Treatment unaffected, Neutralizing Antibody" |
169 |
) |
|
170 | ||
171 | 3x |
aval_random_sub <- stats::rbinom(nrow(unique(adab1 %>% dplyr::select(USUBJID))), 1, 0.5) |
172 | 3x |
aval_random_sub <- cbind(unique(adab1 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub) |
173 | ||
174 | 3x |
adab_sub <- adab1 %>% dplyr::left_join(aval_random_sub, by = c("USUBJID")) |
175 | 3x |
adab_sub <- adab_sub %>% |
176 | 3x |
dplyr::mutate( |
177 | 3x |
AVAL = AVAL1, |
178 | 3x |
AVALC = dplyr::case_when( |
179 | 3x |
PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 1 ~ "POSITIVE", |
180 | 3x |
PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 0 ~ "NEGATIVE", |
181 | 3x |
!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 1 ~ "Y", |
182 | 3x |
!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 0 ~ "N" |
183 |
) |
|
184 |
) %>% |
|
185 | 3x |
dplyr::select(-c(AVAL1)) |
186 | ||
187 |
# Set random values for subject level paramaters (numeric) |
|
188 | ||
189 | 3x |
adab2 <- expand.grid( |
190 | 3x |
STUDYID = unique(adsl$STUDYID), |
191 | 3x |
USUBJID = unique(adpc$USUBJID), |
192 | 3x |
VISIT = NA, |
193 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(14, 15)]), |
194 | 3x |
PARCAT1 = "A: Drug X Antibody", |
195 | 3x |
stringsAsFactors = FALSE |
196 |
) |
|
197 | ||
198 | 3x |
sub_lvl_params_num <- c("Time to onset of ADA", "ADA Duration") |
199 | ||
200 | 3x |
aval_random_sub_num <- stats::rnorm(nrow(unique(adab2 %>% dplyr::select(USUBJID))), mean = 1, sd = 1) |
201 | 3x |
aval_random_sub_num <- cbind(unique(adab2 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub_num) |
202 | ||
203 | 3x |
adab_sub_num <- adab2 %>% dplyr::left_join(aval_random_sub_num, by = c("USUBJID")) |
204 | 3x |
adab_sub_num <- adab_sub_num %>% |
205 | 3x |
dplyr::mutate( |
206 | 3x |
AVAL = ifelse(AVAL1 >= 1, round(AVAL1, 2), NA), |