1 |
#' Laboratory Data Analysis Dataset (ADLB) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Laboratory Data Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `LBSEQ`, `ASPID` |
|
11 |
# |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param lbcat (`character vector`)\cr LB category values. |
|
14 |
#' @param max_n_lbs (`integer`)\cr Maximum number of labs per patient. Defaults to 10. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adlb |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author tomlinsj, npaszty, Xuefeng Hou |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adlb <- radlb(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
27 |
#' adlb |
|
28 |
#' |
|
29 |
#' adlb <- radlb(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2) |
|
30 |
#' adlb |
|
31 |
radlb <- function(adsl, |
|
32 |
lbcat = c("CHEMISTRY", "CHEMISTRY", "IMMUNOLOGY"), |
|
33 |
param = c( |
|
34 |
"Alanine Aminotransferase Measurement", |
|
35 |
"C-Reactive Protein Measurement", |
|
36 |
"Immunoglobulin A Measurement" |
|
37 |
), |
|
38 |
paramcd = c("ALT", "CRP", "IGA"), |
|
39 |
paramu = c("U/L", "mg/L", "g/L"), |
|
40 |
aval_mean = c(18, 9, 2.9), |
|
41 |
visit_format = "WEEK", |
|
42 |
n_assessments = 5L, |
|
43 |
n_days = 5L, |
|
44 |
max_n_lbs = 10L, |
|
45 |
lookup = NULL, |
|
46 |
seed = NULL, |
|
47 |
na_percentage = 0, |
|
48 |
na_vars = list( |
|
49 |
LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1), |
|
50 |
BASE2 = c(NA, 0.1), BASE = c(NA, 0.1), |
|
51 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
52 |
), |
|
53 |
cached = FALSE) { |
|
54 | 4x |
checkmate::assert_flag(cached) |
55 | 4x |
if (cached) { |
56 | 1x |
return(get_cached_data("cadlb")) |
57 |
} |
|
58 | ||
59 | 3x |
checkmate::assert_data_frame(adsl) |
60 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
62 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
63 | 3x |
checkmate::assert_character(lbcat, min.len = 1, any.missing = FALSE) |
64 | 3x |
checkmate::assert_string(visit_format) |
65 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
66 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
67 | 3x |
checkmate::assert_integer(max_n_lbs, len = 1, any.missing = FALSE) |
68 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
69 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
70 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
71 | 3x |
checkmate::assert_true(na_percentage < 1) |
72 | ||
73 |
# validate and initialize related variables |
|
74 | 3x |
lbcat_init_list <- relvar_init(param, lbcat) |
75 | 3x |
param_init_list <- relvar_init(param, paramcd) |
76 | 3x |
unit_init_list <- relvar_init(param, paramu) |
77 | ||
78 | 3x |
if (!is.null(seed)) { |
79 | 3x |
set.seed(seed) |
80 |
} |
|
81 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
82 | ||
83 | 3x |
adlb <- expand.grid( |
84 | 3x |
STUDYID = unique(adsl$STUDYID), |
85 | 3x |
USUBJID = adsl$USUBJID, |
86 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
87 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
88 | 3x |
stringsAsFactors = FALSE |
89 |
) |
|
90 | ||
91 |
# assign AVAL based on different tests |
|
92 | 3x |
adlb <- adlb %>% mutate(AVAL = case_when( |
93 | 3x |
PARAM == param[1] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[1], sd = 10)), |
94 | 3x |
PARAM == param[2] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[2], sd = 1)), |
95 | 3x |
PARAM == param[3] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[3], sd = 0.1)) |
96 |
)) |
|
97 | ||
98 |
# assign related variable values: PARAMxLBCAT are related |
|
99 | 3x |
adlb <- adlb %>% rel_var( |
100 | 3x |
var_name = "LBCAT", |
101 | 3x |
related_var = "PARAM", |
102 | 3x |
var_values = lbcat_init_list$relvar2 |
103 |
) |
|
104 | ||
105 |
# assign related variable values: PARAMxPARAMCD are related |
|
106 | 3x |
adlb <- adlb %>% rel_var( |
107 | 3x |
var_name = "PARAMCD", |
108 | 3x |
related_var = "PARAM", |
109 | 3x |
var_values = param_init_list$relvar2 |
110 |
) |
|
111 | ||
112 | 3x |
adlb <- adlb %>% |
113 | 3x |
dplyr::mutate(LBTESTCD = PARAMCD) %>% |
114 | 3x |
dplyr::mutate(LBTEST = PARAM) |
115 | ||
116 | 3x |
adlb <- adlb %>% dplyr::mutate(AVISITN = dplyr::case_when( |
117 | 3x |
AVISIT == "SCREENING" ~ -1, |
118 | 3x |
AVISIT == "BASELINE" ~ 0, |
119 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
120 | 3x |
TRUE ~ NA_real_ |
121 |
)) |
|
122 | ||
123 | 3x |
adlb <- adlb %>% rel_var( |
124 | 3x |
var_name = "AVALU", |
125 | 3x |
related_var = "PARAM", |
126 | 3x |
var_values = unit_init_list$relvar2 |
127 |
) |
|
128 | ||
129 | 3x |
adlb <- adlb %>% |
130 | 3x |
dplyr::mutate(AVISITN = dplyr::case_when( |
131 | 3x |
AVISIT == "SCREENING" ~ -1, |
132 | 3x |
AVISIT == "BASELINE" ~ 0, |
133 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
134 | 3x |
TRUE ~ NA_real_ |
135 |
)) |
|
136 | ||
137 |
# order to prepare for change from screening and baseline values |
|
138 | 3x |
adlb <- adlb[order(adlb$STUDYID, adlb$USUBJID, adlb$PARAMCD, adlb$AVISITN), ] |
139 | ||
140 | 3x |
adlb <- Reduce(rbind, lapply(split(adlb, adlb$USUBJID), function(x) { |
141 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
142 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
143 | 30x |
x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
144 | 30x |
"Y", |
145 | 30x |
ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "") |
146 |
) |
|
147 | 30x |
x |
148 |
})) |
|
149 | ||
150 | 3x |
adlb$BASE2 <- retain(adlb, adlb$AVAL, adlb$ABLFL2 == "Y") |
151 | 3x |
adlb$BASE <- ifelse(adlb$ABLFL2 != "Y", retain(adlb, adlb$AVAL, adlb$ABLFL == "Y"), NA) |
152 | ||
153 | 3x |
adlb <- adlb %>% |
154 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
155 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
156 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
157 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
158 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
159 | 3x |
dplyr::mutate(ANRLO = dplyr::case_when( |
160 | 3x |
PARAMCD == "ALT" ~ 7, |
161 | 3x |
PARAMCD == "CRP" ~ 8, |
162 | 3x |
PARAMCD == "IGA" ~ 0.8 |
163 |
)) %>% |
|
164 | 3x |
dplyr::mutate(ANRHI = dplyr::case_when( |
165 | 3x |
PARAMCD == "ALT" ~ 55, |
166 | 3x |
PARAMCD == "CRP" ~ 10, |
167 | 3x |
PARAMCD == "IGA" ~ 3 |
168 |
)) %>% |
|
169 | 3x |
dplyr::mutate(ANRIND = factor(dplyr::case_when( |
170 | 3x |
AVAL < ANRLO ~ "LOW", |
171 | 3x |
AVAL > ANRHI ~ "HIGH", |
172 | 3x |
TRUE ~ "NORMAL" |
173 |
))) %>% |
|
174 | 3x |
dplyr::mutate(LBSTRESC = factor(dplyr::case_when( |
175 | 3x |
PARAMCD == "ALT" ~ "<7", |
176 | 3x |
PARAMCD == "CRP" ~ "<8", |
177 | 3x |
PARAMCD == "IGA" ~ ">3" |
178 |
))) %>% |
|
179 | 3x |
dplyr::rowwise() %>% |
180 | 3x |
dplyr::mutate(LOQFL = factor( |
181 | 3x |
ifelse(eval(parse(text = paste(AVAL, LBSTRESC))), "Y", "N") |
182 |
)) %>% |
|
183 | 3x |
dplyr::ungroup() %>% |
184 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
185 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
186 | 3x |
dplyr::ungroup() %>% |
187 | 3x |
dplyr::mutate(SHIFT1 = factor(ifelse( |
188 | 3x |
AVISITN > 0, |
189 | 3x |
paste( |
190 | 3x |
retain( |
191 | 3x |
adlb, as.character(BNRIND), |
192 | 3x |
AVISITN == 0 |
193 |
), |
|
194 | 3x |
ANRIND, |
195 | 3x |
sep = " to " |
196 |
), |
|
197 |
"" |
|
198 |
))) %>% |
|
199 | 3x |
dplyr::mutate(ATOXGR = factor(dplyr::case_when( |
200 | 3x |
ANRIND == "LOW" ~ sample( |
201 | 3x |
c("-1", "-2", "-3", "-4", "-5"), |
202 | 3x |
nrow(adlb), |
203 | 3x |
replace = TRUE, |
204 | 3x |
prob = c(0.30, 0.25, 0.20, 0.15, 0) |
205 |
), |
|
206 | 3x |
ANRIND == "HIGH" ~ sample( |
207 | 3x |
c("1", "2", "3", "4", "5"), |
208 | 3x |
nrow(adlb), |
209 | 3x |
replace = TRUE, |
210 | 3x |
prob = c(0.30, 0.25, 0.20, 0.15, 0) |
211 |
), |
|
212 | 3x |
ANRIND == "NORMAL" ~ "0" |
213 |
))) %>% |
|
214 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
215 | 3x |
dplyr::mutate(BTOXGR = ATOXGR[ABLFL == "Y"]) %>% |
216 | 3x |
dplyr::ungroup() %>% |
217 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
218 | 3x |
dplyr::mutate(DTYPE = NA) %>% |
219 | 3x |
dplyr::mutate(BTOXGRL = factor(dplyr::case_when( |
220 | 3x |
BTOXGR == "0" ~ "0", |
221 | 3x |
BTOXGR == "-1" ~ "1", |
222 | 3x |
BTOXGR == "-2" ~ "2", |
223 | 3x |
BTOXGR == "-3" ~ "3", |
224 | 3x |
BTOXGR == "-4" ~ "4", |
225 | 3x |
BTOXGR == "1" ~ "<Missing>", |
226 | 3x |
BTOXGR == "2" ~ "<Missing>", |
227 | 3x |
BTOXGR == "3" ~ "<Missing>", |
228 | 3x |
BTOXGR == "4" ~ "<Missing>" |
229 |
))) %>% |
|
230 | 3x |
dplyr::mutate(BTOXGRH = factor(dplyr::case_when( |
231 | 3x |
BTOXGR == "0" ~ "0", |
232 | 3x |
BTOXGR == "1" ~ "1", |
233 | 3x |
BTOXGR == "2" ~ "2", |
234 | 3x |
BTOXGR == "3" ~ "3", |
235 | 3x |
BTOXGR == "4" ~ "4", |
236 | 3x |
BTOXGR == "-1" ~ "<Missing>", |
237 | 3x |
BTOXGR == "-2" ~ "<Missing>", |
238 | 3x |
BTOXGR == "-3" ~ "<Missing>", |
239 | 3x |
BTOXGR == "-4" ~ "<Missing>", |
240 |
))) %>% |
|
241 | 3x |
dplyr::mutate(ATOXGRL = factor(dplyr::case_when( |
242 | 3x |
ATOXGR == "0" ~ "0", |
243 | 3x |
ATOXGR == "-1" ~ "1", |
244 | 3x |
ATOXGR == "-2" ~ "2", |
245 | 3x |
ATOXGR == "-3" ~ "3", |
246 | 3x |
ATOXGR == "-4" ~ "4", |
247 | 3x |
ATOXGR == "1" ~ "<Missing>", |
248 | 3x |
ATOXGR == "2" ~ "<Missing>", |
249 | 3x |
ATOXGR == "3" ~ "<Missing>", |
250 | 3x |
ATOXGR == "4" ~ "<Missing>", |
251 |
))) %>% |
|
252 | 3x |
dplyr::mutate(ATOXGRH = factor(dplyr::case_when( |
253 | 3x |
ATOXGR == "0" ~ "0", |
254 | 3x |
ATOXGR == "1" ~ "1", |
255 | 3x |
ATOXGR == "2" ~ "2", |
256 | 3x |
ATOXGR == "3" ~ "3", |
257 | 3x |
ATOXGR == "4" ~ "4", |
258 | 3x |
ATOXGR == "-1" ~ "<Missing>", |
259 | 3x |
ATOXGR == "-2" ~ "<Missing>", |
260 | 3x |
ATOXGR == "-3" ~ "<Missing>", |
261 | 3x |
ATOXGR == "-4" ~ "<Missing>", |
262 |
))) %>% |
|
263 | 3x |
var_relabel( |
264 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
265 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
266 |
) |
|
267 | ||
268 |
# High and low descriptions of the different PARAMCD values |
|
269 |
# This is currently hard coded as the GDSR does not have these descriptions yet |
|
270 | 3x |
grade_lookup <- tibble::tribble( |
271 | 3x |
~PARAMCD, ~ATOXDSCL, ~ATOXDSCH, |
272 | 3x |
"ALB", "Hypoalbuminemia", NA_character_, |
273 | 3x |
"ALKPH", NA_character_, "Alkaline phosphatase increased", |
274 | 3x |
"ALT", NA_character_, "Alanine aminotransferase increased", |
275 | 3x |
"AST", NA_character_, "Aspartate aminotransferase increased", |
276 | 3x |
"BILI", NA_character_, "Blood bilirubin increased", |
277 | 3x |
"CA", "Hypocalcemia", "Hypercalcemia", |
278 | 3x |
"CHOLES", NA_character_, "Cholesterol high", |
279 | 3x |
"CK", NA_character_, "CPK increased", |
280 | 3x |
"CREAT", NA_character_, "Creatinine increased", |
281 | 3x |
"CRP", NA_character_, "C reactive protein increased", |
282 | 3x |
"GGT", NA_character_, "GGT increased", |
283 | 3x |
"GLUC", "Hypoglycemia", "Hyperglycemia", |
284 | 3x |
"HGB", "Anemia", "Hemoglobin increased", |
285 | 3x |
"IGA", NA_character_, "Immunoglobulin A increased", |
286 | 3x |
"POTAS", "Hypokalemia", "Hyperkalemia", |
287 | 3x |
"LYMPH", "CD4 lymphocytes decreased", NA_character_, |
288 | 3x |
"PHOS", "Hypophosphatemia", NA_character_, |
289 | 3x |
"PLAT", "Platelet count decreased", NA_character_, |
290 | 3x |
"SODIUM", "Hyponatremia", "Hypernatremia", |
291 | 3x |
"WBC", "White blood cell decreased", "Leukocytosis", |
292 |
) |
|
293 | ||
294 |
# merge grade_lookup onto adlb |
|
295 | 3x |
adlb <- dplyr::left_join(adlb, grade_lookup, by = "PARAMCD") |
296 | ||
297 | 3x |
adlb <- var_relabel( |
298 | 3x |
adlb, |
299 | 3x |
STUDYID = "Study Identifier", |
300 | 3x |
USUBJID = "Unique Subject Identifier" |
301 |
) |
|
302 | ||
303 |
# merge ADSL to be able to add LB date and study day variables |
|
304 | 3x |
adlb <- dplyr::inner_join( |
305 | 3x |
adlb, |
306 | 3x |
adsl, |
307 | 3x |
by = c("STUDYID", "USUBJID") |
308 |
) %>% |
|
309 | 3x |
dplyr::rowwise() %>% |
310 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
311 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
312 | 3x |
TRUE ~ TRTEDTM |
313 |
))) %>% |
|
314 | 3x |
dplyr::ungroup() |
315 | ||
316 | 3x |
adlb <- adlb %>% |
317 | 3x |
dplyr::group_by(USUBJID) %>% |
318 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
319 | 3x |
dplyr::mutate(ADTM = rep( |
320 | 3x |
sort(sample( |
321 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
322 | 3x |
size = nlevels(AVISIT) |
323 |
)), |
|
324 | 3x |
each = n() / nlevels(AVISIT) |
325 |
)) %>% |
|
326 | 3x |
dplyr::ungroup() %>% |
327 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
328 | 3x |
dplyr::select(-TRTENDT) %>% |
329 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
330 | ||
331 | 3x |
adlb <- adlb %>% |
332 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
333 | 3x |
dplyr::group_by(USUBJID) %>% |
334 | 3x |
dplyr::mutate(LBSEQ = seq_len(dplyr::n())) %>% |
335 | 3x |
dplyr::mutate(ASEQ = LBSEQ) %>% |
336 | 3x |
dplyr::ungroup() %>% |
337 | 3x |
dplyr::arrange( |
338 | 3x |
STUDYID, |
339 | 3x |
USUBJID, |
340 | 3x |
PARAMCD, |
341 | 3x |
BASETYPE, |
342 | 3x |
AVISITN, |
343 | 3x |
ATPTN, |
344 | 3x |
DTYPE, |
345 | 3x |
ADTM, |
346 | 3x |
LBSEQ, |
347 | 3x |
ASPID |
348 |
) |
|
349 | ||
350 | 3x |
adlb <- adlb %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
351 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y", |
352 | 3x |
TRUE ~ "" |
353 |
))) |
|
354 | ||
355 | 3x |
flag_variables <- function(data, |
356 | 3x |
apply_grouping, |
357 | 3x |
apply_filter, |
358 | 3x |
apply_mutate) { |
359 | 15x |
data_compare <- data %>% |
360 | 15x |
dplyr::mutate(row_check = seq_len(nrow(data))) |
361 | ||
362 | 15x |
data <- data_compare %>% |
363 |
{ |
|
364 | 15x |
if (apply_grouping == TRUE) { |
365 | 9x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) |
366 |
} else { |
|
367 | 6x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE) |
368 |
} |
|
369 |
} %>% |
|
370 | 15x |
dplyr::arrange(ADTM, ASPID, LBSEQ) %>% |
371 |
{ |
|
372 | 15x |
if (apply_filter == TRUE) { |
373 | 6x |
dplyr::filter( |
374 |
., |
|
375 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
376 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
377 |
) %>% |
|
378 | 6x |
dplyr::filter(ATOXGR == max(as.numeric(as.character(ATOXGR)))) |
379 | 9x |
} else if (apply_filter == FALSE) { |
380 | 6x |
dplyr::filter( |
381 |
., |
|
382 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
383 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
384 |
) %>% |
|
385 | 6x |
dplyr::filter(ATOXGR == min(as.numeric(as.character(ATOXGR)))) |
386 |
} else { |
|
387 | 3x |
dplyr::filter( |
388 |
., |
|
389 | 3x |
AVAL == min(AVAL) & |
390 | 3x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
391 | 3x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
392 |
) |
|
393 |
} |
|
394 |
} %>% |
|
395 | 15x |
dplyr::slice(1) %>% |
396 |
{ |
|
397 | 15x |
if (apply_mutate == TRUE) { |
398 | 12x |
dplyr::mutate(., new_var = ifelse(is.na(DTYPE), "Y", "")) |
399 |
} else { |
|
400 | 3x |
dplyr::mutate(., new_var = ifelse(is.na(AVAL) == FALSE & is.na(DTYPE), "Y", "")) |
401 |
} |
|
402 |
} %>% |
|
403 | 15x |
dplyr::ungroup() |
404 | ||
405 | 15x |
data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "") |
406 | ||
407 | 15x |
data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))] |
408 | ||
409 | 15x |
return(data_compare) |
410 |
} |
|
411 | ||
412 | 3x |
adlb <- flag_variables(adlb, TRUE, "ELSE", FALSE) %>% dplyr::rename(WORS01FL = "new_var") |
413 | 3x |
adlb <- flag_variables(adlb, FALSE, TRUE, TRUE) %>% dplyr::rename(WGRHIFL = "new_var") |
414 | 3x |
adlb <- flag_variables(adlb, FALSE, FALSE, TRUE) %>% dplyr::rename(WGRLOFL = "new_var") |
415 | 3x |
adlb <- flag_variables(adlb, TRUE, TRUE, TRUE) %>% dplyr::rename(WGRHIVFL = "new_var") |
416 | 3x |
adlb <- flag_variables(adlb, TRUE, FALSE, TRUE) %>% dplyr::rename(WGRLOVFL = "new_var") |
417 | ||
418 | 3x |
adlb <- adlb %>% dplyr::mutate(ANL01FL = ifelse( |
419 | 3x |
(ABLFL == "Y" | (WORS01FL == "Y" & is.na(DTYPE))) & |
420 | 3x |
(AVISIT != "SCREENING"), |
421 | 3x |
"Y", |
422 |
"" |
|
423 |
)) |
|
424 | ||
425 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
426 | ! |
adlb <- mutate_na(ds = adlb, na_vars = na_vars, na_percentage = na_percentage) |
427 |
} |
|
428 | ||
429 |
# apply metadata |
|
430 | ||
431 | 3x |
adlb <- apply_metadata(adlb, "metadata/ADLB.yml") |
432 | ||
433 | 3x |
return(adlb) |
434 |
} |
1 |
#' EORTC QLQ-C30 V3 Analysis Dataset (ADQLQC) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random EORTC QLQ-C30 V3 Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARCAT1N`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `ADTM`, `QSSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param percent (`numeric`)\cr Completion - Completed at least y percent of questions, 1 record per visit |
|
14 |
#' @param number (`numeric`)\cr Completion - Completed at least x question(s), 1 record per visit |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adqlqc |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
23 |
#' |
|
24 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
25 |
#' adqlqc |
|
26 |
radqlqc <- function(adsl, |
|
27 |
percent, |
|
28 |
number, |
|
29 |
seed = NULL, |
|
30 |
cached = FALSE) { |
|
31 | 4x |
checkmate::assert_flag(cached) |
32 | 4x |
if (cached) { |
33 | 1x |
return(get_cached_data("cadqlqc")) |
34 |
} |
|
35 | ||
36 | 3x |
checkmate::assert_data_frame(adsl) |
37 | 3x |
checkmate::assert_number(percent, lower = 1, upper = 100) |
38 | 3x |
checkmate::assert_number(number, lower = 1) |
39 | ||
40 | 3x |
if (!is.null(seed)) { |
41 | 3x |
set.seed(seed) |
42 |
} |
|
43 | ||
44 |
# ADQLQC data ------------------------------------------------------------- |
|
45 | 3x |
qs <- get_qs_data(adsl, n_assessments = 5L, seed = seed, na_percentage = 0.1) |
46 |
# prepare ADaM ADQLQC data |
|
47 | 3x |
adqlqc1 <- prep_adqlqc(df = qs) |
48 |
# derive AVAL and AVALC |
|
49 | 3x |
adqlqc1 <- mutate( |
50 | 3x |
adqlqc1, |
51 | 3x |
AVAL = as.numeric(QSSTRESC), |
52 | 3x |
AVALC = case_when( |
53 | 3x |
QSTESTCD == "QSALL" ~ QSREASND, |
54 | 3x |
TRUE ~ QSORRES |
55 |
), |
|
56 | 3x |
AVISIT = VISIT, |
57 | 3x |
AVISITN = VISITNUM, |
58 | 3x |
ADTM = QSDTC |
59 |
) |
|
60 |
# include scale calculation |
|
61 | 3x |
adqlqc_tmp <- calc_scales(adqlqc1) |
62 |
# order to prepare for change from screening and baseline values |
|
63 | 3x |
adqlqc_tmp <- adqlqc_tmp[order(adqlqc_tmp$STUDYID, adqlqc_tmp$USUBJID, adqlqc_tmp$PARAMCD, adqlqc_tmp$AVISITN), ] |
64 | ||
65 | 3x |
adqlqc_tmp <- Reduce( |
66 | 3x |
rbind, |
67 | 3x |
lapply( |
68 | 3x |
split(adqlqc_tmp, adqlqc_tmp$USUBJID), |
69 | 3x |
function(x) { |
70 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
71 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
72 | 30x |
x$ABLFL <- ifelse( |
73 | 30x |
x$AVISIT == "BASELINE" & |
74 | 30x |
x$PARAMCD != "EX028", |
75 | 30x |
"Y", |
76 | 30x |
ifelse( |
77 | 30x |
x$AVISIT == "CYCLE 1 DAY 1" & |
78 | 30x |
x$PARAMCD != "EX028", |
79 | 30x |
"Y", |
80 |
"" |
|
81 |
) |
|
82 |
) |
|
83 | 30x |
x |
84 |
} |
|
85 |
) |
|
86 |
) |
|
87 | ||
88 | 3x |
adqlqc_tmp$BASE2 <- ifelse( |
89 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
90 | 3x |
retain( |
91 | 3x |
df = adqlqc_tmp, |
92 | 3x |
value_var = adqlqc_tmp$AVAL, |
93 | 3x |
event = adqlqc_tmp$ABLFL2 == "Y" |
94 |
), |
|
95 | 3x |
NA |
96 |
) |
|
97 | ||
98 | 3x |
adqlqc_tmp$BASE <- ifelse( |
99 | 3x |
adqlqc_tmp$ABLFL2 != "Y" & |
100 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
101 | 3x |
retain( |
102 | 3x |
adqlqc_tmp, |
103 | 3x |
adqlqc_tmp$AVAL, |
104 | 3x |
adqlqc_tmp$ABLFL == "Y" |
105 |
), |
|
106 | 3x |
NA |
107 |
) |
|
108 | ||
109 | 3x |
adqlqc_tmp <- adqlqc_tmp %>% |
110 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
111 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
112 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
113 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
114 | 3x |
var_relabel( |
115 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
116 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
117 |
) |
|
118 |
# derive CHGCAT1 ---------------------------------------------------------- |
|
119 | 3x |
adqlqc_tmp <- derv_chgcat1(dataset = adqlqc_tmp) |
120 | ||
121 | 3x |
adqlqc_tmp <- var_relabel( |
122 | 3x |
adqlqc_tmp, |
123 | 3x |
STUDYID = "Study Identifier", |
124 | 3x |
USUBJID = "Unique Subject Identifier" |
125 |
) |
|
126 | ||
127 | 3x |
adqlqc_tmp <- arrange( |
128 | 3x |
adqlqc_tmp, |
129 | 3x |
USUBJID, |
130 | 3x |
AVISITN |
131 |
) |
|
132 |
# Merge ADSL -------------------------------------------------------------- |
|
133 |
# ADSL variables needed for ADQLQC |
|
134 | 3x |
adsl_vars <- c( |
135 | 3x |
"STUDYID", "USUBJID", "SUBJID", "SITEID", "REGION1", "COUNTRY", "ETHNIC", "AGE", |
136 | 3x |
"AGEU", "AAGE", "AAGEU", "AGEGR1", "AGEGR2", "AGEGR3", "STRATwNM", "STRATw", "STRATwV", |
137 | 3x |
"SEX", "RACE", "ITTFL", "SAFFL", "PPROTFL", "TRT01P", "TRT01A", |
138 | 3x |
"TRTSEQP", "TRTSEQA", "TRTSDTM", "TRTSDT", "TRTEDTM", "TRTEDT", "DCUTDT" |
139 |
) |
|
140 | 3x |
adsl <- select( |
141 | 3x |
adsl, |
142 | 3x |
any_of(adsl_vars) |
143 |
) |
|
144 | 3x |
adqlqc <- dplyr::inner_join( |
145 | 3x |
adqlqc_tmp, |
146 | 3x |
adsl, |
147 | 3x |
by = c("STUDYID", "USUBJID") |
148 |
) %>% |
|
149 | 3x |
dplyr::mutate( |
150 | 3x |
ADY_der = ceiling(difftime(ADTM, TRTSDTM, units = "days")), |
151 | 3x |
ADY = case_when( |
152 | 3x |
ADY_der >= 0 ~ ADY_der + 1, |
153 | 3x |
TRUE ~ ADY_der |
154 |
) |
|
155 |
) %>% |
|
156 | 3x |
select(-ADY_der) |
157 | ||
158 |
# get compliance data --------------------------------------------------- |
|
159 | 3x |
compliance_data <- comp_derv( |
160 | 3x |
dataset = adqlqc, |
161 | 3x |
percent = percent, |
162 | 3x |
number = number |
163 |
) |
|
164 |
# add ADSL variables |
|
165 | 3x |
compliance_data <- left_join( |
166 | 3x |
compliance_data, |
167 | 3x |
adsl, |
168 | 3x |
by = c("STUDYID", "USUBJID") |
169 |
) |
|
170 |
# add completion to ADQLQC |
|
171 | 3x |
adqlqc <- bind_rows( |
172 | 3x |
adqlqc, |
173 | 3x |
compliance_data |
174 |
) %>% |
|
175 | 3x |
arrange( |
176 | 3x |
USUBJID, |
177 | 3x |
AVISITN, |
178 | 3x |
QSTESTCD |
179 |
) |
|
180 |
# find first set of questionnaire observations |
|
181 | 3x |
adqlqc_x <- arrange( |
182 | 3x |
adqlqc, |
183 | 3x |
USUBJID, |
184 | 3x |
ADTM |
185 |
) %>% |
|
186 | 3x |
filter( |
187 | 3x |
PARAMCD != "QSALL" & |
188 | 3x |
!str_detect(AVISIT, "SCREENING|UNSCHEDULED") |
189 |
) %>% |
|
190 | 3x |
group_by( |
191 | 3x |
USUBJID, |
192 | 3x |
ADTM |
193 |
) %>% |
|
194 | 3x |
summarise(first_date = first(ADTM), .groups = "drop") |
195 | ||
196 | 3x |
adqlqc <- left_join( |
197 | 3x |
adqlqc, |
198 | 3x |
adqlqc_x, |
199 | 3x |
by = c("USUBJID", "ADTM") |
200 |
) %>% |
|
201 | 3x |
mutate( |
202 | 3x |
ANL01FL = case_when( |
203 | 3x |
PARAMCD != "QSALL" & ABLFL == "Y" ~ "Y", |
204 | 3x |
PARAMCD != "QSALL" & |
205 | 3x |
!str_detect(AVISIT, "UNSCHEDULED") & |
206 | 3x |
!is.na(first_date) ~ "Y" |
207 |
) |
|
208 |
) %>% |
|
209 | 3x |
select(-first_date) |
210 | ||
211 |
# final dataset ----------------------------------------------------------- |
|
212 | 3x |
adqlqc_final <- adqlqc %>% |
213 | 3x |
dplyr::group_by(USUBJID) %>% |
214 | 3x |
dplyr::mutate(ASEQ = row_number()) %>% |
215 | 3x |
dplyr::ungroup() %>% |
216 | 3x |
dplyr::arrange( |
217 | 3x |
STUDYID, |
218 | 3x |
USUBJID, |
219 | 3x |
AVISITN |
220 |
) %>% |
|
221 | 3x |
select( |
222 | 3x |
-c("BASE2", "CHG2", "PCHG2", "ABLFL2") |
223 |
) %>% |
|
224 | 3x |
ungroup() |
225 | ||
226 | 3x |
adam_vars <- c( |
227 | 3x |
adsl_vars, "QSSEQ", "QSCAT", "QSSCAT", "QSDTC", "QSSPID", "QSSTAT", "QSSTRESN", |
228 | 3x |
"QSSTRESC", "QSSTRESU", "QSORRES", "QSORRESU", "QSTEST", "QSTESTCD", "QSTPT", |
229 | 3x |
"QSTPTNUM", "QSTPTREF", "QSDY", "QSREASND", "QSTSTDTL", "QSEVAL", "VISIT", "VISITNUM", |
230 | 3x |
"PARAM", "PARAMCD", "PARCAT1", "PARCAT1N", "PARCAT2", "AVAL", "AVALC", "AREASND", |
231 | 3x |
"BASE", "BASETYPE", "ABLFL", "CHG", "PCHG", "CHGCAT1", "CRIT1", "CRIT1FL", "DTYPE", |
232 | 3x |
"ADTM", "ADT", "ADY", "ADTF", "ATMF", "ATPT", "ATPTN", "AVISIT", "AVISITN", "APHASE", |
233 | 3x |
"APHASEN", "APERIOD", "APERIODC", "APERIODC", "ASPER", "ASPERC", "PERADY", "TRTP", |
234 | 3x |
"TRTA", "ONTRTFL", "LAST02FL", "FIRS02FL", "ANL01FL", "ANL02FL", "ANL03FL", |
235 | 3x |
"ANL04FL", "CGCAT1NX" |
236 |
) |
|
237 |
# order variables in mapped qs by variables in adam_vars |
|
238 | 3x |
adqlqc_name_ordered <- names(adqlqc_final)[order(match(names(adqlqc_final), adam_vars))] |
239 |
# adqlqc with variables ordered per gdsr |
|
240 | 3x |
adqlqc_final <- adqlqc_final %>% |
241 | 3x |
select( |
242 | 3x |
any_of(adqlqc_name_ordered) |
243 |
) |
|
244 | ||
245 | 3x |
adqlqc_final <- relocate(adqlqc_final, "QSEVLINT", .after = "QSTESTCD") %>% |
246 | 3x |
arrange( |
247 | 3x |
USUBJID, |
248 | 3x |
AVISITN, |
249 | 3x |
ASEQ, |
250 | 3x |
QSTESTCD |
251 |
) |
|
252 |
# apply metadata |
|
253 | 3x |
adqlqc_final <- apply_metadata(adqlqc_final, "metadata/ADQLQC.yml") |
254 | 3x |
return(adqlqc_final) |
255 |
} |
|
256 | ||
257 |
#' Helper Functions for Constructing ADQLQC |
|
258 |
#' |
|
259 |
#' Internal functions used by `radqlqc`. |
|
260 |
#' |
|
261 |
#' @inheritParams argument_convention |
|
262 |
#' @inheritParams radqlqc |
|
263 |
#' |
|
264 |
#' @examples |
|
265 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
266 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
267 |
#' |
|
268 |
#' @name h_adqlqc |
|
269 |
NULL |
|
270 | ||
271 |
#' @describeIn h_adqlqc Questionnaires EORTC QLQ-C30 V3.0 SDTM (QS) |
|
272 |
#' |
|
273 |
#' Function for generating random Questionnaires SDTM domain |
|
274 |
#' |
|
275 |
#' @return a dataframe with SDTM questionnaire data |
|
276 |
#' @keywords internal |
|
277 |
get_qs_data <- function(adsl, |
|
278 |
visit_format = "CYCLE", |
|
279 |
n_assessments = 5L, |
|
280 |
n_days = 1L, |
|
281 |
lookup = NULL, |
|
282 |
seed = NULL, |
|
283 |
na_percentage = 0, |
|
284 |
na_vars = list( |
|
285 |
QSORRES = c(1234, 0.2), |
|
286 |
QSSTRESC = c(1234, 0.2) |
|
287 |
)) { |
|
288 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
289 | 3x |
checkmate::assert_string(visit_format) |
290 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
291 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
292 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
293 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
294 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
295 | 3x |
checkmate::assert_true(na_percentage < 1) |
296 | ||
297 |
# get subjects for QS data from ADSL |
|
298 |
# get studyid, subject for QS generation |
|
299 | 3x |
qs <- select( |
300 | 3x |
adsl, |
301 | 3x |
STUDYID, |
302 | 3x |
USUBJID |
303 |
) %>% |
|
304 | 3x |
mutate( |
305 | 3x |
DOMAIN = "QS" |
306 |
) |
|
307 | ||
308 |
# QS prep ----------------------------------------------------------------- |
|
309 |
# get questionnaire function for QS |
|
310 |
# QSTESTCD: EOR0101 to EOR0130 |
|
311 | 3x |
eortc_qlq_c30_sub <- filter( |
312 | 3x |
eortc_qlq_c30, |
313 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & |
314 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130 |
315 |
) %>% |
|
316 | 3x |
select(-publication_name) |
317 | ||
318 |
# validate and initialize QSTEST vectors |
|
319 | 3x |
qstest_init_list <- relvar_init( |
320 | 3x |
unique(eortc_qlq_c30_sub$QSTEST), |
321 | 3x |
unique(eortc_qlq_c30_sub$QSTESTCD) |
322 |
) |
|
323 | ||
324 | 3x |
if (!is.null(seed)) { |
325 | 3x |
set.seed(seed) |
326 |
} |
|
327 | ||
328 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
329 | ||
330 | 3x |
lookup_qs <- if (!is.null(lookup)) { |
331 | ! |
lookup |
332 |
} else { |
|
333 | 3x |
expand.grid( |
334 | 3x |
STUDYID = unique(qs$STUDYID), |
335 | 3x |
USUBJID = qs$USUBJID, |
336 | 3x |
QSTEST = qstest_init_list$relvar1, |
337 | 3x |
VISIT = visit_schedule( |
338 | 3x |
visit_format = visit_format, |
339 | 3x |
n_assessments = n_assessments, |
340 | 3x |
n_days = n_days |
341 |
), |
|
342 | 3x |
stringsAsFactors = FALSE |
343 |
) |
|
344 |
} |
|
345 | ||
346 |
# assign related variable values: QSTESTxQSTESTCD are related |
|
347 | 3x |
lookup_qs <- lookup_qs %>% rel_var( |
348 | 3x |
var_name = "QSTESTCD", |
349 | 3x |
related_var = "QSTEST", |
350 | 3x |
var_values = qstest_init_list$relvar2 |
351 |
) |
|
352 | ||
353 | 3x |
lookup_qs <- left_join( |
354 | 3x |
lookup_qs, |
355 | 3x |
eortc_qlq_c30_sub, |
356 | 3x |
by = c( |
357 | 3x |
"QSTEST", |
358 | 3x |
"QSTESTCD" |
359 |
), |
|
360 | 3x |
multiple = "all", |
361 | 3x |
relationship = "many-to-many" |
362 |
) |
|
363 | ||
364 | 3x |
lookup_qs <- dplyr::mutate( |
365 | 3x |
lookup_qs, |
366 | 3x |
VISITNUM = dplyr::case_when( |
367 | 3x |
VISIT == "SCREENING" ~ -1, |
368 | 3x |
VISIT == "BASELINE" ~ 0, |
369 | 3x |
(grepl("^WEEK", VISIT) | grepl("^CYCLE", VISIT)) ~ as.numeric(VISIT) - 2, |
370 | 3x |
TRUE ~ NA_real_ |
371 |
) |
|
372 | 3x |
) %>% arrange(USUBJID) |
373 | ||
374 |
# # prep QSALL -------------------------------------------------------------- |
|
375 |
# get last subject and visit for QSALL |
|
376 | 3x |
last_subj_vis <- select(lookup_qs, USUBJID, VISIT) %>% |
377 | 3x |
distinct() %>% |
378 | 3x |
slice(n()) |
379 | 3x |
last_subj_vis_full <- filter( |
380 | 3x |
lookup_qs, |
381 | 3x |
USUBJID == last_subj_vis$USUBJID, |
382 | 3x |
VISIT == last_subj_vis$VISIT |
383 |
) |
|
384 | ||
385 | 3x |
qsall_data1 <- tibble::tibble( |
386 | 3x |
STUDYID = unique(last_subj_vis_full$STUDYID), |
387 | 3x |
USUBJID = unique(last_subj_vis_full$USUBJID), |
388 | 3x |
VISIT = unique(last_subj_vis_full$VISIT), |
389 | 3x |
VISITNUM = unique(last_subj_vis_full$VISITNUM), |
390 | 3x |
QSTESTCD = "QSALL", |
391 | 3x |
QSTEST = "Questionnaires", |
392 | 3x |
QSSTAT = "NOT DONE", |
393 | 3x |
QSREASND = "SUBJECT REFUSED" |
394 |
) |
|
395 | ||
396 |
# remove last subject and visit from main data |
|
397 | 3x |
lookup_qs_sub <- anti_join( |
398 | 3x |
lookup_qs, |
399 | 3x |
last_subj_vis_full, |
400 | 3x |
by = c("USUBJID", "VISIT") |
401 |
) |
|
402 | ||
403 | 3x |
set.seed(seed) |
404 | 3x |
lookup_qs_sub_x <- lookup_qs_sub %>% |
405 | 3x |
group_by( |
406 | 3x |
USUBJID, |
407 | 3x |
QSTESTCD, |
408 | 3x |
VISIT |
409 |
) %>% |
|
410 | 3x |
slice_sample(n = 1) %>% |
411 | 3x |
ungroup() %>% |
412 | 3x |
as.data.frame() |
413 | ||
414 | 3x |
lookup_qs_sub_x <- arrange( |
415 | 3x |
lookup_qs_sub_x, |
416 | 3x |
USUBJID, |
417 | 3x |
VISITNUM |
418 |
) |
|
419 | ||
420 |
# add date: QSDTC --------------------------------------------------------- |
|
421 |
# get treatment dates from ADSL |
|
422 | 3x |
adsl_trt <- select( |
423 | 3x |
adsl, |
424 | 3x |
USUBJID, |
425 | 3x |
TRTSDTM, |
426 | 3x |
TRTEDTM |
427 |
) |
|
428 |
# use to derive QSDTC |
|
429 |
# if no treatment end date, create an arbituary one |
|
430 | 3x |
trt_end_date <- max(adsl_trt$TRTEDTM, na.rm = TRUE) |
431 | ||
432 | 3x |
lookup_qs_sub_x <- left_join( |
433 | 3x |
lookup_qs_sub_x, |
434 | 3x |
adsl_trt, |
435 | 3x |
by = "USUBJID" |
436 |
) %>% |
|
437 | 3x |
group_by( |
438 | 3x |
USUBJID |
439 |
) %>% |
|
440 | 3x |
mutate(QSDTC = get_random_dates_between( |
441 | 3x |
from = TRTSDTM, |
442 | 3x |
to = ifelse( |
443 | 3x |
is.na(TRTEDTM), |
444 | 3x |
trt_end_date, |
445 | 3x |
TRTEDTM |
446 |
), |
|
447 | 3x |
visit_id = VISITNUM |
448 |
)) %>% |
|
449 | 3x |
select(-c("TRTSDTM", "TRTEDTM")) |
450 | ||
451 |
# filter out subjects with missing dates |
|
452 | 3x |
lookup_qs_sub_x1 <- filter( |
453 | 3x |
lookup_qs_sub_x, |
454 | 3x |
!is.na(QSDTC) |
455 |
) |
|
456 | ||
457 |
# subjects with missing dates |
|
458 | 3x |
lookup_qs_sub_x2 <- filter( |
459 | 3x |
lookup_qs_sub_x, |
460 | 3x |
is.na(QSDTC) |
461 |
) %>% |
|
462 | 3x |
select( |
463 | 3x |
STUDYID, |
464 | 3x |
USUBJID, |
465 | 3x |
VISIT, |
466 | 3x |
VISITNUM |
467 |
) %>% |
|
468 | 3x |
distinct() |
469 | ||
470 |
# generate QSALL for subjects with missing dates |
|
471 | 3x |
qsall_data2 <- mutate( |
472 | 3x |
lookup_qs_sub_x2, |
473 | 3x |
QSTESTCD = "QSALL", |
474 | 3x |
QSTEST = "Questionnaires", |
475 | 3x |
QSSTAT = "NOT DONE", |
476 | 3x |
QSREASND = "SUBJECT REFUSED" |
477 |
) |
|
478 | ||
479 |
# add qsall data to original item data |
|
480 | 3x |
lookup_qs_sub_all <- bind_rows( |
481 | 3x |
lookup_qs_sub_x1, |
482 | 3x |
qsall_data1, |
483 | 3x |
qsall_data2 |
484 |
) |
|
485 | ||
486 | 3x |
qs_all <- lookup_qs_sub_all %>% |
487 | 3x |
arrange( |
488 | 3x |
STUDYID, |
489 | 3x |
USUBJID, |
490 | 3x |
VISITNUM |
491 |
) %>% |
|
492 | 3x |
dplyr::group_by(USUBJID) %>% |
493 | 3x |
dplyr::ungroup() |
494 | ||
495 |
# get first and second subject ids |
|
496 | 3x |
first_second_subj <- select(qs_all, USUBJID) %>% |
497 | 3x |
distinct() %>% |
498 | 3x |
slice(1:2) |
499 | ||
500 | 3x |
qs1 <- filter( |
501 | 3x |
qs_all, |
502 | 3x |
USUBJID %in% first_second_subj$USUBJID |
503 |
) |
|
504 | ||
505 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
506 | 3x |
qs1 <- mutate_na(ds = qs1, na_vars = na_vars, na_percentage = na_percentage) |
507 |
} |
|
508 | ||
509 |
# QSSTAT = NOT DONE |
|
510 | 3x |
qs1 <- mutate( |
511 | 3x |
qs1, |
512 | 3x |
QSSTAT = case_when( |
513 | 3x |
is.na(QSORRES) & is.na(QSSTRESC) ~ "NOT DONE" |
514 |
) |
|
515 |
) |
|
516 | ||
517 |
# remove first and second subjects from main data |
|
518 | 3x |
qs2 <- anti_join( |
519 | 3x |
qs_all, |
520 | 3x |
qs1, |
521 | 3x |
by = c("USUBJID") |
522 |
) |
|
523 | ||
524 | 3x |
final_qs <- rbind( |
525 | 3x |
qs1, |
526 | 3x |
qs2 |
527 |
) %>% |
|
528 | 3x |
group_by(USUBJID) %>% |
529 | 3x |
dplyr::mutate(QSSEQ = row_number()) %>% |
530 | 3x |
arrange( |
531 | 3x |
STUDYID, |
532 | 3x |
USUBJID, |
533 | 3x |
VISITNUM |
534 |
) %>% |
|
535 | 3x |
ungroup() |
536 | ||
537 |
# ordered variables as per gdsr |
|
538 | 3x |
final_qs <- select( |
539 | 3x |
final_qs, |
540 | 3x |
STUDYID, |
541 | 3x |
USUBJID, |
542 | 3x |
QSSEQ, |
543 | 3x |
QSTESTCD, |
544 | 3x |
QSTEST, |
545 | 3x |
QSCAT, |
546 | 3x |
QSSCAT, |
547 | 3x |
QSORRES, |
548 | 3x |
QSORRESU, |
549 | 3x |
QSSTRESC, |
550 | 3x |
QSSTRESU, |
551 | 3x |
QSSTAT, |
552 | 3x |
QSREASND, |
553 | 3x |
VISITNUM, |
554 | 3x |
VISIT, |
555 | 3x |
QSDTC, |
556 | 3x |
QSEVLINT |
557 |
) |
|
558 | 3x |
return(final_qs) |
559 |
} |
|
560 | ||
561 |
#' @describeIn h_adqlqc Function for generating random dates between 2 dates |
|
562 |
#' |
|
563 |
#' @param from (`datetime vector`)\cr Start date/times. |
|
564 |
#' @param to (`datetime vector`)\cr End date/times. |
|
565 |
#' @param visit_id (`vector`)\cr Visit identifiers. |
|
566 |
#' |
|
567 |
#' @return Data frame with new randomly generated dates variable. |
|
568 |
#' @keywords internal |
|
569 |
get_random_dates_between <- function(from, to, visit_id) { |
|
570 | 30x |
min_date <- min(lubridate::as_datetime(from), na.rm = TRUE) |
571 | 30x |
max_date <- max(lubridate::as_datetime(to), na.rm = TRUE) |
572 | 30x |
date_seq <- seq(from = min_date + lubridate::days(1), to = max_date, by = "28 days") |
573 | ||
574 | 30x |
visit_ids <- unique(visit_id) |
575 | 30x |
out <- sapply(visit_ids, simplify = TRUE, USE.NAMES = TRUE, FUN = function(x) { |
576 | 177x |
if (x == -1) { |
577 | 30x |
random_days_to_subtract <- lubridate::days(sample(1:10, size = 1)) |
578 | 30x |
min_date - random_days_to_subtract |
579 | 147x |
} else if (x == 0) { |
580 | 30x |
min_date |
581 | 117x |
} else if (x > 0) { |
582 | 117x |
if (x %in% seq_along(date_seq)) { |
583 | 117x |
date_seq[[x]] |
584 |
} else { |
|
585 | 30x |
NA |
586 |
} |
|
587 |
} |
|
588 |
}) |
|
589 | 30x |
lubridate::as_datetime(out[match(visit_id, visit_ids)]) |
590 |
} |
|
591 | ||
592 |
#' @describeIn h_adqlqc Prepare ADaM ADQLQC data, adding PARAMCD to SDTM QS data |
|
593 |
#' |
|
594 |
#' @param df (`data.frame`)\cr SDTM QS dataset. |
|
595 |
#' |
|
596 |
#' @return `data.frame` |
|
597 |
#' @keywords internal |
|
598 |
prep_adqlqc <- function(df) { |
|
599 |
# create PARAMCD from QSTESTCD |
|
600 | 3x |
adqlqc <- dplyr::mutate( |
601 | 3x |
df, |
602 | 3x |
PARAMCD = case_when( |
603 | 3x |
QSTESTCD == "EOR0101" ~ "QS02801", |
604 | 3x |
QSTESTCD == "EOR0102" ~ "QS02802", |
605 | 3x |
QSTESTCD == "EOR0103" ~ "QS02803", |
606 | 3x |
QSTESTCD == "EOR0104" ~ "QS02804", |
607 | 3x |
QSTESTCD == "EOR0105" ~ "QS02805", |
608 | 3x |
QSTESTCD == "EOR0106" ~ "QS02806", |
609 | 3x |
QSTESTCD == "EOR0107" ~ "QS02807", |
610 | 3x |
QSTESTCD == "EOR0108" ~ "QS02808", |
611 | 3x |
QSTESTCD == "EOR0109" ~ "QS02809", |
612 | 3x |
QSTESTCD == "EOR0110" ~ "QS02810", |
613 | 3x |
QSTESTCD == "EOR0111" ~ "QS02811", |
614 | 3x |
QSTESTCD == "EOR0112" ~ "QS02812", |
615 | 3x |
QSTESTCD == "EOR0113" ~ "QS02813", |
616 | 3x |
QSTESTCD == "EOR0114" ~ "QS02814", |
617 | 3x |
QSTESTCD == "EOR0115" ~ "QS02815", |
618 | 3x |
QSTESTCD == "EOR0116" ~ "QS02816", |
619 | 3x |
QSTESTCD == "EOR0117" ~ "QS02817", |
620 | 3x |
QSTESTCD == "EOR0118" ~ "QS02818", |
621 | 3x |
QSTESTCD == "EOR0119" ~ "QS02819", |
622 | 3x |
QSTESTCD == "EOR0120" ~ "QS02820", |
623 | 3x |
QSTESTCD == "EOR0121" ~ "QS02821", |
624 | 3x |
QSTESTCD == "EOR0122" ~ "QS02822", |
625 | 3x |
QSTESTCD == "EOR0123" ~ "QS02823", |
626 | 3x |
QSTESTCD == "EOR0124" ~ "QS02824", |
627 | 3x |
QSTESTCD == "EOR0125" ~ "QS02825", |
628 | 3x |
QSTESTCD == "EOR0126" ~ "QS02826", |
629 | 3x |
QSTESTCD == "EOR0127" ~ "QS02827", |
630 | 3x |
QSTESTCD == "EOR0128" ~ "QS02828", |
631 | 3x |
QSTESTCD == "EOR0129" ~ "QS02829", |
632 | 3x |
QSTESTCD == "EOR0130" ~ "QS02830", |
633 | 3x |
TRUE ~ QSTESTCD |
634 |
) |
|
635 |
) |
|
636 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
637 | 3x |
adqlqc1 <- dplyr::left_join( |
638 | 3x |
adqlqc, |
639 | 3x |
gdsr_param_adqlqc, |
640 | 3x |
by = "PARAMCD" |
641 |
) |
|
642 | 3x |
return(adqlqc1) |
643 |
} |
|
644 | ||
645 |
#' @describeIn h_adqlqc Scale calculation for ADQLQC data |
|
646 |
#' |
|
647 |
#' @param adqlqc1 (`data.frame`)\cr Prepared data generated from the [prep_adqlqc()] function. |
|
648 |
#' |
|
649 |
#' @return `data.frame` |
|
650 |
#' @keywords internal |
|
651 |
calc_scales <- function(adqlqc1) { |
|
652 |
# Prep scale data --------------------------------------------------------- |
|
653 |
# parcat2 = scales or global health status |
|
654 |
# global health status/scales data |
|
655 |
# QSTESTCD: EOR0131 to EOR0145 (global health status and scales) |
|
656 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
657 | 3x |
eortc_qlq_c30_sub <- filter( |
658 | 3x |
eortc_qlq_c30, |
659 | 3x |
!(as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130) |
660 |
) %>% |
|
661 | 3x |
mutate( |
662 | 3x |
PARAMCD = case_when( |
663 | 3x |
QSTESTCD == "EOR0131" ~ "QS028QL2", |
664 | 3x |
QSTESTCD == "EOR0132" ~ "QS028PF2", |
665 | 3x |
QSTESTCD == "EOR0133" ~ "QS028RF2", |
666 | 3x |
QSTESTCD == "EOR0134" ~ "QS028EF", |
667 | 3x |
QSTESTCD == "EOR0135" ~ "QS028CF", |
668 | 3x |
QSTESTCD == "EOR0136" ~ "QS028SF", |
669 | 3x |
QSTESTCD == "EOR0137" ~ "QS028FA", |
670 | 3x |
QSTESTCD == "EOR0138" ~ "QS028NV", |
671 | 3x |
QSTESTCD == "EOR0139" ~ "QS028PA", |
672 | 3x |
QSTESTCD == "EOR0140" ~ "QS028DY", |
673 | 3x |
QSTESTCD == "EOR0141" ~ "QS028SL", |
674 | 3x |
QSTESTCD == "EOR0142" ~ "QS028AP", |
675 | 3x |
QSTESTCD == "EOR0143" ~ "QS028CO", |
676 | 3x |
QSTESTCD == "EOR0144" ~ "QS028DI", |
677 | 3x |
QSTESTCD == "EOR0145" ~ "QS028FI", |
678 | 3x |
TRUE ~ QSTESTCD |
679 |
) |
|
680 |
) %>% |
|
681 | 3x |
select(-publication_name) |
682 | ||
683 |
# ADaM global health status and scales from gdsr |
|
684 | 3x |
gdsr_param_adqlqc <- gdsr_param_adqlqc %>% |
685 | 3x |
filter( |
686 | 3x |
!str_detect(PARCAT2, "Original Items|Completion") |
687 |
) |
|
688 | ||
689 | 3x |
ghs_scales <- left_join( |
690 | 3x |
eortc_qlq_c30_sub, |
691 | 3x |
gdsr_param_adqlqc, |
692 | 3x |
by = "PARAMCD" |
693 |
) |
|
694 |
# scale data |
|
695 | 3x |
df <- data.frame(index = seq_len(nrow(ghs_scales))) |
696 | 3x |
df$previous <- list( |
697 | 3x |
c("QS02826", "QS02827"), |
698 | 3x |
c("QS02811"), |
699 | 3x |
c("QS02810", "QS02812", "QS02818"), |
700 | 3x |
c("QS02806", "QS02807"), |
701 | 3x |
c("QS02814", "QS02815"), |
702 | 3x |
c("QS02808"), |
703 | 3x |
c("QS02817"), |
704 | 3x |
c("QS02816"), |
705 | 3x |
c("QS02821", "QS02822", "QS02823", "QS02824"), |
706 | 3x |
c("QS02829", "QS02830"), |
707 | 3x |
c("QS02813"), |
708 | 3x |
c("QS02801", "QS02802", "QS02803", "QS02804", "QS02805"), |
709 | 3x |
c("QS02809", "QS02819"), |
710 | 3x |
c("QS02820", "QS02825"), |
711 | 3x |
c("QS02828") |
712 |
) |
|
713 | 3x |
df$newName <- list( |
714 | 3x |
"QS028SF", |
715 | 3x |
"QS028SL", |
716 | 3x |
"QS028FA", |
717 | 3x |
"QS028RF2", |
718 | 3x |
"QS028NV", |
719 | 3x |
"QS028DY", |
720 | 3x |
"QS028DI", |
721 | 3x |
"QS028CO", |
722 | 3x |
"QS028EF", |
723 | 3x |
"QS028QL2", |
724 | 3x |
"QS028AP", |
725 | 3x |
"QS028PF2", |
726 | 3x |
"QS028PA", |
727 | 3x |
"QS028CF", |
728 | 3x |
"QS028FI" |
729 |
) |
|
730 | 3x |
df$newNamelabel <- list( |
731 | 3x |
"EORTC QLQ-C30: Social functioning", |
732 | 3x |
"EORTC QLQ-C30: Insomnia", |
733 | 3x |
"EORTC QLQ-C30: Fatigue", |
734 | 3x |
"EORTC QLQ-C30: Role functioning (revised)", |
735 | 3x |
"EORTC QLQ-C30: Nausea and vomiting", |
736 | 3x |
"EORTC QLQ-C30: Dyspnoea", |
737 | 3x |
"EORTC QLQ-C30: Diarrhoea", |
738 | 3x |
"EORTC QLQ-C30: Constipation", |
739 | 3x |
"EORTC QLQ-C30: Emotional functioning", |
740 | 3x |
"EORTC QLQ-C30: Global health status/QoL (revised)", |
741 | 3x |
"EORTC QLQ-C30: Appetite loss", |
742 | 3x |
"EORTC QLQ-C30: Physical functioning (revised)", |
743 | 3x |
"EORTC QLQ-C30: Pain", |
744 | 3x |
"EORTC QLQ-C30: Cognitive functioning", |
745 | 3x |
"EORTC QLQ-C30: Financial difficulties" |
746 |
) |
|
747 | 3x |
df$newNameCategory <- list( |
748 | 3x |
"Functional Scales", |
749 | 3x |
"Symptom Scales", |
750 | 3x |
"Symptom Scales", |
751 | 3x |
"Functional Scales", |
752 | 3x |
"Symptom Scales", |
753 | 3x |
"Symptom Scales", |
754 | 3x |
"Symptom Scales", |
755 | 3x |
"Symptom Scales", |
756 | 3x |
"Functional Scales", |
757 | 3x |
"Global Health Status", |
758 | 3x |
"Symptom Scales", |
759 | 3x |
"Functional Scales", |
760 | 3x |
"Symptom Scales", |
761 | 3x |
"Functional Scales", |
762 | 3x |
"Symptom Scales" |
763 |
) |
|
764 | 3x |
df$num_param <- list( |
765 | 3x |
"1", |
766 | 3x |
"1", |
767 | 3x |
"2", |
768 | 3x |
"1", |
769 | 3x |
"1", |
770 | 3x |
"1", |
771 | 3x |
"1", |
772 | 3x |
"1", |
773 | 3x |
"2", |
774 | 3x |
"1", |
775 | 3x |
"1", |
776 | 3x |
"3", |
777 | 3x |
"1", |
778 | 3x |
"1", |
779 | 3x |
"1" |
780 |
) |
|
781 | 3x |
df$equation <- list( |
782 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
783 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
784 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
785 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
786 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
787 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
788 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
789 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
790 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
791 | 3x |
"new_value = ((temp_val/var_length-1)/6)*100.0", |
792 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
793 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
794 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
795 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
796 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0" |
797 |
) |
|
798 | ||
799 | 3x |
expect_data <- data.frame( |
800 | 3x |
PARAM = expect$PARAM, |
801 | 3x |
PARAMCD = expect$PARAMCD, |
802 | 3x |
PARCAT2 = expect$PARCAT2, |
803 | 3x |
PARCAT1N = expect$PARCAT1N, |
804 | 3x |
AVAL = c(0, 1), |
805 | 3x |
AVALC = c( |
806 | 3x |
"Not expected to complete questionnaire", |
807 | 3x |
"Expected to complete questionnaire" |
808 |
) |
|
809 |
) |
|
810 | ||
811 | 3x |
df_saved <- data.frame() |
812 | ||
813 | 3x |
unique_id <- unique(adqlqc1$USUBJID) |
814 | ||
815 | 3x |
for (id in unique_id) { |
816 | 30x |
id_data <- adqlqc1[adqlqc1$USUBJID == id, ] |
817 | 30x |
unique_avisit <- unique(id_data$AVISIT) |
818 | 30x |
for (visit in unique_avisit) { |
819 | 180x |
if (is.na(visit)) { |
820 | ! |
next |
821 |
} |
|
822 | 180x |
id_data_at_visit <- id_data[id_data$AVISIT == visit, ] |
823 | ||
824 | 180x |
if (any(id_data_at_visit$PARAMCD != "QSALL")) { |
825 | 177x |
for (idx in seq_along(df$index)) { |
826 | 2655x |
previous_names <- df$previous[idx] |
827 | 2655x |
current_name <- df$newName[idx] |
828 | 2655x |
current_name_label <- df$newNamelabel[idx] |
829 | 2655x |
current_name_category <- df$newNameCategory[idx] |
830 | 2655x |
eqn <- df$equation[idx] |
831 | 2655x |
temp_val <- 0 |
832 | 2655x |
var_length <- 0 |
833 | 2655x |
for (param_name in previous_names[[1]]) { |
834 | 5310x |
if (param_name %in% id_data_at_visit$PARAMCD) { #### |
835 | 5310x |
current_val <- as.numeric(as.character(id_data_at_visit$AVAL[id_data_at_visit$PARAMCD == param_name])) |
836 | 5310x |
if (!is.na(current_val)) { |
837 | 5094x |
temp_val <- temp_val + current_val ### |
838 | 5094x |
var_length <- var_length + 1 |
839 |
} |
|
840 |
} # if |
|
841 |
} # param_name |
|
842 |
# eval |
|
843 | 2655x |
if (var_length >= as.numeric(df$num_param[idx])) { |
844 | 2604x |
eval(parse(text = eqn)) ##### |
845 |
} else { |
|
846 | 51x |
new_value <- NA |
847 |
} |
|
848 | ||
849 | 2655x |
new_data_row <- data.frame( |
850 | 2655x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
851 | 2655x |
id, |
852 | 2655x |
visit, |
853 | 2655x |
id_data_at_visit$AVISITN[1], |
854 | 2655x |
id_data_at_visit$QSDTC[1], |
855 | 2655x |
current_name_category, |
856 | 2655x |
current_name_label, |
857 | 2655x |
current_name, |
858 | 2655x |
new_value, |
859 | 2655x |
NA, |
860 | 2655x |
stringsAsFactors = FALSE |
861 |
) |
|
862 | 2655x |
colnames(new_data_row) <- c( |
863 | 2655x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
864 | 2655x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", |
865 | 2655x |
"AVAL", "AVALC" |
866 |
) ### |
|
867 | 2655x |
df_saved <- rbind(df_saved, new_data_row) ##### |
868 |
} # idx |
|
869 |
} |
|
870 |
# add expect data |
|
871 | 180x |
expect_value <- sample(expect_data$AVAL, 1, prob = c(0.10, 0.90)) |
872 | 180x |
expect_valuec <- expect_data$AVALC[expect_data$AVAL == expect_value] |
873 | ||
874 | 180x |
new_data_row <- data.frame( |
875 | 180x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
876 | 180x |
id, |
877 | 180x |
visit, |
878 | 180x |
id_data_at_visit$AVISITN[1], |
879 | 180x |
datetime = NA, |
880 | 180x |
expect_data$PARCAT2[1], |
881 | 180x |
expect_data$PARAM[1], |
882 | 180x |
expect_data$PARAMCD[1], |
883 | 180x |
expect_value, |
884 | 180x |
expect_valuec, |
885 | 180x |
stringsAsFactors = FALSE |
886 |
) |
|
887 | 180x |
colnames(new_data_row) <- c( |
888 | 180x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
889 | 180x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", "AVAL", |
890 | 180x |
"AVALC" |
891 |
) ### |
|
892 | 180x |
df_saved <- rbind(df_saved, new_data_row) |
893 |
} # visit |
|
894 |
} # id |
|
895 | ||
896 | 3x |
df_saved1 <- left_join( |
897 | 3x |
df_saved, |
898 | 3x |
ghs_scales, |
899 | 3x |
by = c( |
900 | 3x |
"PARAM", |
901 | 3x |
"PARAMCD", |
902 | 3x |
"PARCAT2" |
903 |
) |
|
904 |
) %>% |
|
905 | 3x |
mutate( |
906 | 3x |
AVALC = ifelse(is.na(AVALC), as.character(AVAL), AVALC), |
907 | 3x |
PARCAT1 = ifelse(PARAMCD == "EX028", expect$PARCAT1, PARCAT1), |
908 | 3x |
PARCAT1N = ifelse(PARAMCD == "EX028", expect$PARCAT1N, PARCAT1N) |
909 |
) |
|
910 | ||
911 | 3x |
adqlqc_tmp <- bind_rows(adqlqc1, df_saved1) %>% |
912 | 3x |
arrange( |
913 | 3x |
USUBJID, |
914 | 3x |
AVISITN, |
915 | 3x |
QSTESTCD |
916 |
) |
|
917 | 3x |
return(adqlqc_tmp) |
918 |
} |
|
919 | ||
920 |
#' @describeIn h_adqlqc Calculate Change from Baseline Category 1 |
|
921 |
#' |
|
922 |
#' @param dataset (`data.frame`)\cr ADaM dataset. |
|
923 |
#' |
|
924 |
#' @return `data.frame` |
|
925 |
#' @keywords internal |
|
926 |
derv_chgcat1 <- function(dataset) { |
|
927 |
# derivation of CHGCAT1 |
|
928 | 3x |
check_vars <- c("PARCAT2", "CHG") |
929 | ||
930 | 3x |
if (all(check_vars %in% names(dataset))) { |
931 | 3x |
dataset$CHGCAT1 <- ifelse( |
932 | 3x |
dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG <= -10, |
933 | 3x |
"Improved", "" |
934 |
) |
|
935 | 3x |
dataset$CHGCAT1 <- ifelse( |
936 | 3x |
dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG >= 10, |
937 | 3x |
"Worsened", dataset$CHGCAT1 |
938 |
) |
|
939 | 3x |
dataset$CHGCAT1 <- ifelse( |
940 | 3x |
dataset$PARCAT2 == "Symptom Scales" & |
941 | 3x |
!is.na(dataset$CHG) & dataset$CHG > -10 & |
942 | 3x |
dataset$CHG < 10, |
943 | 3x |
"No change", dataset$CHGCAT1 |
944 |
) |
|
945 | ||
946 | 3x |
dataset$CHGCAT1 <- ifelse( |
947 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
948 | 3x |
!is.na(dataset$CHG) & dataset$CHG >= 10, |
949 | 3x |
"Improved", dataset$CHGCAT1 |
950 |
) |
|
951 | 3x |
dataset$CHGCAT1 <- ifelse( |
952 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
953 | 3x |
!is.na(dataset$CHG) & dataset$CHG <= -10, |
954 | 3x |
"Worsened", dataset$CHGCAT1 |
955 |
) |
|
956 | 3x |
dataset$CHGCAT1 <- ifelse( |
957 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
958 | 3x |
!is.na(dataset$CHG) & |
959 | 3x |
dataset$CHG > -10 & dataset$CHG < 10, |
960 | 3x |
"No change", dataset$CHGCAT1 |
961 |
) |
|
962 | ||
963 | 3x |
dataset$CHGCAT1 <- ifelse( |
964 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 6, |
965 | 3x |
"Improved by six levels", dataset$CHGCAT1 |
966 |
) |
|
967 | 3x |
dataset$CHGCAT1 <- ifelse( |
968 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 5, |
969 | 3x |
"Improved by five levels", dataset$CHGCAT1 |
970 |
) |
|
971 | 3x |
dataset$CHGCAT1 <- ifelse( |
972 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 4, |
973 | 3x |
"Improved by four levels", dataset$CHGCAT1 |
974 |
) |
|
975 | 3x |
dataset$CHGCAT1 <- ifelse( |
976 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 3, |
977 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
978 |
) |
|
979 | 3x |
dataset$CHGCAT1 <- ifelse( |
980 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 2, |
981 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
982 |
) |
|
983 | 3x |
dataset$CHGCAT1 <- ifelse( |
984 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 1, |
985 | 3x |
"Improved by one level", dataset$CHGCAT1 |
986 |
) |
|
987 | 3x |
dataset$CHGCAT1 <- ifelse( |
988 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 0, |
989 | 3x |
"No change", dataset$CHGCAT1 |
990 |
) |
|
991 | 3x |
dataset$CHGCAT1 <- ifelse( |
992 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -1, |
993 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
994 |
) |
|
995 | 3x |
dataset$CHGCAT1 <- ifelse( |
996 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -2, |
997 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
998 |
) |
|
999 | 3x |
dataset$CHGCAT1 <- ifelse( |
1000 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -3, |
1001 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1002 |
) |
|
1003 | 3x |
dataset$CHGCAT1 <- ifelse( |
1004 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -4, |
1005 | 3x |
"Worsened by four levels", dataset$CHGCAT1 |
1006 |
) |
|
1007 | 3x |
dataset$CHGCAT1 <- ifelse( |
1008 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -5, |
1009 | 3x |
"Worsened by five levels", dataset$CHGCAT1 |
1010 |
) |
|
1011 | 3x |
dataset$CHGCAT1 <- ifelse( |
1012 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -6, |
1013 | 3x |
"Worsened by six levels", dataset$CHGCAT1 |
1014 |
) |
|
1015 | ||
1016 | 3x |
dataset$CHGCAT1 <- ifelse( |
1017 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -3, |
1018 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1019 |
) |
|
1020 | 3x |
dataset$CHGCAT1 <- ifelse( |
1021 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -2, |
1022 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1023 |
) |
|
1024 | 3x |
dataset$CHGCAT1 <- ifelse( |
1025 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -1, |
1026 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1027 |
) |
|
1028 | 3x |
dataset$CHGCAT1 <- ifelse( |
1029 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 0, |
1030 | 3x |
"No change", dataset$CHGCAT1 |
1031 |
) |
|
1032 | 3x |
dataset$CHGCAT1 <- ifelse( |
1033 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 1, |
1034 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1035 |
) |
|
1036 | 3x |
dataset$CHGCAT1 <- ifelse( |
1037 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 2, |
1038 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1039 |
) |
|
1040 | 3x |
dataset$CHGCAT1 <- ifelse( |
1041 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 3, |
1042 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1043 |
) |
|
1044 | ||
1045 | 3x |
dataset$CHGCAT1 <- ifelse( |
1046 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -3, |
1047 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1048 |
) |
|
1049 | 3x |
dataset$CHGCAT1 <- ifelse( |
1050 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -2, |
1051 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1052 |
) |
|
1053 | 3x |
dataset$CHGCAT1 <- ifelse( |
1054 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -1, |
1055 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1056 |
) |
|
1057 | 3x |
dataset$CHGCAT1 <- ifelse( |
1058 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 0, |
1059 | 3x |
"No changed", dataset$CHGCAT1 |
1060 |
) |
|
1061 | 3x |
dataset$CHGCAT1 <- ifelse( |
1062 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 1, |
1063 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1064 |
) |
|
1065 | 3x |
dataset$CHGCAT1 <- ifelse( |
1066 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 2, |
1067 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1068 |
) |
|
1069 | 3x |
dataset$CHGCAT1 <- ifelse( |
1070 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 3, |
1071 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1072 |
) |
|
1073 | ||
1074 | 3x |
paramcd_vec <- c( |
1075 | 3x |
"QS02803", "QS02804", "QS02805", "QS02807", "QS02808", "QS02809", "QS02810", |
1076 | 3x |
"QS02811", "QS02812", "QS02813", "QS02814", "QS02815", "QS02816", "QS02817", |
1077 | 3x |
"QS02818", "QS02819", "QS02820", "QS02821", "QS02822", "QS02823", "QS02824", |
1078 | 3x |
"QS02825", "QS02826", "QS02827", "QS02828" |
1079 |
) |
|
1080 | ||
1081 | 3x |
dataset$CHGCAT1 <- ifelse( |
1082 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -3, |
1083 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1084 |
) |
|
1085 | 3x |
dataset$CHGCAT1 <- ifelse( |
1086 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -2, |
1087 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1088 |
) |
|
1089 | 3x |
dataset$CHGCAT1 <- ifelse( |
1090 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -1, |
1091 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1092 |
) |
|
1093 | 3x |
dataset$CHGCAT1 <- ifelse( |
1094 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 0, |
1095 | 3x |
"No change", dataset$CHGCAT1 |
1096 |
) |
|
1097 | 3x |
dataset$CHGCAT1 <- ifelse( |
1098 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 1, |
1099 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1100 |
) |
|
1101 | 3x |
dataset$CHGCAT1 <- ifelse( |
1102 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 2, |
1103 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1104 |
) |
|
1105 | 3x |
dataset$CHGCAT1 <- ifelse( |
1106 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 3, |
1107 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1108 |
) |
|
1109 | ||
1110 | 3x |
return(dataset) |
1111 |
} else { |
|
1112 | ! |
collapse_vars <- paste(check_vars, collapse = ", ") |
1113 | ! |
stop(sprintf( |
1114 | ! |
"%s: one or both variables is/are missing, needed for derivation", |
1115 | ! |
collapse_vars |
1116 |
)) |
|
1117 |
} |
|
1118 |
} |
|
1119 | ||
1120 |
#' @describeIn h_adqlqc Completion/Compliance Data Calculation |
|
1121 |
#' |
|
1122 |
#' @param dataset (`data.frame`)\cr Dataset. |
|
1123 |
#' |
|
1124 |
#' @return `data.frame` |
|
1125 |
#' @keywords internal |
|
1126 |
comp_derv <- function(dataset, percent, number) { |
|
1127 |
# original items data |
|
1128 | 3x |
orig_data <- filter( |
1129 | 3x |
dataset, |
1130 | 3x |
PARCAT2 == "Original Items" |
1131 |
) |
|
1132 |
# total number of questionnaires |
|
1133 | 3x |
comp_count_all <- select( |
1134 | 3x |
orig_data, |
1135 | 3x |
PARAMCD |
1136 |
) %>% |
|
1137 | 3x |
distinct() %>% |
1138 | 3x |
count() |
1139 | 3x |
comp_count_all <- comp_count_all$n |
1140 |
# original items data count of questions answered |
|
1141 | 3x |
orig_data_summ <- group_by( |
1142 | 3x |
orig_data, |
1143 | 3x |
STUDYID, |
1144 | 3x |
USUBJID, |
1145 | 3x |
PARCAT1, |
1146 | 3x |
AVISIT, |
1147 | 3x |
AVISITN, |
1148 | 3x |
ADTM, |
1149 | 3x |
ADY |
1150 |
) %>% |
|
1151 | 3x |
summarise( |
1152 | 3x |
comp_count = sum(!is.na(AVAL)), |
1153 | 3x |
comp_count_all = comp_count_all, |
1154 | 3x |
.groups = "drop" |
1155 |
) %>% |
|
1156 | 3x |
mutate( |
1157 | 3x |
per_comp = trunc((comp_count / comp_count_all) * 100) |
1158 |
) |
|
1159 |
# expected data |
|
1160 | 3x |
ex028_data <- filter( |
1161 | 3x |
dataset, |
1162 | 3x |
PARAMCD == "EX028", |
1163 | 3x |
AVAL == 1 |
1164 |
) %>% |
|
1165 | 3x |
select( |
1166 | 3x |
STUDYID, |
1167 | 3x |
USUBJID, |
1168 | 3x |
PARCAT1, |
1169 | 3x |
AVISIT, |
1170 | 3x |
AVISITN, |
1171 | 3x |
ADTM, |
1172 | 3x |
ADY, |
1173 | 3x |
AVAL_ex028 = AVAL |
1174 |
) %>% |
|
1175 | 3x |
mutate( |
1176 | 3x |
comp_count_all = comp_count_all |
1177 |
) |
|
1178 | ||
1179 | 3x |
joined <- left_join( |
1180 | 3x |
ex028_data, |
1181 | 3x |
orig_data_summ, |
1182 | 3x |
by = c( |
1183 | 3x |
"STUDYID", |
1184 | 3x |
"USUBJID", |
1185 | 3x |
"PARCAT1", |
1186 | 3x |
"AVISIT", |
1187 | 3x |
"AVISITN", |
1188 | 3x |
"comp_count_all" |
1189 |
) |
|
1190 |
) %>% |
|
1191 | 3x |
select(-c("ADTM.x", "ADY.x")) |
1192 | ||
1193 | 3x |
joined <- rename( |
1194 | 3x |
joined, |
1195 | 3x |
ADTM = ADTM.y, |
1196 | 3x |
ADY = ADY.y |
1197 |
) |
|
1198 |
# CO028ALL |
|
1199 | 3x |
co028all <- mutate( |
1200 | 3x |
joined, |
1201 | 3x |
PARAMCD = "CO028ALL", |
1202 | 3x |
PARAM = "EORTC QLQ-C30: Completion - Completed all questions", |
1203 | 3x |
PARCAT2 = "Completion", |
1204 | 3x |
AVAL = case_when( |
1205 | 3x |
AVAL_ex028 == 1 & comp_count == comp_count_all ~ 1, |
1206 | 3x |
AVAL_ex028 == 1 & (is.na(comp_count) | comp_count < comp_count_all) ~ 0 |
1207 |
), |
|
1208 | 3x |
AVALC = case_when( |
1209 | 3x |
AVAL == 1 ~ "Completed all questions", |
1210 | 3x |
AVAL == 0 ~ "Did not complete all questions" |
1211 |
) |
|
1212 |
) |
|
1213 |
# CO028<y>P |
|
1214 | 3x |
co028p <- mutate( |
1215 | 3x |
joined, |
1216 | 3x |
PARAMCD = paste0("CO028", as.character(percent), "P"), |
1217 | 3x |
PARAM = sprintf( |
1218 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s%% of questions", |
1219 | 3x |
as.character(percent) |
1220 |
), |
|
1221 | 3x |
PARCAT2 = "Completion", |
1222 | 3x |
AVAL = case_when( |
1223 | 3x |
AVAL_ex028 == 1 & per_comp >= percent ~ 1, |
1224 | 3x |
AVAL_ex028 == 1 & (is.na(per_comp) | per_comp < percent) ~ 0 |
1225 |
), |
|
1226 | 3x |
AVALC = case_when( |
1227 | 3x |
AVAL == 1 ~ sprintf( |
1228 | 3x |
"Completed at least %s%% of questions", |
1229 | 3x |
as.character(percent) |
1230 |
), |
|
1231 | 3x |
AVAL == 0 ~ sprintf( |
1232 | 3x |
"Did not complete at least %s%% of questions", |
1233 | 3x |
as.character(percent) |
1234 |
) |
|
1235 |
) |
|
1236 |
) |
|
1237 |
# CO028<x>Q |
|
1238 | 3x |
co028q <- mutate( |
1239 | 3x |
joined, |
1240 | 3x |
PARAMCD = paste0("CO028", as.character(number), "Q"), |
1241 | 3x |
PARAM = sprintf( |
1242 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s question(s)", |
1243 | 3x |
as.character(number) |
1244 |
), |
|
1245 | 3x |
PARCAT2 = "Completion", |
1246 | 3x |
AVAL = case_when( |
1247 | 3x |
AVAL_ex028 == 1 & comp_count >= number ~ 1, |
1248 | 3x |
AVAL_ex028 == 1 & (comp_count < number | is.na(comp_count)) ~ 0 |
1249 |
), |
|
1250 | 3x |
AVALC = case_when( |
1251 | 3x |
AVAL == 1 ~ sprintf( |
1252 | 3x |
"Completed at least %s questions", |
1253 | 3x |
as.character(number) |
1254 |
), |
|
1255 | 3x |
AVAL == 0 ~ sprintf( |
1256 | 3x |
"Did not complete at least %s question(s)", |
1257 | 3x |
as.character(number) |
1258 |
) |
|
1259 |
) |
|
1260 |
) |
|
1261 | ||
1262 | 3x |
co028_bind <- rbind( |
1263 | 3x |
co028all, |
1264 | 3x |
co028p, |
1265 | 3x |
co028q |
1266 |
) %>% |
|
1267 | 3x |
select( |
1268 | 3x |
-c("AVAL_ex028", "comp_count", "comp_count_all", "per_comp") |
1269 |
) |
|
1270 | 3x |
return(co028_bind) |
1271 |
} |
1 |
#' Exposure Analysis Dataset (ADEX) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Exposure Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `EXSEQ`, `PARAMCD`, `PARCAT1`, `ASTDTM`, `AENDTM`, `ASTDY`, `AENDY`, |
|
11 |
#' `AVISITN`, `EXDOSFRQ`, `EXROUTE`, `VISIT`, `VISITDY`, `EXSTDTC`, `EXENDTC`, `EXSTDY`, `EXENDY` |
|
12 |
#' |
|
13 |
#' @inheritParams argument_convention |
|
14 |
#' @param parcat1 (`character vector`)\cr Dose amount categories. Defaults to "Individual" and "Overall". |
|
15 |
#' @param parcat2 (`character vector`)\cr Types of drug received. Defaults to "Drug A" and "Drug B". |
|
16 |
#' @param max_n_exs (`integer`)\cr Maximum number of exposures per patient. Defaults to 6. |
|
17 |
#' @template param_cached |
|
18 |
#' @templateVar data adex |
|
19 |
#' |
|
20 |
#' @return `data.frame` |
|
21 |
#' @export |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
25 |
#' |
|
26 |
#' adex <- radex(adsl, seed = 2) |
|
27 |
#' adex |
|
28 |
radex <- function(adsl, |
|
29 |
param = c( |
|
30 |
"Dose administered during constant dosing interval", |
|
31 |
"Number of doses administered during constant dosing interval", |
|
32 |
"Total dose administered", |
|
33 |
"Total number of doses administered" |
|
34 |
), |
|
35 |
paramcd = c("DOSE", "NDOSE", "TDOSE", "TNDOSE"), |
|
36 |
paramu = c("mg", " ", "mg", " "), |
|
37 |
parcat1 = c("INDIVIDUAL", "OVERALL"), |
|
38 |
parcat2 = c("Drug A", "Drug B"), |
|
39 |
visit_format = "WEEK", |
|
40 |
n_assessments = 5L, |
|
41 |
n_days = 5L, |
|
42 |
max_n_exs = 6L, |
|
43 |
lookup = NULL, |
|
44 |
seed = NULL, |
|
45 |
na_percentage = 0, |
|
46 |
na_vars = list(AVAL = c(NA, 0.1), AVALU = c(NA), 0.1), |
|
47 |
cached = FALSE) { |
|
48 | 4x |
checkmate::assert_flag(cached) |
49 | 4x |
if (cached) { |
50 | 1x |
return(get_cached_data("cadex")) |
51 |
} |
|
52 | ||
53 | 3x |
checkmate::assert_data_frame(adsl) |
54 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
55 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
56 | 3x |
checkmate::assert_character(parcat1, min.len = 1, any.missing = FALSE) |
57 | 3x |
checkmate::assert_character(parcat2, min.len = 1, any.missing = FALSE) |
58 | 3x |
checkmate::assert_string(visit_format) |
59 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
60 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_integer(max_n_exs, len = 1, any.missing = FALSE) |
62 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
63 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
64 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
65 | 3x |
checkmate::assert_true(na_percentage < 1) |
66 | ||
67 |
# validate and initialize related variables |
|
68 | 3x |
param_init_list <- relvar_init(param, paramcd) |
69 | 3x |
unit_init_list <- relvar_init(param, paramu) |
70 | ||
71 | 3x |
if (!is.null(seed)) { |
72 | 3x |
set.seed(seed) |
73 |
} |
|
74 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
75 | ||
76 | 3x |
adex <- expand.grid( |
77 | 3x |
STUDYID = unique(adsl$STUDYID), |
78 | 3x |
USUBJID = adsl$USUBJID, |
79 | 3x |
PARAM = c( |
80 | 3x |
rep( |
81 | 3x |
param_init_list$relvar1[1], |
82 | 3x |
length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days))) |
83 |
), |
|
84 | 3x |
rep( |
85 | 3x |
param_init_list$relvar1[2], |
86 | 3x |
length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days))) |
87 |
), |
|
88 | 3x |
param_init_list$relvar1[3:4] |
89 |
), |
|
90 | 3x |
stringsAsFactors = FALSE |
91 |
) |
|
92 | ||
93 |
# assign related variable values: PARAMxPARAMCD are related |
|
94 | 3x |
adex <- adex %>% rel_var( |
95 | 3x |
var_name = "PARAMCD", |
96 | 3x |
related_var = "PARAM", |
97 | 3x |
var_values = param_init_list$relvar2 |
98 |
) |
|
99 | ||
100 |
# assign related variable values: AVALUxPARAM are related |
|
101 | 3x |
adex <- adex %>% rel_var( |
102 | 3x |
var_name = "AVALU", |
103 | 3x |
related_var = "PARAM", |
104 | 3x |
var_values = unit_init_list$relvar2 |
105 |
) |
|
106 | ||
107 | 3x |
adex <- adex %>% |
108 | 3x |
dplyr::group_by(USUBJID) %>% |
109 | 3x |
dplyr::mutate(PARCAT_ind = sample(c(1, 2), size = 1)) %>% |
110 | 3x |
dplyr::mutate(PARCAT2 = ifelse(PARCAT_ind == 1, parcat2[1], parcat2[2])) %>% |
111 | 3x |
dplyr::select(-"PARCAT_ind") |
112 | ||
113 |
# Add in PARCAT1 |
|
114 | 3x |
adex <- adex %>% dplyr::mutate(PARCAT1 = dplyr::case_when( |
115 | 3x |
(PARAMCD == "TNDOSE" | PARAMCD == "TDOSE") ~ "OVERALL", |
116 | 3x |
PARAMCD == "DOSE" | PARAMCD == "NDOSE" ~ "INDIVIDUAL" |
117 |
)) |
|
118 | ||
119 | 3x |
adex_visit <- adex %>% |
120 | 3x |
dplyr::filter(PARAMCD == "DOSE" | PARAMCD == "NDOSE") %>% |
121 | 3x |
dplyr::mutate( |
122 | 3x |
AVISIT = rep(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), 2) |
123 |
) |
|
124 | ||
125 | 3x |
adex <- dplyr::left_join( |
126 | 3x |
adex %>% |
127 | 3x |
dplyr::group_by( |
128 | 3x |
USUBJID, |
129 | 3x |
STUDYID, |
130 | 3x |
PARAM, |
131 | 3x |
PARAMCD, |
132 | 3x |
AVALU, |
133 | 3x |
PARCAT1, |
134 | 3x |
PARCAT2 |
135 |
) %>% |
|
136 | 3x |
dplyr::mutate(id = dplyr::row_number()), |
137 | 3x |
adex_visit %>% |
138 | 3x |
dplyr::group_by( |
139 | 3x |
USUBJID, |
140 | 3x |
STUDYID, |
141 | 3x |
PARAM, |
142 | 3x |
PARAMCD, |
143 | 3x |
AVALU, |
144 | 3x |
PARCAT1, |
145 | 3x |
PARCAT2 |
146 |
) %>% |
|
147 | 3x |
dplyr::mutate(id = dplyr::row_number()), |
148 | 3x |
by = c("USUBJID", "STUDYID", "PARCAT1", "PARCAT2", "id", "PARAMCD", "PARAM", "AVALU") |
149 |
) %>% |
|
150 | 3x |
dplyr::select(-"id") |
151 | ||
152 |
# Visit numbers |
|
153 | 3x |
adex <- adex %>% dplyr::mutate(AVISITN = dplyr::case_when( |
154 | 3x |
AVISIT == "SCREENING" ~ -1, |
155 | 3x |
AVISIT == "BASELINE" ~ 0, |
156 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
157 | 3x |
TRUE ~ 999000 |
158 |
)) |
|
159 | ||
160 | ||
161 | 3x |
adex2 <- split(adex, adex$USUBJID) %>% |
162 | 3x |
lapply(function(pinfo) { |
163 | 30x |
pinfo %>% |
164 | 30x |
dplyr::filter(PARAMCD == "DOSE") %>% |
165 | 30x |
dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>% |
166 | 30x |
dplyr::mutate(changeind = dplyr::case_when( |
167 | 30x |
AVISIT == "SCREENING" ~ 0, |
168 | 30x |
AVISIT != "SCREENING" ~ sample(c(-1, 0, 1), |
169 | 30x |
size = 1, |
170 | 30x |
prob = c(0.25, 0.5, 0.25), |
171 | 30x |
replace = TRUE |
172 |
) |
|
173 |
)) %>% |
|
174 | 30x |
dplyr::ungroup() %>% |
175 | 30x |
dplyr::group_by(USUBJID, PARCAT2) %>% |
176 | 30x |
dplyr::mutate( |
177 | 30x |
csum = cumsum(changeind), |
178 | 30x |
changeind = dplyr::case_when( |
179 | 30x |
csum <= -3 ~ sample(c(0, 1), size = 1, prob = c(0.5, 0.5)), |
180 | 30x |
csum >= 3 ~ sample(c(0, -1), size = 1, prob = c(0.5, 0.5)), |
181 | 30x |
TRUE ~ changeind |
182 |
) |
|
183 |
) %>% |
|
184 | 30x |
dplyr::mutate(csum = cumsum(changeind)) %>% |
185 | 30x |
dplyr::ungroup() %>% |
186 | 30x |
dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>% |
187 | 30x |
dplyr::mutate(AVAL = dplyr::case_when( |
188 | 30x |
csum == -2 ~ 480, |
189 | 30x |
csum == -1 ~ 720, |
190 | 30x |
csum == 0 ~ 960, |
191 | 30x |
csum == 1 ~ 1200, |
192 | 30x |
csum == 2 ~ 1440 |
193 |
)) %>% |
|
194 | 30x |
dplyr::select(-c("csum", "changeind")) %>% |
195 | 30x |
dplyr::ungroup() |
196 |
}) %>% |
|
197 | 3x |
Reduce(rbind, .) |
198 | ||
199 | 3x |
adex_tmp <- dplyr::full_join(adex2, adex, by = names(adex)) |
200 | 3x |
adex <- adex_tmp %>% |
201 | 3x |
dplyr::group_by(USUBJID) %>% |
202 | 3x |
dplyr::mutate(AVAL = ifelse(PARAMCD == "NDOSE", 1, AVAL)) %>% |
203 | 3x |
dplyr::mutate(AVAL = ifelse( |
204 | 3x |
PARAMCD == "TNDOSE", |
205 | 3x |
sum(AVAL[PARAMCD == "NDOSE"]), |
206 | 3x |
AVAL |
207 |
)) %>% |
|
208 | 3x |
dplyr::ungroup() %>% |
209 | 3x |
dplyr::group_by(USUBJID, STUDYID, PARCAT2) %>% |
210 | 3x |
dplyr::mutate(AVAL = ifelse( |
211 | 3x |
PARAMCD == "TDOSE", |
212 | 3x |
sum(AVAL[PARAMCD == "DOSE"]), |
213 | 3x |
AVAL |
214 |
)) |
|
215 | ||
216 | 3x |
adex <- var_relabel( |
217 | 3x |
adex, |
218 | 3x |
STUDYID = "Study Identifier", |
219 | 3x |
USUBJID = "Unique Subject Identifier" |
220 |
) |
|
221 | ||
222 |
# merge ADSL to be able to add ADEX date and study day variables |
|
223 | 3x |
adex <- dplyr::inner_join(adex, adsl, by = c("STUDYID", "USUBJID")) %>% |
224 | 3x |
dplyr::rowwise() %>% |
225 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
226 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
227 | 3x |
TRUE ~ TRTEDTM |
228 |
))) %>% |
|
229 | 3x |
dplyr::mutate(ASTDTM = sample( |
230 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
231 | 3x |
size = 1 |
232 |
)) %>% |
|
233 |
# add 1 to end of range incase both values passed to sample() are the same |
|
234 | 3x |
dplyr::mutate(AENDTM = sample( |
235 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
236 | 3x |
size = 1 |
237 |
)) %>% |
|
238 | 3x |
dplyr::select(-TRTENDT) %>% |
239 | 3x |
dplyr::ungroup() %>% |
240 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM) |
241 | ||
242 | ||
243 | 3x |
adex <- adex %>% |
244 | 3x |
dplyr::group_by(USUBJID) %>% |
245 | 3x |
dplyr::mutate(EXSEQ = seq_len(dplyr::n())) %>% |
246 | 3x |
dplyr::mutate(ASEQ = EXSEQ) %>% |
247 | 3x |
dplyr::ungroup() %>% |
248 | 3x |
dplyr::arrange( |
249 | 3x |
STUDYID, |
250 | 3x |
USUBJID, |
251 | 3x |
PARAMCD, |
252 | 3x |
ASTDTM, |
253 | 3x |
AVISITN, |
254 | 3x |
EXSEQ |
255 |
) |
|
256 | ||
257 |
# Adding EXDOSFRQ |
|
258 | 3x |
adex <- adex %>% |
259 | 3x |
dplyr::mutate(EXDOSFRQ = dplyr::case_when( |
260 | 3x |
PARCAT1 == "INDIVIDUAL" ~ "ONCE", |
261 | 3x |
TRUE ~ "" |
262 |
)) |
|
263 | ||
264 |
# Adding EXROUTE |
|
265 | 3x |
adex <- adex %>% |
266 | 3x |
dplyr::mutate(EXROUTE = dplyr::case_when( |
267 | 3x |
PARCAT1 == "INDIVIDUAL" ~ sample(c("INTRAVENOUS", "SUBCUTANEOUS"), |
268 | 3x |
nrow(adex), |
269 | 3x |
replace = TRUE, |
270 | 3x |
prob = c(0.9, 0.1) |
271 |
), |
|
272 | 3x |
TRUE ~ "" |
273 |
)) |
|
274 | ||
275 |
# Fix VISIT according to AVISIT |
|
276 | 3x |
adex <- adex %>% |
277 | 3x |
dplyr::mutate(VISIT = AVISIT) |
278 | ||
279 |
# Hack for VISITDY - to fix in ADSL |
|
280 | 3x |
visit_levels <- str_extract(levels(adex$VISIT), pattern = "[0-9]+") |
281 | 3x |
vl_extracted <- vapply(visit_levels, function(x) as.numeric(x[2]), numeric(1)) |
282 | 3x |
vl_extracted <- c(-1, 1, vl_extracted[!is.na(vl_extracted)]) |
283 | ||
284 |
# Adding VISITDY |
|
285 | 3x |
adex <- adex %>% |
286 | 3x |
dplyr::mutate(VISITDY = as.numeric(as.character(factor(VISIT, labels = vl_extracted)))) |
287 | ||
288 |
# Exposure time stamps |
|
289 | 3x |
adex <- adex %>% |
290 | 3x |
dplyr::mutate( |
291 | 3x |
EXSTDTC = TRTSDTM + lubridate::days(VISITDY), |
292 | 3x |
EXENDTC = EXSTDTC + lubridate::hours(1), |
293 | 3x |
EXSTDY = VISITDY, |
294 | 3x |
EXENDY = VISITDY |
295 |
) |
|
296 | ||
297 |
# Correcting last exposure to treatment |
|
298 | 3x |
adex <- adex %>% |
299 | 3x |
dplyr::group_by(SUBJID) %>% |
300 | 3x |
dplyr::mutate(TRTEDTM = lubridate::as_datetime(max(EXENDTC, na.rm = TRUE))) %>% |
301 | 3x |
dplyr::ungroup() |
302 | ||
303 |
# Fixing Date - to add into ADSL |
|
304 | 3x |
adex <- adex %>% |
305 | 3x |
dplyr::mutate( |
306 | 3x |
TRTSDT = lubridate::date(TRTSDTM), |
307 | 3x |
TRTEDT = lubridate::date(TRTEDTM) |
308 |
) |
|
309 | ||
310 |
# Fixing analysis time stamps |
|
311 | 3x |
adex <- adex %>% |
312 | 3x |
dplyr::mutate( |
313 | 3x |
ASTDY = EXSTDY, |
314 | 3x |
AENDY = EXENDY, |
315 | 3x |
ASTDTM = EXSTDTC, |
316 | 3x |
AENDTM = EXENDTC |
317 |
) |
|
318 | ||
319 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
320 | ! |
adex <- mutate_na(ds = adex, na_vars = na_vars, na_percentage = na_percentage) |
321 |
} |
|
322 | ||
323 |
# apply metadata |
|
324 | 3x |
adex <- apply_metadata(adex, "metadata/ADEX.yml") |
325 |
} |
|
326 | ||
327 |
# Equivalent of stringr::str_extract_all() |
|
328 |
str_extract <- function(string, pattern) { |
|
329 | 2850x |
regmatches(string, gregexpr(pattern, string)) |
330 |
} |
1 |
#' Medical History Analysis Dataset (ADMH) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Medical History Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `MHSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param max_n_mhs (`integer`)\cr Maximum number of MHs per patient. Defaults to 10. |
|
14 |
#' @template param_cached |
|
15 |
#' @templateVar data admh |
|
16 |
#' |
|
17 |
#' @return `data.frame` |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
22 |
#' |
|
23 |
#' admh <- radmh(adsl, seed = 2) |
|
24 |
#' admh |
|
25 |
radmh <- function(adsl, |
|
26 |
max_n_mhs = 10L, |
|
27 |
lookup = NULL, |
|
28 |
seed = NULL, |
|
29 |
na_percentage = 0, |
|
30 |
na_vars = list(MHBODSYS = c(NA, 0.1), MHDECOD = c(1234, 0.1)), |
|
31 |
cached = FALSE) { |
|
32 | 4x |
checkmate::assert_flag(cached) |
33 | 4x |
if (cached) { |
34 | 1x |
return(get_cached_data("cadmh")) |
35 |
} |
|
36 | ||
37 | 3x |
checkmate::assert_data_frame(adsl) |
38 | 3x |
checkmate::assert_integer(max_n_mhs, len = 1, any.missing = FALSE) |
39 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
40 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
41 | 3x |
checkmate::assert_true(na_percentage < 1) |
42 | ||
43 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
44 | 3x |
lookup_mh <- if (!is.null(lookup)) { |
45 | ! |
lookup |
46 |
} else { |
|
47 | 3x |
tibble::tribble( |
48 | 3x |
~MHBODSYS, ~MHDECOD, ~MHSOC, |
49 | 3x |
"cl A", "trm A_1/2", "cl A", |
50 | 3x |
"cl A", "trm A_2/2", "cl A", |
51 | 3x |
"cl B", "trm B_1/3", "cl B", |
52 | 3x |
"cl B", "trm B_2/3", "cl B", |
53 | 3x |
"cl B", "trm B_3/3", "cl B", |
54 | 3x |
"cl C", "trm C_1/2", "cl C", |
55 | 3x |
"cl C", "trm C_2/2", "cl C", |
56 | 3x |
"cl D", "trm D_1/3", "cl D", |
57 | 3x |
"cl D", "trm D_2/3", "cl D", |
58 | 3x |
"cl D", "trm D_3/3", "cl D" |
59 |
) |
|
60 |
} |
|
61 | ||
62 | 3x |
if (!is.null(seed)) { |
63 | 3x |
set.seed(seed) |
64 |
} |
|
65 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
66 | ||
67 | 3x |
admh <- Map( |
68 | 3x |
function(id, sid) { |
69 | 30x |
n_mhs <- sample(0:max_n_mhs, 1) |
70 | 30x |
i <- sample(seq_len(nrow(lookup_mh)), n_mhs, TRUE) |
71 | 30x |
dplyr::mutate( |
72 | 30x |
lookup_mh[i, ], |
73 | 30x |
USUBJID = id, |
74 | 30x |
STUDYID = sid |
75 |
) |
|
76 |
}, |
|
77 | 3x |
adsl$USUBJID, |
78 | 3x |
adsl$STUDYID |
79 |
) %>% |
|
80 | 3x |
Reduce(rbind, .) %>% |
81 | 3x |
`[`(c(4, 5, 1, 2, 3)) %>% |
82 | 3x |
dplyr::mutate(MHTERM = MHDECOD) |
83 | ||
84 | 3x |
admh <- var_relabel( |
85 | 3x |
admh, |
86 | 3x |
STUDYID = "Study Identifier", |
87 | 3x |
USUBJID = "Unique Subject Identifier" |
88 |
) |
|
89 | ||
90 |
# merge ADSL to be able to add MH date and study day variables |
|
91 | 3x |
admh <- dplyr::inner_join( |
92 | 3x |
admh, |
93 | 3x |
adsl, |
94 | 3x |
by = c("STUDYID", "USUBJID") |
95 |
) %>% |
|
96 | 3x |
dplyr::rowwise() %>% |
97 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
98 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
99 | 3x |
TRUE ~ TRTEDTM |
100 |
))) %>% |
|
101 | 3x |
dplyr::mutate(ASTDTM = sample( |
102 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
103 | 3x |
size = 1 |
104 |
)) %>% |
|
105 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
106 |
# add 1 to end of range incase both values passed to sample() are the same |
|
107 | 3x |
dplyr::mutate(AENDTM = sample( |
108 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
109 | 3x |
size = 1 |
110 |
)) %>% |
|
111 | 3x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
112 | 3x |
select(-TRTENDT) %>% |
113 | 3x |
dplyr::ungroup() %>% |
114 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHTERM) %>% |
115 | 3x |
dplyr::mutate(MHDISTAT = sample( |
116 | 3x |
x = c("Resolved", "Ongoing with treatment", "Ongoing without treatment"), |
117 | 3x |
prob = c(0.6, 0.2, 0.2), |
118 | 3x |
size = dplyr::n(), |
119 | 3x |
replace = TRUE |
120 |
)) %>% |
|
121 | 3x |
dplyr::mutate(ATIREL = dplyr::case_when( |
122 | 3x |
(AENDTM < TRTSDTM | (is.na(AENDTM) & MHDISTAT == "Resolved")) ~ "PRIOR", |
123 | 3x |
(AENDTM >= TRTSDTM | (is.na(AENDTM) & grepl("Ongoing", MHDISTAT))) ~ "PRIOR_CONCOMITANT" |
124 |
)) |
|
125 | ||
126 | 3x |
admh <- admh %>% |
127 | 3x |
dplyr::group_by(USUBJID) %>% |
128 | 3x |
dplyr::mutate(MHSEQ = seq_len(dplyr::n())) %>% |
129 | 3x |
dplyr::mutate(ASEQ = MHSEQ) %>% |
130 | 3x |
dplyr::ungroup() %>% |
131 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHSEQ) |
132 | ||
133 | 3x |
if (length(na_vars) > 0 && na_percentage > 0 && na_percentage <= 1) { |
134 | ! |
admh <- mutate_na(ds = admh, na_vars = na_vars, na_percentage = na_percentage) |
135 |
} |
|
136 | ||
137 |
# apply metadata |
|
138 | 3x |
admh <- apply_metadata(admh, "metadata/ADMH.yml") |
139 | ||
140 | 3x |
return(admh) |
141 |
} |
1 |
#' Adverse Event Analysis Dataset (ADAE) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Adverse Event Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `AETERM`, `AESEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param max_n_aes (`integer`)\cr Maximum number of AEs per patient. Defaults to 10. |
|
14 |
#' @template param_cached |
|
15 |
#' @templateVar data adae |
|
16 |
#' |
|
17 |
#' @return `data.frame` |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
22 |
#' |
|
23 |
#' adae <- radae(adsl, seed = 2) |
|
24 |
#' adae |
|
25 |
#' |
|
26 |
#' # Add metadata. |
|
27 |
#' aag <- utils::read.table( |
|
28 |
#' sep = ",", header = TRUE, |
|
29 |
#' text = paste( |
|
30 |
#' "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE", |
|
31 |
#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,", |
|
32 |
#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,", |
|
33 |
#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD", |
|
34 |
#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD", |
|
35 |
#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW", |
|
36 |
#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW", |
|
37 |
#' sep = "\n" |
|
38 |
#' ), stringsAsFactors = FALSE |
|
39 |
#' ) |
|
40 |
#' |
|
41 |
#' adae <- radae(adsl, lookup_aag = aag) |
|
42 |
#' |
|
43 |
#' with( |
|
44 |
#' adae, |
|
45 |
#' cbind( |
|
46 |
#' table(AEDECOD, SMQ01NAM), |
|
47 |
#' table(AEDECOD, CQ01NAM) |
|
48 |
#' ) |
|
49 |
#' ) |
|
50 |
radae <- function(adsl, |
|
51 |
max_n_aes = 10L, |
|
52 |
lookup = NULL, |
|
53 |
lookup_aag = NULL, |
|
54 |
seed = NULL, |
|
55 |
na_percentage = 0, |
|
56 |
na_vars = list( |
|
57 |
AEBODSYS = c(NA, 0.1), |
|
58 |
AEDECOD = c(1234, 0.1), |
|
59 |
AETOXGR = c(1234, 0.1) |
|
60 |
), |
|
61 |
cached = FALSE) { |
|
62 | 4x |
checkmate::assert_flag(cached) |
63 | 4x |
if (cached) { |
64 | 1x |
return(get_cached_data("cadae")) |
65 |
} |
|
66 | ||
67 | 3x |
checkmate::assert_data_frame(adsl) |
68 | 3x |
checkmate::assert_integer(max_n_aes, len = 1, any.missing = FALSE) |
69 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
70 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
71 | 3x |
checkmate::assert_true(na_percentage < 1) |
72 | ||
73 |
# check lookup parameters |
|
74 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
75 | 3x |
lookup_ae <- if (!is.null(lookup)) { |
76 | ! |
lookup |
77 |
} else { |
|
78 | 3x |
tibble::tribble( |
79 | 3x |
~AEBODSYS, ~AELLT, ~AEDECOD, ~AEHLT, ~AEHLGT, ~AETOXGR, ~AESOC, ~AESER, ~AEREL, |
80 | 3x |
"cl A.1", "llt A.1.1.1.1", "dcd A.1.1.1.1", "hlt A.1.1.1", "hlgt A.1.1", "1", "cl A", "N", "N", |
81 | 3x |
"cl A.1", "llt A.1.1.1.2", "dcd A.1.1.1.2", "hlt A.1.1.1", "hlgt A.1.1", "2", "cl A", "Y", "N", |
82 | 3x |
"cl B.1", "llt B.1.1.1.1", "dcd B.1.1.1.1", "hlt B.1.1.1", "hlgt B.1.1", "5", "cl B", "Y", "Y", |
83 | 3x |
"cl B.2", "llt B.2.1.2.1", "dcd B.2.1.2.1", "hlt B.2.1.2", "hlgt B.2.1", "3", "cl B", "N", "N", |
84 | 3x |
"cl B.2", "llt B.2.2.3.1", "dcd B.2.2.3.1", "hlt B.2.2.3", "hlgt B.2.2", "1", "cl B", "Y", "N", |
85 | 3x |
"cl C.1", "llt C.1.1.1.3", "dcd C.1.1.1.3", "hlt C.1.1.1", "hlgt C.1.1", "4", "cl C", "N", "Y", |
86 | 3x |
"cl C.2", "llt C.2.1.2.1", "dcd C.2.1.2.1", "hlt C.2.1.2", "hlgt C.2.1", "2", "cl C", "N", "Y", |
87 | 3x |
"cl D.1", "llt D.1.1.1.1", "dcd D.1.1.1.1", "hlt D.1.1.1", "hlgt D.1.1", "5", "cl D", "Y", "Y", |
88 | 3x |
"cl D.1", "llt D.1.1.4.2", "dcd D.1.1.4.2", "hlt D.1.1.4", "hlgt D.1.1", "3", "cl D", "N", "N", |
89 | 3x |
"cl D.2", "llt D.2.1.5.3", "dcd D.2.1.5.3", "hlt D.2.1.5", "hlgt D.2.1", "1", "cl D", "N", "Y" |
90 |
) |
|
91 |
} |
|
92 | ||
93 | 3x |
checkmate::assert_data_frame(lookup_aag, null.ok = TRUE) |
94 | 3x |
aag <- if (!is.null(lookup_aag)) { |
95 | ! |
lookup_aag |
96 |
} else { |
|
97 | 3x |
aag <- utils::read.table( |
98 | 3x |
sep = ",", header = TRUE, |
99 | 3x |
text = paste( |
100 | 3x |
"NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE", |
101 | 3x |
"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,", |
102 | 3x |
"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,", |
103 | 3x |
"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD", |
104 | 3x |
"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD", |
105 | 3x |
"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW", |
106 | 3x |
"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW", |
107 | 3x |
sep = "\n" |
108 | 3x |
), stringsAsFactors = FALSE |
109 |
) |
|
110 |
} |
|
111 | ||
112 | 3x |
if (!is.null(seed)) set.seed(seed) |
113 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
114 | ||
115 | 3x |
adae <- Map( |
116 | 3x |
function(id, sid) { |
117 | 30x |
n_aes <- sample(c(0, seq_len(max_n_aes)), 1) |
118 | 30x |
i <- sample(seq_len(nrow(lookup_ae)), n_aes, TRUE) |
119 | 30x |
dplyr::mutate( |
120 | 30x |
lookup_ae[i, ], |
121 | 30x |
USUBJID = id, |
122 | 30x |
STUDYID = sid |
123 |
) |
|
124 |
}, |
|
125 | 3x |
adsl$USUBJID, |
126 | 3x |
adsl$STUDYID |
127 |
) %>% |
|
128 | 3x |
Reduce(rbind, .) %>% |
129 | 3x |
`[`(c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% |
130 | 3x |
dplyr::mutate(AETERM = gsub("dcd", "trm", AEDECOD)) %>% |
131 | 3x |
dplyr::mutate(AESEV = dplyr::case_when( |
132 | 3x |
AETOXGR == 1 ~ "MILD", |
133 | 3x |
AETOXGR %in% c(2, 3) ~ "MODERATE", |
134 | 3x |
AETOXGR %in% c(4, 5) ~ "SEVERE" |
135 |
)) |
|
136 | ||
137 | 3x |
adae <- var_relabel( |
138 | 3x |
adae, |
139 | 3x |
STUDYID = "Study Identifier", |
140 | 3x |
USUBJID = "Unique Subject Identifier" |
141 |
) |
|
142 | ||
143 |
# merge adsl to be able to add AE date and study day variables |
|
144 | 3x |
adae <- dplyr::inner_join(adae, adsl, by = c("STUDYID", "USUBJID")) %>% |
145 | 3x |
dplyr::rowwise() %>% |
146 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
147 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
148 | 3x |
TRUE ~ TRTEDTM |
149 |
))) %>% |
|
150 | 3x |
dplyr::mutate(ASTDTM = sample( |
151 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
152 | 3x |
size = 1 |
153 |
)) %>% |
|
154 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
155 |
# add 1 to end of range incase both values passed to sample() are the same |
|
156 | 3x |
dplyr::mutate(AENDTM = sample( |
157 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
158 | 3x |
size = 1 |
159 |
)) %>% |
|
160 | 3x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
161 | 3x |
dplyr::mutate(LDOSEDTM = dplyr::case_when( |
162 | 3x |
TRTSDTM < ASTDTM ~ lubridate::as_datetime(stats::runif(1, TRTSDTM, ASTDTM)), |
163 | 3x |
TRUE ~ ASTDTM |
164 |
)) %>% |
|
165 | 3x |
dplyr::mutate(LDRELTM = as.numeric(difftime(ASTDTM, LDOSEDTM, units = "mins"))) %>% |
166 | 3x |
dplyr::select(-TRTENDT) %>% |
167 | 3x |
dplyr::ungroup() %>% |
168 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, AETERM) |
169 | ||
170 | 3x |
adae <- adae %>% |
171 | 3x |
dplyr::group_by(USUBJID) %>% |
172 | 3x |
dplyr::mutate(AESEQ = seq_len(dplyr::n())) %>% |
173 | 3x |
dplyr::mutate(ASEQ = AESEQ) %>% |
174 | 3x |
dplyr::ungroup() %>% |
175 | 3x |
dplyr::arrange( |
176 | 3x |
STUDYID, |
177 | 3x |
USUBJID, |
178 | 3x |
ASTDTM, |
179 | 3x |
AETERM, |
180 | 3x |
AESEQ |
181 |
) |
|
182 | ||
183 | 3x |
outcomes <- c( |
184 | 3x |
"UNKNOWN", |
185 | 3x |
"NOT RECOVERED/NOT RESOLVED", |
186 | 3x |
"RECOVERED/RESOLVED WITH SEQUELAE", |
187 | 3x |
"RECOVERING/RESOLVING", |
188 | 3x |
"RECOVERED/RESOLVED" |
189 |
) |
|
190 | ||
191 | 3x |
actions <- c( |
192 | 3x |
"DOSE RATE REDUCED", |
193 | 3x |
"UNKNOWN", |
194 | 3x |
"NOT APPLICABLE", |
195 | 3x |
"DRUG INTERRUPTED", |
196 | 3x |
"DRUG WITHDRAWN", |
197 | 3x |
"DOSE INCREASED", |
198 | 3x |
"DOSE NOT CHANGED", |
199 | 3x |
"DOSE REDUCED", |
200 | 3x |
"NOT EVALUABLE" |
201 |
) |
|
202 | ||
203 | 3x |
adae <- adae %>% |
204 | 3x |
dplyr::mutate(AEOUT = factor(ifelse( |
205 | 3x |
AETOXGR == "5", |
206 | 3x |
"FATAL", |
207 | 3x |
as.character(sample_fct(outcomes, nrow(adae), prob = c(0.1, 0.2, 0.1, 0.3, 0.3))) |
208 |
))) %>% |
|
209 | 3x |
dplyr::mutate(AEACN = factor(ifelse( |
210 | 3x |
AETOXGR == "5", |
211 | 3x |
"NOT EVALUABLE", |
212 | 3x |
as.character(sample_fct(actions, nrow(adae), prob = c(0.05, 0.05, 0.05, 0.01, 0.05, 0.1, 0.45, 0.1, 0.05))) |
213 |
))) %>% |
|
214 | 3x |
dplyr::mutate(AESDTH = dplyr::case_when( |
215 | 3x |
AEOUT == "FATAL" ~ "Y", |
216 | 3x |
TRUE ~ "N" |
217 |
)) %>% |
|
218 | 3x |
dplyr::mutate(TRTEMFL = ifelse(ASTDTM >= TRTSDTM, "Y", "")) %>% |
219 | 3x |
dplyr::mutate(AECONTRT = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>% |
220 | 3x |
dplyr::mutate( |
221 | 3x |
ANL01FL = ifelse(TRTEMFL == "Y" & ASTDTM <= TRTEDTM + lubridate::month(1), "Y", "") |
222 |
) %>% |
|
223 | 3x |
dplyr::mutate(ANL01FL = ifelse(is.na(ANL01FL), "", ANL01FL)) |
224 | ||
225 | 3x |
adae <- adae %>% |
226 | 3x |
dplyr::mutate(AERELNST = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>% |
227 | 3x |
dplyr::mutate(AEACNOTH = sample( |
228 | 3x |
x = c("MEDICATION", "PROCEDURE/SURGERY", "SUBJECT DISCONTINUED FROM STUDY", "NONE"), |
229 | 3x |
prob = c(0.2, 0.4, 0.2, 0.2), |
230 | 3x |
size = dplyr::n(), |
231 | 3x |
replace = TRUE |
232 |
)) |
|
233 | ||
234 |
# Split metadata for AEs of special interest (AESI). |
|
235 | 3x |
l_aag <- split(aag, interaction(aag$NAMVAR, aag$SRCVAR, aag$GRPTYPE, drop = TRUE)) |
236 | ||
237 |
# Create AESI flags |
|
238 | 3x |
l_aesi <- lapply(l_aag, function(d_adag, d_adae) { |
239 | 9x |
names(d_adag)[names(d_adag) == "REFTERM"] <- d_adag$SRCVAR[1] |
240 | 9x |
names(d_adag)[names(d_adag) == "REFNAME"] <- d_adag$NAMVAR[1] |
241 | ||
242 | 9x |
if (d_adag$GRPTYPE[1] == "CUSTOM") { |
243 | 3x |
d_adag <- d_adag[-which(names(d_adag) == "SCOPE")] |
244 | 6x |
} else if (d_adag$GRPTYPE[1] == "SMQ") { |
245 | 6x |
names(d_adag)[names(d_adag) == "SCOPE"] <- paste0(substr(d_adag$NAMVAR[1], 1, 5), "SC") |
246 |
} |
|
247 | ||
248 | 9x |
d_adag <- d_adag[-which(names(d_adag) %in% c("NAMVAR", "SRCVAR", "GRPTYPE"))] |
249 | 9x |
d_new <- dplyr::left_join(x = d_adae, y = d_adag, by = intersect(names(d_adae), names(d_adag))) |
250 | 9x |
d_new[, dplyr::setdiff(names(d_new), names(d_adae)), drop = FALSE] |
251 | 3x |
}, adae) |
252 | ||
253 | 3x |
adae <- dplyr::bind_cols(adae, l_aesi) |
254 | ||
255 | 3x |
adae <- dplyr::mutate(adae, AERELNST = sample( |
256 | 3x |
x = c("CONCURRENT ILLNESS", "OTHER", "DISEASE UNDER STUDY", "NONE"), |
257 | 3x |
prob = c(0.3, 0.3, 0.3, 0.1), |
258 | 3x |
size = dplyr::n(), |
259 | 3x |
replace = TRUE |
260 |
)) |
|
261 | ||
262 | ||
263 | 3x |
adae <- adae %>% |
264 | 3x |
dplyr::mutate(AES_FLAG = sample( |
265 | 3x |
x = c("AESLIFE", "AESHOSP", "AESDISAB", "AESCONG", "AESMIE"), |
266 | 3x |
prob = c(0.1, 0.2, 0.2, 0.2, 0.3), |
267 | 3x |
size = dplyr::n(), |
268 | 3x |
replace = TRUE |
269 |
)) %>% |
|
270 | 3x |
dplyr::mutate(AES_FLAG = dplyr::case_when( |
271 | 3x |
AESDTH == "Y" ~ "AESDTH", |
272 | 3x |
TRUE ~ AES_FLAG |
273 |
)) %>% |
|
274 | 3x |
dplyr::mutate( |
275 | 3x |
AESCONG = ifelse(AES_FLAG == "AESCONG", "Y", "N"), |
276 | 3x |
AESDISAB = ifelse(AES_FLAG == "AESDISAB", "Y", "N"), |
277 | 3x |
AESHOSP = ifelse(AES_FLAG == "AESHOSP", "Y", "N"), |
278 | 3x |
AESLIFE = ifelse(AES_FLAG == "AESLIFE", "Y", "N"), |
279 | 3x |
AESMIE = ifelse(AES_FLAG == "AESMIE", "Y", "N") |
280 |
) %>% |
|
281 | 3x |
dplyr::select(-"AES_FLAG") |
282 | ||
283 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
284 | ! |
adae <- mutate_na(ds = adae, na_vars = na_vars, na_percentage = na_percentage) |
285 |
} |
|
286 | ||
287 |
# apply metadata |
|
288 | 3x |
adae <- apply_metadata(adae, "metadata/ADAE.yml") |
289 | ||
290 | 3x |
return(adae) |
291 |
} |
1 |
#' ECG Analysis Dataset (ADEG) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random dataset from ECG Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `EGSEQ`, `ASPID` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param egcat (`character vector`)\cr EG category values. |
|
14 |
#' @param max_n_eg (`integer`)\cr Maximum number of EG results per patient. Defaults to 10. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adeg |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adeg <- radeg(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
27 |
#' adeg |
|
28 |
#' |
|
29 |
#' adeg <- radeg(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2) |
|
30 |
#' adeg |
|
31 |
radeg <- function(adsl, |
|
32 |
egcat = c("INTERVAL", "INTERVAL", "MEASUREMENT", "FINDING"), |
|
33 |
param = c( |
|
34 |
"QT Duration", |
|
35 |
"RR Duration", |
|
36 |
"Heart Rate", |
|
37 |
"ECG Interpretation" |
|
38 |
), |
|
39 |
paramcd = c("QT", "RR", "HR", "ECGINTP"), |
|
40 |
paramu = c("msec", "msec", "beats/min", ""), |
|
41 |
visit_format = "WEEK", |
|
42 |
n_assessments = 5L, |
|
43 |
n_days = 5L, |
|
44 |
max_n_eg = 10L, |
|
45 |
lookup = NULL, |
|
46 |
seed = NULL, |
|
47 |
na_percentage = 0, |
|
48 |
na_vars = list( |
|
49 |
ABLFL = c(1235, 0.1), BASE = c(NA, 0.1), BASEC = c(NA, 0.1), |
|
50 |
CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
51 |
), |
|
52 |
cached = FALSE) { |
|
53 | 4x |
checkmate::assert_flag(cached) |
54 | 4x |
if (cached) { |
55 | 1x |
return(get_cached_data("cadeg")) |
56 |
} |
|
57 | ||
58 | 3x |
checkmate::assert_data_frame(adsl) |
59 | 3x |
checkmate::assert_character(egcat, min.len = 1, any.missing = FALSE) |
60 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
62 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
63 | 3x |
checkmate::assert_string(visit_format) |
64 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
65 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
66 | 3x |
checkmate::assert_integer(max_n_eg, len = 1, any.missing = FALSE) |
67 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
68 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
69 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
70 | 3x |
checkmate::assert_true(na_percentage < 1) |
71 | ||
72 |
# validate and initialize related variables |
|
73 | 3x |
egcat_init_list <- relvar_init(param, egcat) |
74 | 3x |
param_init_list <- relvar_init(param, paramcd) |
75 | 3x |
unit_init_list <- relvar_init(param, paramu) |
76 | ||
77 | 3x |
if (!is.null(seed)) { |
78 | 3x |
set.seed(seed) |
79 |
} |
|
80 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
81 | ||
82 | 3x |
adeg <- expand.grid( |
83 | 3x |
STUDYID = unique(adsl$STUDYID), |
84 | 3x |
USUBJID = adsl$USUBJID, |
85 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
86 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
87 | 3x |
stringsAsFactors = FALSE |
88 |
) |
|
89 | ||
90 |
# assign related variable values: PARAMxEGCAT are related |
|
91 | 3x |
adeg <- adeg %>% rel_var( |
92 | 3x |
var_name = "EGCAT", |
93 | 3x |
related_var = "PARAM", |
94 | 3x |
var_values = egcat_init_list$relvar2 |
95 |
) |
|
96 | ||
97 |
# assign related variable values: PARAMxPARAMCD are related |
|
98 | 3x |
adeg <- adeg %>% rel_var( |
99 | 3x |
var_name = "PARAMCD", |
100 | 3x |
related_var = "PARAM", |
101 | 3x |
var_values = param_init_list$relvar2 |
102 |
) |
|
103 | ||
104 | 3x |
adeg <- adeg %>% dplyr::mutate(AVAL = dplyr::case_when( |
105 | 3x |
PARAMCD == "QT" ~ stats::rnorm(nrow(adeg), mean = 350, sd = 100), |
106 | 3x |
PARAMCD == "RR" ~ stats::rnorm(nrow(adeg), mean = 1050, sd = 300), |
107 | 3x |
PARAMCD == "HR" ~ stats::rnorm(nrow(adeg), mean = 70, sd = 20), |
108 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
109 |
)) |
|
110 | ||
111 | 3x |
adeg <- adeg %>% |
112 | 3x |
dplyr::mutate(EGTESTCD = PARAMCD) %>% |
113 | 3x |
dplyr::mutate(EGTEST = PARAM) |
114 | ||
115 | 3x |
adeg <- adeg %>% dplyr::mutate(AVISITN = dplyr::case_when( |
116 | 3x |
AVISIT == "SCREENING" ~ -1, |
117 | 3x |
AVISIT == "BASELINE" ~ 0, |
118 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
119 | 3x |
TRUE ~ NA_real_ |
120 |
)) |
|
121 | ||
122 | 3x |
adeg <- adeg %>% rel_var( |
123 | 3x |
var_name = "AVALU", |
124 | 3x |
related_var = "PARAM", |
125 | 3x |
var_values = unit_init_list$relvar2 |
126 |
) |
|
127 | ||
128 |
# order to prepare for change from screening and baseline values |
|
129 | 3x |
adeg <- adeg[order(adeg$STUDYID, adeg$USUBJID, adeg$PARAMCD, adeg$AVISITN), ] |
130 | ||
131 | 3x |
adeg <- Reduce(rbind, lapply(split(adeg, adeg$USUBJID), function(x) { |
132 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
133 | 30x |
x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
134 | 30x |
"Y", |
135 | 30x |
ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "") |
136 |
) |
|
137 | 30x |
x |
138 |
})) |
|
139 | ||
140 | 3x |
adeg$BASE <- ifelse(adeg$AVISITN >= 0, retain(adeg, adeg$AVAL, adeg$ABLFL == "Y"), adeg$AVAL) |
141 | ||
142 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRLO = dplyr::case_when( |
143 | 3x |
PARAMCD == "QT" ~ 200, |
144 | 3x |
PARAMCD == "RR" ~ 600, |
145 | 3x |
PARAMCD == "HR" ~ 40, |
146 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
147 |
)) |
|
148 | ||
149 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRHI = dplyr::case_when( |
150 | 3x |
PARAMCD == "QT" ~ 500, |
151 | 3x |
PARAMCD == "RR" ~ 1500, |
152 | 3x |
PARAMCD == "HR" ~ 100, |
153 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
154 |
)) |
|
155 | ||
156 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRIND = factor(dplyr::case_when( |
157 | 3x |
AVAL < ANRLO ~ "LOW", |
158 | 3x |
AVAL >= ANRLO & AVAL <= ANRHI ~ "NORMAL", |
159 | 3x |
AVAL > ANRHI ~ "HIGH" |
160 |
))) |
|
161 | ||
162 | 3x |
adeg <- adeg %>% |
163 | 3x |
dplyr::mutate(CHG = ifelse(AVISITN > 0, AVAL - BASE, NA)) %>% |
164 | 3x |
dplyr::mutate(PCHG = ifelse(AVISITN > 0, 100 * (CHG / BASE), NA)) %>% |
165 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
166 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
167 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
168 | 3x |
dplyr::ungroup() %>% |
169 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
170 | 3x |
dplyr::mutate(DTYPE = NA) %>% |
171 | 3x |
var_relabel( |
172 | 3x |
STUDYID = attr(adeg$STUDYID, "label"), |
173 | 3x |
USUBJID = attr(adeg$USUBJID, "label") |
174 |
) |
|
175 | ||
176 | 3x |
adeg$ANRIND <- factor(adeg$ANRIND, levels = c("LOW", "NORMAL", "HIGH")) |
177 | 3x |
adeg$BNRIND <- factor(adeg$BNRIND, levels = c("LOW", "NORMAL", "HIGH")) |
178 | ||
179 | 3x |
adeg <- var_relabel( |
180 | 3x |
adeg, |
181 | 3x |
STUDYID = "Study Identifier", |
182 | 3x |
USUBJID = "Unique Subject Identifier" |
183 |
) |
|
184 | ||
185 |
# merge ADSL to be able to add EG date and study day variables |
|
186 | 3x |
adeg <- dplyr::inner_join( |
187 | 3x |
adeg, |
188 | 3x |
adsl, |
189 | 3x |
by = c("STUDYID", "USUBJID") |
190 |
) %>% |
|
191 | 3x |
dplyr::rowwise() %>% |
192 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
193 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
194 | 3x |
TRUE ~ TRTEDTM |
195 |
))) %>% |
|
196 | 3x |
dplyr::ungroup() |
197 | ||
198 | 3x |
adeg <- adeg %>% |
199 | 3x |
dplyr::group_by(USUBJID) %>% |
200 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
201 | 3x |
dplyr::mutate(ADTM = rep( |
202 | 3x |
sort(sample( |
203 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
204 | 3x |
size = nlevels(AVISIT) |
205 |
)), |
|
206 | 3x |
each = n() / nlevels(AVISIT) |
207 |
)) %>% |
|
208 | 3x |
dplyr::ungroup() %>% |
209 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
210 | 3x |
dplyr::select(-TRTENDT) %>% |
211 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
212 | ||
213 | 3x |
adeg <- adeg %>% |
214 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
215 | 3x |
dplyr::group_by(USUBJID) %>% |
216 | 3x |
dplyr::mutate(EGSEQ = seq_len(dplyr::n())) %>% |
217 | 3x |
dplyr::mutate(ASEQ = EGSEQ) %>% |
218 | 3x |
dplyr::ungroup() %>% |
219 | 3x |
dplyr::arrange( |
220 | 3x |
STUDYID, |
221 | 3x |
USUBJID, |
222 | 3x |
PARAMCD, |
223 | 3x |
BASETYPE, |
224 | 3x |
AVISITN, |
225 | 3x |
ATPTN, |
226 | 3x |
DTYPE, |
227 | 3x |
ADTM, |
228 | 3x |
EGSEQ, |
229 | 3x |
ASPID |
230 |
) |
|
231 | ||
232 | 3x |
adeg <- adeg %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
233 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y", |
234 | 3x |
TRUE ~ "" |
235 |
))) |
|
236 | ||
237 | 3x |
adeg <- adeg %>% dplyr::mutate(AVALC = ifelse( |
238 | 3x |
PARAMCD == "ECGINTP", |
239 | 3x |
as.character(sample_fct(c("ABNORMAL", "NORMAL"), nrow(adeg), prob = c(0.25, 0.75))), |
240 | 3x |
as.character(AVAL) |
241 |
)) |
|
242 | ||
243 |
# Temporarily creating a row_check column to easily match newly created |
|
244 |
# observations with their row correct arrangement. |
|
245 | 3x |
adeg <- adeg %>% |
246 | 3x |
dplyr::mutate(row_check = seq_len(nrow(adeg))) |
247 | ||
248 |
# Created function to add in new observations for DTYPE, "MINIMUM" & "MAXIMUM" in this case. |
|
249 | 3x |
get_groups <- function(data, |
250 | 3x |
minimum) { |
251 | 6x |
data <- data %>% |
252 | 6x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
253 | 6x |
dplyr::arrange(ADTM, ASPID, EGSEQ) %>% |
254 | 6x |
dplyr::filter( |
255 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
256 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
257 |
) %>% |
|
258 |
{ |
|
259 | 6x |
if (minimum == TRUE) { |
260 | 3x |
dplyr::filter(., AVAL == min(AVAL)) %>% |
261 | 3x |
dplyr::mutate(., DTYPE = "MINIMUM", AVISIT = "POST-BASELINE MINIMUM") |
262 |
} else { |
|
263 | 3x |
dplyr::filter(., AVAL == max(AVAL)) %>% |
264 | 3x |
dplyr::mutate(., DTYPE = "MAXIMUM", AVISIT = "POST-BASELINE MAXIMUM") |
265 |
} |
|
266 |
} %>% |
|
267 | 6x |
dplyr::slice(1) %>% |
268 | 6x |
dplyr::ungroup() |
269 | ||
270 | 6x |
return(data) |
271 |
} |
|
272 | ||
273 |
# Binding the new observations to the dataset from the function above and rearranging in the correct order. |
|
274 | 3x |
adeg <- rbind(adeg, get_groups(adeg, TRUE), get_groups(adeg, FALSE)) %>% |
275 | 3x |
dplyr::arrange(row_check) %>% |
276 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
277 | 3x |
dplyr::arrange(AVISIT, .by_group = TRUE) %>% |
278 | 3x |
dplyr::ungroup() |
279 | ||
280 |
# Dropping the "row_check" column created above. |
|
281 | 3x |
adeg <- adeg[, -which(names(adeg) %in% c("row_check"))] |
282 | ||
283 |
# Created function to easily match rows which comply to ONTRTFL derivation |
|
284 | 3x |
flag_variables <- function(data, worst_obs) { |
285 | 6x |
data_compare <- data %>% |
286 | 6x |
dplyr::mutate(row_check = seq_len(nrow(data))) |
287 | ||
288 | 6x |
data <- data_compare %>% |
289 |
{ |
|
290 | 6x |
if (worst_obs == FALSE) { |
291 | 3x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) %>% |
292 | 3x |
dplyr::arrange(., ADTM, ASPID, EGSEQ) |
293 |
} else { |
|
294 | 3x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE) |
295 |
} |
|
296 |
} %>% |
|
297 | 6x |
dplyr::filter( |
298 | 6x |
AVISITN > 0 & (ONTRTFL == "Y" | ADTM <= TRTSDTM) & |
299 | 6x |
is.na(DTYPE) |
300 |
) %>% |
|
301 |
{ |
|
302 | 6x |
if (worst_obs == TRUE) { |
303 | 3x |
dplyr::arrange(., AVALC) %>% dplyr::filter(., ifelse( |
304 | 3x |
PARAMCD == "ECGINTP", |
305 | 3x |
ifelse(AVALC == "ABNORMAL", AVALC == "ABNORMAL", AVALC == "NORMAL"), |
306 | 3x |
AVAL == min(AVAL) |
307 |
)) |
|
308 |
} else { |
|
309 | 3x |
dplyr::filter(., ifelse( |
310 | 3x |
PARAMCD == "ECGINTP", |
311 | 3x |
AVALC == "ABNORMAL" | AVALC == "NORMAL", |
312 | 3x |
AVAL == min(AVAL) |
313 |
)) |
|
314 |
} |
|
315 |
} %>% |
|
316 | 6x |
dplyr::slice(1) %>% |
317 |
{ |
|
318 | 6x |
if (worst_obs == TRUE) { |
319 | 3x |
dplyr::mutate(., new_var = dplyr::case_when( |
320 | 3x |
(AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y", |
321 | 3x |
(!is.na(AVAL) & is.na(DTYPE)) ~ "Y", |
322 | 3x |
TRUE ~ "" |
323 |
)) |
|
324 |
} else { |
|
325 | 3x |
dplyr::mutate(., new_var = dplyr::case_when( |
326 | 3x |
(AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y", |
327 | 3x |
(!is.na(AVAL) & is.na(DTYPE)) ~ "Y", |
328 | 3x |
TRUE ~ "" |
329 |
)) |
|
330 |
} |
|
331 |
} %>% |
|
332 | 6x |
dplyr::ungroup() |
333 | ||
334 | 6x |
data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "") |
335 | 6x |
data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))] |
336 | ||
337 | 6x |
return(data_compare) |
338 |
} |
|
339 | ||
340 | 3x |
adeg <- flag_variables(adeg, FALSE) %>% dplyr::rename(WORS01FL = "new_var") |
341 | 3x |
adeg <- flag_variables(adeg, TRUE) %>% dplyr::rename(WORS02FL = "new_var") |
342 | ||
343 | 3x |
adeg <- adeg %>% dplyr::mutate(ANL01FL = factor(ifelse( |
344 | 3x |
(ABLFL == "Y" | (is.na(DTYPE) & WORS01FL == "Y")) & |
345 | 3x |
(AVISIT != "SCREENING"), |
346 | 3x |
"Y", |
347 |
"" |
|
348 |
))) |
|
349 | ||
350 | 3x |
adeg <- adeg %>% |
351 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
352 | 3x |
dplyr::mutate(BASEC = ifelse( |
353 | 3x |
PARAMCD == "ECGINTP", |
354 | 3x |
AVALC[AVISIT == "BASELINE"], |
355 | 3x |
as.character(BASE) |
356 |
)) %>% |
|
357 | 3x |
dplyr::mutate(ANL03FL = dplyr::case_when( |
358 | 3x |
DTYPE == "MINIMUM" ~ "Y", |
359 | 3x |
ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y", |
360 | 3x |
TRUE ~ "" |
361 |
)) %>% |
|
362 | 3x |
dplyr::mutate(ANL04FL = dplyr::case_when( |
363 | 3x |
DTYPE == "MAXIMUM" ~ "Y", |
364 | 3x |
ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y", |
365 | 3x |
TRUE ~ "" |
366 |
)) %>% |
|
367 | 3x |
dplyr::ungroup() |
368 | ||
369 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
370 | ! |
adeg <- mutate_na(ds = adeg, na_vars = na_vars, na_percentage = na_percentage) |
371 |
} |
|
372 | ||
373 |
# apply metadata |
|
374 | 3x |
adeg <- apply_metadata(adeg, "metadata/ADEG.yml") |
375 | ||
376 | 3x |
return(adeg) |
377 |
} |
1 |
#' Time to Adverse Event Analysis Dataset (ADAETTE) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function to generate random Time-to-AE Dataset for a |
|
6 |
#' given Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param event_descr (`character vector`)\cr Descriptions of events. Defaults to `NULL`. |
|
14 |
#' @param censor_descr (`character vector`)\cr Descriptions of censors. Defaults to `NULL`. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adaette |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author Xiuting Mi |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adaette <- radaette(adsl, seed = 2) |
|
27 |
#' adaette |
|
28 |
radaette <- function(adsl, |
|
29 |
event_descr = NULL, |
|
30 |
censor_descr = NULL, |
|
31 |
lookup = NULL, |
|
32 |
seed = NULL, |
|
33 |
na_percentage = 0, |
|
34 |
na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1)), |
|
35 |
cached = FALSE) { |
|
36 | 6x |
checkmate::assert_flag(cached) |
37 | 6x |
if (cached) { |
38 | 1x |
return(get_cached_data("cadaette")) |
39 |
} |
|
40 | ||
41 | 5x |
checkmate::assert_data_frame(adsl) |
42 | 5x |
checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
43 | 5x |
checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
44 | 5x |
checkmate::assert_number(seed, null.ok = TRUE) |
45 | 5x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
46 | 5x |
checkmate::assert_true(na_percentage < 1) |
47 | ||
48 | 5x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
49 | 5x |
lookup_adaette <- if (!is.null(lookup)) { |
50 | ! |
lookup |
51 |
} else { |
|
52 | 5x |
tibble::tribble( |
53 | 5x |
~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P, |
54 | 5x |
"ARM A", "1", "any adverse event", 1 / 80, 0.4, |
55 | 5x |
"ARM B", "1", "any adverse event", 1 / 100, 0.2, |
56 | 5x |
"ARM C", "1", "any adverse event", 1 / 60, 0.42, |
57 | 5x |
"ARM A", "2", "any serious adverse event", 1 / 100, 0.3, |
58 | 5x |
"ARM B", "2", "any serious adverse event", 1 / 150, 0.1, |
59 | 5x |
"ARM C", "2", "any serious adverse event", 1 / 80, 0.32, |
60 | 5x |
"ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2, |
61 | 5x |
"ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08, |
62 | 5x |
"ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23 |
63 |
) |
|
64 |
} |
|
65 | ||
66 | 5x |
if (!is.null(seed)) { |
67 | 5x |
set.seed(seed) |
68 |
} |
|
69 | 5x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
70 | ||
71 | 5x |
evntdescr_sel <- if (!is.null(event_descr)) { |
72 | ! |
event_descr |
73 |
} else { |
|
74 | 5x |
"Preferred Term" |
75 |
} |
|
76 | ||
77 | 5x |
cnsdtdscr_sel <- if (!is.null(censor_descr)) { |
78 | ! |
censor_descr |
79 |
} else { |
|
80 | 5x |
c( |
81 | 5x |
"Clinical Cut Off", |
82 | 5x |
"Completion or Discontinuation", |
83 | 5x |
"End of AE Reporting Period" |
84 |
) |
|
85 |
} |
|
86 | ||
87 | 5x |
random_patient_data <- function(patient_info) { |
88 | 50x |
startdt <- lubridate::date(patient_info$TRTSDTM) |
89 | 50x |
trtedtm <- lubridate::floor_date(dplyr::case_when( |
90 | 50x |
is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs, |
91 | 50x |
TRUE ~ lubridate::date(patient_info$TRTEDTM) |
92 | 50x |
), unit = "day") |
93 | 50x |
enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm)) |
94 | 50x |
enddts_min_index <- which.min(enddts) |
95 | 50x |
adt <- enddts[enddts_min_index] |
96 | 50x |
adtm <- lubridate::as_datetime(adt) |
97 | 50x |
ady <- as.numeric(adt - startdt + 1) |
98 | 50x |
data.frame( |
99 | 50x |
ARM = patient_info$ARM, |
100 | 50x |
STUDYID = patient_info$STUDYID, |
101 | 50x |
SITEID = patient_info$SITEID, |
102 | 50x |
USUBJID = patient_info$USUBJID, |
103 | 50x |
PARAMCD = "AEREPTTE", |
104 | 50x |
PARAM = "Time to end of AE reporting period", |
105 | 50x |
CNSR = 0, |
106 | 50x |
AVAL = lubridate::days(ady) / lubridate::years(1), |
107 | 50x |
AVALU = "YEARS", |
108 | 50x |
EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"), |
109 | 50x |
CNSDTDSC = NA, |
110 | 50x |
ADTM = adtm, |
111 | 50x |
ADY = ady, |
112 | 50x |
stringsAsFactors = FALSE |
113 |
) |
|
114 |
} |
|
115 | ||
116 |
# validate and initialize related variables for Hy's law |
|
117 | 5x |
paramcd_hy <- c("HYSTTEUL", "HYSTTEBL") |
118 | 5x |
param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline") |
119 | 5x |
param_init_list <- relvar_init(param_hy, paramcd_hy) |
120 | 5x |
adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM") |
121 | ||
122 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
123 | 5x |
adaette_hy <- expand.grid( |
124 | 5x |
STUDYID = unique(adsl$STUDYID), |
125 | 5x |
USUBJID = adsl$USUBJID, |
126 | 5x |
PARAM = as.factor(param_init_list$relvar1), |
127 | 5x |
stringsAsFactors = FALSE |
128 |
) |
|
129 | ||
130 |
# Add other variables to adaette_hy |
|
131 | 5x |
adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID")) %>% |
132 | 5x |
rel_var( |
133 | 5x |
var_name = "PARAMCD", |
134 | 5x |
related_var = "PARAM", |
135 | 5x |
var_values = param_init_list$relvar2 |
136 |
) %>% |
|
137 | 5x |
dplyr::mutate( |
138 | 5x |
CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE), |
139 | 5x |
EVNTDESC = dplyr::if_else( |
140 | 5x |
CNSR == 0, |
141 | 5x |
"First Post-Baseline Raised ALT or AST Elevation Result", |
142 | 5x |
NA_character_ |
143 |
), |
|
144 | 5x |
CNSDTDSC = dplyr::if_else(CNSR == 0, NA_character_, |
145 | 5x |
sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"), |
146 | 5x |
prob = c(0.9, 0.1), |
147 | 5x |
size = dplyr::n(), replace = TRUE |
148 |
) |
|
149 |
) |
|
150 |
) %>% |
|
151 | 5x |
dplyr::rowwise() %>% |
152 | 5x |
dplyr::mutate(ADTM = dplyr::case_when( |
153 | 5x |
CNSDTDSC == "Treatment Start" ~ TRTSDTM, |
154 | 5x |
TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE) |
155 |
)) %>% |
|
156 | 5x |
dplyr::mutate( |
157 | 5x |
ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1, |
158 | 5x |
ADY = as.numeric(ADY_int), |
159 | 5x |
AVAL = lubridate::days(ADY_int) / lubridate::weeks(1), |
160 | 5x |
AVALU = "WEEKS" |
161 |
) %>% |
|
162 | 5x |
dplyr::select(-TRTSDTM, -ADY_int) |
163 | ||
164 | 5x |
random_ae_data <- function(lookup_info, patient_info, patient_data) { |
165 | 150x |
cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P)) |
166 | 150x |
ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"] |
167 | 150x |
data.frame( |
168 | 150x |
ARM = rep(patient_data$ARM, 2), |
169 | 150x |
STUDYID = rep(patient_data$STUDYID, 2), |
170 | 150x |
SITEID = rep(patient_data$SITEID, 2), |
171 | 150x |
USUBJID = rep(patient_data$USUBJID, 2), |
172 | 150x |
PARAMCD = c( |
173 | 150x |
paste0("AETTE", lookup_info$CATCD), |
174 | 150x |
paste0("AETOT", lookup_info$CATCD) |
175 |
), |
|
176 | 150x |
PARAM = c( |
177 | 150x |
paste("Time to first occurrence of", lookup_info$CAT), |
178 | 150x |
paste("Number of occurrences of", lookup_info$CAT) |
179 |
), |
|
180 | 150x |
CNSR = c( |
181 | 150x |
cnsr, |
182 | 150x |
NA |
183 |
), |
|
184 | 150x |
AVAL = c( |
185 |
# We generate these values conditional on the censoring information. |
|
186 |
# If this time to event is censored, then there were no AEs reported and the time is set |
|
187 |
# to the AE reporting period time. Otherwise we draw from truncated distributions to make |
|
188 |
# sure that we are within the AE reporting time and above 0 AEs. |
|
189 | 150x |
ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)), |
190 | 150x |
ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25)) |
191 |
), |
|
192 | 150x |
AVALU = c( |
193 | 150x |
"YEARS", |
194 | 150x |
NA |
195 |
), |
|
196 | 150x |
EVNTDESC = c( |
197 | 150x |
ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""), |
198 | 150x |
NA |
199 |
), |
|
200 | 150x |
CNSDTDSC = c( |
201 | 150x |
ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""), |
202 | 150x |
NA |
203 |
), |
|
204 | 150x |
stringsAsFactors = FALSE |
205 | 150x |
) %>% dplyr::mutate( |
206 | 150x |
ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))), |
207 | 150x |
ADTM = dplyr::if_else( |
208 | 150x |
is.na(AVALU), |
209 | 150x |
lubridate::as_datetime(NA), |
210 | 150x |
patient_info$TRTSDTM + lubridate::days(ADY) |
211 |
) |
|
212 |
) |
|
213 |
} |
|
214 | ||
215 | 5x |
adaette <- split(adsl, adsl$USUBJID) %>% |
216 | 5x |
lapply(function(patient_info) { |
217 | 50x |
patient_data <- random_patient_data(patient_info) |
218 | 50x |
lookup_arm <- lookup_adaette %>% |
219 | 50x |
dplyr::filter(ARM == as.character(patient_info$ARMCD)) |
220 | 50x |
ae_data <- split(lookup_arm, lookup_arm$CATCD) %>% |
221 | 50x |
lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>% |
222 | 50x |
Reduce(rbind, .) |
223 | 50x |
dplyr::bind_rows(patient_data, ae_data) |
224 |
}) %>% |
|
225 | 5x |
Reduce(rbind, .) %>% |
226 | 5x |
var_relabel( |
227 | 5x |
STUDYID = "Study Identifier", |
228 | 5x |
USUBJID = "Unique Subject Identifier" |
229 |
) |
|
230 | ||
231 | 5x |
adaette <- var_relabel( |
232 | 5x |
adaette, |
233 | 5x |
STUDYID = "Study Identifier", |
234 | 5x |
USUBJID = "Unique Subject Identifier" |
235 |
) |
|
236 | ||
237 | 5x |
adaette <- rbind(adaette, adaette_hy) |
238 | ||
239 | 5x |
adaette <- dplyr::inner_join( |
240 | 5x |
dplyr::select(adaette, -"SITEID", -"ARM"), |
241 | 5x |
adsl, |
242 | 5x |
by = c("STUDYID", "USUBJID") |
243 |
) %>% |
|
244 | 5x |
dplyr::group_by(USUBJID) %>% |
245 | 5x |
dplyr::arrange(ADTM) %>% |
246 | 5x |
dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>% |
247 | 5x |
dplyr::mutate(ASEQ = TTESEQ) %>% |
248 | 5x |
dplyr::mutate(PARAM = as.factor(PARAM)) %>% |
249 | 5x |
dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>% |
250 | 5x |
dplyr::ungroup() %>% |
251 | 5x |
dplyr::arrange( |
252 | 5x |
STUDYID, |
253 | 5x |
USUBJID, |
254 | 5x |
PARAMCD, |
255 | 5x |
ADTM, |
256 | 5x |
TTESEQ |
257 |
) |
|
258 | ||
259 | 5x |
if (length(na_vars) > 0 && na_percentage > 0) { |
260 | ! |
adaette <- dplyr::mutate(ds = adaette, na_vars = na_vars, na_percentage = na_percentage) |
261 |
} |
|
262 | ||
263 |
# apply metadata |
|
264 | 5x |
adaette <- apply_metadata(adaette, "metadata/ADAETTE.yml") |
265 | ||
266 | 5x |
return(adaette) |
267 |
} |
1 |
#' Load Cached Data |
|
2 |
#' |
|
3 |
#' Return data attached to package. |
|
4 |
#' |
|
5 |
#' @keywords internal |
|
6 |
#' @noRd |
|
7 |
get_cached_data <- function(dataname) { |
|
8 | 22x |
checkmate::assert_string(dataname) |
9 | 22x |
if (!("package:random.cdisc.data" %in% search())) { |
10 | 1x |
stop("cached data can only be loaded if the random.cdisc.data package is attached.", |
11 | 1x |
"Please run library(random.cdisc.data) before loading cached data.", |
12 | 1x |
call. = FALSE |
13 |
) |
|
14 |
} else { |
|
15 | 21x |
get(dataname, envir = asNamespace("random.cdisc.data")) |
16 |
} |
|
17 |
} |
|
18 | ||
19 |
#' Create a Factor with Random Elements of x |
|
20 |
#' |
|
21 |
#' Sample elements from `x` with replacement to build a factor. |
|
22 |
#' |
|
23 |
#' @param x (`character vector` or `factor`)\cr If character vector then it is also used |
|
24 |
#' as levels of the returned factor. If factor then the levels are used as the new levels. |
|
25 |
#' @param N (`numeric`)\cr Number of items to choose. |
|
26 |
#' @param ... Additional arguments to be passed to `sample`. |
|
27 |
#' |
|
28 |
#' @return A factor of length `N`. |
|
29 |
#' @export |
|
30 |
#' |
|
31 |
#' @examples |
|
32 |
#' sample_fct(letters[1:3], 10) |
|
33 |
#' sample_fct(iris$Species, 10) |
|
34 |
sample_fct <- function(x, N, ...) { # nolint |
|
35 | 296x |
checkmate::assert_number(N) |
36 | ||
37 | 296x |
factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x) |
38 |
} |
|
39 | ||
40 |
#' Related Variables: Initialize |
|
41 |
#' |
|
42 |
#' Verify and initialize related variable values. |
|
43 |
#' For example, `relvar_init("Alanine Aminotransferase Measurement", "ALT")`. |
|
44 |
#' |
|
45 |
#' @param relvar1 (`list` of `character`)\cr List of n elements. |
|
46 |
#' @param relvar2 (`list` of `character`)\cr List of n elements. |
|
47 |
#' |
|
48 |
#' @return A vector of n elements. |
|
49 |
#' |
|
50 |
#' @keywords internal |
|
51 |
relvar_init <- function(relvar1, relvar2) { |
|
52 | 64x |
checkmate::assert_character(relvar1, min.len = 1, any.missing = FALSE) |
53 | 64x |
checkmate::assert_character(relvar2, min.len = 1, any.missing = FALSE) |
54 | ||
55 | 64x |
if (length(relvar1) != length(relvar2)) { |
56 | 1x |
message(simpleError( |
57 | 1x |
"The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements." |
58 |
)) |
|
59 | ! |
return(NA) |
60 |
} |
|
61 | 63x |
return(list("relvar1" = relvar1, "relvar2" = relvar2)) |
62 |
} |
|
63 | ||
64 |
#' Related Variables: Assign |
|
65 |
#' |
|
66 |
#' Assign values to a related variable within a domain. |
|
67 |
#' |
|
68 |
#' @param df (`data.frame`)\cr Data frame containing the related variables. |
|
69 |
#' @param var_name (`character`)\cr Name of variable related to `rel_var` to add to `df`. |
|
70 |
#' @param var_values (`any`)\cr Vector of values related to values of `related_var`. |
|
71 |
#' @param related_var (`character`)\cr Name of variable within `df` with values to which values |
|
72 |
#' of `var_name` must relate. |
|
73 |
#' |
|
74 |
#' @return `df` with added factor variable `var_name` containing `var_values` corresponding to `related_var`. |
|
75 |
#' @export |
|
76 |
#' |
|
77 |
#' @examples |
|
78 |
#' # Example with data.frame. |
|
79 |
#' params <- c("Level A", "Level B", "Level C") |
|
80 |
#' adlb_df <- data.frame( |
|
81 |
#' ID = 1:9, |
|
82 |
#' PARAM = factor( |
|
83 |
#' rep(c("Level A", "Level B", "Level C"), 3), |
|
84 |
#' levels = params |
|
85 |
#' ) |
|
86 |
#' ) |
|
87 |
#' rel_var( |
|
88 |
#' df = adlb_df, |
|
89 |
#' var_name = "PARAMCD", |
|
90 |
#' var_values = c("A", "B", "C"), |
|
91 |
#' related_var = "PARAM" |
|
92 |
#' ) |
|
93 |
#' |
|
94 |
#' # Example with tibble. |
|
95 |
#' adlb_tbl <- tibble::tibble( |
|
96 |
#' ID = 1:9, |
|
97 |
#' PARAM = factor( |
|
98 |
#' rep(c("Level A", "Level B", "Level C"), 3), |
|
99 |
#' levels = params |
|
100 |
#' ) |
|
101 |
#' ) |
|
102 |
#' rel_var( |
|
103 |
#' df = adlb_tbl, |
|
104 |
#' var_name = "PARAMCD", |
|
105 |
#' var_values = c("A", "B", "C"), |
|
106 |
#' related_var = "PARAM" |
|
107 |
#' ) |
|
108 |
rel_var <- function(df, var_name, related_var, var_values = NULL) { |
|
109 | 64x |
checkmate::assert_data_frame(df) |
110 | 64x |
checkmate::assert_string(var_name) |
111 | 64x |
checkmate::assert_string(related_var) |
112 | 64x |
n_relvar1 <- length(unique(df[, related_var, drop = TRUE])) |
113 | 64x |
checkmate::assert_vector(var_values, null.ok = TRUE, len = n_relvar1, any.missing = FALSE) |
114 | 1x |
if (is.null(var_values)) var_values <- rep(NA, n_relvar1) |
115 | ||
116 | 64x |
relvar1 <- unique(df[, related_var, drop = TRUE]) |
117 | 64x |
relvar2_values <- rep(NA, nrow(df)) |
118 | 64x |
for (r in seq_len(n_relvar1)) { |
119 | 538x |
matched <- which(df[, related_var, drop = TRUE] == relvar1[r]) |
120 | 538x |
relvar2_values[matched] <- var_values[r] |
121 |
} |
|
122 | 64x |
df[[var_name]] <- factor(relvar2_values) |
123 | 64x |
return(df) |
124 |
} |
|
125 | ||
126 |
#' Create Visit Schedule |
|
127 |
#' |
|
128 |
#' Create a visit schedule as a factor. |
|
129 |
#' |
|
130 |
#' X number of visits, or X number of cycles and Y number of days. |
|
131 |
#' |
|
132 |
#' @inheritParams argument_convention |
|
133 |
#' |
|
134 |
#' @return A factor of length `n_assessments`. |
|
135 |
#' @export |
|
136 |
#' |
|
137 |
#' @examples |
|
138 |
#' visit_schedule(visit_format = "WEeK", n_assessments = 10L) |
|
139 |
#' visit_schedule(visit_format = "CyCLE", n_assessments = 5L, n_days = 2L) |
|
140 |
visit_schedule <- function(visit_format = "WEEK", |
|
141 |
n_assessments = 10L, |
|
142 |
n_days = 5L) { |
|
143 | 56x |
checkmate::assert_string(visit_format, pattern = "^WEEK$|^CYCLE$", ignore.case = TRUE) |
144 | 56x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
145 | 56x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
146 | ||
147 | 56x |
if (toupper(visit_format) == "WEEK") { |
148 |
# numeric vector of n assessments/cycles/days |
|
149 | 49x |
assessments <- 1:n_assessments |
150 |
# numeric vector for ordering including screening (-1) and baseline (0) place holders |
|
151 | 49x |
assessments_ord <- -1:n_assessments |
152 |
# character vector of nominal visit values |
|
153 | 49x |
visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1)) |
154 | 7x |
} else if (toupper(visit_format) == "CYCLE") { |
155 | 7x |
cycles <- sort(rep(1:n_assessments, times = 1, each = n_days)) |
156 | 7x |
days <- rep(seq(1:n_days), times = n_assessments, each = 1) |
157 | 7x |
assessments_ord <- 0:(n_assessments * n_days) |
158 | 7x |
visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days)) |
159 |
} |
|
160 | ||
161 |
# create and order factor variable to return from function |
|
162 | 56x |
visit_values <- stats::reorder(factor(visit_values), assessments_ord) |
163 |
} |
|
164 | ||
165 |
#' Primary Keys: Retain Values |
|
166 |
#' |
|
167 |
#' Retain values within primary keys. |
|
168 |
#' |
|
169 |
#' @param df (`data.frame`)\cr Data frame in which to apply the retain. |
|
170 |
#' @param value_var (`any`)\cr Variable in `df` containing the value to be retained. |
|
171 |
#' @param event (`expression`)\cr Expression returning a logical value to trigger the retain. |
|
172 |
#' @param outside (`any`)\cr Additional value to retain. Defaults to `NA`. |
|
173 |
#' @return A vector of values where expression is true. |
|
174 |
#' @keywords internal |
|
175 |
retain <- function(df, value_var, event, outside = NA) { |
|
176 | 31x |
indices <- c(1, which(event == TRUE), nrow(df) + 1) |
177 | 31x |
values <- c(outside, value_var[event == TRUE]) |
178 | 31x |
rep(values, diff(indices)) |
179 |
} |
|
180 | ||
181 |
#' Primary Keys: Labels |
|
182 |
#' |
|
183 |
#' Relabel a subset of variables in a data set. |
|
184 |
#' |
|
185 |
#' @param x (`data.frame`)\cr Data frame containing variables to which labels are applied. |
|
186 |
#' @param ... (`named character`)\cr Name-Value pairs, where name corresponds to a variable |
|
187 |
#' name in `x` and the value to the new variable label. |
|
188 |
#' @return x (`data.frame`)\cr Data frame with labels applied. |
|
189 |
#' |
|
190 |
#' @export |
|
191 |
#' |
|
192 |
#' @examples |
|
193 |
#' adsl <- radsl() |
|
194 |
#' var_relabel(adsl, |
|
195 |
#' STUDYID = "Study Identifier", |
|
196 |
#' USUBJID = "Unique Subject Identifier" |
|
197 |
#' ) |
|
198 |
var_relabel <- function(x, ...) { |
|
199 | 82x |
dots <- list(...) |
200 | 82x |
varnames <- names(dots) |
201 | 82x |
if (is.null(varnames)) { |
202 | 1x |
stop("missing variable declarations") |
203 |
} |
|
204 | 81x |
map_varnames <- match(varnames, names(x)) |
205 | 81x |
for (i in seq_along(map_varnames)) { |
206 | 161x |
attr(x[[map_varnames[[i]]]], "label") <- dots[[i]] |
207 |
} |
|
208 | 81x |
x |
209 |
} |
|
210 | ||
211 |
#' Apply Metadata |
|
212 |
#' |
|
213 |
#' Apply label and variable ordering attributes to domains. |
|
214 |
#' |
|
215 |
#' @param df (`data.frame`)\cr Data frame to which metadata is applied. |
|
216 |
#' @param filename (`yaml`)\cr File containing domain metadata. |
|
217 |
#' @param add_adsl (`logical`)\cr Should ADSL data be merged to domain. |
|
218 |
#' @param adsl_filename (`yaml`)\cr File containing ADSL metadata. |
|
219 |
#' @return Data frame with metadata applied. |
|
220 |
#' |
|
221 |
#' @export |
|
222 |
#' @examples |
|
223 |
#' seed <- 1 |
|
224 |
#' adsl <- radsl(seed = seed) |
|
225 |
#' adsub <- radsub(adsl, seed = seed) |
|
226 |
#' yaml_path <- file.path(path.package("random.cdisc.data"), "inst", "metadata") |
|
227 |
#' adsl <- apply_metadata(adsl, file.path(yaml_path, "ADSL.yml"), FALSE) |
|
228 |
#' adsub <- apply_metadata( |
|
229 |
#' adsub, file.path(yaml_path, "ADSUB.yml"), TRUE, |
|
230 |
#' file.path(yaml_path, "ADSL.yml") |
|
231 |
#' ) |
|
232 |
apply_metadata <- function(df, filename, add_adsl = TRUE, adsl_filename = "metadata/ADSL.yml") { |
|
233 | 90x |
checkmate::assert_data_frame(df) |
234 | 90x |
checkmate::assert_string(filename) |
235 | 90x |
checkmate::assert_flag(add_adsl) |
236 | 90x |
checkmate::assert_string(adsl_filename) |
237 | ||
238 | 90x |
apply_type <- function(df, var, type) { |
239 | 5986x |
if (is.null(type)) { |
240 | ! |
return() |
241 |
} |
|
242 | ||
243 | 5986x |
if (type == "character" && !is.character(df[[var]])) { |
244 | 12x |
df[[var]] <- as.character(df[[var]]) |
245 | 5974x |
} else if (type == "factor" && !is.factor(df[[var]])) { |
246 | 730x |
df[[var]] <- as.factor(df[[var]]) |
247 | 5244x |
} else if (type == "integer" && !is.integer(df[[var]])) { |
248 | 225x |
df[[var]] <- as.integer(df[[var]]) |
249 | 5019x |
} else if (type == "numeric" && !is.numeric(df[[var]])) { |
250 | 3x |
df[[var]] <- as.numeric(df[[var]]) |
251 | 5016x |
} else if (type == "logical" && !is.logical(df[[var]])) { |
252 | ! |
df[[var]] <- as.logical(df[[var]]) |
253 | 5016x |
} else if (type == "datetime" && !lubridate::is.POSIXct(df[[var]])) { |
254 | 9x |
df[[var]] <- as.POSIXct(df[[var]]) |
255 | 5007x |
} else if (type == "date" && !lubridate::is.Date(df[[var]])) { |
256 | ! |
df[[var]] <- as.Date(df[[var]]) |
257 |
} |
|
258 | 5986x |
return(df) |
259 |
} |
|
260 | ||
261 |
# remove existing attributes |
|
262 | 90x |
for (i in base::setdiff(names(attributes(df)), names(attributes(data.frame())))) { |
263 | 3x |
attr(df, i) <- NULL |
264 |
} |
|
265 | ||
266 |
# get metadata |
|
267 | 90x |
metadata <- yaml::yaml.load_file(system.file(filename, package = "random.cdisc.data")) |
268 | 90x |
adsl_metadata <- if (add_adsl) { |
269 | 64x |
yaml::yaml.load_file(system.file(adsl_filename, package = "random.cdisc.data")) |
270 |
} else { |
|
271 | 26x |
NULL |
272 |
} |
|
273 | 90x |
metadata_variables <- append(adsl_metadata$variables, metadata$variables) |
274 | 90x |
metadata_varnames <- names(metadata_variables) |
275 | ||
276 |
# find variables that does not have labels and are not it metadata |
|
277 | 90x |
missing_vars_map <- vapply( |
278 | 90x |
names(df), |
279 | 90x |
function(x) { |
280 | 5986x |
!(x %in% c("STUDYID", "USUBJID", metadata_varnames)) && is.null(attr(df[[x]], "label")) |
281 |
}, |
|
282 | 90x |
logical(1) |
283 |
) |
|
284 | 90x |
missing_vars <- names(df)[missing_vars_map] |
285 | 90x |
if (length(missing_vars) > 0) { |
286 | ! |
msg <- paste0( |
287 | ! |
"Following variables does not have label or are not found in ", |
288 | ! |
filename, |
289 |
": ", |
|
290 | ! |
paste0(missing_vars, collapse = ", ") |
291 |
) |
|
292 | ! |
warning(msg) |
293 |
} |
|
294 | ||
295 | 90x |
if (!all(metadata_varnames %in% names(df))) { |
296 | 6x |
metadata_varnames <- metadata_varnames[metadata_varnames %in% names(df)] |
297 |
} |
|
298 | ||
299 |
# assign labels to variables |
|
300 | 90x |
for (var in metadata_varnames) { |
301 | 5986x |
df <- apply_type(df, var, metadata_variables[[var]]$type) |
302 | 5986x |
attr(df[[var]], "label") <- metadata_variables[[var]]$label |
303 |
} |
|
304 | ||
305 |
# reorder data frame columns to expected BDS order |
|
306 | 90x |
df <- df[, unique(c("STUDYID", "USUBJID", metadata_varnames, names(df)))] |
307 | ||
308 |
# assign label to data frame |
|
309 | 90x |
attr(df, "label") <- metadata$domain$label |
310 | ||
311 | 90x |
df |
312 |
} |
|
313 | ||
314 |
#' Replace Values in a Vector by NA |
|
315 |
#' |
|
316 |
#' @description `r lifecycle::badge("stable")` |
|
317 |
#' |
|
318 |
#' Randomized replacement of values by `NA`. |
|
319 |
#' |
|
320 |
#' @inheritParams argument_convention |
|
321 |
#' @param v (`any`)\cr Vector of any type. |
|
322 |
#' @param percentage (`proportion`)\cr Value between 0 and 1 defining |
|
323 |
#' how much of the vector shall be replaced by `NA`. This number |
|
324 |
#' is randomized by +/- 5% to have full randomization. |
|
325 |
#' |
|
326 |
#' @return The input vector `v` where a certain number of values are replaced by `NA`. |
|
327 |
#' |
|
328 |
#' @export |
|
329 |
replace_na <- function(v, percentage = 0.05, seed = NULL) { |
|
330 | 9x |
checkmate::assert_number(percentage, lower = 0, upper = 1) |
331 | ||
332 | 9x |
if (percentage == 0) { |
333 | 1x |
return(v) |
334 |
} |
|
335 | ||
336 | 8x |
if (!is.null(seed) && !is.na(seed)) { |
337 | 8x |
set.seed(seed) |
338 |
} |
|
339 | ||
340 |
# randomize the percentage |
|
341 | 8x |
ind <- sample(seq_along(v), round(length(v) * percentage)) |
342 | ||
343 | 8x |
v[ind] <- NA |
344 | ||
345 | 8x |
return(v) |
346 |
} |
|
347 | ||
348 |
#' Replace Values with NA |
|
349 |
#' |
|
350 |
#' @description `r lifecycle::badge("stable")` |
|
351 |
#' |
|
352 |
#' Replace column values with `NA`s. |
|
353 |
#' |
|
354 |
#' @inheritParams argument_convention |
|
355 |
#' @param ds (`data.frame`)\cr Any data set. |
|
356 |
#' |
|
357 |
#' @return dataframe without `NA` values. |
|
358 |
#' |
|
359 |
#' @export |
|
360 |
mutate_na <- function(ds, na_vars = NULL, na_percentage = 0.05) { |
|
361 | 5x |
if (!is.null(na_vars)) { |
362 | 4x |
stopifnot(is.list(na_vars)) # any list is OK; as values can be left NA |
363 | 4x |
stopifnot(length(names(na_vars)) == length(na_vars)) # names for all elements |
364 |
} else { |
|
365 | 1x |
na_vars <- names(ds) |
366 |
} |
|
367 | ||
368 | 5x |
stopifnot(is.numeric(na_percentage)) |
369 | 5x |
stopifnot(na_percentage >= 0 && na_percentage < 1) |
370 | ||
371 | 5x |
for (na_var in names(na_vars)) { |
372 | 8x |
if (!is.na(na_var)) { |
373 | 8x |
if (!na_var %in% names(ds)) { |
374 | 1x |
warning(paste(na_var, "not in column names")) |
375 |
} else { |
|
376 | 7x |
ds <- ds %>% |
377 | 7x |
ungroup_rowwise_df() %>% |
378 | 7x |
dplyr::mutate( |
379 | 7x |
!!na_var := ds[[na_var]] %>% |
380 | 7x |
replace_na( |
381 | 7x |
percentage = ifelse(is.na(na_vars[[na_var]][2]), na_percentage, na_vars[[na_var]][2]), |
382 | 7x |
seed = na_vars[[na_var]][1] |
383 |
) |
|
384 |
) |
|
385 |
} |
|
386 |
} |
|
387 |
} |
|
388 | 5x |
return(ds) |
389 |
} |
|
390 | ||
391 |
ungroup_rowwise_df <- function(x) { |
|
392 | 7x |
class(x) <- c("tbl", "tbl_df", "data.frame") |
393 | 7x |
return(x) |
394 |
} |
|
395 | ||
396 |
#' Zero-Truncated Poisson Distribution |
|
397 |
#' |
|
398 |
#' @description `r lifecycle::badge("stable")` |
|
399 |
#' |
|
400 |
#' This generates random numbers from a zero-truncated Poisson distribution, |
|
401 |
#' i.e. from `X | X > 0` when `X ~ Poisson(lambda)`. The advantage here is that |
|
402 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
403 |
#' This solution was provided in a post by |
|
404 |
#' [Peter Dalgaard](https://stat.ethz.ch/pipermail/r-help/2005-May/070680.html). |
|
405 |
#' |
|
406 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
407 |
#' @param lambda (`numeric`)\cr Non-negative mean(s). |
|
408 |
#' |
|
409 |
#' @return The random numbers. |
|
410 |
#' @export |
|
411 |
#' |
|
412 |
#' @examples |
|
413 |
#' x <- rpois(1e6, lambda = 5) |
|
414 |
#' x <- x[x > 0] |
|
415 |
#' hist(x) |
|
416 |
#' |
|
417 |
#' y <- rtpois(1e6, lambda = 5) |
|
418 |
#' hist(y) |
|
419 |
rtpois <- function(n, lambda) { |
|
420 | 121x |
stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda) |
421 |
} |
|
422 | ||
423 |
#' Truncated Exponential Distribution |
|
424 |
#' |
|
425 |
#' @description `r lifecycle::badge("stable")` |
|
426 |
#' |
|
427 |
#' This generates random numbers from a truncated Exponential distribution, |
|
428 |
#' i.e. from `X | X > l` or `X | X < r` when `X ~ Exp(rate)`. The advantage here is that |
|
429 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
430 |
#' This can be derived from the quantile functions of the left- and right-truncated |
|
431 |
#' Exponential distributions. |
|
432 |
#' |
|
433 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
434 |
#' @param rate (`numeric`)\cr Non-negative rate. |
|
435 |
#' @param l (`numeric`)\cr Positive left-hand truncation parameter. |
|
436 |
#' @param r (`numeric`)\cr Positive right-hand truncation parameter. |
|
437 |
#' |
|
438 |
#' @return The random numbers. If neither `l` nor `r` are provided then the usual Exponential |
|
439 |
#' distribution is used. |
|
440 |
#' @export |
|
441 |
#' |
|
442 |
#' @examples |
|
443 |
#' x <- stats::rexp(1e6, rate = 5) |
|
444 |
#' x <- x[x > 0.5] |
|
445 |
#' hist(x) |
|
446 |
#' |
|
447 |
#' y <- rtexp(1e6, rate = 5, l = 0.5) |
|
448 |
#' hist(y) |
|
449 |
#' |
|
450 |
#' z <- rtexp(1e6, rate = 5, r = 0.5) |
|
451 |
#' hist(z) |
|
452 |
rtexp <- function(n, rate, l = NULL, r = NULL) { |
|
453 | 123x |
if (!is.null(l)) { |
454 | 1x |
l - log(1 - stats::runif(n)) / rate |
455 | 122x |
} else if (!is.null(r)) { |
456 | 121x |
-log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate |
457 |
} else { |
|
458 | 1x |
stats::rexp(n, rate) |
459 |
} |
|
460 |
} |
1 |
#' Pharmacokinetics Analysis Dataset (ADPC) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Pharmacokinetics Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per study, subject, parameter, and time point. |
|
9 |
#' |
|
10 |
#' @inheritParams argument_convention |
|
11 |
#' @param avalu (`character`)\cr Analysis value units. |
|
12 |
#' @param constants (`character vector`)\cr Constant parameters to be used in formulas for creating analysis values. |
|
13 |
#' @param duration (`numeric`)\cr Duration in number of days. |
|
14 |
#' @template param_cached |
|
15 |
#' @templateVar data adpc |
|
16 |
#' |
|
17 |
#' @return `data.frame` |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
22 |
#' |
|
23 |
#' adpc <- radpc(adsl, seed = 2) |
|
24 |
#' adpc |
|
25 |
#' |
|
26 |
#' adpc <- radpc(adsl, seed = 2, duration = 3) |
|
27 |
#' adpc |
|
28 |
radpc <- function(adsl, |
|
29 |
avalu = "ug/mL", |
|
30 |
constants = c(D = 100, ka = 0.8, ke = 1), |
|
31 |
duration = 2, |
|
32 |
seed = NULL, |
|
33 |
na_percentage = 0, |
|
34 |
na_vars = list( |
|
35 |
AVAL = c(NA, 0.1) |
|
36 |
), |
|
37 |
cached = FALSE) { |
|
38 | 5x |
checkmate::assert_flag(cached) |
39 | 5x |
if (cached) { |
40 | 1x |
return(get_cached_data("cadpc")) |
41 |
} |
|
42 | ||
43 | 4x |
checkmate::assert_data_frame(adsl) |
44 | 4x |
checkmate::assert_character(avalu, len = 1, any.missing = FALSE) |
45 | 4x |
checkmate::assert_subset(names(constants), c("D", "ka", "ke")) |
46 | 4x |
checkmate::assert_numeric(x = duration, max.len = 1) |
47 | 4x |
checkmate::assert_number(seed, null.ok = TRUE) |
48 | 4x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
49 | 4x |
checkmate::assert_true(na_percentage < 1) |
50 | 4x |
checkmate::assert_list(na_vars) |
51 | ||
52 | 4x |
if (!is.null(seed)) { |
53 | 4x |
set.seed(seed) |
54 |
} |
|
55 | ||
56 | 4x |
radpc_core <- function(day) { |
57 | 8x |
adpc_day <- tidyr::expand_grid( |
58 | 8x |
data.frame( |
59 | 8x |
STUDYID = adsl$STUDYID, |
60 | 8x |
USUBJID = adsl$USUBJID, |
61 | 8x |
ARMCD = adsl$ARMCD, |
62 | 8x |
A0 = unname(constants["D"]), |
63 | 8x |
ka = unname(constants["ka"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2), |
64 | 8x |
ke = unname(constants["ke"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2) |
65 |
), |
|
66 | 8x |
PCTPTNUM = if (day == 1) c(0, 0.5, 1, 1.5, 2, 3, 4, 8, 12) else 24 * (day - 1), |
67 | 8x |
PARAM = factor(c("Plasma Drug X", "Urine Drug X", "Plasma Drug Y", "Urine Drug Y")) |
68 |
) |
|
69 | 8x |
adpc_day <- adpc_day[!(grepl("Urine", adpc_day$PARAM) & adpc_day$PCTPTNUM %in% c(0.5, 1, 1.5, 2, 3)), ] %>% |
70 | 8x |
dplyr::arrange(USUBJID, PARAM) %>% |
71 | 8x |
dplyr::mutate( |
72 | 8x |
VISITDY = day, |
73 | 8x |
VISIT = ifelse(day <= 7, paste("Day", VISITDY), paste("Week", (VISITDY - 1) / 7)), |
74 | 8x |
PCVOLU = ifelse(grepl("Urine", PARAM), "mL", ""), |
75 | 8x |
ASMED = ifelse(grepl("Urine", PARAM), "URINE", "PLASMA"), |
76 | 8x |
PCTPT = factor(dplyr::case_when( |
77 | 8x |
PCTPTNUM == 0 ~ "Predose", |
78 | 8x |
(day == 1 & grepl("Urine", PARAM)) ~ |
79 | 8x |
paste0(lag(PCTPTNUM), "H - ", PCTPTNUM, "H"), |
80 | 8x |
(day != 1 & grepl("Urine", PARAM)) ~ |
81 | 8x |
paste0(as.numeric(PCTPTNUM) - 24, "H - ", PCTPTNUM, "H"), |
82 | 8x |
TRUE ~ paste0(PCTPTNUM, "H") |
83 |
)), |
|
84 | 8x |
ARELTM1 = PCTPTNUM, |
85 | 8x |
NRELTM1 = PCTPTNUM, |
86 | 8x |
ARELTM2 = ARELTM1 - (24 * (day - 1)), |
87 | 8x |
NRELTM2 = NRELTM1 - (24 * (day - 1)), |
88 | 8x |
A0 = ifelse(PARAM == "Plasma Drug Y", A0, A0 / 2), |
89 | 8x |
AVAL = round( |
90 | 8x |
(A0 * ka * ( |
91 | 8x |
exp(-ka * ARELTM1) - exp(-ke * ARELTM1) |
92 |
)) |
|
93 | 8x |
/ (ke - ka), |
94 | 8x |
digits = 3 |
95 |
) |
|
96 |
) %>% |
|
97 | 8x |
dplyr::mutate( |
98 | 8x |
PCVOL = ifelse( |
99 | 8x |
ASMED == "URINE", |
100 | 8x |
round(abs(((PCTPTNUM - 1) %% 24) * A0 * ka * exp(PCTPTNUM %% 1.8 / 10)), 2), |
101 | 8x |
NA |
102 |
), |
|
103 |
# PK Equation |
|
104 | 8x |
AVALC = ifelse(AVAL == 0, "BLQ", as.character(AVAL)), |
105 | 8x |
AVALU = avalu, |
106 | 8x |
RELTMU = "hr" |
107 |
) %>% |
|
108 | 8x |
dplyr::select(-c("A0", "ka", "ke")) |
109 | ||
110 | 8x |
return(adpc_day) |
111 |
} |
|
112 | ||
113 | 4x |
adpc <- list() |
114 | ||
115 | 4x |
for (day in seq(duration)[seq(duration) <= 7 | ((seq(duration) - 1) %% 7 == 0)]) { |
116 | 8x |
adpc[[day]] <- radpc_core(day = day) |
117 |
} |
|
118 | ||
119 | 4x |
adpc <- do.call(rbind, adpc) |
120 | ||
121 | 4x |
adpc <- dplyr::inner_join(adpc, adsl, by = c("STUDYID", "USUBJID", "ARMCD")) %>% |
122 | 4x |
dplyr::filter(ACTARM != "B: Placebo", !(ACTARM == "A: Drug X" & PARAM == "Plasma Drug Y")) |
123 | ||
124 | 4x |
if (length(na_vars) > 0 && na_percentage > 0) { |
125 | ! |
adpc <- mutate_na(ds = adpc, na_vars = na_vars, na_percentage = na_percentage) |
126 |
} |
|
127 | ||
128 | 4x |
adpc <- adpc %>% |
129 | 4x |
rename( |
130 | 4x |
AVALCAT1 = AVALC, |
131 | 4x |
NFRLT = NRELTM1, |
132 | 4x |
AFRLT = ARELTM1, |
133 | 4x |
NRRLT = NRELTM2, |
134 | 4x |
ARRLT = ARELTM2 |
135 |
) %>% |
|
136 | 4x |
mutate(ANL02FL = "Y") |
137 | ||
138 | 4x |
adpc <- apply_metadata(adpc, "metadata/ADPC.yml") |
139 |
} |
1 |
#' Generate Anthropometric Measurements for Males and Females. |
|
2 |
#' |
|
3 |
#' Anthropometric measurements are randomly generated using normal approximation. |
|
4 |
#' The default mean and standard deviation values used are based on US National Health |
|
5 |
#' Statistics for adults aged 20 years or over. The measurements are generated in same units |
|
6 |
#' as provided to the function. |
|
7 |
#' |
|
8 |
#' @details One record per subject. |
|
9 |
#' |
|
10 |
#' @inheritParams argument_convention |
|
11 |
#' @param df (`data.frame`)\cr Analysis dataset. |
|
12 |
#' @param id_var (`character`)\cr Patient identifier variable name. |
|
13 |
#' @param sex_var (`character`)\cr Name of variable representing sex of patient. |
|
14 |
#' @param sex_var_level_male (`character`)\cr Level of `sex_var` representing males. |
|
15 |
#' @param male_weight_in_kg (named `list`)\cr List of means and SDs of male weights in kilograms. |
|
16 |
#' @param female_weight_in_kg (named `list`)\cr List of means and SDs of female weights in kilograms. |
|
17 |
#' @param male_height_in_m (named `list`)\cr List of means and SDs of male heights in metres. |
|
18 |
#' @param female_height_in_m (named `list`)\cr list of means and SDs of female heights in metres. |
|
19 |
#' |
|
20 |
#' @return a dataframe with anthropometric measurements for each subject in analysis dataset. |
|
21 |
#' @keywords internal |
|
22 |
h_anthropometrics_by_sex <- function(df, |
|
23 |
seed = 1, |
|
24 |
id_var = "USUBJID", |
|
25 |
sex_var = "SEX", |
|
26 |
sex_var_level_male = "M", |
|
27 |
male_weight_in_kg = list(mean = 90.6, sd = 44.9), |
|
28 |
female_weight_in_kg = list(mean = 77.5, sd = 46.2), |
|
29 |
male_height_in_m = list(mean = 1.75, sd = 0.14), |
|
30 |
female_height_in_m = list(mean = 1.61, sd = 0.24)) { |
|
31 | 3x |
checkmate::assert_data_frame(df) |
32 | 3x |
checkmate::assert_string(id_var) |
33 | 3x |
checkmate::assert_string(sex_var) |
34 | 3x |
checkmate::assert_string(sex_var_level_male) |
35 | 3x |
checkmate::assert_list(male_weight_in_kg, types = "numeric") |
36 | 3x |
checkmate::assert_subset(names(male_weight_in_kg), choices = c("mean", "sd")) |
37 | 3x |
checkmate::assert_list(female_weight_in_kg, types = "numeric") |
38 | 3x |
checkmate::assert_subset(names(female_weight_in_kg), choices = c("mean", "sd")) |
39 | 3x |
checkmate::assert_list(male_height_in_m, types = "numeric") |
40 | 3x |
checkmate::assert_subset(names(male_height_in_m), choices = c("mean", "sd")) |
41 | 3x |
checkmate::assert_list(female_height_in_m, types = "numeric") |
42 | 3x |
checkmate::assert_subset(names(female_height_in_m), choices = c("mean", "sd")) |
43 | ||
44 | ||
45 | 3x |
n <- length(unique(df[[id_var]])) |
46 | 3x |
set.seed(seed) |
47 | ||
48 | 3x |
df_by_sex <- unique(subset(df, select = c(id_var, sex_var))) |
49 | ||
50 | 3x |
df_with_measurements <- df_by_sex %>% |
51 | 3x |
dplyr::mutate( |
52 | 3x |
WEIGHT = ifelse( |
53 | 3x |
.data[[sex_var]] == sex_var_level_male, |
54 | 3x |
stats::rnorm(n = n, mean = male_weight_in_kg$mean, sd = male_weight_in_kg$sd), |
55 | 3x |
stats::rnorm(n = n, mean = female_weight_in_kg$mean, sd = female_weight_in_kg$sd) |
56 |
) |
|
57 |
) %>% |
|
58 | 3x |
dplyr::mutate( |
59 | 3x |
HEIGHT = ifelse( |
60 | 3x |
.data[[sex_var]] == sex_var_level_male, |
61 | 3x |
stats::rnorm(n = n, mean = male_height_in_m$mean, sd = male_height_in_m$sd), |
62 | 3x |
stats::rnorm(n = n, mean = female_height_in_m$mean, sd = female_height_in_m$sd) |
63 |
) |
|
64 |
) %>% |
|
65 | 3x |
dplyr::mutate( |
66 | 3x |
BMI = WEIGHT / ((HEIGHT)^2) |
67 |
) |
|
68 | ||
69 | 3x |
return(df_with_measurements) |
70 |
} |
|
71 | ||
72 |
#' Subcategory Analysis Dataset (ADSUB) |
|
73 |
#' |
|
74 |
#' @description `r lifecycle::badge("stable")` |
|
75 |
#' |
|
76 |
#' Function for generating a random Subcategory Analysis Dataset for a given |
|
77 |
#' Subject-Level Analysis Dataset. |
|
78 |
#' |
|
79 |
#' @details One record per subject. |
|
80 |
#' |
|
81 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ` |
|
82 |
#' |
|
83 |
#' @inheritParams argument_convention |
|
84 |
#' @template param_cached |
|
85 |
#' @templateVar data adsub |
|
86 |
#' |
|
87 |
#' @return `data.frame` |
|
88 |
#' @export |
|
89 |
#' |
|
90 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
91 |
#' |
|
92 |
#' @examples |
|
93 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
94 |
#' |
|
95 |
#' adsub <- radsub(adsl, seed = 2) |
|
96 |
#' adsub |
|
97 |
radsub <- function(adsl, |
|
98 |
param = c( |
|
99 |
"Baseline Weight", |
|
100 |
"Baseline Height", |
|
101 |
"Baseline BMI", |
|
102 |
"Baseline ECOG", |
|
103 |
"Baseline Biomarker Mutation" |
|
104 |
), |
|
105 |
paramcd = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"), |
|
106 |
seed = NULL, |
|
107 |
na_percentage = 0, |
|
108 |
na_vars = list(), |
|
109 |
cached = FALSE) { |
|
110 | 4x |
checkmate::assert_flag(cached) |
111 | 4x |
if (cached) { |
112 | 1x |
return(get_cached_data("cadsub")) |
113 |
} |
|
114 | ||
115 | 3x |
checkmate::assert_data_frame(adsl) |
116 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
117 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
118 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
119 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
120 | 3x |
checkmate::assert_true(na_percentage < 1) |
121 | ||
122 |
# Validate and initialize related variables. |
|
123 | 3x |
param_init_list <- relvar_init(param, paramcd) |
124 | ||
125 | 3x |
if (!is.null(seed)) { |
126 | 3x |
set.seed(seed) |
127 |
} |
|
128 | ||
129 | 3x |
adsub <- expand.grid( |
130 | 3x |
STUDYID = unique(adsl$STUDYID), |
131 | 3x |
USUBJID = adsl$USUBJID, |
132 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
133 | 3x |
AVISIT = "BASELINE", |
134 | 3x |
stringsAsFactors = FALSE |
135 |
) |
|
136 | ||
137 |
# Assign related variable values: PARAM and PARAMCD are related. |
|
138 | 3x |
adsub <- adsub %>% rel_var( |
139 | 3x |
var_name = "PARAMCD", |
140 | 3x |
related_var = "PARAM", |
141 | 3x |
var_values = param_init_list$relvar2 |
142 |
) |
|
143 | ||
144 | 3x |
adsub <- adsub[order(adsub$STUDYID, adsub$USUBJID, adsub$PARAMCD), ] |
145 | ||
146 | 3x |
adsub <- var_relabel( |
147 | 3x |
adsub, |
148 | 3x |
STUDYID = "Study Identifier", |
149 | 3x |
USUBJID = "Unique Subject Identifier" |
150 |
) |
|
151 | ||
152 |
# Merge ADSL to be able to add EG date and study day variables. |
|
153 |
# Sample ADTM to be a few days before TRTSDTM. |
|
154 | 3x |
adsub <- dplyr::inner_join( |
155 | 3x |
adsub, |
156 | 3x |
adsl, |
157 | 3x |
by = c("STUDYID", "USUBJID") |
158 |
) %>% |
|
159 | 3x |
dplyr::group_by(USUBJID) %>% |
160 | 3x |
dplyr::mutate(ADTM = rep( |
161 | 3x |
lubridate::date(TRTSDTM)[1] - lubridate::days(sample(1:10, size = 1)), |
162 | 3x |
each = n() |
163 |
)) %>% |
|
164 | 3x |
dplyr::ungroup() %>% |
165 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
166 | ||
167 |
# Generate a dataset with height, weight and BMI measurements for each subject. |
|
168 | 3x |
if (!is.null(seed)) { |
169 | 3x |
df_with_measurements <- h_anthropometrics_by_sex(adsub, seed = seed) |
170 |
} else { |
|
171 | ! |
df_with_measurements <- h_anthropometrics_by_sex(adsub) |
172 |
} |
|
173 | ||
174 |
# Add this to adsub and create other measurements. |
|
175 | 3x |
adsub <- adsub %>% |
176 | 3x |
dplyr::group_by(USUBJID) %>% |
177 | 3x |
dplyr::mutate( |
178 | 3x |
AVAL = dplyr::case_when( |
179 | 3x |
PARAMCD == |
180 | 3x |
"BWGHTSI" ~ df_with_measurements$WEIGHT[df_with_measurements$USUBJID == USUBJID], |
181 | 3x |
PARAMCD == |
182 | 3x |
"BHGHTSI" ~ df_with_measurements$HEIGHT[df_with_measurements$USUBJID == USUBJID], |
183 | 3x |
PARAMCD == |
184 | 3x |
"BBMISI" ~ df_with_measurements$BMI[df_with_measurements$USUBJID == USUBJID], |
185 | 3x |
PARAMCD == "BECOG" ~ sample(c(0, 1, 2, 3, 4, 5), 1), |
186 | 3x |
PARAMCD == "BBMRKR1" ~ sample(c(1, 2), prob = c(0.5, 0.5), 1) |
187 |
) |
|
188 |
) %>% |
|
189 | 3x |
dplyr::arrange(PARAMCD) %>% |
190 | 3x |
dplyr::ungroup() %>% |
191 | 3x |
dplyr::mutate(AVAL = dplyr::case_when( |
192 | 3x |
PARAMCD != "BBMRKR1" | PARAMCD != "BECOG" ~ round(AVAL, 1), |
193 | 3x |
TRUE ~ round(AVAL) |
194 |
)) |
|
195 | ||
196 | 3x |
adsub <- adsub %>% |
197 | 3x |
dplyr::mutate( |
198 | 3x |
AVALC = dplyr::case_when( |
199 | 3x |
PARAMCD == "BBMRKR1" ~ dplyr::case_when( |
200 | 3x |
AVAL == "1" ~ "WILD TYPE", |
201 | 3x |
AVAL == "2" ~ "MUTANT", |
202 | 3x |
TRUE ~ "" |
203 |
), |
|
204 | 3x |
TRUE ~ as.character(AVAL) |
205 |
), |
|
206 | 3x |
AVALU = dplyr::case_when( |
207 | 3x |
PARAMCD == "BWGHTSI" ~ "kg", |
208 | 3x |
PARAMCD == "BHGHTSI" ~ "m", |
209 | 3x |
PARAMCD == "BBMISI" ~ "kg/m2", |
210 | 3x |
TRUE ~ "" |
211 |
), |
|
212 | 3x |
AVALCAT1 = dplyr::case_when( |
213 | 3x |
PARAMCD == "BBMISI" ~ dplyr::case_when( |
214 | 3x |
AVAL < 18.5 ~ "<18.5", |
215 | 3x |
AVAL >= 18.5 & AVAL < 25 ~ "18.5 - 24.9", |
216 | 3x |
AVAL >= 25 & AVAL < 30 ~ "25 - 29.9", |
217 | 3x |
TRUE ~ ">30" |
218 |
), |
|
219 | 3x |
PARAMCD == "BECOG" ~ dplyr::case_when( |
220 | 3x |
AVAL <= 1 ~ "0-1", |
221 | 3x |
AVAL > 1 & AVAL <= 3 ~ "2-3", |
222 | 3x |
TRUE ~ "4-5" |
223 |
), |
|
224 | 3x |
TRUE ~ "" |
225 |
), |
|
226 | 3x |
AVISITN = "0", |
227 | 3x |
SRCSEQ = "1" |
228 |
) %>% |
|
229 | 3x |
dplyr::arrange( |
230 | 3x |
USUBJID, |
231 | 3x |
factor(PARAMCD, levels = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1")) |
232 |
) |
|
233 | ||
234 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
235 | ! |
adsub <- mutate_na(ds = adsub, na_vars = na_vars, na_percentage = na_percentage) |
236 |
} |
|
237 | ||
238 |
# Apply metadata. |
|
239 | 3x |
adsub <- apply_metadata(adsub, "metadata/ADSUB.yml") |
240 | ||
241 | 3x |
return(adsub) |
242 |
} |
1 |
#' Vital Signs Analysis Dataset (ADVS) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Vital Signs Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `VSSEQ`, `ASPID` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @template param_cached |
|
14 |
#' @templateVar data advs |
|
15 |
#' |
|
16 |
#' @return `data.frame` |
|
17 |
#' @export |
|
18 |
#' |
|
19 |
#' @author npaszty |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
23 |
#' |
|
24 |
#' advs <- radvs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
25 |
#' advs |
|
26 |
#' |
|
27 |
#' advs <- radvs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2) |
|
28 |
#' advs |
|
29 |
radvs <- function(adsl, |
|
30 |
param = c( |
|
31 |
"Diastolic Blood Pressure", |
|
32 |
"Pulse Rate", |
|
33 |
"Respiratory Rate", |
|
34 |
"Systolic Blood Pressure", |
|
35 |
"Temperature", "Weight" |
|
36 |
), |
|
37 |
paramcd = c("DIABP", "PULSE", "RESP", "SYSBP", "TEMP", "WEIGHT"), |
|
38 |
paramu = c("Pa", "beats/min", "breaths/min", "Pa", "C", "Kg"), |
|
39 |
visit_format = "WEEK", |
|
40 |
n_assessments = 5L, |
|
41 |
n_days = 5L, |
|
42 |
seed = NULL, |
|
43 |
na_percentage = 0, |
|
44 |
na_vars = list( |
|
45 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1), |
|
46 |
AVAL = c(123, 0.1), AVALU = c(123, 0.1) |
|
47 |
), |
|
48 |
cached = FALSE) { |
|
49 | 4x |
checkmate::assert_flag(cached) |
50 | 4x |
if (cached) { |
51 | 1x |
return(get_cached_data("cadvs")) |
52 |
} |
|
53 | ||
54 | 3x |
checkmate::assert_data_frame(adsl) |
55 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
56 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
57 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
58 | 3x |
checkmate::assert_string(visit_format) |
59 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
60 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
62 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
63 | 3x |
checkmate::assert_true(na_percentage < 1) |
64 | ||
65 |
# validate and initialize param vectors |
|
66 | 3x |
param_init_list <- relvar_init(param, paramcd) |
67 | 3x |
unit_init_list <- relvar_init(param, paramu) |
68 | ||
69 | 3x |
if (!is.null(seed)) { |
70 | 3x |
set.seed(seed) |
71 |
} |
|
72 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
73 | ||
74 | 3x |
advs <- expand.grid( |
75 | 3x |
STUDYID = unique(adsl$STUDYID), |
76 | 3x |
USUBJID = adsl$USUBJID, |
77 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
78 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments), |
79 | 3x |
stringsAsFactors = FALSE |
80 |
) |
|
81 | ||
82 | 3x |
advs <- dplyr::mutate( |
83 | 3x |
advs, |
84 | 3x |
AVISITN = dplyr::case_when( |
85 | 3x |
AVISIT == "SCREENING" ~ -1, |
86 | 3x |
AVISIT == "BASELINE" ~ 0, |
87 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
88 | 3x |
TRUE ~ NA_real_ |
89 |
) |
|
90 |
) |
|
91 | ||
92 | 3x |
advs$VSCAT <- "VITAL SIGNS" |
93 | ||
94 |
# assign related variable values: PARAMxPARAMCD are related |
|
95 | 3x |
advs <- advs %>% rel_var( |
96 | 3x |
var_name = "PARAMCD", |
97 | 3x |
related_var = "PARAM", |
98 | 3x |
var_values = param_init_list$relvar2 |
99 |
) |
|
100 | ||
101 |
# assign related variable values: PARAMxAVALU are related |
|
102 | 3x |
advs <- advs %>% rel_var( |
103 | 3x |
var_name = "AVALU", |
104 | 3x |
related_var = "PARAM", |
105 | 3x |
var_values = unit_init_list$relvar2 |
106 |
) |
|
107 | ||
108 | 3x |
advs <- advs %>% |
109 | 3x |
dplyr::mutate(VSTESTCD = PARAMCD) %>% |
110 | 3x |
dplyr::mutate(VSTEST = PARAM) |
111 | ||
112 | 3x |
advs <- advs %>% dplyr::mutate(AVAL = dplyr::case_when( |
113 | 3x |
PARAMCD == paramcd[1] ~ stats::rnorm(nrow(advs), mean = 100, sd = 20), |
114 | 3x |
PARAMCD == paramcd[2] ~ stats::rnorm(nrow(advs), mean = 80, sd = 15), |
115 | 3x |
PARAMCD == paramcd[3] ~ stats::rnorm(nrow(advs), mean = 16, sd = 5), |
116 | 3x |
PARAMCD == paramcd[4] ~ stats::rnorm(nrow(advs), mean = 150, sd = 30), |
117 | 3x |
PARAMCD == paramcd[5] ~ stats::rnorm(nrow(advs), mean = 36.65, sd = 1), |
118 | 3x |
PARAMCD == paramcd[6] ~ stats::rnorm(nrow(advs), mean = 70, sd = 20) |
119 |
)) |
|
120 | ||
121 |
# order to prepare for change from screening and baseline values |
|
122 | 3x |
advs <- advs[order(advs$STUDYID, advs$USUBJID, advs$PARAMCD, advs$AVISITN), ] |
123 | ||
124 | 3x |
advs <- Reduce(rbind, lapply(split(advs, advs$USUBJID), function(x) { |
125 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
126 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
127 | 30x |
x$ABLFL <- ifelse( |
128 | 30x |
toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
129 | 30x |
"Y", |
130 | 30x |
ifelse( |
131 | 30x |
toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", |
132 | 30x |
"Y", |
133 |
"" |
|
134 |
) |
|
135 |
) |
|
136 | 30x |
x |
137 |
})) |
|
138 | ||
139 | 3x |
advs$BASE2 <- retain(advs, advs$AVAL, advs$ABLFL2 == "Y") |
140 | 3x |
advs$BASE <- ifelse(advs$ABLFL2 != "Y", retain(advs, advs$AVAL, advs$ABLFL == "Y"), NA) |
141 | ||
142 | 3x |
advs <- advs %>% |
143 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
144 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
145 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
146 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
147 | 3x |
dplyr::mutate(ANRLO = dplyr::case_when( |
148 | 3x |
PARAMCD == "DIABP" ~ 80, |
149 | 3x |
PARAMCD == "PULSE" ~ 60, |
150 | 3x |
PARAMCD == "RESP" ~ 12, |
151 | 3x |
PARAMCD == "SYSBP" ~ 120, |
152 | 3x |
PARAMCD == "TEMP" ~ 36.1, |
153 | 3x |
PARAMCD == "WEIGHT" ~ 40 |
154 |
)) %>% |
|
155 | 3x |
dplyr::mutate(ANRHI = dplyr::case_when( |
156 | 3x |
PARAMCD == "DIABP" ~ 120, |
157 | 3x |
PARAMCD == "PULSE" ~ 100, |
158 | 3x |
PARAMCD == "RESP" ~ 20, |
159 | 3x |
PARAMCD == "SYSBP" ~ 180, |
160 | 3x |
PARAMCD == "TEMP" ~ 37.2, |
161 | 3x |
PARAMCD == "WEIGHT" ~ 100 |
162 |
)) %>% |
|
163 | 3x |
dplyr::mutate(ANRIND = factor(dplyr::case_when( |
164 | 3x |
AVAL < ANRLO ~ "LOW", |
165 | 3x |
AVAL > ANRHI ~ "HIGH", |
166 | 3x |
TRUE ~ "NORMAL" |
167 |
))) %>% |
|
168 | 3x |
dplyr::mutate(VSSTRESC = dplyr::case_when( |
169 | 3x |
PARAMCD == "DIABP" ~ "<80", |
170 | 3x |
PARAMCD == "PULSE" ~ "<60", |
171 | 3x |
PARAMCD == "RESP" ~ ">20", |
172 | 3x |
PARAMCD == "SYSBP" ~ ">180", |
173 | 3x |
PARAMCD == "TEMP" ~ "<36.1", |
174 | 3x |
PARAMCD == "WEIGHT" ~ "<40" |
175 |
)) %>% |
|
176 | 3x |
dplyr::rowwise() %>% |
177 | 3x |
dplyr::mutate(LOQFL = factor( |
178 | 3x |
ifelse(eval(parse(text = paste(AVAL, VSSTRESC))), "Y", "N") |
179 |
)) %>% |
|
180 | 3x |
dplyr::ungroup() %>% |
181 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
182 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
183 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
184 | 3x |
dplyr::ungroup() %>% |
185 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
186 | 3x |
dplyr::mutate(DTYPE = NA) %>% |
187 | 3x |
var_relabel( |
188 | 3x |
USUBJID = attr(adsl$USUBJID, "label"), |
189 | 3x |
STUDYID = attr(adsl$STUDYID, "label") |
190 |
) |
|
191 | ||
192 | 3x |
advs <- var_relabel( |
193 | 3x |
advs, |
194 | 3x |
STUDYID = "Study Identifier", |
195 | 3x |
USUBJID = "Unique Subject Identifier" |
196 |
) |
|
197 | ||
198 |
# merge ADSL to be able to add LB date and study day variables |
|
199 | 3x |
advs <- dplyr::inner_join( |
200 | 3x |
advs, |
201 | 3x |
adsl, |
202 | 3x |
by = c("STUDYID", "USUBJID") |
203 |
) %>% |
|
204 | 3x |
dplyr::rowwise() %>% |
205 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
206 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
207 | 3x |
TRUE ~ TRTEDTM |
208 |
))) %>% |
|
209 | 3x |
dplyr::ungroup() |
210 | ||
211 | 3x |
advs <- advs %>% |
212 | 3x |
dplyr::group_by(USUBJID) %>% |
213 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
214 | 3x |
dplyr::mutate(ADTM = rep( |
215 | 3x |
sort(sample( |
216 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
217 | 3x |
size = nlevels(AVISIT) |
218 |
)), |
|
219 | 3x |
each = n() / nlevels(AVISIT) |
220 |
)) %>% |
|
221 | 3x |
dplyr::ungroup() %>% |
222 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
223 | 3x |
dplyr::select(-TRTENDT) %>% |
224 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
225 | ||
226 | 3x |
advs <- advs %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
227 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y", |
228 | 3x |
TRUE ~ "" |
229 |
))) |
|
230 | ||
231 | 3x |
advs <- advs %>% |
232 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
233 | 3x |
dplyr::group_by(USUBJID) %>% |
234 | 3x |
dplyr::mutate(VSSEQ = seq_len(dplyr::n())) %>% |
235 | 3x |
dplyr::mutate(ASEQ = VSSEQ) %>% |
236 | 3x |
dplyr::ungroup() %>% |
237 | 3x |
dplyr::arrange( |
238 | 3x |
STUDYID, |
239 | 3x |
USUBJID, |
240 | 3x |
PARAMCD, |
241 | 3x |
BASETYPE, |
242 | 3x |
AVISITN, |
243 | 3x |
ATPTN, |
244 | 3x |
DTYPE, |
245 | 3x |
ADTM, |
246 | 3x |
VSSEQ, |
247 | 3x |
ASPID |
248 |
) |
|
249 | ||
250 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
251 | ! |
advs <- mutate_na(ds = advs, na_vars = na_vars, na_percentage = na_percentage) |
252 |
} |
|
253 | ||
254 |
# apply metadata |
|
255 | 3x |
advs <- apply_metadata(advs, "metadata/ADVS.yml") |
256 | ||
257 | 3x |
return(advs) |
258 |
} |
1 |
#' Hy's Law Analysis Dataset (ADHY) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Hy's Law Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ` |
|
11 |
# |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @template param_cached |
|
14 |
#' @templateVar data adhy |
|
15 |
#' |
|
16 |
#' @return `data.frame` |
|
17 |
#' @export |
|
18 |
#' |
|
19 |
#' @author wojciakw |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
23 |
#' |
|
24 |
#' adhy <- radhy(adsl, seed = 2) |
|
25 |
#' adhy |
|
26 |
radhy <- function(adsl, |
|
27 |
param = c( |
|
28 |
"TBILI <= 2 times ULN and ALT value category", |
|
29 |
"TBILI > 2 times ULN and AST value category", |
|
30 |
"TBILI > 2 times ULN and ALT value category", |
|
31 |
"TBILI <= 2 times ULN and AST value category", |
|
32 |
"TBILI > 2 times ULN and ALKPH <= 2 times ULN and ALT value category", |
|
33 |
"TBILI > 2 times ULN and ALKPH <= 2 times ULN and AST value category", |
|
34 |
"TBILI > 2 times ULN and ALKPH <= 5 times ULN and ALT value category", |
|
35 |
"TBILI > 2 times ULN and ALKPH <= 5 times ULN and AST value category", |
|
36 |
"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to ULN", |
|
37 |
"TBILI > 2 times ULN and two consecutive elevations of AST in relation to ULN", |
|
38 |
"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to ULN", |
|
39 |
"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to ULN", |
|
40 |
"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to Baseline", |
|
41 |
"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to Baseline", |
|
42 |
"TBILI > 2 times ULN and two consecutive elevations of AST in relation to Baseline", |
|
43 |
"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to Baseline", |
|
44 |
"ALT > 3 times ULN by Period", |
|
45 |
"AST > 3 times ULN by Period", |
|
46 |
"ALT or AST > 3 times ULN by Period", |
|
47 |
"ALT > 3 times Baseline by Period", |
|
48 |
"AST > 3 times Baseline by Period", |
|
49 |
"ALT or AST > 3 times Baseline by Period" |
|
50 |
), |
|
51 |
paramcd = c( |
|
52 |
"BLAL", |
|
53 |
"BGAS", |
|
54 |
"BGAL", |
|
55 |
"BLAS", |
|
56 |
"BA2AL", |
|
57 |
"BA2AS", |
|
58 |
"BA5AL", |
|
59 |
"BA5AS", |
|
60 |
"BL2AL2CU", |
|
61 |
"BG2AS2CU", |
|
62 |
"BL2AS2CU", |
|
63 |
"BG2AL2CU", |
|
64 |
"BG2AL2CB", |
|
65 |
"BL2AL2CB", |
|
66 |
"BG2AS2CB", |
|
67 |
"BL2AS2CB", |
|
68 |
"ALTPULN", |
|
69 |
"ASTPULN", |
|
70 |
"ALTASTPU", |
|
71 |
"ALTPBASE", |
|
72 |
"ASTPBASE", |
|
73 |
"ALTASTPB" |
|
74 |
), |
|
75 |
seed = NULL, |
|
76 |
cached = FALSE) { |
|
77 | 4x |
checkmate::assert_flag(cached) |
78 | ||
79 | 4x |
if (cached) { |
80 | 1x |
return(get_cached_data("cadhy")) |
81 |
} |
|
82 | ||
83 | 3x |
checkmate::assert_data_frame(adsl) |
84 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
85 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
86 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
87 | ||
88 |
# validate and initialize related variables |
|
89 | 3x |
param_init_list <- relvar_init(param, paramcd) |
90 | ||
91 | 3x |
if (!is.null(seed)) { |
92 | 3x |
set.seed(seed) |
93 |
} |
|
94 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
95 | ||
96 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
97 | 3x |
adhy <- expand.grid( |
98 | 3x |
STUDYID = unique(adsl$STUDYID), |
99 | 3x |
USUBJID = adsl$USUBJID, |
100 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
101 | 3x |
AVISIT = as.factor(c("BASELINE", "POST-BASELINE")), |
102 | 3x |
APERIODC = as.factor(c("PERIOD 1", "PERIOD 2")), |
103 | 3x |
stringsAsFactors = FALSE |
104 |
) |
|
105 | ||
106 |
# remove records that are not needed and were created as a side product of expand.grid above |
|
107 | 3x |
adhy <- dplyr::filter(adhy, !(AVISIT == "BASELINE" & APERIODC == "PERIOD 2")) |
108 | ||
109 |
# define TBILI ALT/AST params, period dependent parameters and the parameters that will be assigned values "Y" or "N" |
|
110 | 3x |
paramcd_tbilialtast <- c("BLAL", "BGAS", "BGAL", "BLAS", "BA2AL", "BA2AS", "BA5AL", "BA5AS") |
111 | 3x |
paramcd_by_period <- c("ALTPULN", "ASTPULN", "ALTASTPU", "ALTPBASE", "ASTPBASE", "ALTASTPB") |
112 | 3x |
paramcd_yn <- c( |
113 | 3x |
"BL2AL2CU", "BG2AS2CU", "BL2AS2CU", "BG2AL2CU", "BG2AL2CB", "BL2AL2CB", "BG2AS2CB", "BL2AS2CB", |
114 | 3x |
paramcd_by_period |
115 |
) |
|
116 | ||
117 |
# add other variables to adhy |
|
118 | 3x |
adhy <- adhy %>% |
119 | 3x |
rel_var( |
120 | 3x |
var_name = "PARAMCD", |
121 | 3x |
related_var = "PARAM", |
122 | 3x |
var_values = param_init_list$relvar2 |
123 |
) %>% |
|
124 | 3x |
dplyr::mutate( |
125 | 3x |
AVALC = dplyr::case_when( |
126 | 3x |
PARAMCD %in% paramcd_tbilialtast ~ sample( |
127 | 3x |
x = c(">3-5ULN", ">5-10ULN", ">10-20ULN", ">20ULN", "Criteria not met"), size = dplyr::n(), replace = TRUE |
128 |
), |
|
129 | 3x |
PARAMCD %in% paramcd_yn ~ sample( |
130 | 3x |
x = c("Y", "N"), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE |
131 |
) |
|
132 |
), |
|
133 | 3x |
AVAL = dplyr::case_when( |
134 | 3x |
AVALC == ">3-5ULN" ~ 1, |
135 | 3x |
AVALC == ">5-10ULN" ~ 2, |
136 | 3x |
AVALC == ">10-20ULN" ~ 3, |
137 | 3x |
AVALC == ">20ULN" ~ 4, |
138 | 3x |
AVALC == "Y" ~ 1, |
139 | 3x |
AVALC == "N" ~ 0, |
140 | 3x |
AVALC == "Criteria not met" ~ 0 |
141 |
), |
|
142 | 3x |
AVISITN = dplyr::case_when( |
143 | 3x |
AVISIT == "BASELINE" ~ 0L, |
144 | 3x |
AVISIT == "POST-BASELINE" ~ 9995L, |
145 | 3x |
TRUE ~ NA_integer_ |
146 |
), |
|
147 | 3x |
APERIOD = dplyr::case_when( |
148 | 3x |
APERIODC == "PERIOD 1" ~ 1L, |
149 | 3x |
APERIODC == "PERIOD 2" ~ 2L, |
150 | 3x |
TRUE ~ NA_integer_ |
151 |
), |
|
152 | 3x |
ABLFL = dplyr::if_else(AVISIT == "BASELINE", "Y", NA_character_), |
153 | 3x |
ONTRTFL = dplyr::if_else(AVISIT == "POST-BASELINE", "Y", NA_character_), |
154 | 3x |
ANL01FL = "Y", |
155 | 3x |
SRCSEQ = NA_integer_ |
156 |
) |
|
157 | ||
158 |
# remove records for parameters with period 2 and not in paramcd_by_period |
|
159 | 3x |
adhy <- dplyr::filter(adhy, PARAMCD %in% paramcd_by_period | APERIODC == "PERIOD 1") |
160 | ||
161 |
# add baseline variables |
|
162 | 3x |
adhy <- adhy %>% |
163 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
164 | 3x |
dplyr::mutate( |
165 | 3x |
BASEC = AVALC[AVISIT == "BASELINE"], |
166 | 3x |
BASE = AVAL[AVISIT == "BASELINE"] |
167 |
) %>% |
|
168 | 3x |
dplyr::ungroup() |
169 | ||
170 | 3x |
adhy <- adhy %>% |
171 | 3x |
var_relabel( |
172 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
173 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
174 |
) |
|
175 | ||
176 |
# merge ADSL to be able to add analysis datetime and analysis relative day variables |
|
177 | 3x |
adhy <- dplyr::inner_join(adhy, adsl, by = c("STUDYID", "USUBJID")) |
178 | ||
179 |
# define a simple helper function to create ADY variable |
|
180 | 3x |
add_ady <- function(x, avisit) { |
181 | 6x |
if (avisit == "BASELINE") { |
182 | 3x |
dplyr::mutate( |
183 | 3x |
x, |
184 | 3x |
ADY = sample(x = -(1:14), size = dplyr::n(), replace = TRUE) |
185 |
) |
|
186 | 3x |
} else if (avisit == "POST-BASELINE") { |
187 | 3x |
dplyr::rowwise(x) %>% |
188 | 3x |
dplyr::mutate(ADY = as.integer(sample( |
189 | 3x |
dplyr::if_else( |
190 | 3x |
!is.na(TRTEDTM), |
191 | 3x |
as.numeric(difftime(TRTEDTM, TRTSDTM, units = "days")), |
192 | 3x |
as.numeric(study_duration_secs, "days") |
193 |
), |
|
194 | 3x |
size = 1, |
195 | 3x |
replace = TRUE |
196 |
))) |
|
197 |
} else { |
|
198 | ! |
dplyr::mutate(x, ADY = NA_integer_) |
199 |
} |
|
200 |
} |
|
201 | ||
202 |
# add ADY and ADTM variables |
|
203 | 3x |
adhy <- adhy %>% |
204 | 3x |
dplyr::group_by(AVISIT, .add = FALSE) %>% |
205 | 3x |
dplyr::group_modify(~ add_ady(.x, .y$AVISIT)) %>% |
206 | 3x |
dplyr::ungroup() %>% |
207 | 3x |
dplyr::mutate(ADTM = TRTSDTM + lubridate::days(ADY)) |
208 | ||
209 |
# order columns and arrange rows; column order follows ADaM_1.1 specification |
|
210 | 3x |
adhy <- |
211 | 3x |
adhy[, c( |
212 | 3x |
colnames(adsl), |
213 | 3x |
"PARAM", |
214 | 3x |
"PARAMCD", |
215 | 3x |
"AVAL", |
216 | 3x |
"AVALC", |
217 | 3x |
"BASE", |
218 | 3x |
"BASEC", |
219 | 3x |
"ABLFL", |
220 | 3x |
"ADTM", |
221 | 3x |
"ADY", |
222 | 3x |
"AVISIT", |
223 | 3x |
"AVISITN", |
224 | 3x |
"APERIOD", |
225 | 3x |
"APERIODC", |
226 | 3x |
"ONTRTFL", |
227 | 3x |
"SRCSEQ", |
228 | 3x |
"ANL01FL" |
229 |
)] |
|
230 | ||
231 | 3x |
adhy <- adhy %>% |
232 | 3x |
dplyr::arrange( |
233 | 3x |
STUDYID, |
234 | 3x |
USUBJID, |
235 | 3x |
PARAMCD, |
236 | 3x |
AVISITN, |
237 | 3x |
ADTM, |
238 | 3x |
SRCSEQ |
239 |
) |
|
240 | ||
241 |
# apply metadata |
|
242 | 3x |
adhy <- apply_metadata(adhy, "metadata/ADHY.yml") |
243 | ||
244 | 3x |
return(adhy) |
245 |
} |
1 |
#' Tumor Response Analysis Dataset (ADTR) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Tumor Response Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `DTYPE` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param ... Additional arguments to be passed to `radrs`. |
|
14 |
#' @template param_cached |
|
15 |
#' @templateVar data adtr |
|
16 |
#' |
|
17 |
#' @return `data.frame` |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
21 |
#' |
|
22 |
#' @examples |
|
23 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
24 |
#' |
|
25 |
#' adtr <- radtr(adsl, seed = 2) |
|
26 |
#' adtr |
|
27 |
radtr <- function(adsl, |
|
28 |
param = c("Sum of Longest Diameter by Investigator"), |
|
29 |
paramcd = c("SLDINV"), |
|
30 |
seed = NULL, |
|
31 |
cached = FALSE, |
|
32 |
...) { |
|
33 | 4x |
checkmate::assert_flag(cached) |
34 | 4x |
if (cached) { |
35 | 1x |
return(get_cached_data("cadtr")) |
36 |
} |
|
37 | 3x |
checkmate::assert_data_frame(adsl) |
38 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
39 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
40 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
41 | 3x |
stopifnot(length(param) == length(paramcd)) |
42 |
# validate and initialize related variables |
|
43 | ||
44 | 3x |
if (!is.null(seed)) { |
45 | 3x |
set.seed(seed) |
46 |
} |
|
47 | ||
48 |
# Make times consistent with ADRS at ADY and ADTM. |
|
49 | 3x |
adrs <- radrs(adsl, seed = seed, ...) %>% |
50 | 3x |
dplyr::filter(PARAMCD == "OVRINV") %>% |
51 | 3x |
dplyr::select( |
52 | 3x |
"STUDYID", |
53 | 3x |
"USUBJID", |
54 | 3x |
"AVISIT", |
55 | 3x |
"AVISITN", |
56 | 3x |
"ADTM", |
57 | 3x |
"ADY" |
58 |
) |
|
59 | ||
60 | 3x |
adtr <- Map(function(parcd, par) { |
61 | 3x |
df <- adrs |
62 | 3x |
df$AVAL <- stats::rnorm(nrow(df), mean = 150, sd = 30) |
63 | 3x |
df$PARAMCD <- parcd |
64 | 3x |
df$PARAM <- par |
65 | 3x |
df |
66 | 3x |
}, paramcd, param) %>% |
67 | 3x |
Reduce(rbind, .) |
68 | ||
69 | 3x |
adtr_base <- adtr %>% |
70 | 3x |
dplyr::filter(AVISITN == 0) %>% |
71 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
72 | 3x |
dplyr::mutate(BASE = AVAL) %>% |
73 | 3x |
dplyr::select("STUDYID", "USUBJID", "BASE", "PARAMCD") |
74 | ||
75 | 3x |
adtr_postbase <- adtr %>% |
76 | 3x |
dplyr::filter(AVISITN > 0) %>% |
77 | 3x |
dplyr::filter(!is.na(AVAL)) %>% |
78 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
79 | 3x |
dplyr::filter(AVAL == min(AVAL)) %>% |
80 | 3x |
dplyr::slice(1) %>% |
81 | 3x |
dplyr::mutate(AVISIT = "POST-BASELINE MINIMUM") %>% |
82 | 3x |
dplyr::mutate(DTYPE = "MINIMUM") %>% |
83 | 3x |
dplyr::ungroup() |
84 | ||
85 | 3x |
adtr_lastobs <- adtr %>% |
86 | 3x |
dplyr::filter(AVISITN > 0) %>% |
87 | 3x |
dplyr::filter(!is.na(AVAL)) %>% |
88 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
89 | 3x |
dplyr::filter(ADTM == max(ADTM, na.rm = TRUE)) %>% |
90 | 3x |
dplyr::slice(1) %>% |
91 | 3x |
dplyr::mutate(LAST_VISIT = AVISIT) %>% |
92 | 3x |
dplyr::ungroup() %>% |
93 | 3x |
dplyr::select( |
94 | 3x |
"STUDYID", |
95 | 3x |
"USUBJID", |
96 | 3x |
"PARAMCD", |
97 | 3x |
"LAST_VISIT" |
98 |
) |
|
99 | ||
100 | 3x |
adtr <- rbind(adtr %>% dplyr::mutate(DTYPE = ""), adtr_postbase) |
101 | ||
102 | 3x |
adtr <- merge(adtr, adtr_base, by = c("STUDYID", "USUBJID", "PARAMCD")) %>% |
103 | 3x |
dplyr::mutate( |
104 | 3x |
ABLFL = dplyr::case_when(AVISIT == "BASELINE" ~ "Y", TRUE ~ ""), |
105 | 3x |
AVAL = dplyr::case_when(AVISIT == "BASELINE" ~ NA_real_, TRUE ~ AVAL), |
106 | 3x |
CHG = dplyr::case_when(AVISITN > 0 ~ AVAL - BASE, TRUE ~ NA_real_), |
107 | 3x |
PCHG = dplyr::case_when(AVISITN > 0 ~ CHG / BASE * 100, TRUE ~ NA_real_), |
108 | 3x |
AVALC = as.character(AVAL), |
109 | 3x |
AVALU = "mm" |
110 |
) |
|
111 | ||
112 |
# ensure PCHG does not exceed 200%, nor go below -100% (double in size, or complete remission of tumor). |
|
113 | 3x |
adtr <- adtr %>% |
114 | 3x |
dplyr::mutate( |
115 | 3x |
PCHG_DUM = PCHG, |
116 | 3x |
PCHG = dplyr::case_when( |
117 | 3x |
PCHG_DUM > 200 ~ 200, |
118 | 3x |
PCHG_DUM < -100 ~ -100, |
119 | 3x |
TRUE ~ PCHG |
120 |
), |
|
121 | 3x |
AVAL = dplyr::case_when( |
122 | 3x |
PCHG_DUM > 200 ~ 3 * BASE, |
123 | 3x |
PCHG_DUM < -100 ~ 0, |
124 | 3x |
TRUE ~ AVAL |
125 |
), |
|
126 | 3x |
CHG = dplyr::case_when( |
127 | 3x |
PCHG_DUM > 200 ~ 2 * BASE, |
128 | 3x |
PCHG_DUM < -100 ~ -BASE, |
129 | 3x |
TRUE ~ CHG |
130 |
) |
|
131 |
) %>% |
|
132 | 3x |
dplyr::select(-"PCHG_DUM") |
133 | ||
134 | 3x |
adtr <- merge(adsl, adtr, by = c("STUDYID", "USUBJID")) %>% |
135 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
136 | 3x |
dplyr::mutate( |
137 | 3x |
ONTRTFL = factor(dplyr::case_when( |
138 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE", "FOLLOW UP") ~ "Y", |
139 | 3x |
TRUE ~ "" |
140 |
)), |
|
141 | 3x |
ANL01FL = dplyr::case_when( |
142 | 3x |
DTYPE == "" & AVISITN > 0 ~ "Y", |
143 | 3x |
TRUE ~ "" |
144 |
), |
|
145 | 3x |
ANL03FL = dplyr::case_when( |
146 | 3x |
DTYPE == "MINIMUM" ~ "Y", |
147 | 3x |
ABLFL == "Y" ~ "Y", |
148 | 3x |
TRUE ~ "" |
149 |
) |
|
150 |
) |
|
151 | 3x |
adtr <- merge(adtr, adtr_lastobs, by = c("STUDYID", "USUBJID", "PARAMCD")) %>% |
152 | 3x |
dplyr::mutate( |
153 | 3x |
ANL02FL = dplyr::case_when( |
154 | 3x |
as.character(AVISIT) == as.character(LAST_VISIT) ~ "Y", |
155 | 3x |
ABLFL == "Y" ~ "Y", |
156 | 3x |
TRUE ~ "" |
157 |
) |
|
158 |
) %>% |
|
159 | 3x |
dplyr::select(-"LAST_VISIT") |
160 |
# Adding variables that are in ADTR osprey but not RCD. |
|
161 | 3x |
adtr <- adtr %>% |
162 | 3x |
dplyr::mutate( |
163 | 3x |
DCSREAS_GRP = ifelse(DCSREAS == "ADVERSE EVENT", "Safety", "Non-Safety"), |
164 | 3x |
TRTDURD = ifelse( |
165 | 3x |
is.na(TRTSDTM) | is.na(TRTEDTM), |
166 | 3x |
NA, |
167 | 3x |
TRTEDTM - (TRTSDTM + lubridate::days(1)) |
168 |
), |
|
169 | 3x |
AGEGR1 = ifelse(AGE < 65, "<65", ">=65") |
170 |
) |
|
171 | ||
172 |
# apply metadata |
|
173 | 3x |
adtr <- apply_metadata(adtr, "metadata/ADTR.yml") |
174 | 3x |
return(adtr) |
175 |
} |
1 |
#' Protocol Deviations Analysis Dataset (ADDV) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Protocol Deviations Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDT`, `DVTERM`, `DVSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param max_n_dv (`integer`)\cr Maximum number of deviations per patient. Defaults to 3. |
|
14 |
#' @param p_dv (`proportion`)\cr Probability of a patient having protocol deviations. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data addv |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
23 |
#' |
|
24 |
#' addv <- raddv(adsl, seed = 2) |
|
25 |
#' addv |
|
26 |
raddv <- function(adsl, |
|
27 |
max_n_dv = 3L, |
|
28 |
p_dv = 0.15, |
|
29 |
lookup = NULL, |
|
30 |
seed = NULL, |
|
31 |
na_percentage = 0, |
|
32 |
na_vars = list( |
|
33 |
"ASTDT" = c(seed = 1234, percentage = 0.1), |
|
34 |
"DVCAT" = c(seed = 1234, percentage = 0.1) |
|
35 |
), |
|
36 |
cached = FALSE) { |
|
37 | 4x |
checkmate::assert_flag(cached) |
38 | 4x |
if (cached) { |
39 | 1x |
return(get_cached_data("caddv")) |
40 |
} |
|
41 | ||
42 | 3x |
checkmate::assert_data_frame(adsl) |
43 | 3x |
checkmate::assert_integer(max_n_dv, len = 1, lower = 1, any.missing = FALSE) |
44 | 3x |
checkmate::assert_number(p_dv, lower = .Machine$double.xmin, upper = 1) |
45 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
46 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
47 | 3x |
checkmate::assert_true(na_percentage < 1) |
48 | ||
49 | 3x |
if (!is.null(seed)) set.seed(seed) |
50 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
51 | ||
52 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
53 | 3x |
lookup_dv <- if (!is.null(lookup)) { |
54 | ! |
lookup |
55 |
} else { |
|
56 | 3x |
tibble::tribble( |
57 | 3x |
~DOMAIN, ~DVCAT, ~DVDECOD, ~DVTERM, ~DVREAS, ~DVEPRELI, |
58 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Received prior prohibited therapy or medication", "", "N", |
59 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Active or untreated or other excluded cns metastases", "", "N", |
60 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "History of other malignancies within the last 5 years", "", "N", |
61 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Uncontrolled concurrent condition", "", "N", |
62 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Other exclusion criteria", "", "N", |
63 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Pregnancy criteria", "", "N", |
64 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Does not meet prior therapy requirements", "", "N", |
65 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion lab values outside allowed limits", "", "N", |
66 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "No signed ICF at study entry", "", "N", |
67 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion-related test not done/out of window", "", "N", |
68 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Ineligible cancer type or current cancer stage", "", "N", |
69 | 3x |
"DV", "MAJOR", "MEDICATION", "Dose missed or significantly out of window", |
70 | 3x |
"Site action due to epidemic/pandemic", "Y", |
71 | 3x |
"DV", "MAJOR", "MEDICATION", "Received incorrect study medication", "", "N", |
72 | 3x |
"DV", "MAJOR", "MEDICATION", "Received prohibited concomitant medication", "", "N", |
73 | 3x |
"DV", "MAJOR", "MEDICATION", "Discontinued study drug for unspecified reason", "", "N", |
74 | 3x |
"DV", "MAJOR", "MEDICATION", "Significant deviation from planned dose", |
75 | 3x |
"Site action due to epidemic/pandemic", "Y", |
76 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Missed assessment affecting safety/study outcomes", "", "N", |
77 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Eligibility-related test not done/out of window", "", "N", |
78 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Failure to sign updated ICF within two visits", |
79 | 3x |
"Site action due to epidemic/pandemic", "Y", |
80 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Omission of complete lab panel required by protocol", "", "N", |
81 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Omission of screening tumor assessment", "", "N", |
82 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Missed 2 or more efficacy assessments", |
83 | 3x |
"Site action due to epidemic/pandemic", "Y" |
84 |
) |
|
85 |
} |
|
86 | ||
87 | ||
88 | 3x |
addv <- Map( |
89 | 3x |
function(id, sid) { |
90 | 30x |
n_dv <- stats::rbinom(1, 1, p_dv) * sample(c(1, seq_len(max_n_dv)), 1) |
91 | 30x |
i <- sample(seq_len(nrow(lookup_dv)), n_dv, TRUE) |
92 | 30x |
dplyr::mutate( |
93 | 30x |
lookup_dv[i, ], |
94 | 30x |
USUBJID = id, |
95 | 30x |
STUDYID = sid |
96 |
) |
|
97 |
}, |
|
98 | 3x |
adsl$USUBJID, |
99 | 3x |
adsl$STUDYID |
100 |
) %>% |
|
101 | 3x |
Reduce(rbind, .) %>% |
102 | 3x |
dplyr::mutate(DVSCAT = DVCAT) |
103 | ||
104 | 3x |
addv <- var_relabel( |
105 | 3x |
addv, |
106 | 3x |
STUDYID = "Study Identifier", |
107 | 3x |
USUBJID = "Unique Subject Identifier" |
108 |
) |
|
109 | ||
110 |
# merge ADSL to be able to add deviation date and study day variables |
|
111 | 3x |
addv <- dplyr::inner_join(addv, adsl, by = c("STUDYID", "USUBJID")) %>% |
112 | 3x |
dplyr::rowwise() %>% |
113 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
114 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
115 | 3x |
TRUE ~ TRTEDTM |
116 |
))) %>% |
|
117 | 3x |
dplyr::mutate(ASTDTM = sample( |
118 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
119 | 3x |
size = 1 |
120 |
)) %>% |
|
121 | 3x |
dplyr::mutate(ASTDT = lubridate::date(ASTDTM)) %>% |
122 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
123 | 3x |
dplyr::select(-TRTENDT, -ASTDTM) %>% |
124 | 3x |
dplyr::ungroup() %>% |
125 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM) |
126 | ||
127 | 3x |
addv <- addv %>% |
128 | 3x |
dplyr::group_by(USUBJID) %>% |
129 | 3x |
dplyr::mutate(DVSEQ = seq_len(dplyr::n())) %>% |
130 | 3x |
dplyr::ungroup() %>% |
131 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM, DVSEQ) |
132 | ||
133 | 3x |
addv <- addv %>% |
134 | 3x |
dplyr::mutate(AEPRELFL = ifelse(DVEPRELI == "Y", DVEPRELI, "")) |
135 | ||
136 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
137 | ! |
addv <- mutate_na(ds = addv, na_vars = na_vars, na_percentage = na_percentage) |
138 |
} |
|
139 | ||
140 |
# apply metadata |
|
141 | 3x |
addv <- apply_metadata(addv, "metadata/ADDV.yml") |
142 | ||
143 | 3x |
return(addv) |
144 |
} |
1 |
#' Time-to-Event Analysis Dataset (ADTTE) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Time-to-Event Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @inheritParams radaette |
|
14 |
#' @template param_cached |
|
15 |
#' @templateVar data adtte |
|
16 |
#' |
|
17 |
#' @return `data.frame` |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
22 |
#' |
|
23 |
#' adtte <- radtte(adsl, seed = 2) |
|
24 |
#' adtte |
|
25 |
radtte <- function(adsl, |
|
26 |
event_descr = NULL, |
|
27 |
censor_descr = NULL, |
|
28 |
lookup = NULL, |
|
29 |
seed = NULL, |
|
30 |
na_percentage = 0, |
|
31 |
na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1), AVALU = c(1234, 0.1)), |
|
32 |
cached = FALSE) { |
|
33 | 4x |
checkmate::assert_flag(cached) |
34 | 4x |
if (cached) { |
35 | 1x |
return(get_cached_data("cadtte")) |
36 |
} |
|
37 | ||
38 | 3x |
checkmate::assert_data_frame(adsl) |
39 | 3x |
checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
40 | 3x |
checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
41 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
42 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
43 | 3x |
checkmate::assert_true(na_percentage < 1) |
44 | ||
45 | 3x |
if (!is.null(seed)) { |
46 | 3x |
set.seed(seed) |
47 |
} |
|
48 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
49 | ||
50 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
51 | 3x |
lookup_tte <- if (!is.null(lookup)) { |
52 | ! |
lookup |
53 |
} else { |
|
54 | 3x |
tibble::tribble( |
55 | 3x |
~ARM, ~PARAMCD, ~PARAM, ~LAMBDA, ~CNSR_P, |
56 | 3x |
"ARM A", "EFS", "Event Free Survival", log(2) / 365, 0.4, |
57 | 3x |
"ARM B", "EFS", "Event Free Survival", log(2) / 305, 0.3, |
58 | 3x |
"ARM C", "EFS", "Event Free Survival", log(2) / 243, 0.2, |
59 | 3x |
"ARM A", "CRSD", "Duration of Confirmed Response", log(2) / 305, 0.4, |
60 | 3x |
"ARM B", "CRSD", "Duration of Confirmed Response", log(2) / 243, 0.3, |
61 | 3x |
"ARM C", "CRSD", "Duration of Confirmed Response", log(2) / 182, 0.2, |
62 | 3x |
"ARM A", "PFS", "Progression Free Survival", log(2) / 365, 0.4, |
63 | 3x |
"ARM B", "PFS", "Progression Free Survival", log(2) / 305, 0.3, |
64 | 3x |
"ARM C", "PFS", "Progression Free Survival", log(2) / 243, 0.2, |
65 | 3x |
"ARM A", "OS", "Overall Survival", log(2) / 610, 0.4, |
66 | 3x |
"ARM B", "OS", "Overall Survival", log(2) / 490, 0.3, |
67 | 3x |
"ARM C", "OS", "Overall Survival", log(2) / 365, 0.2, |
68 |
) |
|
69 |
} |
|
70 | ||
71 | 3x |
evntdescr_sel <- if (!is.null(event_descr)) { |
72 | ! |
event_descr |
73 |
} else { |
|
74 | 3x |
c( |
75 | 3x |
"Death", |
76 | 3x |
"Disease Progression", |
77 | 3x |
"Last Tumor Assessment", |
78 | 3x |
"Adverse Event", |
79 | 3x |
"Alive" |
80 |
) |
|
81 |
} |
|
82 | ||
83 | 3x |
cnsdtdscr_sel <- if (!is.null(censor_descr)) { |
84 | ! |
censor_descr |
85 |
} else { |
|
86 | 3x |
c( |
87 | 3x |
"Preferred Term", |
88 | 3x |
"Clinical Cut Off", |
89 | 3x |
"Completion or Discontinuation", |
90 | 3x |
"End of AE Reporting Period" |
91 |
) |
|
92 |
} |
|
93 | ||
94 | 3x |
adtte <- split(adsl, adsl$USUBJID) %>% |
95 | 3x |
lapply(FUN = function(pinfo) { |
96 | 30x |
lookup_tte %>% |
97 | 30x |
dplyr::filter(ARM == as.character(pinfo$ACTARMCD)) %>% |
98 | 30x |
dplyr::rowwise() %>% |
99 | 30x |
dplyr::mutate( |
100 | 30x |
STUDYID = pinfo$STUDYID, |
101 | 30x |
SITEID = pinfo$SITEID, |
102 | 30x |
USUBJID = pinfo$USUBJID, |
103 | 30x |
AVALU = "DAYS" |
104 |
) %>% |
|
105 | 30x |
dplyr::select(-"LAMBDA", -"CNSR_P") |
106 |
}) %>% |
|
107 | 3x |
Reduce(rbind, .) %>% |
108 | 3x |
var_relabel( |
109 | 3x |
STUDYID = "Study Identifier", |
110 | 3x |
USUBJID = "Unique Subject Identifier" # ) |
111 |
) |
|
112 | ||
113 |
# Loop through each patient and randomly assign a value for EVNTDESC |
|
114 | 3x |
adtte_split <- split(adtte, adtte$USUBJID) |
115 | ||
116 |
# Add EVNTDESC column |
|
117 | 3x |
adtte_lst <- lapply(adtte_split, function(split_df) { |
118 |
# First create an empty EVNTDESC variable to populate |
|
119 | 30x |
split_df$EVNTDESC <- NA |
120 | 30x |
for (i in 1:nrow(split_df)) { # nolint |
121 |
# If this is the first row then create a random value from evntdescr_sel for EVNTDESC |
|
122 | 120x |
if (i == 1) { |
123 | 30x |
split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1, prob = c(0.1, 0.3, 0.4, 0.2)) |
124 | 90x |
} else if (i != 1 & i != nrow(split_df)) { |
125 |
# First check to see if "Death" has been entered in as a previous value |
|
126 |
# If so we need to make the rest of the EVNTDESC values "Death" to make sense |
|
127 |
# The patient cannot die and then come back to life |
|
128 | 60x |
if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death" |
129 | 21x |
split_df$EVNTDESC[i] <- "Death" |
130 | 3x |
} else { # If there are no "Death" values randomly select another value |
131 | 39x |
split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1) |
132 |
} |
|
133 | 3x |
} else { # This is for processing OS as this can only be "Death" or "Alive" |
134 | 30x |
if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death" |
135 | 21x |
split_df$EVNTDESC[i] <- "Death" |
136 | 3x |
} else { # If there are no "Death" values randomly select another value |
137 | 9x |
split_df$EVNTDESC[i] <- "Alive" |
138 |
} |
|
139 |
} |
|
140 |
} |
|
141 | 30x |
split_df |
142 |
}) |
|
143 | ||
144 |
# Add CNSR column |
|
145 | 3x |
adtte_lst <- lapply(adtte_lst, function(split_df) { |
146 |
# First create an empty CNSR variable to populate |
|
147 | 30x |
split_df$CNSR <- NA |
148 | 30x |
for (i in 1:nrow(split_df)) { # nolint |
149 |
# If this is the first row then create a random value from evntdescr_sel for EVNTDESC |
|
150 | 120x |
if (split_df$EVNTDESC[i] == "Death" | split_df$EVNTDESC[i] == "Disease Progression") { |
151 | 81x |
split_df$CNSR[i] <- 0 |
152 |
} else { |
|
153 | 39x |
split_df$CNSR[i] <- 1 |
154 |
} |
|
155 |
} |
|
156 | 30x |
split_df |
157 |
}) |
|
158 | ||
159 |
# Add AVAL column |
|
160 | 3x |
adtte_lst <- lapply(adtte_lst, function(split_df) { |
161 |
# First create an empty CNSR variable to populate |
|
162 | 30x |
split_df$AVAL <- NA |
163 | 30x |
for (i in 1:nrow(split_df)) { # nolint |
164 | 120x |
if (i == 1) { |
165 | 30x |
split_df$AVAL[i] <- stats::runif(1, 15, 100) |
166 | 90x |
} else if (i != 1 & any(grepl("Death", split_df[1:i - 1, "EVNTDESC"]))) { |
167 |
# Check if there are any death values before the current row |
|
168 |
# Set the AVAL to the value of the row that has the "Death" value |
|
169 |
# as the patient cannot live longer than this value |
|
170 | 42x |
death_position <- match("Death", split_df[1:i - 1, "EVNTDESC"][[1]]) |
171 | 42x |
split_df$AVAL[i] <- split_df$AVAL[death_position] |
172 | 48x |
} else if (i == 2) { |
173 | 24x |
split_df$AVAL[i] <- stats::runif(1, 100, 200) |
174 | 24x |
} else if (i == 3) { |
175 | 15x |
split_df$AVAL[i] <- stats::runif(1, 200, 300) |
176 | 9x |
} else if (i == 4) { |
177 | 9x |
split_df$AVAL[i] <- stats::runif(1, 300, 500) |
178 |
} |
|
179 |
} |
|
180 | 30x |
split_df |
181 |
}) |
|
182 | ||
183 |
# Add CNSDTDSC column |
|
184 | 3x |
adtte_lst <- lapply(adtte_lst, function(split_df) { |
185 |
# First create an empty CNSDTDSC variable to populate |
|
186 | 30x |
split_df$CNSDTDSC <- NA |
187 | 30x |
for (i in 1:nrow(split_df)) { # nolint |
188 | 120x |
if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Last Tumor Assessment") { |
189 | 27x |
split_df$CNSDTDSC[i] <- "Completion or Discontinuation" |
190 | 93x |
} else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Adverse Event") { |
191 | 3x |
split_df$CNSDTDSC[i] <- "Preferred Term" |
192 | 90x |
} else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Alive") { |
193 | 9x |
split_df$CNSDTDSC[i] <- "Alive During Study" |
194 |
} else { |
|
195 | 81x |
split_df$CNSDTDSC[i] <- "" |
196 |
} |
|
197 |
} |
|
198 | 30x |
split_df |
199 |
}) |
|
200 | ||
201 |
# Take the split df and combine them back together |
|
202 | 3x |
adtte <- do.call("rbind", adtte_lst) |
203 | 3x |
rownames(adtte) <- NULL |
204 | ||
205 | 3x |
adtte <- var_relabel( |
206 | 3x |
adtte, |
207 | 3x |
STUDYID = "Study Identifier", |
208 | 3x |
USUBJID = "Unique Subject Identifier" |
209 |
) |
|
210 | ||
211 |
# merge ADSL to be able to add TTE date and study day variables |
|
212 | 3x |
adtte <- dplyr::inner_join( |
213 | 3x |
dplyr::select(adtte, -"SITEID", -"ARM"), |
214 | 3x |
adsl, |
215 | 3x |
by = c("STUDYID", "USUBJID") |
216 |
) %>% |
|
217 | 3x |
dplyr::rowwise() %>% |
218 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
219 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
220 | 3x |
TRUE ~ TRTEDTM |
221 |
))) %>% |
|
222 | 3x |
dplyr::mutate(ADTM = sample( |
223 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
224 | 3x |
size = 1 |
225 |
)) %>% |
|
226 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
227 | 3x |
dplyr::select(-TRTENDT) %>% |
228 | 3x |
dplyr::ungroup() %>% |
229 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
230 | ||
231 | 3x |
adtte <- adtte %>% |
232 | 3x |
dplyr::group_by(USUBJID) %>% |
233 | 3x |
dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>% |
234 | 3x |
dplyr::mutate(ASEQ = TTESEQ) %>% |
235 | 3x |
dplyr::mutate(PARAM = as.factor(PARAM)) %>% |
236 | 3x |
dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>% |
237 | 3x |
dplyr::ungroup() %>% |
238 | 3x |
dplyr::arrange( |
239 | 3x |
STUDYID, |
240 | 3x |
USUBJID, |
241 | 3x |
PARAMCD, |
242 | 3x |
ADTM, |
243 | 3x |
TTESEQ |
244 |
) |
|
245 | ||
246 | 3x |
mod_before_adtte <- adtte |
247 | ||
248 |
# adding adverse event counts and log follow-up time |
|
249 | 3x |
adtte <- dplyr::bind_rows( |
250 | 3x |
adtte, |
251 | 3x |
data.frame( |
252 | 3x |
adtte %>% |
253 | 3x |
dplyr::group_by(USUBJID) %>% |
254 | 3x |
dplyr::slice_head(n = 1) %>% |
255 | 3x |
dplyr::mutate( |
256 | 3x |
PARAMCD = "TNE", |
257 | 3x |
PARAM = "Total Number of Exacerbations", |
258 | 3x |
AVAL = stats::rpois(1, 3), |
259 | 3x |
AVALU = "COUNT", |
260 | 3x |
lgTMATRSK = log(stats::rexp(1, rate = 3)), |
261 | 3x |
dplyr::across( |
262 | 3x |
c("ASEQ", "TTESEQ", "ADY", "ADTM", "EVNTDESC"), |
263 | 3x |
~NA |
264 |
) |
|
265 |
) |
|
266 |
) |
|
267 |
) %>% |
|
268 | 3x |
dplyr::arrange( |
269 | 3x |
STUDYID, |
270 | 3x |
USUBJID, |
271 | 3x |
PARAMCD, |
272 | 3x |
ADTM, |
273 | 3x |
TTESEQ |
274 |
) |
|
275 | ||
276 | 3x |
mod_after_adtte <- adtte |
277 | ||
278 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
279 | ! |
adtte <- mutate_na(ds = adtte, na_vars = na_vars, na_percentage = na_percentage) |
280 |
} |
|
281 | ||
282 |
# apply metadata |
|
283 | 3x |
adtte <- apply_metadata(adtte, "metadata/ADTTE.yml") |
284 | ||
285 | 3x |
return(adtte) |
286 |
} |
1 |
#' Subject-Level Analysis Dataset (ADSL) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' The Subject-Level Analysis Dataset (ADSL) is used to provide the variables |
|
6 |
#' that describe attributes of a subject. ADSL is a source for subject-level |
|
7 |
#' variables used in other analysis data sets, such as population flags and |
|
8 |
#' treatment variables. There is only one ADSL per study. ADSL and its related |
|
9 |
#' metadata are required in a CDISC-based submission of data from a clinical |
|
10 |
#' trial even if no other analysis data sets are submitted. |
|
11 |
#' |
|
12 |
#' @details One record per subject. |
|
13 |
#' |
|
14 |
#' Keys: `STUDYID`, `USUBJID` |
|
15 |
#' |
|
16 |
#' @inheritParams argument_convention |
|
17 |
#' @param N (`numeric`)\cr Number of patients. |
|
18 |
#' @param study_duration (`numeric`)\cr Duration of study in years. |
|
19 |
#' @param with_trt02 (`logical`)\cr Should period 2 be added. |
|
20 |
#' @param ae_withdrawal_prob (`proportion`)\cr Probability that there is at least one |
|
21 |
#' Adverse Event leading to the withdrawal of a study drug. |
|
22 |
#' @template param_cached |
|
23 |
#' @templateVar data adsl |
|
24 |
#' |
|
25 |
#' @return `data.frame` |
|
26 |
#' @export |
|
27 |
# |
|
28 |
#' @examples |
|
29 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
30 |
#' adsl |
|
31 |
#' |
|
32 |
#' adsl <- radsl( |
|
33 |
#' N = 10, seed = 1, |
|
34 |
#' na_percentage = 0.1, |
|
35 |
#' na_vars = list( |
|
36 |
#' DTHDT = c(seed = 1234, percentage = 0.1), |
|
37 |
#' LSTALVDT = c(seed = 1234, percentage = 0.1) |
|
38 |
#' ) |
|
39 |
#' ) |
|
40 |
#' adsl |
|
41 |
#' |
|
42 |
#' adsl <- radsl(N = 10, seed = 1, na_percentage = .1) |
|
43 |
#' adsl |
|
44 |
radsl <- function(N = 400, # nolint |
|
45 |
study_duration = 2, |
|
46 |
seed = NULL, |
|
47 |
with_trt02 = TRUE, |
|
48 |
na_percentage = 0, |
|
49 |
na_vars = list( |
|
50 |
"AGE" = NA, "SEX" = NA, "RACE" = NA, "STRATA1" = NA, "STRATA2" = NA, |
|
51 |
"BMRKR1" = c(seed = 1234, percentage = 0.1), "BMRKR2" = c(1234, 0.1), "BEP01FL" = NA |
|
52 |
), |
|
53 |
ae_withdrawal_prob = 0.05, |
|
54 |
cached = FALSE) { |
|
55 | 28x |
checkmate::assert_flag(cached) |
56 | 28x |
if (cached) { |
57 | 2x |
return(get_cached_data("cadsl")) |
58 |
} |
|
59 | ||
60 | 26x |
checkmate::assert_number(N) |
61 | 26x |
checkmate::assert_number(seed, null.ok = TRUE) |
62 | 26x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
63 | 26x |
checkmate::assert_number(study_duration, lower = 1) |
64 | 26x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
65 | 26x |
checkmate::assert_true(na_percentage < 1) |
66 | ||
67 | 26x |
if (!is.null(seed)) { |
68 | 26x |
set.seed(seed) |
69 |
} |
|
70 | ||
71 | 26x |
study_duration_secs <- lubridate::seconds(lubridate::years(study_duration)) |
72 | 26x |
sys_dtm <- lubridate::fast_strptime("20/2/2019 11:16:16.683", "%d/%m/%Y %H:%M:%OS") |
73 | 26x |
discons <- max(1, floor((N * .3))) |
74 | 26x |
country_site_prob <- c(.5, .121, .077, .077, .075, .052, .046, .025, .014, .003) |
75 | ||
76 | 26x |
adsl <- tibble::tibble( |
77 | 26x |
STUDYID = rep("AB12345", N), |
78 | 26x |
COUNTRY = sample_fct( |
79 | 26x |
c("CHN", "USA", "BRA", "PAK", "NGA", "RUS", "JPN", "GBR", "CAN", "CHE"), |
80 | 26x |
N, |
81 | 26x |
prob = country_site_prob |
82 |
), |
|
83 | 26x |
SITEID = sample_fct(1:20, N, prob = rep(country_site_prob, times = 2)), |
84 | 26x |
SUBJID = paste("id", seq_len(N), sep = "-"), |
85 | 26x |
AGE = sapply(stats::rchisq(N, df = 5, ncp = 10), max, 0) + 20, |
86 | 26x |
AGEU = "YEARS", |
87 | 26x |
SEX = c("F", "M") %>% sample_fct(N, prob = c(.52, .48)), |
88 | 26x |
ARMCD = c("ARM A", "ARM B", "ARM C") %>% sample_fct(N), |
89 | 26x |
RACE = c( |
90 | 26x |
"ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", |
91 | 26x |
"MULTIPLE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "OTHER", "UNKNOWN" |
92 |
) %>% |
|
93 | 26x |
sample_fct(N, prob = c(.55, .23, .16, .05, .004, .003, .002, .002)), |
94 | 26x |
TRTSDTM = sys_dtm + sample(seq(0, study_duration_secs), size = N, replace = TRUE), |
95 | 26x |
RANDDT = lubridate::date(TRTSDTM - lubridate::days(floor(stats::runif(N, min = 0, max = 5)))), |
96 | 26x |
TRTEDTM = TRTSDTM + study_duration_secs, |
97 | 26x |
STRATA1 = c("A", "B", "C") %>% sample_fct(N), |
98 | 26x |
STRATA2 = c("S1", "S2") %>% sample_fct(N), |
99 | 26x |
BMRKR1 = stats::rchisq(N, 6), |
100 | 26x |
BMRKR2 = sample_fct(c("LOW", "MEDIUM", "HIGH"), N), |
101 | 26x |
BMEASIFL = sample_fct(c("Y", "N"), N), |
102 | 26x |
BEP01FL = sample_fct(c("Y", "N"), N), |
103 | 26x |
AEWITHFL = sample_fct(c("Y", "N"), N, prob = c(ae_withdrawal_prob, 1 - ae_withdrawal_prob)) |
104 |
) %>% |
|
105 | 26x |
dplyr::mutate(ARM = dplyr::recode( |
106 | 26x |
ARMCD, |
107 | 26x |
"ARM A" = "A: Drug X", "ARM B" = "B: Placebo", "ARM C" = "C: Combination" |
108 |
)) %>% |
|
109 | 26x |
dplyr::mutate(ACTARM = ARM) %>% |
110 | 26x |
dplyr::mutate(ACTARMCD = ARMCD) %>% |
111 | 26x |
dplyr::mutate(TRT01P = ARM) %>% |
112 | 26x |
dplyr::mutate(TRT01A = ACTARM) %>% |
113 | 26x |
dplyr::mutate(ITTFL = factor("Y")) %>% |
114 | 26x |
dplyr::mutate(SAFFL = factor("Y")) %>% |
115 | 26x |
dplyr::arrange(TRTSDTM) |
116 | ||
117 | 26x |
adds <- adsl[sample(nrow(adsl), discons), ] %>% |
118 | 26x |
dplyr::mutate(TRTEDTM_discon = sample( |
119 | 26x |
seq(from = max(TRTSDTM), to = sys_dtm + study_duration_secs, by = 1), |
120 | 26x |
size = discons, |
121 | 26x |
replace = TRUE |
122 |
)) %>% |
|
123 | 26x |
dplyr::select(SUBJID, TRTSDTM, TRTEDTM_discon) %>% |
124 | 26x |
dplyr::arrange(TRTSDTM) |
125 | ||
126 | 26x |
adsl <- dplyr::left_join(adsl, adds, by = c("SUBJID", "TRTSDTM")) %>% |
127 | 26x |
dplyr::mutate(TRTEDTM = dplyr::case_when( |
128 | 26x |
!is.na(TRTEDTM_discon) ~ TRTEDTM_discon, |
129 | 26x |
TRTSDTM >= quantile(TRTSDTM)[2] & TRTSDTM <= quantile(TRTSDTM)[3] ~ lubridate::as_datetime(NA), |
130 | 26x |
TRUE ~ TRTEDTM |
131 |
)) %>% |
|
132 | 26x |
dplyr::select(-"TRTEDTM_discon") |
133 | ||
134 |
# add period 2 if needed |
|
135 | 26x |
if (with_trt02) { |
136 | 26x |
with_trt02 <- lubridate::seconds(lubridate::years(1)) |
137 | 26x |
adsl <- adsl %>% |
138 | 26x |
dplyr::mutate(TRT02P = sample(ARM)) %>% |
139 | 26x |
dplyr::mutate(TRT02A = sample(ACTARM)) %>% |
140 | 26x |
dplyr::mutate( |
141 | 26x |
TRT01SDTM = TRTSDTM, |
142 | 26x |
AP01SDTM = TRT01SDTM, |
143 | 26x |
TRT01EDTM = TRTEDTM, |
144 | 26x |
AP01EDTM = TRT01EDTM, |
145 | 26x |
TRT02SDTM = TRTEDTM, |
146 | 26x |
AP02SDTM = TRT02SDTM, |
147 | 26x |
TRT02EDTM = TRT01EDTM + with_trt02, |
148 | 26x |
AP02EDTM = TRT02EDTM, |
149 | 26x |
TRTEDTM = TRT02EDTM |
150 |
) |
|
151 |
} |
|
152 | ||
153 | 26x |
adsl <- adsl %>% |
154 | 26x |
dplyr::mutate(EOSDT = lubridate::date(TRTEDTM)) %>% |
155 | 26x |
dplyr::mutate(EOSDY = ceiling(difftime(TRTEDTM, TRTSDTM))) %>% |
156 | 26x |
dplyr::mutate(EOSSTT = dplyr::case_when( |
157 | 26x |
EOSDY == max(EOSDY, na.rm = TRUE) ~ "COMPLETED", |
158 | 26x |
EOSDY < max(EOSDY, na.rm = TRUE) ~ "DISCONTINUED", |
159 | 26x |
is.na(TRTEDTM) ~ "ONGOING" |
160 |
)) %>% |
|
161 | 26x |
dplyr::mutate(EOTSTT = EOSSTT) |
162 | ||
163 |
# disposition related variables |
|
164 |
# using probability of 1 for the "DEATH" level to ensure at least one death record exists |
|
165 | 26x |
l_dcsreas <- list( |
166 | 26x |
choices = c( |
167 | 26x |
"ADVERSE EVENT", "DEATH", "LACK OF EFFICACY", "PHYSICIAN DECISION", |
168 | 26x |
"PROTOCOL VIOLATION", "WITHDRAWAL BY PARENT/GUARDIAN", "WITHDRAWAL BY SUBJECT" |
169 |
), |
|
170 | 26x |
prob = c(.2, 1, .1, .1, .2, .1, .1) |
171 |
) |
|
172 | 26x |
l_dthcat_other <- list( |
173 | 26x |
choices = c( |
174 | 26x |
"Post-study reporting of death", "LOST TO FOLLOW UP", "MISSING", "SUICIDE", "UNKNOWN" |
175 |
), |
|
176 | 26x |
prob = c(.1, .3, .3, .2, .1) |
177 |
) |
|
178 | ||
179 | 26x |
adsl <- adsl %>% |
180 | 26x |
dplyr::mutate( |
181 | 26x |
DCSREAS = ifelse( |
182 | 26x |
EOSSTT == "DISCONTINUED", |
183 | 26x |
sample(x = l_dcsreas$choices, size = N, replace = TRUE, prob = l_dcsreas$prob), |
184 | 26x |
as.character(NA) |
185 |
) |
|
186 |
) %>% |
|
187 | 26x |
dplyr::mutate(DTHFL = dplyr::case_when( |
188 | 26x |
DCSREAS == "DEATH" ~ "Y", |
189 | 26x |
TRUE ~ "N" |
190 |
)) %>% |
|
191 | 26x |
dplyr::mutate( |
192 | 26x |
DTHCAT = ifelse( |
193 | 26x |
DCSREAS == "DEATH", |
194 | 26x |
sample(x = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER"), size = N, replace = TRUE), |
195 | 26x |
as.character(NA) |
196 |
) |
|
197 |
) %>% |
|
198 | 26x |
dplyr::mutate(DTHCAUS = dplyr::case_when( |
199 | 26x |
DTHCAT == "ADVERSE EVENT" ~ "ADVERSE EVENT", |
200 | 26x |
DTHCAT == "PROGRESSIVE DISEASE" ~ "DISEASE PROGRESSION", |
201 | 26x |
DTHCAT == "OTHER" ~ sample(x = l_dthcat_other$choices, size = N, replace = TRUE, prob = l_dthcat_other$prob), |
202 | 26x |
TRUE ~ as.character(NA) |
203 |
)) %>% |
|
204 | 26x |
dplyr::mutate(ADTHAUT = dplyr::case_when( |
205 | 26x |
DTHCAUS %in% c("ADVERSE EVENT", "DISEASE PROGRESSION") ~ "Yes", |
206 | 26x |
DTHCAUS %in% c("UNKNOWN", "SUICIDE", "Post-study reporting of death") ~ sample( |
207 | 26x |
x = c("Yes", "No"), size = N, replace = TRUE, prob = c(0.25, 0.75) |
208 |
), |
|
209 | 26x |
TRUE ~ as.character(NA) |
210 |
)) %>% |
|
211 |
# adding some random number of days post last treatment date so that death days from last trt admin |
|
212 |
# supports the LDDTHGR1 derivation below |
|
213 | 26x |
dplyr::mutate(DTHDT = dplyr::case_when( |
214 | 26x |
DCSREAS == "DEATH" ~ lubridate::date(TRTEDTM + lubridate::days(sample(0:50, size = N, replace = TRUE))), |
215 | 26x |
TRUE ~ NA |
216 |
)) %>% |
|
217 | 26x |
dplyr::mutate(LDDTHELD = difftime(DTHDT, lubridate::date(TRTEDTM), units = "days")) %>% |
218 | 26x |
dplyr::mutate(LDDTHGR1 = dplyr::case_when( |
219 | 26x |
LDDTHELD <= 30 ~ "<=30", |
220 | 26x |
LDDTHELD > 30 ~ ">30", |
221 | 26x |
TRUE ~ as.character(NA) |
222 |
)) %>% |
|
223 | 26x |
dplyr::mutate(LSTALVDT = dplyr::case_when( |
224 | 26x |
DCSREAS == "DEATH" ~ DTHDT, |
225 | 26x |
TRUE ~ lubridate::date(TRTEDTM) + lubridate::days(floor(stats::runif(N, min = 10, max = 30))) |
226 |
)) |
|
227 | ||
228 |
# add random ETHNIC (Ethnicity) |
|
229 | 26x |
adsl <- adsl %>% |
230 | 26x |
dplyr::mutate(ETHNIC = sample( |
231 | 26x |
x = c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "NOT REPORTED", "UNKNOWN"), |
232 | 26x |
size = N, replace = TRUE, prob = c(.1, .8, .06, .04) |
233 |
)) |
|
234 | ||
235 |
# associate DTHADY (Relative Day of Death) with Death date |
|
236 |
# Date of Death [adsl.DTHDT] - date part of Date of First Exposure to Treatment [adsl.TRTSDTM] |
|
237 | ||
238 | 26x |
adsl <- adsl %>% |
239 | 26x |
dplyr::mutate(DTHADY = difftime(DTHDT, TRTSDTM, units = "days")) |
240 | ||
241 | ||
242 |
# associate sites with countries and regions |
|
243 | 26x |
adsl <- adsl %>% |
244 | 26x |
dplyr::mutate(SITEID = paste0(COUNTRY, "-", SITEID)) %>% |
245 | 26x |
dplyr::mutate(REGION1 = dplyr::case_when( |
246 | 26x |
COUNTRY %in% c("NGA") ~ "Africa", |
247 | 26x |
COUNTRY %in% c("CHN", "JPN", "PAK") ~ "Asia", |
248 | 26x |
COUNTRY %in% c("RUS") ~ "Eurasia", |
249 | 26x |
COUNTRY %in% c("GBR") ~ "Europe", |
250 | 26x |
COUNTRY %in% c("CAN", "USA") ~ "North America", |
251 | 26x |
COUNTRY %in% c("BRA") ~ "South America", |
252 | 26x |
TRUE ~ as.character(NA) |
253 |
)) %>% |
|
254 | 26x |
dplyr::mutate(INVID = paste("INV ID", SITEID)) %>% |
255 | 26x |
dplyr::mutate(INVNAM = paste("Dr.", SITEID, "Doe")) %>% |
256 | 26x |
dplyr::mutate(USUBJID = paste(STUDYID, SITEID, SUBJID, sep = "-")) |
257 | ||
258 | ||
259 | 26x |
if (length(na_vars) > 0 && na_percentage > 0) { |
260 | ! |
adsl <- mutate_na(ds = adsl, na_vars = na_vars, na_percentage = na_percentage) |
261 |
} |
|
262 | ||
263 |
# apply metadata |
|
264 | 26x |
adsl <- apply_metadata(adsl, "metadata/ADSL.yml", FALSE) |
265 | ||
266 | 26x |
attr(adsl, "study_duration_secs") <- as.numeric(study_duration_secs) |
267 | 26x |
return(adsl) |
268 |
} |
1 |
#' Tumor Response Analysis Dataset (ADRS) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Tumor Response Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' One record per subject per parameter per analysis visit per analysis date. |
|
10 |
#' SDTM variables are populated on new records coming from other single records. |
|
11 |
#' Otherwise, SDTM variables are left blank. |
|
12 |
#' |
|
13 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADT`, `RSSEQ` |
|
14 |
#' |
|
15 |
#' @inheritParams argument_convention |
|
16 |
#' @param avalc (`character vector`)\cr Analysis value categories. |
|
17 |
#' @template param_cached |
|
18 |
#' @templateVar data adrs |
|
19 |
#' |
|
20 |
#' @return `data.frame` |
|
21 |
#' @export |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adrs <- radrs(adsl, seed = 2) |
|
27 |
#' adrs |
|
28 |
radrs <- function(adsl, |
|
29 |
avalc = NULL, |
|
30 |
lookup = NULL, |
|
31 |
seed = NULL, |
|
32 |
na_percentage = 0, |
|
33 |
na_vars = list(AVISIT = c(NA, 0.1), AVAL = c(1234, 0.1), AVALC = c(1234, 0.1)), |
|
34 |
cached = FALSE) { |
|
35 | 7x |
checkmate::assert_flag(cached) |
36 | 7x |
if (cached) { |
37 | 1x |
return(get_cached_data("cadrs")) |
38 |
} |
|
39 | ||
40 | 6x |
checkmate::assert_data_frame(adsl) |
41 | 6x |
checkmate::assert_vector(avalc, null.ok = TRUE) |
42 | 6x |
checkmate::assert_number(seed, null.ok = TRUE) |
43 | 6x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
44 | 6x |
checkmate::assert_true(na_percentage < 1) |
45 | ||
46 | 6x |
param_codes <- if (!is.null(avalc)) { |
47 | ! |
avalc |
48 |
} else { |
|
49 | 6x |
stats::setNames(1:5, c("CR", "PR", "SD", "PD", "NE")) |
50 |
} |
|
51 | ||
52 | 6x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
53 | 6x |
lookup_ars <- if (!is.null(lookup)) { |
54 | ! |
lookup |
55 |
} else { |
|
56 | 6x |
expand.grid( |
57 | 6x |
ARM = c("A: Drug X", "B: Placebo", "C: Combination"), |
58 | 6x |
AVALC = names(param_codes) |
59 | 6x |
) %>% dplyr::mutate( |
60 | 6x |
AVAL = param_codes[AVALC], |
61 | 6x |
p_scr = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)), |
62 | 6x |
p_bsl = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)), |
63 | 6x |
p_cycle = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)), |
64 | 6x |
p_eoi = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)), |
65 | 6x |
p_fu = c(c(.3, .2, .4), c(.2, .1, .3), c(.2, .2, .2), c(.3, .5, 0.1), rep(0, 3)) |
66 |
) |
|
67 |
} |
|
68 | ||
69 | 6x |
if (!is.null(seed)) { |
70 | 6x |
set.seed(seed) |
71 |
} |
|
72 | 6x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
73 | ||
74 | 6x |
adrs <- split(adsl, adsl$USUBJID) %>% |
75 | 6x |
lapply(function(pinfo) { |
76 | 60x |
probs <- dplyr::filter(lookup_ars, ARM == as.character(pinfo$ACTARM)) |
77 | ||
78 |
# screening |
|
79 | 60x |
rsp_screen <- sample(probs$AVALC, 1, prob = probs$p_scr) %>% as.character() |
80 | ||
81 |
# baseline |
|
82 | 60x |
rsp_bsl <- sample(probs$AVALC, 1, prob = probs$p_bsl) %>% as.character() |
83 | ||
84 |
# cycle |
|
85 | 60x |
rsp_c2d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character() |
86 | 60x |
rsp_c4d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character() |
87 | ||
88 |
# end of induction |
|
89 | 60x |
rsp_eoi <- sample(probs$AVALC, 1, prob = probs$p_eoi) %>% as.character() |
90 | ||
91 |
# follow up |
|
92 | 60x |
rsp_fu <- sample(probs$AVALC, 1, prob = probs$p_fu) %>% as.character() |
93 | ||
94 | 60x |
best_rsp <- min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)]) |
95 | 60x |
best_rsp_i <- which.min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)]) |
96 | ||
97 | 60x |
avisit <- c("SCREENING", "BASELINE", "CYCLE 2 DAY 1", "CYCLE 4 DAY 1", "END OF INDUCTION", "FOLLOW UP") |
98 | ||
99 |
# meaningful date information |
|
100 | 60x |
trtstdt <- lubridate::date(pinfo$TRTSDTM) |
101 | 60x |
trtendt <- lubridate::date(dplyr::if_else( |
102 | 60x |
!is.na(pinfo$TRTEDTM), pinfo$TRTEDTM, |
103 | 60x |
lubridate::floor_date(trtstdt + study_duration_secs, unit = "day") |
104 |
)) |
|
105 | 60x |
scr_date <- trtstdt - lubridate::days(100) |
106 | 60x |
bs_date <- trtstdt |
107 | 60x |
flu_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
108 | 60x |
eoi_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
109 | 60x |
c2d1_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
110 | 60x |
c4d1_date <- min(lubridate::date(c2d1_date + lubridate::days(60)), trtendt) |
111 | ||
112 | 60x |
tibble::tibble( |
113 | 60x |
STUDYID = pinfo$STUDYID, |
114 | 60x |
SITEID = pinfo$SITEID, |
115 | 60x |
USUBJID = pinfo$USUBJID, |
116 | 60x |
PARAMCD = as.factor(c(rep("OVRINV", 6), "BESRSPI", "INVET")), |
117 | 60x |
PARAM = as.factor(dplyr::recode( |
118 | 60x |
PARAMCD, |
119 | 60x |
OVRINV = "Overall Response by Investigator - by visit", |
120 | 60x |
OVRSPI = "Best Overall Response by Investigator (no confirmation required)", |
121 | 60x |
BESRSPI = "Best Confirmed Overall Response by Investigator", |
122 | 60x |
INVET = "Investigator End Of Induction Response" |
123 |
)), |
|
124 | 60x |
AVALC = c( |
125 | 60x |
rsp_screen, rsp_bsl, rsp_c2d1, rsp_c4d1, rsp_eoi, rsp_fu, |
126 | 60x |
names(param_codes)[best_rsp], |
127 | 60x |
rsp_eoi |
128 |
), |
|
129 | 60x |
AVAL = param_codes[AVALC], |
130 | 60x |
AVISIT = factor(c(avisit, avisit[best_rsp_i], avisit[5]), levels = avisit) |
131 |
) %>% |
|
132 | 60x |
merge( |
133 | 60x |
tibble::tibble( |
134 | 60x |
AVISIT = avisit, |
135 | 60x |
ADTM = c(scr_date, bs_date, c2d1_date, c4d1_date, eoi_date, flu_date), |
136 | 60x |
AVISITN = c(-1, 0, 2, 4, 999, 999), |
137 | 60x |
TRTSDTM = pinfo$TRTSDTM |
138 |
) %>% |
|
139 | 60x |
dplyr::mutate( |
140 | 60x |
ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days")) |
141 |
) %>% |
|
142 | 60x |
dplyr::select(-"TRTSDTM"), |
143 | 60x |
by = "AVISIT" |
144 |
) |
|
145 |
}) %>% |
|
146 | 6x |
Reduce(rbind, .) %>% |
147 | 6x |
dplyr::mutate(AVALC = factor(AVALC, levels = names(param_codes))) %>% |
148 | 6x |
var_relabel( |
149 | 6x |
STUDYID = "Study Identifier", |
150 | 6x |
USUBJID = "Unique Subject Identifier" |
151 |
) |
|
152 | ||
153 | 6x |
adrs <- var_relabel( |
154 | 6x |
adrs, |
155 | 6x |
STUDYID = "Study Identifier", |
156 | 6x |
USUBJID = "Unique Subject Identifier" |
157 |
) |
|
158 | ||
159 |
# merge ADSL to be able to add RS date and study day variables |
|
160 | ||
161 | ||
162 | 6x |
adrs <- dplyr::inner_join( |
163 | 6x |
dplyr::select(adrs, -"SITEID"), |
164 | 6x |
adsl, |
165 | 6x |
by = c("STUDYID", "USUBJID") |
166 |
) |
|
167 | ||
168 | 6x |
adrs <- adrs %>% |
169 | 6x |
dplyr::group_by(USUBJID) %>% |
170 | 6x |
dplyr::mutate(RSSEQ = seq_len(dplyr::n())) %>% |
171 | 6x |
dplyr::mutate(ASEQ = RSSEQ) %>% |
172 | 6x |
dplyr::ungroup() %>% |
173 | 6x |
dplyr::arrange( |
174 | 6x |
STUDYID, |
175 | 6x |
USUBJID, |
176 | 6x |
PARAMCD, |
177 | 6x |
AVISITN, |
178 | 6x |
ADTM, |
179 | 6x |
RSSEQ |
180 |
) |
|
181 | ||
182 | 6x |
if (length(na_vars) > 0 && na_percentage > 0) { |
183 | ! |
adrs <- mutate_na(ds = adrs, na_vars = na_vars, na_percentage = na_percentage) |
184 |
} |
|
185 | ||
186 |
# apply metadata |
|
187 | 6x |
adrs <- apply_metadata(adrs, "metadata/ADRS.yml") |
188 | ||
189 | 6x |
return(adrs) |
190 |
} |
1 |
#' Pharmacokinetics Parameters Dataset (ADPP) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Pharmacokinetics Parameters Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per study, subject, parameter category, parameter and visit. |
|
9 |
#' |
|
10 |
#' @inheritParams argument_convention |
|
11 |
#' @param ppcat (`character vector`)\cr Categories of parameters. |
|
12 |
#' @param ppspec (`character vector`)\cr Specimen material types. |
|
13 |
#' @template param_cached |
|
14 |
#' @templateVar data adpp |
|
15 |
#' |
|
16 |
#' @return `data.frame` |
|
17 |
#' @export |
|
18 |
#' |
|
19 |
#' @examples |
|
20 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
21 |
#' |
|
22 |
#' adpp <- radpp(adsl, seed = 2) |
|
23 |
#' adpp |
|
24 |
radpp <- function(adsl, |
|
25 |
ppcat = c("Plasma Drug X", "Plasma Drug Y", "Metabolite Drug X", "Metabolite Drug Y"), |
|
26 |
ppspec = c( |
|
27 |
"Plasma", "Plasma", "Plasma", "Matrix of PD", "Matrix of PD", |
|
28 |
"Urine", "Urine", "Urine", "Urine" |
|
29 |
), |
|
30 |
paramcd = c( |
|
31 |
"AUCIFO", "CMAX", "CLO", "RMAX", "TON", |
|
32 |
"RENALCL", "RENALCLD", "RCAMINT", "RCPCINT" |
|
33 |
), |
|
34 |
param = c( |
|
35 |
"AUC Infinity Obs", "Max Conc", "Total CL Obs", "Time of Maximum Response", |
|
36 |
"Time to Onset", "Renal CL", "Renal CL Norm by Dose", |
|
37 |
"Amt Rec from T1 to T2", "Pct Rec from T1 to T2" |
|
38 |
), |
|
39 |
paramu = c("day*ug/mL", "ug/mL", "ml/day/kg", "hr", "hr", "L/hr", "L/hr/mg", "mg", "%"), |
|
40 |
aval_mean = c(200, 30, 5, 10, 3, 0.05, 0.005, 1.5613, 15.65), |
|
41 |
visit_format = "CYCLE", |
|
42 |
n_days = 2L, |
|
43 |
seed = NULL, |
|
44 |
na_percentage = 0, |
|
45 |
na_vars = list( |
|
46 |
AVAL = c(NA, 0.1) |
|
47 |
), |
|
48 |
cached = FALSE) { |
|
49 | 4x |
checkmate::assert_flag(cached) |
50 | 4x |
if (cached) { |
51 | 1x |
return(get_cached_data("cadlb")) |
52 |
} |
|
53 | ||
54 | 3x |
checkmate::assert_character(ppcat) |
55 | 3x |
checkmate::assert_character(ppspec) |
56 | 3x |
checkmate::assert_character(paramcd) |
57 | 3x |
checkmate::assert_character(param) |
58 | 3x |
checkmate::assert_character(paramu) |
59 | 3x |
checkmate::assert_vector(aval_mean) |
60 | 3x |
checkmate::assert_string(visit_format) |
61 | 3x |
checkmate::assert_integer(n_days) |
62 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
63 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
64 | 3x |
checkmate::assert_true(na_percentage < 1) |
65 | 3x |
checkmate::assert_list(na_vars) |
66 | ||
67 | 3x |
checkmate::assertTRUE(length(ppspec) == length(paramcd)) |
68 | 3x |
checkmate::assertTRUE(length(ppspec) == length(param)) |
69 | 3x |
checkmate::assertTRUE(length(ppspec) == length(paramu)) |
70 | 3x |
checkmate::assertTRUE(length(ppspec) == length(aval_mean)) |
71 | ||
72 | 3x |
if (!is.null(seed)) { |
73 | 3x |
set.seed(seed) |
74 |
} |
|
75 | ||
76 |
# validate and initialize related variables |
|
77 | 3x |
ppspec_init_list <- relvar_init(param, ppspec) |
78 | 3x |
param_init_list <- relvar_init(param, paramcd) |
79 | 3x |
unit_init_list <- relvar_init(param, paramu) |
80 | ||
81 | 3x |
adpp <- expand.grid( |
82 | 3x |
STUDYID = unique(adsl$STUDYID), |
83 | 3x |
USUBJID = adsl$USUBJID, |
84 | 3x |
PPCAT = as.factor(ppcat), |
85 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
86 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = 1L, n_days = n_days), |
87 | 3x |
stringsAsFactors = FALSE |
88 |
) |
|
89 | 3x |
adpp <- adpp %>% |
90 | 3x |
dplyr::mutate(AVAL = stats::rnorm(nrow(adpp), mean = 1, sd = 0.2)) %>% |
91 | 3x |
dplyr::left_join(data.frame(PARAM = param, ADJUST = aval_mean), by = "PARAM") %>% |
92 | 3x |
dplyr::mutate(AVAL = AVAL * ADJUST) %>% |
93 | 3x |
dplyr::select(-"ADJUST") |
94 | ||
95 |
# assign related variable values: PARAMxPPSPEC are related |
|
96 | 3x |
adpp <- adpp %>% rel_var( |
97 | 3x |
var_name = "PPSPEC", |
98 | 3x |
related_var = "PARAM", |
99 | 3x |
var_values = ppspec_init_list$relvar2 |
100 |
) |
|
101 | ||
102 |
# assign related variable values: PARAMxPARAMCD are related |
|
103 | 3x |
adpp <- adpp %>% rel_var( |
104 | 3x |
var_name = "PARAMCD", |
105 | 3x |
related_var = "PARAM", |
106 | 3x |
var_values = param_init_list$relvar2 |
107 |
) |
|
108 | ||
109 |
# assign related variable values: PARAMxAVALU are related |
|
110 | 3x |
adpp <- adpp %>% rel_var( |
111 | 3x |
var_name = "AVALU", |
112 | 3x |
related_var = "PARAM", |
113 | 3x |
var_values = unit_init_list$relvar2 |
114 |
) |
|
115 | ||
116 |
# derive AVISITN based AVISIT and AVALC based on AVAL |
|
117 | 3x |
adpp <- adpp %>% |
118 | 3x |
dplyr::mutate(AVALC = as.character(AVAL)) %>% |
119 | 3x |
dplyr::mutate( |
120 | 3x |
AVISITN = dplyr::case_when( |
121 | 3x |
AVISIT == "SCREENING" ~ 0, |
122 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 1, |
123 | 3x |
TRUE ~ NA_real_ |
124 |
) |
|
125 |
) |
|
126 | ||
127 |
# derive REGIMEN variable |
|
128 | 3x |
adpp <- adpp %>% dplyr::mutate(REGIMEN = "BID") |
129 | ||
130 |
# derive PPSTINT and PPENINT based on PARAMCD |
|
131 | 3x |
t1_t2 <- data.frame( |
132 | 3x |
PARAMCD = c("RCAMINT", "RCAMINT", "RCPCINT", "RCPCINT"), |
133 | 3x |
PPSTINT = c("P0H", "P0H", "P0H", "P0H"), |
134 | 3x |
PPENINT = c("P12H", "P24H", "P12H", "P24H") |
135 |
) |
|
136 | 3x |
adpp <- adpp %>% |
137 | 3x |
dplyr::left_join(t1_t2, by = c("PARAMCD"), multiple = "all", relationship = "many-to-many") |
138 | ||
139 | 3x |
adpp <- dplyr::inner_join(adpp, adsl, by = c("STUDYID", "USUBJID")) %>% |
140 | 3x |
dplyr::filter( |
141 | 3x |
ACTARM != "B: Placebo", |
142 | 3x |
!(ACTARM == "A: Drug X" & (PPCAT == "Plasma Drug Y" | PPCAT == "Metabolite Drug Y")) |
143 |
) |
|
144 | ||
145 |
# derive PKARMCD column for creating more cohorts |
|
146 | 3x |
adpp <- adpp %>% |
147 | 3x |
dplyr::mutate(PKARMCD = factor(1 + (seq_len(nrow(adpp)) - 1) %/% (nrow(adpp) / 10), labels = c( |
148 | 3x |
"Drug A", "Drug B", "Drug C", "Drug D", "Drug E", "Drug F", "Drug G", "Drug H", |
149 | 3x |
"Drug I", "Drug J" |
150 |
))) |
|
151 | ||
152 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
153 | ! |
adpp <- mutate_na(ds = adpp, na_vars = na_vars, na_percentage = na_percentage) |
154 |
} |
|
155 | ||
156 | 3x |
adpp <- apply_metadata(adpp, "metadata/ADPP.yml") |
157 | 3x |
return(adpp) |
158 |
} |
1 |
#' Previous and Concomitant Medications Analysis Dataset (ADCM) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Concomitant Medication Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `CMSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param max_n_cms (`integer`)\cr Maximum number of concomitant medications per patient. Defaults to 10. |
|
14 |
#' @param who_coding (`flag`)\cr Whether WHO coding (with multiple paths per medication) should be used. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adcm |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
23 |
#' |
|
24 |
#' adcm <- radcm(adsl, seed = 2) |
|
25 |
#' adcm |
|
26 |
#' |
|
27 |
#' adcm_who <- radcm(adsl, seed = 2, who_coding = TRUE) |
|
28 |
#' adcm_who |
|
29 |
radcm <- function(adsl, |
|
30 |
max_n_cms = 10L, |
|
31 |
lookup = NULL, |
|
32 |
seed = NULL, |
|
33 |
na_percentage = 0, |
|
34 |
na_vars = list(CMCLAS = c(NA, 0.1), CMDECOD = c(1234, 0.1), ATIREL = c(1234, 0.1)), |
|
35 |
who_coding = FALSE, |
|
36 |
cached = FALSE) { |
|
37 | 5x |
checkmate::assert_flag(cached) |
38 | 5x |
if (cached) { |
39 | 1x |
return(get_cached_data("cadcm")) |
40 |
} |
|
41 | ||
42 | 4x |
checkmate::assert_data_frame(adsl) |
43 | 4x |
checkmate::assert_integer(max_n_cms, len = 1, any.missing = FALSE) |
44 | 4x |
checkmate::assert_number(seed, null.ok = TRUE) |
45 | 4x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
46 | 4x |
checkmate::assert_true(na_percentage < 1) |
47 | 4x |
checkmate::assert_flag(who_coding) |
48 | ||
49 | 4x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
50 | 4x |
lookup_cm <- if (!is.null(lookup)) { |
51 | ! |
lookup |
52 |
} else { |
|
53 | 4x |
tibble::tribble( |
54 | 4x |
~CMCLAS, ~CMDECOD, ~ATIREL, |
55 | 4x |
"medcl A", "medname A_1/3", "PRIOR", |
56 | 4x |
"medcl A", "medname A_2/3", "CONCOMITANT", |
57 | 4x |
"medcl A", "medname A_3/3", "CONCOMITANT", |
58 | 4x |
"medcl B", "medname B_1/4", "CONCOMITANT", |
59 | 4x |
"medcl B", "medname B_2/4", "PRIOR", |
60 | 4x |
"medcl B", "medname B_3/4", "PRIOR", |
61 | 4x |
"medcl B", "medname B_4/4", "CONCOMITANT", |
62 | 4x |
"medcl C", "medname C_1/2", "CONCOMITANT", |
63 | 4x |
"medcl C", "medname C_2/2", "CONCOMITANT" |
64 |
) |
|
65 |
} |
|
66 | ||
67 | 4x |
if (!is.null(seed)) { |
68 | 3x |
set.seed(seed) |
69 |
} |
|
70 | 4x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
71 | ||
72 | 4x |
adcm <- Map(function(id, sid) { |
73 | 430x |
n_cms <- sample(c(0, seq_len(max_n_cms)), 1) |
74 | 430x |
i <- sample(seq_len(nrow(lookup_cm)), n_cms, TRUE) |
75 | 430x |
dplyr::mutate( |
76 | 430x |
lookup_cm[i, ], |
77 | 430x |
USUBJID = id, |
78 | 430x |
STUDYID = sid |
79 |
) |
|
80 | 4x |
}, adsl$USUBJID, adsl$STUDYID) %>% |
81 | 4x |
Reduce(rbind, .) %>% |
82 | 4x |
`[`(c(4, 5, 1, 2, 3)) %>% |
83 | 4x |
dplyr::mutate(CMCAT = CMCLAS) |
84 | ||
85 | 4x |
adcm <- var_relabel( |
86 | 4x |
adcm, |
87 | 4x |
STUDYID = "Study Identifier", |
88 | 4x |
USUBJID = "Unique Subject Identifier" |
89 |
) |
|
90 | ||
91 |
# merge ADSL to be able to add CM date and study day variables |
|
92 | 4x |
adcm <- dplyr::inner_join( |
93 | 4x |
adcm, |
94 | 4x |
adsl, |
95 | 4x |
by = c("STUDYID", "USUBJID") |
96 |
) %>% |
|
97 | 4x |
dplyr::rowwise() %>% |
98 | 4x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
99 | 4x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
100 | 4x |
TRUE ~ TRTEDTM |
101 |
))) %>% |
|
102 | 4x |
dplyr::mutate(ASTDTM = sample( |
103 | 4x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
104 | 4x |
size = 1 |
105 |
)) %>% |
|
106 | 4x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
107 |
# add 1 to end of range incase both values passed to sample() are the same |
|
108 | 4x |
dplyr::mutate(AENDTM = sample( |
109 | 4x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
110 | 4x |
size = 1 |
111 |
)) %>% |
|
112 | 4x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
113 | 4x |
dplyr::select(-TRTENDT) %>% |
114 | 4x |
dplyr::ungroup() %>% |
115 | 4x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM) |
116 | ||
117 | 4x |
adcm <- adcm %>% |
118 | 4x |
dplyr::group_by(USUBJID) %>% |
119 | 4x |
dplyr::mutate(CMSEQ = seq_len(dplyr::n())) %>% |
120 | 4x |
dplyr::mutate(ASEQ = CMSEQ) %>% |
121 | 4x |
dplyr::ungroup() %>% |
122 | 4x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, CMSEQ) %>% |
123 | 4x |
dplyr::mutate( |
124 | 4x |
ATC1 = paste("ATCCLAS1", substr(CMDECOD, 9, 9)), |
125 | 4x |
ATC2 = paste("ATCCLAS2", substr(CMDECOD, 9, 9)), |
126 | 4x |
ATC3 = paste("ATCCLAS3", substr(CMDECOD, 9, 9)), |
127 | 4x |
ATC4 = paste("ATCCLAS4", substr(CMDECOD, 9, 9)) |
128 |
) %>% |
|
129 | 4x |
dplyr::mutate(CMINDC = sample(c( |
130 | 4x |
"Nausea", "Hypertension", "Urticaria", "Fever", |
131 | 4x |
"Asthma", "Infection", "Diabete", "Diarrhea", "Pneumonia" |
132 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
133 | 4x |
dplyr::mutate(CMDOSE = sample(1:99, dplyr::n(), replace = TRUE)) %>% |
134 | 4x |
dplyr::mutate(CMTRT = substr(CMDECOD, 9, 13)) %>% |
135 | 4x |
dplyr::mutate(CMDOSU = sample(c( |
136 | 4x |
"ug/mL", "ug/kg/day", "%", "uL", "DROP", |
137 | 4x |
"umol/L", "mg", "mg/breath", "ug" |
138 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
139 | 4x |
dplyr::mutate(CMROUTE = sample(c( |
140 | 4x |
"INTRAVENOUS", "ORAL", "NASAL", |
141 | 4x |
"INTRAMUSCULAR", "SUBCUTANEOUS", "INHALED", "RECTAL", "UNKNOWN" |
142 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
143 | 4x |
dplyr::mutate(CMDOSFRQ = sample(c( |
144 | 4x |
"Q4W", "QN", "Q4H", "UNKNOWN", "TWICE", |
145 | 4x |
"Q4H", "QD", "TID", "4 TIMES PER MONTH" |
146 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
147 | 4x |
dplyr::mutate( |
148 |
# use 1 year as reference time point |
|
149 | 4x |
CMSTRTPT = dplyr::case_when( |
150 | 4x |
ASTDY <= 365 ~ "BEFORE", |
151 | 4x |
ASTDY > 365 ~ "AFTER", |
152 | 4x |
is.na(ASTDY) ~ "U" |
153 |
), |
|
154 | 4x |
CMENRTPT = dplyr::case_when( |
155 | 4x |
EOSSTT %in% c("COMPLETED", "DISCONTINUED") ~ "BEFORE", |
156 | 4x |
EOSSTT == "ONGOING" ~ "ONGOING", |
157 | 4x |
is.na(EOSSTT) ~ "U" |
158 |
), |
|
159 | 4x |
ADURN = as.numeric(difftime(ASTDTM, AENDTM, units = "days")), |
160 | 4x |
ADURU = "days" |
161 |
) |
|
162 | ||
163 | ||
164 |
# Optional WHO coding, which adds more `ATC` paths for randomly selected `CMDECOD`. |
|
165 | 4x |
if (who_coding) { |
166 | 1x |
n_cmdecod_path2 <- ceiling(nrow(lookup_cm) / 2) |
167 | 1x |
cmdecod_path2 <- sample(lookup_cm$CMDECOD, n_cmdecod_path2) |
168 | 1x |
adcm_path2 <- adcm %>% |
169 | 1x |
dplyr::filter(CMDECOD %in% cmdecod_path2) %>% |
170 | 1x |
dplyr::mutate( |
171 | 1x |
ATC1 = paste(ATC1, "p2"), |
172 | 1x |
ATC2 = paste(ATC2, "p2"), |
173 | 1x |
ATC3 = paste(ATC3, "p2"), |
174 | 1x |
ATC4 = paste(ATC4, "p2") |
175 |
) |
|
176 | ||
177 | 1x |
n_cmdecod_path3 <- ceiling(length(cmdecod_path2) / 2) |
178 | 1x |
cmdecod_path3 <- sample(cmdecod_path2, n_cmdecod_path3) |
179 | 1x |
adcm_path3 <- adcm %>% |
180 | 1x |
dplyr::filter(CMDECOD %in% cmdecod_path3) %>% |
181 | 1x |
dplyr::mutate( |
182 | 1x |
ATC1 = paste(ATC1, "p3"), |
183 | 1x |
ATC2 = paste(ATC2, "p3"), |
184 | 1x |
ATC3 = paste(ATC3, "p3"), |
185 | 1x |
ATC4 = paste(ATC4, "p3") |
186 |
) |
|
187 | ||
188 | 1x |
adcm <- dplyr::bind_rows( |
189 | 1x |
adcm, |
190 | 1x |
adcm_path2, |
191 | 1x |
adcm_path3 |
192 |
) |
|
193 |
} |
|
194 | ||
195 | 4x |
adcm <- adcm %>% |
196 | 4x |
dplyr::mutate( |
197 | 4x |
ATC1CD = ATC1, |
198 | 4x |
ATC2CD = ATC2, |
199 | 4x |
ATC3CD = ATC3, |
200 | 4x |
ATC4CD = ATC4 |
201 |
) |
|
202 | ||
203 | 4x |
if (length(na_vars) > 0 && na_percentage > 0) { |
204 | ! |
adcm <- mutate_na(ds = adcm, na_vars = na_vars, na_percentage = na_percentage) |
205 |
} |
|
206 | ||
207 |
# apply metadata |
|
208 | 4x |
adcm <- apply_metadata(adcm, "metadata/ADCM.yml") |
209 | ||
210 | 4x |
return(adcm) |
211 |
} |
1 |
#' Anti-Drug Antibody Analysis Dataset (ADAB) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Anti-Drug Antibody Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset and Pharmacokinetics Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @inheritParams argument_convention |
|
9 |
#' @inheritParams radpc |
|
10 |
#' @param adpc (`data.frame`)\cr Pharmacokinetics Analysis Dataset. |
|
11 |
#' @template param_cached |
|
12 |
#' @templateVar data adab |
|
13 |
#' |
|
14 |
#' @return `data.frame` |
|
15 |
#' @export |
|
16 |
#' |
|
17 |
#' @details One record per study per subject per parameter per time point: "R1800000", "RESULT1", "R1800001", "RESULT2". |
|
18 |
#' |
|
19 |
#' @examples |
|
20 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
21 |
#' adpc <- radpc(adsl, seed = 2, duration = 9 * 7) |
|
22 |
#' |
|
23 |
#' adab <- radab(adsl, adpc, seed = 2) |
|
24 |
#' adab |
|
25 |
radab <- function(adsl, |
|
26 |
adpc, |
|
27 |
constants = c(D = 100, ka = 0.8, ke = 1), |
|
28 |
paramcd = c( |
|
29 |
"R1800000", "RESULT1", "R1800001", "RESULT2", "ADASTAT1", "INDUCD1", "ENHANC1", |
|
30 |
"TRUNAFF1", "EMERNEG1", "EMERPOS1", "PERSADA1", "TRANADA1", "BFLAG1", "TIMADA1", |
|
31 |
"ADADUR1", "ADASTAT2", "INDUCD2", "ENHANC2", "EMERNEG2", "EMERPOS2", "BFLAG2", |
|
32 |
"TRUNAFF2" |
|
33 |
), |
|
34 |
param = c( |
|
35 |
"Antibody titer units", "ADA interpreted per sample result", |
|
36 |
"Neutralizing Antibody titer units", "NAB interpreted per sample result", |
|
37 |
"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA", |
|
38 |
"Treatment unaffected", "Treatment Emergent - Negative", |
|
39 |
"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline", |
|
40 |
"Time to onset of ADA", "ADA Duration", "NAB Status of a patient", |
|
41 |
"Treatment induced ADA, Neutralizing Antibody", |
|
42 |
"Treatment enhanced ADA, Neutralizing Antibody", |
|
43 |
"Treatment Emergent - Negative, Neutralizing Antibody", |
|
44 |
"Treatment Emergent - Positive, Neutralizing Antibody", |
|
45 |
"Baseline, Neutralizing Antibody", |
|
46 |
"Treatment unaffected, Neutralizing Antibody" |
|
47 |
), |
|
48 |
avalu = c( |
|
49 |
"titer", "", "titer", "", "", "", "", "", "", "", "", "", "", "weeks", "weeks", |
|
50 |
"", "", "", "", "", "", "" |
|
51 |
), |
|
52 |
seed = NULL, |
|
53 |
na_percentage = 0, |
|
54 |
na_vars = list( |
|
55 |
AVAL = c(NA, 0.1) |
|
56 |
), |
|
57 |
cached = FALSE) { |
|
58 | 4x |
checkmate::assert_flag(cached) |
59 | 4x |
if (cached) { |
60 | 1x |
return(get_cached_data("cadab")) |
61 |
} |
|
62 | ||
63 | 3x |
checkmate::assert_data_frame(adpc) |
64 | 3x |
checkmate::assert_subset(names(constants), c("D", "ka", "ke")) |
65 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
66 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
67 | 3x |
checkmate::assert_list(na_vars) |
68 | 3x |
checkmate::assert_character(paramcd) |
69 | 3x |
checkmate::assert_character(param, len = length(paramcd)) |
70 | 3x |
checkmate::assert_character(avalu, len = length(paramcd)) |
71 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
72 | 3x |
checkmate::assert_true(na_percentage < 1) |
73 | ||
74 | 3x |
if (!is.null(seed)) { |
75 | 3x |
set.seed(seed) |
76 |
} |
|
77 | ||
78 |
# validate and initialize related variables |
|
79 | 3x |
param_init_list <- relvar_init(param, paramcd) |
80 | 3x |
unit_init_list <- relvar_init(param, avalu) |
81 | ||
82 | 3x |
adpc <- adpc %>% dplyr::filter(ASMED == "PLASMA") |
83 | 3x |
adab0 <- expand.grid( |
84 | 3x |
STUDYID = unique(adsl$STUDYID), |
85 | 3x |
USUBJID = unique(adsl$USUBJID), |
86 | 3x |
VISIT = unique(adpc$VISIT), |
87 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(1:4)]), |
88 | 3x |
PARCAT1 = "A: Drug X Antibody", |
89 | 3x |
stringsAsFactors = FALSE |
90 |
) |
|
91 |
# Set random values for observations |
|
92 | 3x |
visit_lvl_params <- c( |
93 | 3x |
"Antibody titer units", "Neutralizing Antibody titer units", |
94 | 3x |
"ADA interpreted per sample result", "NAB interpreted per sample result" |
95 |
) |
|
96 | 3x |
aval_random <- stats::rnorm(nrow(unique(adab0 %>% dplyr::select(USUBJID, VISIT))), mean = 1, sd = 0.2) |
97 | 3x |
aval_random <- cbind(unique(adab0 %>% dplyr::select(USUBJID, VISIT)), AVAL1 = aval_random) |
98 | ||
99 | 3x |
adab_visit <- adab0 %>% dplyr::left_join(aval_random, by = c("USUBJID", "VISIT")) |
100 | 3x |
adab_visit <- adab_visit %>% |
101 | 3x |
dplyr::mutate( |
102 | 3x |
AVAL2 = ifelse(AVAL1 >= 1, AVAL1, NA), |
103 | 3x |
AVALC = dplyr::case_when( |
104 | 3x |
!is.na(AVAL2) ~ "POSITIVE", |
105 | 3x |
is.na(AVAL2) ~ "NEGATIVE" |
106 |
), |
|
107 | 3x |
AVAL = dplyr::case_when( |
108 | 3x |
(PARAM %in% visit_lvl_params[3:4] & !is.na(AVAL2)) ~ 1, |
109 | 3x |
(PARAM %in% visit_lvl_params[3:4] & is.na(AVAL2)) ~ 0, |
110 | 3x |
(PARAM %in% visit_lvl_params[1:2] & !is.na(AVAL2)) ~ AVAL2, |
111 | 3x |
TRUE ~ as.numeric(NA) |
112 |
) |
|
113 |
) %>% |
|
114 | 3x |
dplyr::select(-c(AVAL1, AVAL2)) |
115 | ||
116 |
# retrieve other variables from adpc |
|
117 | 3x |
adab_visit <- adab_visit %>% |
118 | 3x |
dplyr::inner_join( |
119 | 3x |
adpc %>% |
120 | 3x |
dplyr::filter(PCTPT %in% c("Predose", "24H")) %>% |
121 | 3x |
dplyr::select( |
122 | 3x |
STUDYID, |
123 | 3x |
USUBJID, |
124 | 3x |
VISIT, |
125 | 3x |
PCTPT, |
126 | 3x |
ARM, |
127 | 3x |
ACTARM, |
128 | 3x |
VISITDY, |
129 | 3x |
AFRLT, |
130 | 3x |
NFRLT, |
131 | 3x |
ARRLT, |
132 | 3x |
NRRLT, |
133 | 3x |
RELTMU |
134 |
) %>% |
|
135 | 3x |
unique(), |
136 | 3x |
by = c("STUDYID", "USUBJID", "VISIT") |
137 |
) %>% |
|
138 | 3x |
rename(ISTPT = PCTPT) |
139 | ||
140 |
# mutate time from dose variables from adpc to convert into Days |
|
141 | 3x |
adab_visit <- adab_visit %>% dplyr::mutate_at(c("AFRLT", "NFRLT", "ARRLT", "NRRLT"), ~ . / 24) |
142 | ||
143 | ||
144 | ||
145 |
# Set random values for subject level paramaters (Y/N) |
|
146 | ||
147 | 3x |
adab1 <- expand.grid( |
148 | 3x |
STUDYID = unique(adsl$STUDYID), |
149 | 3x |
USUBJID = unique(adpc$USUBJID), |
150 | 3x |
VISIT = NA, |
151 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(5:13, 16:22)]), |
152 | 3x |
PARCAT1 = "A: Drug X Antibody", |
153 | 3x |
stringsAsFactors = FALSE |
154 |
) |
|
155 | ||
156 | 3x |
sub_lvl_params <- c( |
157 | 3x |
"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA", |
158 | 3x |
"Treatment unaffected", "Treatment Emergent - Negative", |
159 | 3x |
"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline", |
160 |
# "Time to onset of ADA", "ADA Duration", |
|
161 | 3x |
"NAB Status of a patient", |
162 | 3x |
"Treatment induced ADA, Neutralizing Antibody", |
163 | 3x |
"Treatment enhanced ADA, Neutralizing Antibody", |
164 | 3x |
"Treatment Emergent - Negative, Neutralizing Antibody", |
165 | 3x |
"Treatment Emergent - Positive, Neutralizing Antibody", |
166 | 3x |
"Baseline, Neutralizing Antibody", |
167 | 3x |
"Treatment unaffected, Neutralizing Antibody" |
168 |
) |
|
169 | ||
170 | 3x |
aval_random_sub <- stats::rbinom(nrow(unique(adab1 %>% dplyr::select(USUBJID))), 1, 0.5) |
171 | 3x |
aval_random_sub <- cbind(unique(adab1 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub) |
172 | ||
173 | 3x |
adab_sub <- adab1 %>% dplyr::left_join(aval_random_sub, by = c("USUBJID")) |
174 | 3x |
adab_sub <- adab_sub %>% |
175 | 3x |
dplyr::mutate( |
176 | 3x |
AVAL = AVAL1, |
177 | 3x |
AVALC = dplyr::case_when( |
178 | 3x |
PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 1 ~ "POSITIVE", |
179 | 3x |
PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 0 ~ "NEGATIVE", |
180 | 3x |
!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 1 ~ "Y", |
181 | 3x |
!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 0 ~ "N" |
182 |
) |
|
183 |
) %>% |
|
184 | 3x |
dplyr::select(-c(AVAL1)) |
185 | ||
186 |
# Set random values for subject level paramaters (numeric) |
|
187 | ||
188 | 3x |
adab2 <- expand.grid( |
189 | 3x |
STUDYID = unique(adsl$STUDYID), |
190 | 3x |
USUBJID = unique(adpc$USUBJID), |
191 | 3x |
VISIT = NA, |
192 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(14, 15)]), |
193 | 3x |
PARCAT1 = "A: Drug X Antibody", |
194 | 3x |
stringsAsFactors = FALSE |
195 |
) |
|
196 | ||
197 | 3x |
sub_lvl_params_num <- c("Time to onset of ADA", "ADA Duration") |
198 | ||
199 | 3x |
aval_random_sub_num <- stats::rnorm(nrow(unique(adab2 %>% dplyr::select(USUBJID))), mean = 1, sd = 1) |
200 | 3x |
aval_random_sub_num <- cbind(unique(adab2 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub_num) |
201 | ||
202 | 3x |
adab_sub_num <- adab2 %>% dplyr::left_join(aval_random_sub_num, by = c("USUBJID")) |
203 | 3x |
adab_sub_num <- adab_sub_num %>% |
204 | 3x |
dplyr::mutate( |
205 | 3x |
AVAL = ifelse(AVAL1 >= 1, round(AVAL1, 2), NA), |
206 | 3x |
AVALC = as.character(AVAL) |
207 |
) %>% |
|
208 | 3x |
dplyr::select(-c(AVAL1)) |
209 | ||
210 | ||
211 | 3x |
adab <- bind_rows(adab_visit, adab_sub, adab_sub_num) |
212 | ||
213 | ||
214 |
# assign related variable values: PARAMxPARAMCD are related |
|
215 | 3x |
adab <- adab %>% rel_var( |
216 | 3x |
var_name = "PARAMCD", |
217 | 3x |
related_var = "PARAM", |
218 | 3x |
var_values = param_init_list$relvar2 |
219 |
) |
|
220 | ||
221 |
# assign related variable values: PARAMxAVALU are related |
|
222 | 3x |
adab <- adab %>% rel_var( |
223 | 3x |
var_name = "AVALU", |
224 | 3x |
related_var = "PARAM", |
225 | 3x |
var_values = unit_init_list$relvar2 |
226 |
) |
|
227 | ||
228 | ||
229 | 3x |
adab <- adab %>% |
230 | 3x |
dplyr::mutate( |
231 | 3x |
RELTMU = "day", |
232 | 3x |
ABLFL = ifelse(!is.na(NFRLT) & NFRLT == 0, "Y", NA) # Baseline Record Flag |
233 |
, |
|
234 | 3x |
ADABLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT == 0, "Y", NA) |
235 |
# Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB |
|
236 |
, |
|
237 | 3x |
ADPBLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT > 0 & !is.na(AVAL), "Y", NA) |
238 |
# Post-Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB |
|
239 |
) %>% |
|
240 | 3x |
dplyr::group_by(USUBJID) %>% |
241 | 3x |
dplyr::ungroup() |
242 | ||
243 |
# create temporary flags to derive subject-level variables |
|
244 | 3x |
adab_subj <- adab %>% |
245 | 3x |
dplyr::group_by(USUBJID) %>% |
246 | 3x |
dplyr::mutate( |
247 | 3x |
pos_bl = any(PARAM == "ADA interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"), |
248 | 3x |
pos_bl_nab = any(PARAM == "NAB interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"), |
249 | 3x |
any_pos_postbl = any(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"), |
250 | 3x |
any_pos_postbl_nab = any(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"), |
251 | 3x |
pos_last_postbl = any(PARAM == "ADA interpreted per sample result" & NFRLT == max(NFRLT) & AVALC == "POSITIVE"), |
252 | 3x |
ada_bl = AVAL[PARAM == "Antibody titer units" & !is.na(ABLFL)], |
253 | 3x |
nab_bl = AVAL[PARAM == "Neutralizing Antibody titer units" & !is.na(ABLFL)] |
254 |
) |
|
255 | 3x |
pos_tots <- adab_subj %>% |
256 | 3x |
dplyr::summarise( |
257 | 3x |
n_pos = sum(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"), |
258 | 3x |
inc_postbl = sum(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & (AVAL - ada_bl) > 0.60), |
259 | 3x |
inc_postbl_nab = sum(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & (AVAL - nab_bl) > 0.60), |
260 | 3x |
onset_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) { |
261 | 18x |
min(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"]) |
262 |
} else { |
|
263 | 3x |
NA |
264 |
}, |
|
265 | 3x |
last_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) { |
266 | 18x |
max(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"]) |
267 |
} else { |
|
268 | 3x |
NA |
269 |
} |
|
270 |
) |
|
271 | 3x |
adab_subj <- adab_subj %>% |
272 | 3x |
dplyr::left_join(pos_tots, by = "USUBJID") %>% |
273 | 3x |
dplyr::select( |
274 | 3x |
USUBJID, |
275 | 3x |
NFRLT, |
276 | 3x |
pos_bl, |
277 | 3x |
pos_bl_nab, |
278 | 3x |
any_pos_postbl, |
279 | 3x |
any_pos_postbl_nab, |
280 | 3x |
inc_postbl, |
281 | 3x |
inc_postbl_nab, |
282 | 3x |
pos_last_postbl, |
283 | 3x |
n_pos, |
284 | 3x |
onset_ada, |
285 | 3x |
last_ada |
286 |
) %>% |
|
287 | 3x |
unique() |
288 | ||
289 |
# add flags to ADAB dataset |
|
290 | 3x |
adab <- adab %>% |
291 | 3x |
dplyr::left_join(adab_subj, by = c("USUBJID", "NFRLT")) |
292 | ||
293 |
# derive subject-level variables |
|
294 | 3x |
adab[!(adab$PARAM %in% visit_lvl_params), ] <- adab %>% |
295 | 3x |
dplyr::filter(!(PARAM %in% visit_lvl_params)) %>% |
296 | 3x |
dplyr::mutate( |
297 |
# nolint start indentation_linter |
|
298 | 3x |
AVALC = dplyr::case_when( |
299 | 3x |
(PARAM == "ADA Status of a patient" & any_pos_postbl) ~ "POSITIVE", |
300 | 3x |
(PARAM == "ADA Status of a patient" & !any_pos_postbl) ~ "NEGATIVE", |
301 | 3x |
(PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ "Y", |
302 | 3x |
(PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ "Y", |
303 | 3x |
(PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 | !any_pos_postbl)) ~ "Y", |
304 | 3x |
(PARAM == "Treatment Emergent - Positive" & |
305 | 3x |
((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ "Y", |
306 | 3x |
(PARAM == "Treatment Emergent - Negative" & |
307 | 3x |
!((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ "Y", |
308 | 3x |
(PARAM == "Persistent ADA" & pos_last_postbl) ~ "Y", |
309 | 3x |
(PARAM == "Transient ADA" & |
310 | 3x |
(n_pos - pos_bl - pos_last_postbl == 1 | n_pos > 1)) ~ "Y", |
311 | 3x |
(PARAM == "Baseline" & pos_bl) ~ "POSITIVE", |
312 | 3x |
(PARAM == "Baseline" & !pos_bl) ~ "NEGATIVE", |
313 | 3x |
(PARAM == "Time to onset of ADA") ~ as.character(onset_ada / 7), |
314 | 3x |
(PARAM == "ADA Duration") ~ as.character((last_ada - onset_ada) / 7), |
315 | 3x |
(PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ "POSITIVE", |
316 | 3x |
(PARAM == "NAB Status of a patient" & !any_pos_postbl_nab) ~ "NEGATIVE", |
317 | 3x |
(PARAM == "Treatment induced ADA, Neutralizing Antibody" & |
318 | 3x |
!pos_bl_nab & any_pos_postbl_nab) ~ "Y", |
319 | 3x |
(PARAM == "Treatment enhanced ADA, Neutralizing Antibody" & |
320 | 3x |
pos_bl_nab & inc_postbl_nab > 0) ~ "Y", |
321 | 3x |
(PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ "POSITIVE", |
322 | 3x |
(PARAM == "Baseline, Neutralizing Antibody" & !pos_bl_nab) ~ "NEGATIVE", |
323 | 3x |
(PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab & |
324 | 3x |
(inc_postbl_nab == 0 | !any_pos_postbl_nab)) ~ "Y", |
325 | 3x |
(PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" & |
326 | 3x |
((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y", |
327 | 3x |
(PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" & |
328 | 3x |
!((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y", |
329 | 3x |
TRUE ~ "N" |
330 |
), |
|
331 | 3x |
AVAL = dplyr::case_when( |
332 | 3x |
(PARAM == "ADA Status of a patient" & any_pos_postbl) ~ 1, |
333 | 3x |
(PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ 1, |
334 | 3x |
(PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ 1, |
335 | 3x |
(PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 | !any_pos_postbl)) ~ 1, |
336 | 3x |
(PARAM == "Treatment Emergent - Positive" & |
337 | 3x |
((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ 1, |
338 | 3x |
(PARAM == "Treatment Emergent - Negative" & |
339 | 3x |
!((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ 1, |
340 | 3x |
(PARAM == "Persistent ADA" & pos_last_postbl) ~ 1, |
341 | 3x |
(PARAM == "Transient ADA" & |
342 | 3x |
(n_pos - ifelse(pos_bl, 1, 0) - ifelse(pos_last_postbl, 1, 0) == 1 | n_pos > 1)) ~ 1, |
343 | 3x |
(PARAM == "Baseline" & pos_bl) ~ 1, |
344 | 3x |
(PARAM == "Time to onset of ADA") ~ onset_ada / 7, |
345 | 3x |
(PARAM == "ADA Duration") ~ (last_ada - onset_ada) / 7, |
346 | 3x |
(PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ 1, |
347 | 3x |
(PARAM == "Treatment induced ADA, Neutralizing Antibody" & |
348 | 3x |
!pos_bl_nab & any_pos_postbl_nab) ~ 1, |
349 | 3x |
(PARAM == "Treatment enhanced ADA, Neutralizing Antibody" & |
350 | 3x |
pos_bl_nab & inc_postbl_nab > 0) ~ 1, |
351 | 3x |
(PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ 1, |
352 | 3x |
(PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab & |
353 | 3x |
(inc_postbl_nab == 0 | !any_pos_postbl_nab)) ~ 1, |
354 | 3x |
(PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" & |
355 | 3x |
((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ 1, |
356 | 3x |
(PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" & |
357 | 3x |
!((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ 1, |
358 | 3x |
TRUE ~ 0 |
359 |
), |
|
360 |
# nolint end indentation_linter |
|
361 | 3x |
PARCAT1 = dplyr::case_when( |
362 | 3x |
PARAM %in% c( |
363 | 3x |
"Neutralizing Antibody titer units", "NAB interpreted per sample result", |
364 | 3x |
"NAB Status of a patient", "Treatment induced ADA, Neutralizing Antibody", |
365 | 3x |
"Treatment enhanced ADA, Neutralizing Antibody", |
366 | 3x |
"Treatment Emergent - Negative, Neutralizing Antibody", |
367 | 3x |
"Treatment Emergent - Positive, Neutralizing Antibody", |
368 | 3x |
"Treatment unaffected, Neutralizing Antibody" |
369 | 3x |
) ~ "A: Drug X Neutralizing Antibody", |
370 | 3x |
TRUE ~ PARCAT1 |
371 |
) |
|
372 |
) |
|
373 | ||
374 |
# remove intermediate flag variables from adab |
|
375 | 3x |
adab <- adab %>% |
376 | 3x |
dplyr::select(-c( |
377 | 3x |
pos_bl, |
378 | 3x |
pos_bl_nab, |
379 | 3x |
any_pos_postbl, |
380 | 3x |
any_pos_postbl_nab, |
381 | 3x |
pos_last_postbl, |
382 | 3x |
inc_postbl, |
383 | 3x |
inc_postbl_nab, |
384 | 3x |
n_pos, |
385 | 3x |
onset_ada, |
386 | 3x |
last_ada |
387 |
)) |
|
388 | ||
389 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
390 | ! |
adab <- mutate_na(ds = adab, na_vars = na_vars, na_percentage = na_percentage) |
391 |
} |
|
392 | ||
393 | 3x |
adab <- apply_metadata(adab, "metadata/ADAB.yml") |
394 |
} |
1 |
#' Questionnaires Analysis Dataset (ADQS) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Questionnaires Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @template param_cached |
|
14 |
#' @templateVar data adqs |
|
15 |
#' |
|
16 |
#' @return `data.frame` |
|
17 |
#' @export |
|
18 |
#' |
|
19 |
#' @author npaszty |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
23 |
#' |
|
24 |
#' adqs <- radqs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
25 |
#' adqs |
|
26 |
#' |
|
27 |
#' adqs <- radqs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2) |
|
28 |
#' adqs |
|
29 |
radqs <- function(adsl, |
|
30 |
param = c( |
|
31 |
"BFI All Questions", |
|
32 |
"Fatigue Interference", |
|
33 |
"Function/Well-Being (GF1,GF3,GF7)", |
|
34 |
"Treatment Side Effects (GP2,C5,GP5)", |
|
35 |
"FKSI-19 All Questions" |
|
36 |
), |
|
37 |
paramcd = c("BFIALL", "FATIGI", "FKSI-FWB", "FKSI-TSE", "FKSIALL"), |
|
38 |
visit_format = "WEEK", |
|
39 |
n_assessments = 5L, |
|
40 |
n_days = 5L, |
|
41 |
seed = NULL, |
|
42 |
na_percentage = 0, |
|
43 |
na_vars = list( |
|
44 |
LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1), |
|
45 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
46 |
), |
|
47 |
cached = FALSE) { |
|
48 | 4x |
checkmate::assert_flag(cached) |
49 | 4x |
if (cached) { |
50 | 1x |
return(get_cached_data("cadqs")) |
51 |
} |
|
52 | ||
53 | 3x |
checkmate::assert_data_frame(adsl) |
54 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
55 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
56 | 3x |
checkmate::assert_string(visit_format) |
57 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
58 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
59 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
60 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
61 | 3x |
checkmate::assert_true(na_percentage < 1) |
62 | ||
63 |
# validate and initialize param vectors |
|
64 | 3x |
param_init_list <- relvar_init(param, paramcd) |
65 | ||
66 | 3x |
if (!is.null(seed)) { |
67 | 3x |
set.seed(seed) |
68 |
} |
|
69 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
70 | ||
71 | 3x |
adqs <- expand.grid( |
72 | 3x |
STUDYID = unique(adsl$STUDYID), |
73 | 3x |
USUBJID = adsl$USUBJID, |
74 | 3x |
PARAM = param_init_list$relvar1, |
75 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
76 | 3x |
stringsAsFactors = FALSE |
77 |
) |
|
78 | ||
79 | 3x |
adqs <- dplyr::mutate( |
80 | 3x |
adqs, |
81 | 3x |
AVISITN = dplyr::case_when( |
82 | 3x |
AVISIT == "SCREENING" ~ -1, |
83 | 3x |
AVISIT == "BASELINE" ~ 0, |
84 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
85 | 3x |
TRUE ~ NA_real_ |
86 |
) |
|
87 |
) |
|
88 | ||
89 |
# assign related variable values: PARAMxPARAMCD are related |
|
90 | 3x |
adqs <- adqs %>% rel_var( |
91 | 3x |
var_name = "PARAMCD", |
92 | 3x |
related_var = "PARAM", |
93 | 3x |
var_values = param_init_list$relvar2 |
94 |
) |
|
95 | ||
96 | 3x |
adqs$AVAL <- stats::rnorm(nrow(adqs), mean = 50, sd = 8) + adqs$AVISITN * stats::rnorm(nrow(adqs), mean = 5, sd = 2) |
97 | ||
98 |
# order to prepare for change from screening and baseline values |
|
99 | 3x |
adqs <- adqs[order(adqs$STUDYID, adqs$USUBJID, adqs$PARAMCD, adqs$AVISITN), ] |
100 | ||
101 | 3x |
adqs <- Reduce( |
102 | 3x |
rbind, |
103 | 3x |
lapply( |
104 | 3x |
split(adqs, adqs$USUBJID), |
105 | 3x |
function(x) { |
106 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
107 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
108 | 30x |
x$ABLFL <- ifelse( |
109 | 30x |
toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
110 | 30x |
"Y", |
111 | 30x |
ifelse( |
112 | 30x |
toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", |
113 | 30x |
"Y", |
114 |
"" |
|
115 |
) |
|
116 |
) |
|
117 | 30x |
x$LOQFL <- ifelse(x$AVAL < 32, "Y", "N") |
118 | 30x |
x |
119 |
} |
|
120 |
) |
|
121 |
) |
|
122 | ||
123 | 3x |
adqs$BASE2 <- retain(adqs, adqs$AVAL, adqs$ABLFL2 == "Y") |
124 | 3x |
adqs$BASE <- ifelse(adqs$ABLFL2 != "Y", retain(adqs, adqs$AVAL, adqs$ABLFL == "Y"), NA) |
125 | ||
126 | 3x |
adqs <- adqs %>% |
127 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
128 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
129 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
130 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
131 | 3x |
var_relabel( |
132 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
133 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
134 |
) |
|
135 | ||
136 | 3x |
adqs <- var_relabel( |
137 | 3x |
adqs, |
138 | 3x |
STUDYID = "Study Identifier", |
139 | 3x |
USUBJID = "Unique Subject Identifier" |
140 |
) |
|
141 | ||
142 |
# merge ADSL to be able to add QS date and study day variables |
|
143 | 3x |
adqs <- dplyr::inner_join( |
144 | 3x |
adqs, |
145 | 3x |
adsl, |
146 | 3x |
by = c("STUDYID", "USUBJID") |
147 |
) %>% |
|
148 | 3x |
dplyr::rowwise() %>% |
149 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
150 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
151 | 3x |
TRUE ~ TRTEDTM |
152 |
))) %>% |
|
153 | 3x |
ungroup() |
154 | ||
155 | 3x |
adqs <- adqs %>% |
156 | 3x |
group_by(USUBJID) %>% |
157 | 3x |
arrange(USUBJID, AVISITN) %>% |
158 | 3x |
dplyr::mutate(ADTM = rep( |
159 | 3x |
sort(sample( |
160 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
161 | 3x |
size = nlevels(AVISIT) |
162 |
)), |
|
163 | 3x |
each = n() / nlevels(AVISIT) |
164 |
)) %>% |
|
165 | 3x |
dplyr::ungroup() %>% |
166 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
167 | 3x |
dplyr::select(-TRTENDT) %>% |
168 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
169 | ||
170 | 3x |
adqs <- adqs %>% |
171 | 3x |
dplyr::group_by(USUBJID) %>% |
172 | 3x |
dplyr::mutate(QSSEQ = seq_len(dplyr::n())) %>% |
173 | 3x |
dplyr::mutate(ASEQ = QSSEQ) %>% |
174 | 3x |
dplyr::ungroup() %>% |
175 | 3x |
dplyr::arrange( |
176 | 3x |
STUDYID, |
177 | 3x |
USUBJID, |
178 | 3x |
PARAMCD, |
179 | 3x |
AVISITN, |
180 | 3x |
ADTM, |
181 | 3x |
QSSEQ |
182 |
) |
|
183 | ||
184 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
185 | ! |
adqs <- mutate_na(ds = adqs, na_vars = na_vars, na_percentage = na_percentage) |
186 |
} |
|
187 | ||
188 |
# apply metadata |
|
189 | 3x |
adqs <- apply_metadata(adqs, "metadata/ADQS.yml") |
190 | ||
191 | 3x |
return(adqs) |
192 |
} |
1 |
#' Time to Safety Event Analysis Dataset (ADSAFTTE) |
|
2 |
#' |
|
3 |
#' Function to generate random Time-to-Safety Event Dataset for a |
|
4 |
#' given Subject-Level Analysis Dataset. |
|
5 |
#' |
|
6 |
#' @details |
|
7 |
#' |
|
8 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
9 |
#' |
|
10 |
#' @inheritParams radaette |
|
11 |
#' @param ... Additional arguments to be passed to `radaette` |
|
12 |
#' |
|
13 |
#' @return `data.frame` |
|
14 |
#' @export |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
18 |
#' |
|
19 |
#' adsaftte <- radsaftte(adsl, seed = 2) |
|
20 |
#' adsaftte |
|
21 |
radsaftte <- function(adsl, |
|
22 |
...) { |
|
23 | 2x |
radaette(adsl = adsl, ...) |
24 |
} |