1 |
#' ECG Analysis Dataset (ADEG) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random dataset from ECG Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `EGSEQ`, `ASPID` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param egcat (`character vector`)\cr EG category values. |
|
14 |
#' @param max_n_eg (`integer`)\cr Maximum number of EG results per patient. Defaults to 10. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adeg |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adeg <- radeg(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
27 |
#' adeg |
|
28 |
#' |
|
29 |
#' adeg <- radeg(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2) |
|
30 |
#' adeg |
|
31 |
radeg <- function(adsl, |
|
32 |
egcat = c("INTERVAL", "INTERVAL", "MEASUREMENT", "FINDING"), |
|
33 |
param = c( |
|
34 |
"QT Duration", |
|
35 |
"RR Duration", |
|
36 |
"Heart Rate", |
|
37 |
"ECG Interpretation" |
|
38 |
), |
|
39 |
paramcd = c("QT", "RR", "HR", "ECGINTP"), |
|
40 |
paramu = c("msec", "msec", "beats/min", ""), |
|
41 |
visit_format = "WEEK", |
|
42 |
n_assessments = 5L, |
|
43 |
n_days = 5L, |
|
44 |
max_n_eg = 10L, |
|
45 |
lookup = NULL, |
|
46 |
seed = NULL, |
|
47 |
na_percentage = 0, |
|
48 |
na_vars = list( |
|
49 |
ABLFL = c(1235, 0.1), BASE = c(NA, 0.1), BASEC = c(NA, 0.1), |
|
50 |
CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
51 |
), |
|
52 |
cached = FALSE) { |
|
53 | 4x |
checkmate::assert_flag(cached) |
54 | 4x |
if (cached) { |
55 | 1x |
return(get_cached_data("cadeg")) |
56 |
} |
|
57 | ||
58 | 3x |
checkmate::assert_data_frame(adsl) |
59 | 3x |
checkmate::assert_character(egcat, min.len = 1, any.missing = FALSE) |
60 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
62 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
63 | 3x |
checkmate::assert_string(visit_format) |
64 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
65 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
66 | 3x |
checkmate::assert_integer(max_n_eg, len = 1, any.missing = FALSE) |
67 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
68 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
69 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
70 | 3x |
checkmate::assert_true(na_percentage < 1) |
71 | ||
72 |
# validate and initialize related variables |
|
73 | 3x |
egcat_init_list <- relvar_init(param, egcat) |
74 | 3x |
param_init_list <- relvar_init(param, paramcd) |
75 | 3x |
unit_init_list <- relvar_init(param, paramu) |
76 | ||
77 | 3x |
if (!is.null(seed)) { |
78 | 3x |
set.seed(seed) |
79 |
} |
|
80 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
81 | ||
82 | 3x |
adeg <- expand.grid( |
83 | 3x |
STUDYID = unique(adsl$STUDYID), |
84 | 3x |
USUBJID = adsl$USUBJID, |
85 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
86 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
87 | 3x |
stringsAsFactors = FALSE |
88 |
) |
|
89 | ||
90 |
# assign related variable values: PARAMxEGCAT are related |
|
91 | 3x |
adeg <- adeg %>% rel_var( |
92 | 3x |
var_name = "EGCAT", |
93 | 3x |
related_var = "PARAM", |
94 | 3x |
var_values = egcat_init_list$relvar2 |
95 |
) |
|
96 | ||
97 |
# assign related variable values: PARAMxPARAMCD are related |
|
98 | 3x |
adeg <- adeg %>% rel_var( |
99 | 3x |
var_name = "PARAMCD", |
100 | 3x |
related_var = "PARAM", |
101 | 3x |
var_values = param_init_list$relvar2 |
102 |
) |
|
103 | ||
104 | 3x |
adeg <- adeg %>% dplyr::mutate(AVAL = dplyr::case_when( |
105 | 3x |
PARAMCD == "QT" ~ stats::rnorm(nrow(adeg), mean = 350, sd = 100), |
106 | 3x |
PARAMCD == "RR" ~ stats::rnorm(nrow(adeg), mean = 1050, sd = 300), |
107 | 3x |
PARAMCD == "HR" ~ stats::rnorm(nrow(adeg), mean = 70, sd = 20), |
108 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
109 |
)) |
|
110 | ||
111 | 3x |
adeg <- adeg %>% |
112 | 3x |
dplyr::mutate(EGTESTCD = PARAMCD) %>% |
113 | 3x |
dplyr::mutate(EGTEST = PARAM) |
114 | ||
115 | 3x |
adeg <- adeg %>% dplyr::mutate(AVISITN = dplyr::case_when( |
116 | 3x |
AVISIT == "SCREENING" ~ -1, |
117 | 3x |
AVISIT == "BASELINE" ~ 0, |
118 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
119 | 3x |
TRUE ~ NA_real_ |
120 |
)) |
|
121 | ||
122 | 3x |
adeg <- adeg %>% rel_var( |
123 | 3x |
var_name = "AVALU", |
124 | 3x |
related_var = "PARAM", |
125 | 3x |
var_values = unit_init_list$relvar2 |
126 |
) |
|
127 | ||
128 |
# order to prepare for change from screening and baseline values |
|
129 | 3x |
adeg <- adeg[order(adeg$STUDYID, adeg$USUBJID, adeg$PARAMCD, adeg$AVISITN), ] |
130 | ||
131 | 3x |
adeg <- Reduce(rbind, lapply(split(adeg, adeg$USUBJID), function(x) { |
132 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
133 | 30x |
x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
134 | 30x |
"Y", |
135 | 30x |
ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "") |
136 |
) |
|
137 | 30x |
x |
138 |
})) |
|
139 | ||
140 | 3x |
adeg$BASE <- ifelse(adeg$AVISITN >= 0, retain(adeg, adeg$AVAL, adeg$ABLFL == "Y"), adeg$AVAL) |
141 | ||
142 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRLO = dplyr::case_when( |
143 | 3x |
PARAMCD == "QT" ~ 200, |
144 | 3x |
PARAMCD == "RR" ~ 600, |
145 | 3x |
PARAMCD == "HR" ~ 40, |
146 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
147 |
)) |
|
148 | ||
149 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRHI = dplyr::case_when( |
150 | 3x |
PARAMCD == "QT" ~ 500, |
151 | 3x |
PARAMCD == "RR" ~ 1500, |
152 | 3x |
PARAMCD == "HR" ~ 100, |
153 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
154 |
)) |
|
155 | ||
156 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRIND = factor(dplyr::case_when( |
157 | 3x |
AVAL < ANRLO ~ "LOW", |
158 | 3x |
AVAL >= ANRLO & AVAL <= ANRHI ~ "NORMAL", |
159 | 3x |
AVAL > ANRHI ~ "HIGH" |
160 |
))) |
|
161 | ||
162 | 3x |
adeg <- adeg %>% |
163 | 3x |
dplyr::mutate(CHG = ifelse(AVISITN > 0, AVAL - BASE, NA)) %>% |
164 | 3x |
dplyr::mutate(PCHG = ifelse(AVISITN > 0, 100 * (CHG / BASE), NA)) %>% |
165 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
166 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
167 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
168 | 3x |
dplyr::ungroup() %>% |
169 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
170 | 3x |
dplyr::mutate(DTYPE = NA) |
171 | ||
172 | 3x |
adeg$ANRIND <- factor(adeg$ANRIND, levels = c("LOW", "NORMAL", "HIGH")) |
173 | 3x |
adeg$BNRIND <- factor(adeg$BNRIND, levels = c("LOW", "NORMAL", "HIGH")) |
174 | ||
175 | 3x |
adeg <- rcd_var_relabel( |
176 | 3x |
adeg, |
177 | 3x |
STUDYID = "Study Identifier", |
178 | 3x |
USUBJID = "Unique Subject Identifier" |
179 |
) |
|
180 | ||
181 |
# merge ADSL to be able to add EG date and study day variables |
|
182 | 3x |
adeg <- dplyr::inner_join( |
183 | 3x |
adeg, |
184 | 3x |
adsl, |
185 | 3x |
by = c("STUDYID", "USUBJID") |
186 |
) %>% |
|
187 | 3x |
dplyr::rowwise() %>% |
188 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
189 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
190 | 3x |
TRUE ~ TRTEDTM |
191 |
))) %>% |
|
192 | 3x |
dplyr::ungroup() |
193 | ||
194 | 3x |
adeg <- adeg %>% |
195 | 3x |
dplyr::group_by(USUBJID) %>% |
196 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
197 | 3x |
dplyr::mutate(ADTM = rep( |
198 | 3x |
sort(sample( |
199 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
200 | 3x |
size = nlevels(AVISIT) |
201 |
)), |
|
202 | 3x |
each = n() / nlevels(AVISIT) |
203 |
)) %>% |
|
204 | 3x |
dplyr::ungroup() %>% |
205 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
206 | 3x |
dplyr::select(-TRTENDT) %>% |
207 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
208 | ||
209 | 3x |
adeg <- adeg %>% |
210 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
211 | 3x |
dplyr::group_by(USUBJID) %>% |
212 | 3x |
dplyr::mutate(EGSEQ = seq_len(dplyr::n())) %>% |
213 | 3x |
dplyr::mutate(ASEQ = EGSEQ) %>% |
214 | 3x |
dplyr::ungroup() %>% |
215 | 3x |
dplyr::arrange( |
216 | 3x |
STUDYID, |
217 | 3x |
USUBJID, |
218 | 3x |
PARAMCD, |
219 | 3x |
BASETYPE, |
220 | 3x |
AVISITN, |
221 | 3x |
ATPTN, |
222 | 3x |
DTYPE, |
223 | 3x |
ADTM, |
224 | 3x |
EGSEQ, |
225 | 3x |
ASPID |
226 |
) |
|
227 | ||
228 | 3x |
adeg <- adeg %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
229 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y", |
230 | 3x |
TRUE ~ "" |
231 |
))) |
|
232 | ||
233 | 3x |
adeg <- adeg %>% dplyr::mutate(AVALC = ifelse( |
234 | 3x |
PARAMCD == "ECGINTP", |
235 | 3x |
as.character(sample_fct(c("ABNORMAL", "NORMAL"), nrow(adeg), prob = c(0.25, 0.75))), |
236 | 3x |
as.character(AVAL) |
237 |
)) |
|
238 | ||
239 |
# Temporarily creating a row_check column to easily match newly created |
|
240 |
# observations with their row correct arrangement. |
|
241 | 3x |
adeg <- adeg %>% |
242 | 3x |
dplyr::mutate(row_check = seq_len(nrow(adeg))) |
243 | ||
244 |
# Created function to add in new observations for DTYPE, "MINIMUM" & "MAXIMUM" in this case. |
|
245 | 3x |
get_groups <- function(data, |
246 | 3x |
minimum) { |
247 | 6x |
data <- data %>% |
248 | 6x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
249 | 6x |
dplyr::arrange(ADTM, ASPID, EGSEQ) %>% |
250 | 6x |
dplyr::filter( |
251 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
252 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
253 |
) %>% |
|
254 |
{ |
|
255 | 6x |
if (minimum == TRUE) { |
256 | 3x |
dplyr::filter(., AVAL == min(AVAL)) %>% |
257 | 3x |
dplyr::mutate(., DTYPE = "MINIMUM", AVISIT = "POST-BASELINE MINIMUM") |
258 |
} else { |
|
259 | 3x |
dplyr::filter(., AVAL == max(AVAL)) %>% |
260 | 3x |
dplyr::mutate(., DTYPE = "MAXIMUM", AVISIT = "POST-BASELINE MAXIMUM") |
261 |
} |
|
262 |
} %>% |
|
263 | 6x |
dplyr::slice(1) %>% |
264 | 6x |
dplyr::ungroup() |
265 | ||
266 | 6x |
return(data) |
267 |
} |
|
268 | ||
269 |
# Binding the new observations to the dataset from the function above and rearranging in the correct order. |
|
270 | 3x |
adeg <- rbind(adeg, get_groups(adeg, TRUE), get_groups(adeg, FALSE)) %>% |
271 | 3x |
dplyr::arrange(row_check) %>% |
272 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
273 | 3x |
dplyr::arrange(AVISIT, .by_group = TRUE) %>% |
274 | 3x |
dplyr::ungroup() |
275 | ||
276 |
# Dropping the "row_check" column created above. |
|
277 | 3x |
adeg <- adeg[, -which(names(adeg) %in% c("row_check"))] |
278 | ||
279 |
# Created function to easily match rows which comply to ONTRTFL derivation |
|
280 | 3x |
flag_variables <- function(data, worst_obs) { |
281 | 6x |
data_compare <- data %>% |
282 | 6x |
dplyr::mutate(row_check = seq_len(nrow(data))) |
283 | ||
284 | 6x |
data <- data_compare %>% |
285 |
{ |
|
286 | 6x |
if (worst_obs == FALSE) { |
287 | 3x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) %>% |
288 | 3x |
dplyr::arrange(., ADTM, ASPID, EGSEQ) |
289 |
} else { |
|
290 | 3x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE) |
291 |
} |
|
292 |
} %>% |
|
293 | 6x |
dplyr::filter( |
294 | 6x |
AVISITN > 0 & (ONTRTFL == "Y" | ADTM <= TRTSDTM) & |
295 | 6x |
is.na(DTYPE) |
296 |
) %>% |
|
297 |
{ |
|
298 | 6x |
if (worst_obs == TRUE) { |
299 | 3x |
dplyr::arrange(., AVALC) %>% dplyr::filter(., ifelse( |
300 | 3x |
PARAMCD == "ECGINTP", |
301 | 3x |
ifelse(AVALC == "ABNORMAL", AVALC == "ABNORMAL", AVALC == "NORMAL"), |
302 | 3x |
AVAL == min(AVAL) |
303 |
)) |
|
304 |
} else { |
|
305 | 3x |
dplyr::filter(., ifelse( |
306 | 3x |
PARAMCD == "ECGINTP", |
307 | 3x |
AVALC == "ABNORMAL" | AVALC == "NORMAL", |
308 | 3x |
AVAL == min(AVAL) |
309 |
)) |
|
310 |
} |
|
311 |
} %>% |
|
312 | 6x |
dplyr::slice(1) %>% |
313 |
{ |
|
314 | 6x |
if (worst_obs == TRUE) { |
315 | 3x |
dplyr::mutate(., new_var = dplyr::case_when( |
316 | 3x |
(AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y", |
317 | 3x |
(!is.na(AVAL) & is.na(DTYPE)) ~ "Y", |
318 | 3x |
TRUE ~ "" |
319 |
)) |
|
320 |
} else { |
|
321 | 3x |
dplyr::mutate(., new_var = dplyr::case_when( |
322 | 3x |
(AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y", |
323 | 3x |
(!is.na(AVAL) & is.na(DTYPE)) ~ "Y", |
324 | 3x |
TRUE ~ "" |
325 |
)) |
|
326 |
} |
|
327 |
} %>% |
|
328 | 6x |
dplyr::ungroup() |
329 | ||
330 | 6x |
data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "") |
331 | 6x |
data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))] |
332 | ||
333 | 6x |
return(data_compare) |
334 |
} |
|
335 | ||
336 | 3x |
adeg <- flag_variables(adeg, FALSE) %>% dplyr::rename(WORS01FL = "new_var") |
337 | 3x |
adeg <- flag_variables(adeg, TRUE) %>% dplyr::rename(WORS02FL = "new_var") |
338 | ||
339 | 3x |
adeg <- adeg %>% dplyr::mutate(ANL01FL = factor(ifelse( |
340 | 3x |
(ABLFL == "Y" | (is.na(DTYPE) & WORS01FL == "Y")) & |
341 | 3x |
(AVISIT != "SCREENING"), |
342 | 3x |
"Y", |
343 |
"" |
|
344 |
))) |
|
345 | ||
346 | 3x |
adeg <- adeg %>% |
347 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
348 | 3x |
dplyr::mutate(BASEC = ifelse( |
349 | 3x |
PARAMCD == "ECGINTP", |
350 | 3x |
AVALC[AVISIT == "BASELINE"], |
351 | 3x |
as.character(BASE) |
352 |
)) %>% |
|
353 | 3x |
dplyr::mutate(ANL03FL = dplyr::case_when( |
354 | 3x |
DTYPE == "MINIMUM" ~ "Y", |
355 | 3x |
ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y", |
356 | 3x |
TRUE ~ "" |
357 |
)) %>% |
|
358 | 3x |
dplyr::mutate(ANL04FL = dplyr::case_when( |
359 | 3x |
DTYPE == "MAXIMUM" ~ "Y", |
360 | 3x |
ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y", |
361 | 3x |
TRUE ~ "" |
362 |
)) %>% |
|
363 | 3x |
dplyr::ungroup() |
364 | ||
365 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
366 | ! |
adeg <- mutate_na(ds = adeg, na_vars = na_vars, na_percentage = na_percentage) |
367 |
} |
|
368 | ||
369 |
# apply metadata |
|
370 | 3x |
adeg <- apply_metadata(adeg, "metadata/ADEG.yml") |
371 | ||
372 | 3x |
return(adeg) |
373 |
} |
1 |
#' Time to Adverse Event Analysis Dataset (ADAETTE) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function to generate random Time-to-AE Dataset for a |
|
6 |
#' given Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param event_descr (`character vector`)\cr Descriptions of events. Defaults to `NULL`. |
|
14 |
#' @param censor_descr (`character vector`)\cr Descriptions of censors. Defaults to `NULL`. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adaette |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author Xiuting Mi |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adaette <- radaette(adsl, seed = 2) |
|
27 |
#' adaette |
|
28 |
radaette <- function(adsl, |
|
29 |
event_descr = NULL, |
|
30 |
censor_descr = NULL, |
|
31 |
lookup = NULL, |
|
32 |
seed = NULL, |
|
33 |
na_percentage = 0, |
|
34 |
na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1)), |
|
35 |
cached = FALSE) { |
|
36 | 6x |
checkmate::assert_flag(cached) |
37 | 6x |
if (cached) { |
38 | 1x |
return(get_cached_data("cadaette")) |
39 |
} |
|
40 | ||
41 | 5x |
checkmate::assert_data_frame(adsl) |
42 | 5x |
checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
43 | 5x |
checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
44 | 5x |
checkmate::assert_number(seed, null.ok = TRUE) |
45 | 5x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
46 | 5x |
checkmate::assert_true(na_percentage < 1) |
47 | ||
48 | 5x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
49 | 5x |
lookup_adaette <- if (!is.null(lookup)) { |
50 | ! |
lookup |
51 |
} else { |
|
52 | 5x |
tibble::tribble( |
53 | 5x |
~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P, |
54 | 5x |
"ARM A", "1", "any adverse event", 1 / 80, 0.4, |
55 | 5x |
"ARM B", "1", "any adverse event", 1 / 100, 0.2, |
56 | 5x |
"ARM C", "1", "any adverse event", 1 / 60, 0.42, |
57 | 5x |
"ARM A", "2", "any serious adverse event", 1 / 100, 0.3, |
58 | 5x |
"ARM B", "2", "any serious adverse event", 1 / 150, 0.1, |
59 | 5x |
"ARM C", "2", "any serious adverse event", 1 / 80, 0.32, |
60 | 5x |
"ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2, |
61 | 5x |
"ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08, |
62 | 5x |
"ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23 |
63 |
) |
|
64 |
} |
|
65 | ||
66 | 5x |
if (!is.null(seed)) { |
67 | 5x |
set.seed(seed) |
68 |
} |
|
69 | 5x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
70 | ||
71 | 5x |
evntdescr_sel <- if (!is.null(event_descr)) { |
72 | ! |
event_descr |
73 |
} else { |
|
74 | 5x |
"Preferred Term" |
75 |
} |
|
76 | ||
77 | 5x |
cnsdtdscr_sel <- if (!is.null(censor_descr)) { |
78 | ! |
censor_descr |
79 |
} else { |
|
80 | 5x |
c( |
81 | 5x |
"Clinical Cut Off", |
82 | 5x |
"Completion or Discontinuation", |
83 | 5x |
"End of AE Reporting Period" |
84 |
) |
|
85 |
} |
|
86 | ||
87 | 5x |
random_patient_data <- function(patient_info) { |
88 | 50x |
startdt <- lubridate::date(patient_info$TRTSDTM) |
89 | 50x |
trtedtm <- lubridate::floor_date(dplyr::case_when( |
90 | 50x |
is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs, |
91 | 50x |
TRUE ~ lubridate::date(patient_info$TRTEDTM) |
92 | 50x |
), unit = "day") |
93 | 50x |
enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm)) |
94 | 50x |
enddts_min_index <- which.min(enddts) |
95 | 50x |
adt <- enddts[enddts_min_index] |
96 | 50x |
adtm <- lubridate::as_datetime(adt) |
97 | 50x |
ady <- as.numeric(adt - startdt + 1) |
98 | 50x |
data.frame( |
99 | 50x |
ARM = patient_info$ARM, |
100 | 50x |
STUDYID = patient_info$STUDYID, |
101 | 50x |
SITEID = patient_info$SITEID, |
102 | 50x |
USUBJID = patient_info$USUBJID, |
103 | 50x |
PARAMCD = "AEREPTTE", |
104 | 50x |
PARAM = "Time to end of AE reporting period", |
105 | 50x |
CNSR = 0, |
106 | 50x |
AVAL = lubridate::days(ady) / lubridate::years(1), |
107 | 50x |
AVALU = "YEARS", |
108 | 50x |
EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"), |
109 | 50x |
CNSDTDSC = NA, |
110 | 50x |
ADTM = adtm, |
111 | 50x |
ADY = ady, |
112 | 50x |
stringsAsFactors = FALSE |
113 |
) |
|
114 |
} |
|
115 | ||
116 |
# validate and initialize related variables for Hy's law |
|
117 | 5x |
paramcd_hy <- c("HYSTTEUL", "HYSTTEBL") |
118 | 5x |
param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline") |
119 | 5x |
param_init_list <- relvar_init(param_hy, paramcd_hy) |
120 | 5x |
adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM") |
121 | ||
122 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
123 | 5x |
adaette_hy <- expand.grid( |
124 | 5x |
STUDYID = unique(adsl$STUDYID), |
125 | 5x |
USUBJID = adsl$USUBJID, |
126 | 5x |
PARAM = as.factor(param_init_list$relvar1), |
127 | 5x |
stringsAsFactors = FALSE |
128 |
) |
|
129 | ||
130 |
# Add other variables to adaette_hy |
|
131 | 5x |
adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID")) %>% |
132 | 5x |
rel_var( |
133 | 5x |
var_name = "PARAMCD", |
134 | 5x |
related_var = "PARAM", |
135 | 5x |
var_values = param_init_list$relvar2 |
136 |
) %>% |
|
137 | 5x |
dplyr::mutate( |
138 | 5x |
CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE), |
139 | 5x |
EVNTDESC = dplyr::if_else( |
140 | 5x |
CNSR == 0, |
141 | 5x |
"First Post-Baseline Raised ALT or AST Elevation Result", |
142 | 5x |
NA_character_ |
143 |
), |
|
144 | 5x |
CNSDTDSC = dplyr::if_else(CNSR == 0, NA_character_, |
145 | 5x |
sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"), |
146 | 5x |
prob = c(0.9, 0.1), |
147 | 5x |
size = dplyr::n(), replace = TRUE |
148 |
) |
|
149 |
) |
|
150 |
) %>% |
|
151 | 5x |
dplyr::rowwise() %>% |
152 | 5x |
dplyr::mutate(ADTM = dplyr::case_when( |
153 | 5x |
CNSDTDSC == "Treatment Start" ~ TRTSDTM, |
154 | 5x |
TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE) |
155 |
)) %>% |
|
156 | 5x |
dplyr::mutate( |
157 | 5x |
ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1, |
158 | 5x |
ADY = as.numeric(ADY_int), |
159 | 5x |
AVAL = lubridate::days(ADY_int) / lubridate::weeks(1), |
160 | 5x |
AVALU = "WEEKS" |
161 |
) %>% |
|
162 | 5x |
dplyr::select(-TRTSDTM, -ADY_int) |
163 | ||
164 | 5x |
random_ae_data <- function(lookup_info, patient_info, patient_data) { |
165 | 150x |
cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P)) |
166 | 150x |
ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"] |
167 | 150x |
data.frame( |
168 | 150x |
ARM = rep(patient_data$ARM, 2), |
169 | 150x |
STUDYID = rep(patient_data$STUDYID, 2), |
170 | 150x |
SITEID = rep(patient_data$SITEID, 2), |
171 | 150x |
USUBJID = rep(patient_data$USUBJID, 2), |
172 | 150x |
PARAMCD = c( |
173 | 150x |
paste0("AETTE", lookup_info$CATCD), |
174 | 150x |
paste0("AETOT", lookup_info$CATCD) |
175 |
), |
|
176 | 150x |
PARAM = c( |
177 | 150x |
paste("Time to first occurrence of", lookup_info$CAT), |
178 | 150x |
paste("Number of occurrences of", lookup_info$CAT) |
179 |
), |
|
180 | 150x |
CNSR = c( |
181 | 150x |
cnsr, |
182 | 150x |
NA |
183 |
), |
|
184 | 150x |
AVAL = c( |
185 |
# We generate these values conditional on the censoring information. |
|
186 |
# If this time to event is censored, then there were no AEs reported and the time is set |
|
187 |
# to the AE reporting period time. Otherwise we draw from truncated distributions to make |
|
188 |
# sure that we are within the AE reporting time and above 0 AEs. |
|
189 | 150x |
ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)), |
190 | 150x |
ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25)) |
191 |
), |
|
192 | 150x |
AVALU = c( |
193 | 150x |
"YEARS", |
194 | 150x |
NA |
195 |
), |
|
196 | 150x |
EVNTDESC = c( |
197 | 150x |
ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""), |
198 | 150x |
NA |
199 |
), |
|
200 | 150x |
CNSDTDSC = c( |
201 | 150x |
ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""), |
202 | 150x |
NA |
203 |
), |
|
204 | 150x |
stringsAsFactors = FALSE |
205 | 150x |
) %>% dplyr::mutate( |
206 | 150x |
ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))), |
207 | 150x |
ADTM = dplyr::if_else( |
208 | 150x |
is.na(AVALU), |
209 | 150x |
lubridate::as_datetime(NA), |
210 | 150x |
patient_info$TRTSDTM + lubridate::days(ADY) |
211 |
) |
|
212 |
) |
|
213 |
} |
|
214 | ||
215 | 5x |
adaette <- split(adsl, adsl$USUBJID) %>% |
216 | 5x |
lapply(function(patient_info) { |
217 | 50x |
patient_data <- random_patient_data(patient_info) |
218 | 50x |
lookup_arm <- lookup_adaette %>% |
219 | 50x |
dplyr::filter(ARM == as.character(patient_info$ARMCD)) |
220 | 50x |
ae_data <- split(lookup_arm, lookup_arm$CATCD) %>% |
221 | 50x |
lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>% |
222 | 50x |
Reduce(rbind, .) |
223 | 50x |
dplyr::bind_rows(patient_data, ae_data) |
224 |
}) %>% |
|
225 | 5x |
Reduce(rbind, .) %>% |
226 | 5x |
rcd_var_relabel( |
227 | 5x |
STUDYID = "Study Identifier", |
228 | 5x |
USUBJID = "Unique Subject Identifier" |
229 |
) |
|
230 | ||
231 | 5x |
adaette <- rcd_var_relabel( |
232 | 5x |
adaette, |
233 | 5x |
STUDYID = "Study Identifier", |
234 | 5x |
USUBJID = "Unique Subject Identifier" |
235 |
) |
|
236 | ||
237 | 5x |
adaette <- rbind(adaette, adaette_hy) |
238 | ||
239 | 5x |
adaette <- dplyr::inner_join( |
240 | 5x |
dplyr::select(adaette, -"SITEID", -"ARM"), |
241 | 5x |
adsl, |
242 | 5x |
by = c("STUDYID", "USUBJID") |
243 |
) %>% |
|
244 | 5x |
dplyr::group_by(USUBJID) %>% |
245 | 5x |
dplyr::arrange(ADTM) %>% |
246 | 5x |
dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>% |
247 | 5x |
dplyr::mutate(ASEQ = TTESEQ) %>% |
248 | 5x |
dplyr::mutate(PARAM = as.factor(PARAM)) %>% |
249 | 5x |
dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>% |
250 | 5x |
dplyr::ungroup() %>% |
251 | 5x |
dplyr::arrange( |
252 | 5x |
STUDYID, |
253 | 5x |
USUBJID, |
254 | 5x |
PARAMCD, |
255 | 5x |
ADTM, |
256 | 5x |
TTESEQ |
257 |
) |
|
258 | ||
259 | 5x |
if (length(na_vars) > 0 && na_percentage > 0) { |
260 | ! |
adaette <- dplyr::mutate(ds = adaette, na_vars = na_vars, na_percentage = na_percentage) |
261 |
} |
|
262 | ||
263 |
# apply metadata |
|
264 | 5x |
adaette <- apply_metadata(adaette, "metadata/ADAETTE.yml") |
265 | ||
266 | 5x |
return(adaette) |
267 |
} |
1 |
#' Load Cached Data |
|
2 |
#' |
|
3 |
#' Return data attached to package. |
|
4 |
#' |
|
5 |
#' @keywords internal |
|
6 |
#' @noRd |
|
7 |
get_cached_data <- function(dataname) { |
|
8 | 22x |
checkmate::assert_string(dataname) |
9 | 22x |
if (!("package:random.cdisc.data" %in% search())) { |
10 | 1x |
stop("cached data can only be loaded if the random.cdisc.data package is attached.", |
11 | 1x |
"Please run library(random.cdisc.data) before loading cached data.", |
12 | 1x |
call. = FALSE |
13 |
) |
|
14 |
} else { |
|
15 | 21x |
get(dataname, envir = asNamespace("random.cdisc.data")) |
16 |
} |
|
17 |
} |
|
18 | ||
19 |
#' Create a Factor with Random Elements of x |
|
20 |
#' |
|
21 |
#' Sample elements from `x` with replacement to build a factor. |
|
22 |
#' |
|
23 |
#' @param x (`character vector` or `factor`)\cr If character vector then it is also used |
|
24 |
#' as levels of the returned factor. If factor then the levels are used as the new levels. |
|
25 |
#' @param N (`numeric`)\cr Number of items to choose. |
|
26 |
#' @param ... Additional arguments to be passed to `sample`. |
|
27 |
#' |
|
28 |
#' @return A factor of length `N`. |
|
29 |
#' @export |
|
30 |
#' |
|
31 |
#' @examples |
|
32 |
#' sample_fct(letters[1:3], 10) |
|
33 |
#' sample_fct(iris$Species, 10) |
|
34 |
sample_fct <- function(x, N, ...) { # nolint |
|
35 | 296x |
checkmate::assert_number(N) |
36 | ||
37 | 296x |
factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x) |
38 |
} |
|
39 | ||
40 |
#' Related Variables: Initialize |
|
41 |
#' |
|
42 |
#' Verify and initialize related variable values. |
|
43 |
#' For example, `relvar_init("Alanine Aminotransferase Measurement", "ALT")`. |
|
44 |
#' |
|
45 |
#' @param relvar1 (`list` of `character`)\cr List of n elements. |
|
46 |
#' @param relvar2 (`list` of `character`)\cr List of n elements. |
|
47 |
#' |
|
48 |
#' @return A vector of n elements. |
|
49 |
#' |
|
50 |
#' @keywords internal |
|
51 |
relvar_init <- function(relvar1, relvar2) { |
|
52 | 64x |
checkmate::assert_character(relvar1, min.len = 1, any.missing = FALSE) |
53 | 64x |
checkmate::assert_character(relvar2, min.len = 1, any.missing = FALSE) |
54 | ||
55 | 64x |
if (length(relvar1) != length(relvar2)) { |
56 | 1x |
message(simpleError( |
57 | 1x |
"The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements." |
58 |
)) |
|
59 | ! |
return(NA) |
60 |
} |
|
61 | 63x |
return(list("relvar1" = relvar1, "relvar2" = relvar2)) |
62 |
} |
|
63 | ||
64 |
#' Related Variables: Assign |
|
65 |
#' |
|
66 |
#' Assign values to a related variable within a domain. |
|
67 |
#' |
|
68 |
#' @param df (`data.frame`)\cr Data frame containing the related variables. |
|
69 |
#' @param var_name (`character`)\cr Name of variable related to `rel_var` to add to `df`. |
|
70 |
#' @param var_values (`any`)\cr Vector of values related to values of `related_var`. |
|
71 |
#' @param related_var (`character`)\cr Name of variable within `df` with values to which values |
|
72 |
#' of `var_name` must relate. |
|
73 |
#' |
|
74 |
#' @return `df` with added factor variable `var_name` containing `var_values` corresponding to `related_var`. |
|
75 |
#' @export |
|
76 |
#' |
|
77 |
#' @examples |
|
78 |
#' # Example with data.frame. |
|
79 |
#' params <- c("Level A", "Level B", "Level C") |
|
80 |
#' adlb_df <- data.frame( |
|
81 |
#' ID = 1:9, |
|
82 |
#' PARAM = factor( |
|
83 |
#' rep(c("Level A", "Level B", "Level C"), 3), |
|
84 |
#' levels = params |
|
85 |
#' ) |
|
86 |
#' ) |
|
87 |
#' rel_var( |
|
88 |
#' df = adlb_df, |
|
89 |
#' var_name = "PARAMCD", |
|
90 |
#' var_values = c("A", "B", "C"), |
|
91 |
#' related_var = "PARAM" |
|
92 |
#' ) |
|
93 |
#' |
|
94 |
#' # Example with tibble. |
|
95 |
#' adlb_tbl <- tibble::tibble( |
|
96 |
#' ID = 1:9, |
|
97 |
#' PARAM = factor( |
|
98 |
#' rep(c("Level A", "Level B", "Level C"), 3), |
|
99 |
#' levels = params |
|
100 |
#' ) |
|
101 |
#' ) |
|
102 |
#' rel_var( |
|
103 |
#' df = adlb_tbl, |
|
104 |
#' var_name = "PARAMCD", |
|
105 |
#' var_values = c("A", "B", "C"), |
|
106 |
#' related_var = "PARAM" |
|
107 |
#' ) |
|
108 |
rel_var <- function(df, var_name, related_var, var_values = NULL) { |
|
109 | 64x |
checkmate::assert_data_frame(df) |
110 | 64x |
checkmate::assert_string(var_name) |
111 | 64x |
checkmate::assert_string(related_var) |
112 | 64x |
n_relvar1 <- length(unique(df[, related_var, drop = TRUE])) |
113 | 64x |
checkmate::assert_vector(var_values, null.ok = TRUE, len = n_relvar1, any.missing = FALSE) |
114 | 1x |
if (is.null(var_values)) var_values <- rep(NA, n_relvar1) |
115 | ||
116 | 64x |
relvar1 <- unique(df[, related_var, drop = TRUE]) |
117 | 64x |
relvar2_values <- rep(NA, nrow(df)) |
118 | 64x |
for (r in seq_len(n_relvar1)) { |
119 | 538x |
matched <- which(df[, related_var, drop = TRUE] == relvar1[r]) |
120 | 538x |
relvar2_values[matched] <- var_values[r] |
121 |
} |
|
122 | 64x |
df[[var_name]] <- factor(relvar2_values) |
123 | 64x |
return(df) |
124 |
} |
|
125 | ||
126 |
#' Create Visit Schedule |
|
127 |
#' |
|
128 |
#' Create a visit schedule as a factor. |
|
129 |
#' |
|
130 |
#' X number of visits, or X number of cycles and Y number of days. |
|
131 |
#' |
|
132 |
#' @inheritParams argument_convention |
|
133 |
#' |
|
134 |
#' @return A factor of length `n_assessments`. |
|
135 |
#' @export |
|
136 |
#' |
|
137 |
#' @examples |
|
138 |
#' visit_schedule(visit_format = "WEeK", n_assessments = 10L) |
|
139 |
#' visit_schedule(visit_format = "CyCLE", n_assessments = 5L, n_days = 2L) |
|
140 |
visit_schedule <- function(visit_format = "WEEK", |
|
141 |
n_assessments = 10L, |
|
142 |
n_days = 5L) { |
|
143 | 56x |
checkmate::assert_string(visit_format, pattern = "^WEEK$|^CYCLE$", ignore.case = TRUE) |
144 | 56x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
145 | 56x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
146 | ||
147 | 56x |
if (toupper(visit_format) == "WEEK") { |
148 |
# numeric vector of n assessments/cycles/days |
|
149 | 49x |
assessments <- 1:n_assessments |
150 |
# numeric vector for ordering including screening (-1) and baseline (0) place holders |
|
151 | 49x |
assessments_ord <- -1:n_assessments |
152 |
# character vector of nominal visit values |
|
153 | 49x |
visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1)) |
154 | 7x |
} else if (toupper(visit_format) == "CYCLE") { |
155 | 7x |
cycles <- sort(rep(1:n_assessments, times = 1, each = n_days)) |
156 | 7x |
days <- rep(seq(1:n_days), times = n_assessments, each = 1) |
157 | 7x |
assessments_ord <- 0:(n_assessments * n_days) |
158 | 7x |
visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days)) |
159 |
} |
|
160 | ||
161 |
# create and order factor variable to return from function |
|
162 | 56x |
visit_values <- stats::reorder(factor(visit_values), assessments_ord) |
163 |
} |
|
164 | ||
165 |
#' Primary Keys: Retain Values |
|
166 |
#' |
|
167 |
#' Retain values within primary keys. |
|
168 |
#' |
|
169 |
#' @param df (`data.frame`)\cr Data frame in which to apply the retain. |
|
170 |
#' @param value_var (`any`)\cr Variable in `df` containing the value to be retained. |
|
171 |
#' @param event (`expression`)\cr Expression returning a logical value to trigger the retain. |
|
172 |
#' @param outside (`any`)\cr Additional value to retain. Defaults to `NA`. |
|
173 |
#' @return A vector of values where expression is true. |
|
174 |
#' @keywords internal |
|
175 |
retain <- function(df, value_var, event, outside = NA) { |
|
176 | 31x |
indices <- c(1, which(event == TRUE), nrow(df) + 1) |
177 | 31x |
values <- c(outside, value_var[event == TRUE]) |
178 | 31x |
rep(values, diff(indices)) |
179 |
} |
|
180 | ||
181 |
#' Primary Keys: Labels |
|
182 |
#' |
|
183 |
#' @description Shallow copy of `formatters::var_relabel()`. Used mainly internally to |
|
184 |
#' relabel a subset of variables in a data set. |
|
185 |
#' |
|
186 |
#' @param x (`data.frame`)\cr Data frame containing variables to which labels are applied. |
|
187 |
#' @param ... (`named character`)\cr Name-Value pairs, where name corresponds to a variable |
|
188 |
#' name in `x` and the value to the new variable label. |
|
189 |
#' @return x (`data.frame`)\cr Data frame with labels applied. |
|
190 |
#' |
|
191 |
#' @keywords internal |
|
192 |
rcd_var_relabel <- function(x, ...) { |
|
193 | 79x |
stopifnot(is.data.frame(x)) |
194 | 79x |
if (missing(...)) { |
195 | ! |
return(x) |
196 |
} |
|
197 | 79x |
dots <- list(...) |
198 | 79x |
varnames <- names(dots) |
199 | 79x |
if (is.null(varnames)) { |
200 | 1x |
stop("missing variable declarations") |
201 |
} |
|
202 | 78x |
map_varnames <- match(varnames, colnames(x)) |
203 | 78x |
if (any(is.na(map_varnames))) { |
204 | ! |
stop("variables: ", paste(varnames[is.na(map_varnames)], collapse = ", "), " not found") |
205 |
} |
|
206 | 78x |
if (any(vapply(dots, Negate(is.character), logical(1)))) { |
207 | ! |
stop("all variable labels must be of type character") |
208 |
} |
|
209 | 78x |
for (i in seq_along(map_varnames)) { |
210 | 155x |
attr(x[[map_varnames[[i]]]], "label") <- dots[[i]] |
211 |
} |
|
212 | 78x |
x |
213 |
} |
|
214 | ||
215 |
#' Apply Metadata |
|
216 |
#' |
|
217 |
#' Apply label and variable ordering attributes to domains. |
|
218 |
#' |
|
219 |
#' @param df (`data.frame`)\cr Data frame to which metadata is applied. |
|
220 |
#' @param filename (`yaml`)\cr File containing domain metadata. |
|
221 |
#' @param add_adsl (`logical`)\cr Should ADSL data be merged to domain. |
|
222 |
#' @param adsl_filename (`yaml`)\cr File containing ADSL metadata. |
|
223 |
#' @return Data frame with metadata applied. |
|
224 |
#' |
|
225 |
#' @export |
|
226 |
#' @examples |
|
227 |
#' seed <- 1 |
|
228 |
#' adsl <- radsl(seed = seed) |
|
229 |
#' adsub <- radsub(adsl, seed = seed) |
|
230 |
#' yaml_path <- file.path(path.package("random.cdisc.data"), "inst", "metadata") |
|
231 |
#' adsl <- apply_metadata(adsl, file.path(yaml_path, "ADSL.yml"), FALSE) |
|
232 |
#' adsub <- apply_metadata( |
|
233 |
#' adsub, file.path(yaml_path, "ADSUB.yml"), TRUE, |
|
234 |
#' file.path(yaml_path, "ADSL.yml") |
|
235 |
#' ) |
|
236 |
apply_metadata <- function(df, filename, add_adsl = TRUE, adsl_filename = "metadata/ADSL.yml") { |
|
237 | 90x |
checkmate::assert_data_frame(df) |
238 | 90x |
checkmate::assert_string(filename) |
239 | 90x |
checkmate::assert_flag(add_adsl) |
240 | 90x |
checkmate::assert_string(adsl_filename) |
241 | ||
242 | 90x |
apply_type <- function(df, var, type) { |
243 | 5986x |
if (is.null(type)) { |
244 | ! |
return() |
245 |
} |
|
246 | ||
247 | 5986x |
if (type == "character" && !is.character(df[[var]])) { |
248 | 12x |
df[[var]] <- as.character(df[[var]]) |
249 | 5974x |
} else if (type == "factor" && !is.factor(df[[var]])) { |
250 | 730x |
df[[var]] <- as.factor(df[[var]]) |
251 | 5244x |
} else if (type == "integer" && !is.integer(df[[var]])) { |
252 | 225x |
df[[var]] <- as.integer(df[[var]]) |
253 | 5019x |
} else if (type == "numeric" && !is.numeric(df[[var]])) { |
254 | 3x |
df[[var]] <- as.numeric(df[[var]]) |
255 | 5016x |
} else if (type == "logical" && !is.logical(df[[var]])) { |
256 | ! |
df[[var]] <- as.logical(df[[var]]) |
257 | 5016x |
} else if (type == "datetime" && !lubridate::is.POSIXct(df[[var]])) { |
258 | 9x |
df[[var]] <- as.POSIXct(df[[var]]) |
259 | 5007x |
} else if (type == "date" && !lubridate::is.Date(df[[var]])) { |
260 | ! |
df[[var]] <- as.Date(df[[var]]) |
261 |
} |
|
262 | 5986x |
return(df) |
263 |
} |
|
264 | ||
265 |
# remove existing attributes |
|
266 | 90x |
for (i in base::setdiff(names(attributes(df)), names(attributes(data.frame())))) { |
267 | 3x |
attr(df, i) <- NULL |
268 |
} |
|
269 | ||
270 |
# get metadata |
|
271 | 90x |
metadata <- yaml::yaml.load_file(system.file(filename, package = "random.cdisc.data")) |
272 | 90x |
adsl_metadata <- if (add_adsl) { |
273 | 64x |
yaml::yaml.load_file(system.file(adsl_filename, package = "random.cdisc.data")) |
274 |
} else { |
|
275 | 26x |
NULL |
276 |
} |
|
277 | 90x |
metadata_variables <- append(adsl_metadata$variables, metadata$variables) |
278 | 90x |
metadata_varnames <- names(metadata_variables) |
279 | ||
280 |
# find variables that does not have labels and are not it metadata |
|
281 | 90x |
missing_vars_map <- vapply( |
282 | 90x |
names(df), |
283 | 90x |
function(x) { |
284 | 5986x |
!(x %in% c("STUDYID", "USUBJID", metadata_varnames)) && is.null(attr(df[[x]], "label")) |
285 |
}, |
|
286 | 90x |
logical(1) |
287 |
) |
|
288 | 90x |
missing_vars <- names(df)[missing_vars_map] |
289 | 90x |
if (length(missing_vars) > 0) { |
290 | ! |
msg <- paste0( |
291 | ! |
"Following variables does not have label or are not found in ", |
292 | ! |
filename, |
293 |
": ", |
|
294 | ! |
paste0(missing_vars, collapse = ", ") |
295 |
) |
|
296 | ! |
warning(msg) |
297 |
} |
|
298 | ||
299 | 90x |
if (!all(metadata_varnames %in% names(df))) { |
300 | 6x |
metadata_varnames <- metadata_varnames[metadata_varnames %in% names(df)] |
301 |
} |
|
302 | ||
303 |
# assign labels to variables |
|
304 | 90x |
for (var in metadata_varnames) { |
305 | 5986x |
df <- apply_type(df, var, metadata_variables[[var]]$type) |
306 | 5986x |
attr(df[[var]], "label") <- metadata_variables[[var]]$label |
307 |
} |
|
308 | ||
309 |
# reorder data frame columns to expected BDS order |
|
310 | 90x |
df <- df[, unique(c("STUDYID", "USUBJID", metadata_varnames, names(df)))] |
311 | ||
312 |
# assign label to data frame |
|
313 | 90x |
attr(df, "label") <- metadata$domain$label |
314 | ||
315 | 90x |
df |
316 |
} |
|
317 | ||
318 |
#' Replace Values in a Vector by NA |
|
319 |
#' |
|
320 |
#' @description `r lifecycle::badge("stable")` |
|
321 |
#' |
|
322 |
#' Randomized replacement of values by `NA`. |
|
323 |
#' |
|
324 |
#' @inheritParams argument_convention |
|
325 |
#' @param v (`any`)\cr Vector of any type. |
|
326 |
#' @param percentage (`proportion`)\cr Value between 0 and 1 defining |
|
327 |
#' how much of the vector shall be replaced by `NA`. This number |
|
328 |
#' is randomized by +/- 5% to have full randomization. |
|
329 |
#' |
|
330 |
#' @return The input vector `v` where a certain number of values are replaced by `NA`. |
|
331 |
#' |
|
332 |
#' @export |
|
333 |
replace_na <- function(v, percentage = 0.05, seed = NULL) { |
|
334 | 9x |
checkmate::assert_number(percentage, lower = 0, upper = 1) |
335 | ||
336 | 9x |
if (percentage == 0) { |
337 | 1x |
return(v) |
338 |
} |
|
339 | ||
340 | 8x |
if (!is.null(seed) && !is.na(seed)) { |
341 | 8x |
set.seed(seed) |
342 |
} |
|
343 | ||
344 |
# randomize the percentage |
|
345 | 8x |
ind <- sample(seq_along(v), round(length(v) * percentage)) |
346 | ||
347 | 8x |
v[ind] <- NA |
348 | ||
349 | 8x |
return(v) |
350 |
} |
|
351 | ||
352 |
#' Replace Values with NA |
|
353 |
#' |
|
354 |
#' @description `r lifecycle::badge("stable")` |
|
355 |
#' |
|
356 |
#' Replace column values with `NA`s. |
|
357 |
#' |
|
358 |
#' @inheritParams argument_convention |
|
359 |
#' @param ds (`data.frame`)\cr Any data set. |
|
360 |
#' |
|
361 |
#' @return dataframe without `NA` values. |
|
362 |
#' |
|
363 |
#' @export |
|
364 |
mutate_na <- function(ds, na_vars = NULL, na_percentage = 0.05) { |
|
365 | 5x |
if (!is.null(na_vars)) { |
366 | 4x |
stopifnot(is.list(na_vars)) # any list is OK; as values can be left NA |
367 | 4x |
stopifnot(length(names(na_vars)) == length(na_vars)) # names for all elements |
368 |
} else { |
|
369 | 1x |
na_vars <- names(ds) |
370 |
} |
|
371 | ||
372 | 5x |
stopifnot(is.numeric(na_percentage)) |
373 | 5x |
stopifnot(na_percentage >= 0 && na_percentage < 1) |
374 | ||
375 | 5x |
for (na_var in names(na_vars)) { |
376 | 8x |
if (!is.na(na_var)) { |
377 | 8x |
if (!na_var %in% names(ds)) { |
378 | 1x |
warning(paste(na_var, "not in column names")) |
379 |
} else { |
|
380 | 7x |
ds <- ds %>% |
381 | 7x |
ungroup_rowwise_df() %>% |
382 | 7x |
dplyr::mutate( |
383 | 7x |
!!na_var := ds[[na_var]] %>% |
384 | 7x |
replace_na( |
385 | 7x |
percentage = ifelse(is.na(na_vars[[na_var]][2]), na_percentage, na_vars[[na_var]][2]), |
386 | 7x |
seed = na_vars[[na_var]][1] |
387 |
) |
|
388 |
) |
|
389 |
} |
|
390 |
} |
|
391 |
} |
|
392 | 5x |
return(ds) |
393 |
} |
|
394 | ||
395 |
ungroup_rowwise_df <- function(x) { |
|
396 | 7x |
class(x) <- c("tbl", "tbl_df", "data.frame") |
397 | 7x |
return(x) |
398 |
} |
|
399 | ||
400 |
#' Zero-Truncated Poisson Distribution |
|
401 |
#' |
|
402 |
#' @description `r lifecycle::badge("stable")` |
|
403 |
#' |
|
404 |
#' This generates random numbers from a zero-truncated Poisson distribution, |
|
405 |
#' i.e. from `X | X > 0` when `X ~ Poisson(lambda)`. The advantage here is that |
|
406 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
407 |
#' This solution was provided in a post by |
|
408 |
#' [Peter Dalgaard](https://stat.ethz.ch/pipermail/r-help/2005-May/070680.html). |
|
409 |
#' |
|
410 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
411 |
#' @param lambda (`numeric`)\cr Non-negative mean(s). |
|
412 |
#' |
|
413 |
#' @return The random numbers. |
|
414 |
#' @export |
|
415 |
#' |
|
416 |
#' @examples |
|
417 |
#' x <- rpois(1e6, lambda = 5) |
|
418 |
#' x <- x[x > 0] |
|
419 |
#' hist(x) |
|
420 |
#' |
|
421 |
#' y <- rtpois(1e6, lambda = 5) |
|
422 |
#' hist(y) |
|
423 |
rtpois <- function(n, lambda) { |
|
424 | 121x |
stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda) |
425 |
} |
|
426 | ||
427 |
#' Truncated Exponential Distribution |
|
428 |
#' |
|
429 |
#' @description `r lifecycle::badge("stable")` |
|
430 |
#' |
|
431 |
#' This generates random numbers from a truncated Exponential distribution, |
|
432 |
#' i.e. from `X | X > l` or `X | X < r` when `X ~ Exp(rate)`. The advantage here is that |
|
433 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
434 |
#' This can be derived from the quantile functions of the left- and right-truncated |
|
435 |
#' Exponential distributions. |
|
436 |
#' |
|
437 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
438 |
#' @param rate (`numeric`)\cr Non-negative rate. |
|
439 |
#' @param l (`numeric`)\cr Positive left-hand truncation parameter. |
|
440 |
#' @param r (`numeric`)\cr Positive right-hand truncation parameter. |
|
441 |
#' |
|
442 |
#' @return The random numbers. If neither `l` nor `r` are provided then the usual Exponential |
|
443 |
#' distribution is used. |
|
444 |
#' @export |
|
445 |
#' |
|
446 |
#' @examples |
|
447 |
#' x <- stats::rexp(1e6, rate = 5) |
|
448 |
#' x <- x[x > 0.5] |
|
449 |
#' hist(x) |
|
450 |
#' |
|
451 |
#' y <- rtexp(1e6, rate = 5, l = 0.5) |
|
452 |
#' hist(y) |
|
453 |
#' |
|
454 |
#' z <- rtexp(1e6, rate = 5, r = 0.5) |
|
455 |
#' hist(z) |
|
456 |
rtexp <- function(n, rate, l = NULL, r = NULL) { |
|
457 | 123x |
if (!is.null(l)) { |
458 | 1x |
l - log(1 - stats::runif(n)) / rate |
459 | 122x |
} else if (!is.null(r)) { |
460 | 121x |
-log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate |
461 |
} else { |
|
462 | 1x |
stats::rexp(n, rate) |
463 |
} |
|
464 |
} |
1 |
#' Hy's Law Analysis Dataset (ADHY) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Hy's Law Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ` |
|
11 |
# |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @template param_cached |
|
14 |
#' @templateVar data adhy |
|
15 |
#' |
|
16 |
#' @return `data.frame` |
|
17 |
#' @export |
|
18 |
#' |
|
19 |
#' @author wojciakw |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
23 |
#' |
|
24 |
#' adhy <- radhy(adsl, seed = 2) |
|
25 |
#' adhy |
|
26 |
radhy <- function(adsl, |
|
27 |
param = c( |
|
28 |
"TBILI <= 2 times ULN and ALT value category", |
|
29 |
"TBILI > 2 times ULN and AST value category", |
|
30 |
"TBILI > 2 times ULN and ALT value category", |
|
31 |
"TBILI <= 2 times ULN and AST value category", |
|
32 |
"TBILI > 2 times ULN and ALKPH <= 2 times ULN and ALT value category", |
|
33 |
"TBILI > 2 times ULN and ALKPH <= 2 times ULN and AST value category", |
|
34 |
"TBILI > 2 times ULN and ALKPH <= 5 times ULN and ALT value category", |
|
35 |
"TBILI > 2 times ULN and ALKPH <= 5 times ULN and AST value category", |
|
36 |
"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to ULN", |
|
37 |
"TBILI > 2 times ULN and two consecutive elevations of AST in relation to ULN", |
|
38 |
"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to ULN", |
|
39 |
"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to ULN", |
|
40 |
"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to Baseline", |
|
41 |
"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to Baseline", |
|
42 |
"TBILI > 2 times ULN and two consecutive elevations of AST in relation to Baseline", |
|
43 |
"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to Baseline", |
|
44 |
"ALT > 3 times ULN by Period", |
|
45 |
"AST > 3 times ULN by Period", |
|
46 |
"ALT or AST > 3 times ULN by Period", |
|
47 |
"ALT > 3 times Baseline by Period", |
|
48 |
"AST > 3 times Baseline by Period", |
|
49 |
"ALT or AST > 3 times Baseline by Period" |
|
50 |
), |
|
51 |
paramcd = c( |
|
52 |
"BLAL", |
|
53 |
"BGAS", |
|
54 |
"BGAL", |
|
55 |
"BLAS", |
|
56 |
"BA2AL", |
|
57 |
"BA2AS", |
|
58 |
"BA5AL", |
|
59 |
"BA5AS", |
|
60 |
"BL2AL2CU", |
|
61 |
"BG2AS2CU", |
|
62 |
"BL2AS2CU", |
|
63 |
"BG2AL2CU", |
|
64 |
"BG2AL2CB", |
|
65 |
"BL2AL2CB", |
|
66 |
"BG2AS2CB", |
|
67 |
"BL2AS2CB", |
|
68 |
"ALTPULN", |
|
69 |
"ASTPULN", |
|
70 |
"ALTASTPU", |
|
71 |
"ALTPBASE", |
|
72 |
"ASTPBASE", |
|
73 |
"ALTASTPB" |
|
74 |
), |
|
75 |
seed = NULL, |
|
76 |
cached = FALSE) { |
|
77 | 4x |
checkmate::assert_flag(cached) |
78 | ||
79 | 4x |
if (cached) { |
80 | 1x |
return(get_cached_data("cadhy")) |
81 |
} |
|
82 | ||
83 | 3x |
checkmate::assert_data_frame(adsl) |
84 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
85 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
86 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
87 | ||
88 |
# validate and initialize related variables |
|
89 | 3x |
param_init_list <- relvar_init(param, paramcd) |
90 | ||
91 | 3x |
if (!is.null(seed)) { |
92 | 3x |
set.seed(seed) |
93 |
} |
|
94 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
95 | ||
96 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
97 | 3x |
adhy <- expand.grid( |
98 | 3x |
STUDYID = unique(adsl$STUDYID), |
99 | 3x |
USUBJID = adsl$USUBJID, |
100 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
101 | 3x |
AVISIT = as.factor(c("BASELINE", "POST-BASELINE")), |
102 | 3x |
APERIODC = as.factor(c("PERIOD 1", "PERIOD 2")), |
103 | 3x |
stringsAsFactors = FALSE |
104 |
) |
|
105 | ||
106 |
# remove records that are not needed and were created as a side product of expand.grid above |
|
107 | 3x |
adhy <- dplyr::filter(adhy, !(AVISIT == "BASELINE" & APERIODC == "PERIOD 2")) |
108 | ||
109 |
# define TBILI ALT/AST params, period dependent parameters and the parameters that will be assigned values "Y" or "N" |
|
110 | 3x |
paramcd_tbilialtast <- c("BLAL", "BGAS", "BGAL", "BLAS", "BA2AL", "BA2AS", "BA5AL", "BA5AS") |
111 | 3x |
paramcd_by_period <- c("ALTPULN", "ASTPULN", "ALTASTPU", "ALTPBASE", "ASTPBASE", "ALTASTPB") |
112 | 3x |
paramcd_yn <- c( |
113 | 3x |
"BL2AL2CU", "BG2AS2CU", "BL2AS2CU", "BG2AL2CU", "BG2AL2CB", "BL2AL2CB", "BG2AS2CB", "BL2AS2CB", |
114 | 3x |
paramcd_by_period |
115 |
) |
|
116 | ||
117 |
# add other variables to adhy |
|
118 | 3x |
adhy <- adhy %>% |
119 | 3x |
rel_var( |
120 | 3x |
var_name = "PARAMCD", |
121 | 3x |
related_var = "PARAM", |
122 | 3x |
var_values = param_init_list$relvar2 |
123 |
) %>% |
|
124 | 3x |
dplyr::mutate( |
125 | 3x |
AVALC = dplyr::case_when( |
126 | 3x |
PARAMCD %in% paramcd_tbilialtast ~ sample( |
127 | 3x |
x = c(">3-5ULN", ">5-10ULN", ">10-20ULN", ">20ULN", "Criteria not met"), size = dplyr::n(), replace = TRUE |
128 |
), |
|
129 | 3x |
PARAMCD %in% paramcd_yn ~ sample( |
130 | 3x |
x = c("Y", "N"), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE |
131 |
) |
|
132 |
), |
|
133 | 3x |
AVAL = dplyr::case_when( |
134 | 3x |
AVALC == ">3-5ULN" ~ 1, |
135 | 3x |
AVALC == ">5-10ULN" ~ 2, |
136 | 3x |
AVALC == ">10-20ULN" ~ 3, |
137 | 3x |
AVALC == ">20ULN" ~ 4, |
138 | 3x |
AVALC == "Y" ~ 1, |
139 | 3x |
AVALC == "N" ~ 0, |
140 | 3x |
AVALC == "Criteria not met" ~ 0 |
141 |
), |
|
142 | 3x |
AVISITN = dplyr::case_when( |
143 | 3x |
AVISIT == "BASELINE" ~ 0L, |
144 | 3x |
AVISIT == "POST-BASELINE" ~ 9995L, |
145 | 3x |
TRUE ~ NA_integer_ |
146 |
), |
|
147 | 3x |
APERIOD = dplyr::case_when( |
148 | 3x |
APERIODC == "PERIOD 1" ~ 1L, |
149 | 3x |
APERIODC == "PERIOD 2" ~ 2L, |
150 | 3x |
TRUE ~ NA_integer_ |
151 |
), |
|
152 | 3x |
ABLFL = dplyr::if_else(AVISIT == "BASELINE", "Y", NA_character_), |
153 | 3x |
ONTRTFL = dplyr::if_else(AVISIT == "POST-BASELINE", "Y", NA_character_), |
154 | 3x |
ANL01FL = "Y", |
155 | 3x |
SRCSEQ = NA_integer_ |
156 |
) |
|
157 | ||
158 |
# remove records for parameters with period 2 and not in paramcd_by_period |
|
159 | 3x |
adhy <- dplyr::filter(adhy, PARAMCD %in% paramcd_by_period | APERIODC == "PERIOD 1") |
160 | ||
161 |
# add baseline variables |
|
162 | 3x |
adhy <- adhy %>% |
163 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
164 | 3x |
dplyr::mutate( |
165 | 3x |
BASEC = AVALC[AVISIT == "BASELINE"], |
166 | 3x |
BASE = AVAL[AVISIT == "BASELINE"] |
167 |
) %>% |
|
168 | 3x |
dplyr::ungroup() |
169 | ||
170 | 3x |
adhy <- adhy %>% |
171 | 3x |
rcd_var_relabel( |
172 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
173 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
174 |
) |
|
175 | ||
176 |
# merge ADSL to be able to add analysis datetime and analysis relative day variables |
|
177 | 3x |
adhy <- dplyr::inner_join(adhy, adsl, by = c("STUDYID", "USUBJID")) |
178 | ||
179 |
# define a simple helper function to create ADY variable |
|
180 | 3x |
add_ady <- function(x, avisit) { |
181 | 6x |
if (avisit == "BASELINE") { |
182 | 3x |
dplyr::mutate( |
183 | 3x |
x, |
184 | 3x |
ADY = sample(x = -(1:14), size = dplyr::n(), replace = TRUE) |
185 |
) |
|
186 | 3x |
} else if (avisit == "POST-BASELINE") { |
187 | 3x |
dplyr::rowwise(x) %>% |
188 | 3x |
dplyr::mutate(ADY = as.integer(sample( |
189 | 3x |
dplyr::if_else( |
190 | 3x |
!is.na(TRTEDTM), |
191 | 3x |
as.numeric(difftime(TRTEDTM, TRTSDTM, units = "days")), |
192 | 3x |
as.numeric(study_duration_secs, "days") |
193 |
), |
|
194 | 3x |
size = 1, |
195 | 3x |
replace = TRUE |
196 |
))) |
|
197 |
} else { |
|
198 | ! |
dplyr::mutate(x, ADY = NA_integer_) |
199 |
} |
|
200 |
} |
|
201 | ||
202 |
# add ADY and ADTM variables |
|
203 | 3x |
adhy <- adhy %>% |
204 | 3x |
dplyr::group_by(AVISIT, .add = FALSE) %>% |
205 | 3x |
dplyr::group_modify(~ add_ady(.x, .y$AVISIT)) %>% |
206 | 3x |
dplyr::ungroup() %>% |
207 | 3x |
dplyr::mutate(ADTM = TRTSDTM + lubridate::days(ADY)) |
208 | ||
209 |
# order columns and arrange rows; column order follows ADaM_1.1 specification |
|
210 | 3x |
adhy <- |
211 | 3x |
adhy[, c( |
212 | 3x |
colnames(adsl), |
213 | 3x |
"PARAM", |
214 | 3x |
"PARAMCD", |
215 | 3x |
"AVAL", |
216 | 3x |
"AVALC", |
217 | 3x |
"BASE", |
218 | 3x |
"BASEC", |
219 | 3x |
"ABLFL", |
220 | 3x |
"ADTM", |
221 | 3x |
"ADY", |
222 | 3x |
"AVISIT", |
223 | 3x |
"AVISITN", |
224 | 3x |
"APERIOD", |
225 | 3x |
"APERIODC", |
226 | 3x |
"ONTRTFL", |
227 | 3x |
"SRCSEQ", |
228 | 3x |
"ANL01FL" |
229 |
)] |
|
230 | ||
231 | 3x |
adhy <- adhy %>% |
232 | 3x |
dplyr::arrange( |
233 | 3x |
STUDYID, |
234 | 3x |
USUBJID, |
235 | 3x |
PARAMCD, |
236 | 3x |
AVISITN, |
237 | 3x |
ADTM, |
238 | 3x |
SRCSEQ |
239 |
) |
|
240 | ||
241 |
# apply metadata |
|
242 | 3x |
adhy <- apply_metadata(adhy, "metadata/ADHY.yml") |
243 | ||
244 | 3x |
return(adhy) |
245 |
} |
1 |
#' EORTC QLQ-C30 V3 Analysis Dataset (ADQLQC) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random EORTC QLQ-C30 V3 Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARCAT1N`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `ADTM`, `QSSEQ` |
|
11 |
#' |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param percent (`numeric`)\cr Completion - Completed at least y percent of questions, 1 record per visit |
|
14 |
#' @param number (`numeric`)\cr Completion - Completed at least x question(s), 1 record per visit |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adqlqc |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @examples |
|
22 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
23 |
#' |
|
24 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
25 |
#' adqlqc |
|
26 |
radqlqc <- function(adsl, |
|
27 |
percent, |
|
28 |
number, |
|
29 |
seed = NULL, |
|
30 |
cached = FALSE) { |
|
31 | 4x |
checkmate::assert_flag(cached) |
32 | 4x |
if (cached) { |
33 | 1x |
return(get_cached_data("cadqlqc")) |
34 |
} |
|
35 | ||
36 | 3x |
checkmate::assert_data_frame(adsl) |
37 | 3x |
checkmate::assert_number(percent, lower = 1, upper = 100) |
38 | 3x |
checkmate::assert_number(number, lower = 1) |
39 | ||
40 | 3x |
if (!is.null(seed)) { |
41 | 3x |
set.seed(seed) |
42 |
} |
|
43 | ||
44 |
# ADQLQC data ------------------------------------------------------------- |
|
45 | 3x |
qs <- get_qs_data(adsl, n_assessments = 5L, seed = seed, na_percentage = 0.1) |
46 |
# prepare ADaM ADQLQC data |
|
47 | 3x |
adqlqc1 <- prep_adqlqc(df = qs) |
48 |
# derive AVAL and AVALC |
|
49 | 3x |
adqlqc1 <- mutate( |
50 | 3x |
adqlqc1, |
51 | 3x |
AVAL = as.numeric(QSSTRESC), |
52 | 3x |
AVALC = case_when( |
53 | 3x |
QSTESTCD == "QSALL" ~ QSREASND, |
54 | 3x |
TRUE ~ QSORRES |
55 |
), |
|
56 | 3x |
AVISIT = VISIT, |
57 | 3x |
AVISITN = VISITNUM, |
58 | 3x |
ADTM = QSDTC |
59 |
) |
|
60 |
# include scale calculation |
|
61 | 3x |
adqlqc_tmp <- calc_scales(adqlqc1) |
62 |
# order to prepare for change from screening and baseline values |
|
63 | 3x |
adqlqc_tmp <- adqlqc_tmp[order(adqlqc_tmp$STUDYID, adqlqc_tmp$USUBJID, adqlqc_tmp$PARAMCD, adqlqc_tmp$AVISITN), ] |
64 | ||
65 | 3x |
adqlqc_tmp <- Reduce( |
66 | 3x |
rbind, |
67 | 3x |
lapply( |
68 | 3x |
split(adqlqc_tmp, adqlqc_tmp$USUBJID), |
69 | 3x |
function(x) { |
70 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
71 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
72 | 30x |
x$ABLFL <- ifelse( |
73 | 30x |
x$AVISIT == "BASELINE" & |
74 | 30x |
x$PARAMCD != "EX028", |
75 | 30x |
"Y", |
76 | 30x |
ifelse( |
77 | 30x |
x$AVISIT == "CYCLE 1 DAY 1" & |
78 | 30x |
x$PARAMCD != "EX028", |
79 | 30x |
"Y", |
80 |
"" |
|
81 |
) |
|
82 |
) |
|
83 | 30x |
x |
84 |
} |
|
85 |
) |
|
86 |
) |
|
87 | ||
88 | 3x |
adqlqc_tmp$BASE2 <- ifelse( |
89 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
90 | 3x |
retain( |
91 | 3x |
df = adqlqc_tmp, |
92 | 3x |
value_var = adqlqc_tmp$AVAL, |
93 | 3x |
event = adqlqc_tmp$ABLFL2 == "Y" |
94 |
), |
|
95 | 3x |
NA |
96 |
) |
|
97 | ||
98 | 3x |
adqlqc_tmp$BASE <- ifelse( |
99 | 3x |
adqlqc_tmp$ABLFL2 != "Y" & |
100 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
101 | 3x |
retain( |
102 | 3x |
adqlqc_tmp, |
103 | 3x |
adqlqc_tmp$AVAL, |
104 | 3x |
adqlqc_tmp$ABLFL == "Y" |
105 |
), |
|
106 | 3x |
NA |
107 |
) |
|
108 | ||
109 | 3x |
adqlqc_tmp <- adqlqc_tmp %>% |
110 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
111 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
112 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
113 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
114 | 3x |
rcd_var_relabel( |
115 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
116 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
117 |
) |
|
118 |
# derive CHGCAT1 ---------------------------------------------------------- |
|
119 | 3x |
adqlqc_tmp <- derv_chgcat1(dataset = adqlqc_tmp) |
120 | ||
121 | 3x |
adqlqc_tmp <- rcd_var_relabel( |
122 | 3x |
adqlqc_tmp, |
123 | 3x |
STUDYID = "Study Identifier", |
124 | 3x |
USUBJID = "Unique Subject Identifier" |
125 |
) |
|
126 | ||
127 | 3x |
adqlqc_tmp <- arrange( |
128 | 3x |
adqlqc_tmp, |
129 | 3x |
USUBJID, |
130 | 3x |
AVISITN |
131 |
) |
|
132 |
# Merge ADSL -------------------------------------------------------------- |
|
133 |
# ADSL variables needed for ADQLQC |
|
134 | 3x |
adsl_vars <- c( |
135 | 3x |
"STUDYID", "USUBJID", "SUBJID", "SITEID", "REGION1", "COUNTRY", "ETHNIC", "AGE", |
136 | 3x |
"AGEU", "AAGE", "AAGEU", "AGEGR1", "AGEGR2", "AGEGR3", "STRATwNM", "STRATw", "STRATwV", |
137 | 3x |
"SEX", "RACE", "ITTFL", "SAFFL", "PPROTFL", "TRT01P", "TRT01A", |
138 | 3x |
"TRTSEQP", "TRTSEQA", "TRTSDTM", "TRTSDT", "TRTEDTM", "TRTEDT", "DCUTDT" |
139 |
) |
|
140 | 3x |
adsl <- select( |
141 | 3x |
adsl, |
142 | 3x |
any_of(adsl_vars) |
143 |
) |
|
144 | 3x |
adqlqc <- dplyr::inner_join( |
145 | 3x |
adqlqc_tmp, |
146 | 3x |
adsl, |
147 | 3x |
by = c("STUDYID", "USUBJID") |
148 |
) %>% |
|
149 | 3x |
dplyr::mutate( |
150 | 3x |
ADY_der = ceiling(difftime(ADTM, TRTSDTM, units = "days")), |
151 | 3x |
ADY = case_when( |
152 | 3x |
ADY_der >= 0 ~ ADY_der + 1, |
153 | 3x |
TRUE ~ ADY_der |
154 |
) |
|
155 |
) %>% |
|
156 | 3x |
select(-ADY_der) |
157 | ||
158 |
# get compliance data --------------------------------------------------- |
|
159 | 3x |
compliance_data <- comp_derv( |
160 | 3x |
dataset = adqlqc, |
161 | 3x |
percent = percent, |
162 | 3x |
number = number |
163 |
) |
|
164 |
# add ADSL variables |
|
165 | 3x |
compliance_data <- left_join( |
166 | 3x |
compliance_data, |
167 | 3x |
adsl, |
168 | 3x |
by = c("STUDYID", "USUBJID") |
169 |
) |
|
170 |
# add completion to ADQLQC |
|
171 | 3x |
adqlqc <- bind_rows( |
172 | 3x |
adqlqc, |
173 | 3x |
compliance_data |
174 |
) %>% |
|
175 | 3x |
arrange( |
176 | 3x |
USUBJID, |
177 | 3x |
AVISITN, |
178 | 3x |
QSTESTCD |
179 |
) |
|
180 |
# find first set of questionnaire observations |
|
181 | 3x |
adqlqc_x <- arrange( |
182 | 3x |
adqlqc, |
183 | 3x |
USUBJID, |
184 | 3x |
ADTM |
185 |
) %>% |
|
186 | 3x |
filter( |
187 | 3x |
PARAMCD != "QSALL" & |
188 | 3x |
!str_detect(AVISIT, "SCREENING|UNSCHEDULED") |
189 |
) %>% |
|
190 | 3x |
group_by( |
191 | 3x |
USUBJID, |
192 | 3x |
ADTM |
193 |
) %>% |
|
194 | 3x |
summarise(first_date = first(ADTM), .groups = "drop") |
195 | ||
196 | 3x |
adqlqc <- left_join( |
197 | 3x |
adqlqc, |
198 | 3x |
adqlqc_x, |
199 | 3x |
by = c("USUBJID", "ADTM") |
200 |
) %>% |
|
201 | 3x |
mutate( |
202 | 3x |
ANL01FL = case_when( |
203 | 3x |
PARAMCD != "QSALL" & ABLFL == "Y" ~ "Y", |
204 | 3x |
PARAMCD != "QSALL" & |
205 | 3x |
!str_detect(AVISIT, "UNSCHEDULED") & |
206 | 3x |
!is.na(first_date) ~ "Y" |
207 |
) |
|
208 |
) %>% |
|
209 | 3x |
select(-first_date) |
210 | ||
211 |
# final dataset ----------------------------------------------------------- |
|
212 | 3x |
adqlqc_final <- adqlqc %>% |
213 | 3x |
dplyr::group_by(USUBJID) %>% |
214 | 3x |
dplyr::mutate(ASEQ = row_number()) %>% |
215 | 3x |
dplyr::ungroup() %>% |
216 | 3x |
dplyr::arrange( |
217 | 3x |
STUDYID, |
218 | 3x |
USUBJID, |
219 | 3x |
AVISITN |
220 |
) %>% |
|
221 | 3x |
select( |
222 | 3x |
-c("BASE2", "CHG2", "PCHG2", "ABLFL2") |
223 |
) %>% |
|
224 | 3x |
ungroup() |
225 | ||
226 | 3x |
adam_vars <- c( |
227 | 3x |
adsl_vars, "QSSEQ", "QSCAT", "QSSCAT", "QSDTC", "QSSPID", "QSSTAT", "QSSTRESN", |
228 | 3x |
"QSSTRESC", "QSSTRESU", "QSORRES", "QSORRESU", "QSTEST", "QSTESTCD", "QSTPT", |
229 | 3x |
"QSTPTNUM", "QSTPTREF", "QSDY", "QSREASND", "QSTSTDTL", "QSEVAL", "VISIT", "VISITNUM", |
230 | 3x |
"PARAM", "PARAMCD", "PARCAT1", "PARCAT1N", "PARCAT2", "AVAL", "AVALC", "AREASND", |
231 | 3x |
"BASE", "BASETYPE", "ABLFL", "CHG", "PCHG", "CHGCAT1", "CRIT1", "CRIT1FL", "DTYPE", |
232 | 3x |
"ADTM", "ADT", "ADY", "ADTF", "ATMF", "ATPT", "ATPTN", "AVISIT", "AVISITN", "APHASE", |
233 | 3x |
"APHASEN", "APERIOD", "APERIODC", "APERIODC", "ASPER", "ASPERC", "PERADY", "TRTP", |
234 | 3x |
"TRTA", "ONTRTFL", "LAST02FL", "FIRS02FL", "ANL01FL", "ANL02FL", "ANL03FL", |
235 | 3x |
"ANL04FL", "CGCAT1NX" |
236 |
) |
|
237 |
# order variables in mapped qs by variables in adam_vars |
|
238 | 3x |
adqlqc_name_ordered <- names(adqlqc_final)[order(match(names(adqlqc_final), adam_vars))] |
239 |
# adqlqc with variables ordered per gdsr |
|
240 | 3x |
adqlqc_final <- adqlqc_final %>% |
241 | 3x |
select( |
242 | 3x |
any_of(adqlqc_name_ordered) |
243 |
) |
|
244 | ||
245 | 3x |
adqlqc_final <- relocate(adqlqc_final, "QSEVLINT", .after = "QSTESTCD") %>% |
246 | 3x |
arrange( |
247 | 3x |
USUBJID, |
248 | 3x |
AVISITN, |
249 | 3x |
ASEQ, |
250 | 3x |
QSTESTCD |
251 |
) |
|
252 |
# apply metadata |
|
253 | 3x |
adqlqc_final <- apply_metadata(adqlqc_final, "metadata/ADQLQC.yml") |
254 | 3x |
return(adqlqc_final) |
255 |
} |
|
256 | ||
257 |
#' Helper Functions for Constructing ADQLQC |
|
258 |
#' |
|
259 |
#' Internal functions used by `radqlqc`. |
|
260 |
#' |
|
261 |
#' @inheritParams argument_convention |
|
262 |
#' @inheritParams radqlqc |
|
263 |
#' |
|
264 |
#' @examples |
|
265 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
266 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
267 |
#' |
|
268 |
#' @name h_adqlqc |
|
269 |
NULL |
|
270 | ||
271 |
#' @describeIn h_adqlqc Questionnaires EORTC QLQ-C30 V3.0 SDTM (QS) |
|
272 |
#' |
|
273 |
#' Function for generating random Questionnaires SDTM domain |
|
274 |
#' |
|
275 |
#' @return a dataframe with SDTM questionnaire data |
|
276 |
#' @keywords internal |
|
277 |
get_qs_data <- function(adsl, |
|
278 |
visit_format = "CYCLE", |
|
279 |
n_assessments = 5L, |
|
280 |
n_days = 1L, |
|
281 |
lookup = NULL, |
|
282 |
seed = NULL, |
|
283 |
na_percentage = 0, |
|
284 |
na_vars = list( |
|
285 |
QSORRES = c(1234, 0.2), |
|
286 |
QSSTRESC = c(1234, 0.2) |
|
287 |
)) { |
|
288 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
289 | 3x |
checkmate::assert_string(visit_format) |
290 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
291 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
292 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
293 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
294 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
295 | 3x |
checkmate::assert_true(na_percentage < 1) |
296 | ||
297 |
# get subjects for QS data from ADSL |
|
298 |
# get studyid, subject for QS generation |
|
299 | 3x |
qs <- select( |
300 | 3x |
adsl, |
301 | 3x |
STUDYID, |
302 | 3x |
USUBJID |
303 |
) %>% |
|
304 | 3x |
mutate( |
305 | 3x |
DOMAIN = "QS" |
306 |
) |
|
307 | ||
308 |
# QS prep ----------------------------------------------------------------- |
|
309 |
# get questionnaire function for QS |
|
310 |
# QSTESTCD: EOR0101 to EOR0130 |
|
311 | 3x |
eortc_qlq_c30_sub <- filter( |
312 | 3x |
eortc_qlq_c30, |
313 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & |
314 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130 |
315 |
) %>% |
|
316 | 3x |
select(-publication_name) |
317 | ||
318 |
# validate and initialize QSTEST vectors |
|
319 | 3x |
qstest_init_list <- relvar_init( |
320 | 3x |
unique(eortc_qlq_c30_sub$QSTEST), |
321 | 3x |
unique(eortc_qlq_c30_sub$QSTESTCD) |
322 |
) |
|
323 | ||
324 | 3x |
if (!is.null(seed)) { |
325 | 3x |
set.seed(seed) |
326 |
} |
|
327 | ||
328 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
329 | ||
330 | 3x |
lookup_qs <- if (!is.null(lookup)) { |
331 | ! |
lookup |
332 |
} else { |
|
333 | 3x |
expand.grid( |
334 | 3x |
STUDYID = unique(qs$STUDYID), |
335 | 3x |
USUBJID = qs$USUBJID, |
336 | 3x |
QSTEST = qstest_init_list$relvar1, |
337 | 3x |
VISIT = visit_schedule( |
338 | 3x |
visit_format = visit_format, |
339 | 3x |
n_assessments = n_assessments, |
340 | 3x |
n_days = n_days |
341 |
), |
|
342 | 3x |
stringsAsFactors = FALSE |
343 |
) |
|
344 |
} |
|
345 | ||
346 |
# assign related variable values: QSTESTxQSTESTCD are related |
|
347 | 3x |
lookup_qs <- lookup_qs %>% rel_var( |
348 | 3x |
var_name = "QSTESTCD", |
349 | 3x |
related_var = "QSTEST", |
350 | 3x |
var_values = qstest_init_list$relvar2 |
351 |
) |
|
352 | ||
353 | 3x |
lookup_qs <- left_join( |
354 | 3x |
lookup_qs, |
355 | 3x |
eortc_qlq_c30_sub, |
356 | 3x |
by = c( |
357 | 3x |
"QSTEST", |
358 | 3x |
"QSTESTCD" |
359 |
), |
|
360 | 3x |
multiple = "all", |
361 | 3x |
relationship = "many-to-many" |
362 |
) |
|
363 | ||
364 | 3x |
lookup_qs <- dplyr::mutate( |
365 | 3x |
lookup_qs, |
366 | 3x |
VISITNUM = dplyr::case_when( |
367 | 3x |
VISIT == "SCREENING" ~ -1, |
368 | 3x |
VISIT == "BASELINE" ~ 0, |
369 | 3x |
(grepl("^WEEK", VISIT) | grepl("^CYCLE", VISIT)) ~ as.numeric(VISIT) - 2, |
370 | 3x |
TRUE ~ NA_real_ |
371 |
) |
|
372 | 3x |
) %>% arrange(USUBJID) |
373 | ||
374 |
# # prep QSALL -------------------------------------------------------------- |
|
375 |
# get last subject and visit for QSALL |
|
376 | 3x |
last_subj_vis <- select(lookup_qs, USUBJID, VISIT) %>% |
377 | 3x |
distinct() %>% |
378 | 3x |
slice(n()) |
379 | 3x |
last_subj_vis_full <- filter( |
380 | 3x |
lookup_qs, |
381 | 3x |
USUBJID == last_subj_vis$USUBJID, |
382 | 3x |
VISIT == last_subj_vis$VISIT |
383 |
) |
|
384 | ||
385 | 3x |
qsall_data1 <- tibble::tibble( |
386 | 3x |
STUDYID = unique(last_subj_vis_full$STUDYID), |
387 | 3x |
USUBJID = unique(last_subj_vis_full$USUBJID), |
388 | 3x |
VISIT = unique(last_subj_vis_full$VISIT), |
389 | 3x |
VISITNUM = unique(last_subj_vis_full$VISITNUM), |
390 | 3x |
QSTESTCD = "QSALL", |
391 | 3x |
QSTEST = "Questionnaires", |
392 | 3x |
QSSTAT = "NOT DONE", |
393 | 3x |
QSREASND = "SUBJECT REFUSED" |
394 |
) |
|
395 | ||
396 |
# remove last subject and visit from main data |
|
397 | 3x |
lookup_qs_sub <- anti_join( |
398 | 3x |
lookup_qs, |
399 | 3x |
last_subj_vis_full, |
400 | 3x |
by = c("USUBJID", "VISIT") |
401 |
) |
|
402 | ||
403 | 3x |
set.seed(seed) |
404 | 3x |
lookup_qs_sub_x <- lookup_qs_sub %>% |
405 | 3x |
group_by( |
406 | 3x |
USUBJID, |
407 | 3x |
QSTESTCD, |
408 | 3x |
VISIT |
409 |
) %>% |
|
410 | 3x |
slice_sample(n = 1) %>% |
411 | 3x |
ungroup() %>% |
412 | 3x |
as.data.frame() |
413 | ||
414 | 3x |
lookup_qs_sub_x <- arrange( |
415 | 3x |
lookup_qs_sub_x, |
416 | 3x |
USUBJID, |
417 | 3x |
VISITNUM |
418 |
) |
|
419 | ||
420 |
# add date: QSDTC --------------------------------------------------------- |
|
421 |
# get treatment dates from ADSL |
|
422 | 3x |
adsl_trt <- select( |
423 | 3x |
adsl, |
424 | 3x |
USUBJID, |
425 | 3x |
TRTSDTM, |
426 | 3x |
TRTEDTM |
427 |
) |
|
428 |
# use to derive QSDTC |
|
429 |
# if no treatment end date, create an arbituary one |
|
430 | 3x |
trt_end_date <- max(adsl_trt$TRTEDTM, na.rm = TRUE) |
431 | ||
432 | 3x |
lookup_qs_sub_x <- left_join( |
433 | 3x |
lookup_qs_sub_x, |
434 | 3x |
adsl_trt, |
435 | 3x |
by = "USUBJID" |
436 |
) %>% |
|
437 | 3x |
group_by( |
438 | 3x |
USUBJID |
439 |
) %>% |
|
440 | 3x |
mutate(QSDTC = get_random_dates_between( |
441 | 3x |
from = TRTSDTM, |
442 | 3x |
to = ifelse( |
443 | 3x |
is.na(TRTEDTM), |
444 | 3x |
trt_end_date, |
445 | 3x |
TRTEDTM |
446 |
), |
|
447 | 3x |
visit_id = VISITNUM |
448 |
)) %>% |
|
449 | 3x |
select(-c("TRTSDTM", "TRTEDTM")) |
450 | ||
451 |
# filter out subjects with missing dates |
|
452 | 3x |
lookup_qs_sub_x1 <- filter( |
453 | 3x |
lookup_qs_sub_x, |
454 | 3x |
!is.na(QSDTC) |
455 |
) |
|
456 | ||
457 |
# subjects with missing dates |
|
458 | 3x |
lookup_qs_sub_x2 <- filter( |
459 | 3x |
lookup_qs_sub_x, |
460 | 3x |
is.na(QSDTC) |
461 |
) %>% |
|
462 | 3x |
select( |
463 | 3x |
STUDYID, |
464 | 3x |
USUBJID, |
465 | 3x |
VISIT, |
466 | 3x |
VISITNUM |
467 |
) %>% |
|
468 | 3x |
distinct() |
469 | ||
470 |
# generate QSALL for subjects with missing dates |
|
471 | 3x |
qsall_data2 <- mutate( |
472 | 3x |
lookup_qs_sub_x2, |
473 | 3x |
QSTESTCD = "QSALL", |
474 | 3x |
QSTEST = "Questionnaires", |
475 | 3x |
QSSTAT = "NOT DONE", |
476 | 3x |
QSREASND = "SUBJECT REFUSED" |
477 |
) |
|
478 | ||
479 |
# add qsall data to original item data |
|
480 | 3x |
lookup_qs_sub_all <- bind_rows( |
481 | 3x |
lookup_qs_sub_x1, |
482 | 3x |
qsall_data1, |
483 | 3x |
qsall_data2 |
484 |
) |
|
485 | ||
486 | 3x |
qs_all <- lookup_qs_sub_all %>% |
487 | 3x |
arrange( |
488 | 3x |
STUDYID, |
489 | 3x |
USUBJID, |
490 | 3x |
VISITNUM |
491 |
) %>% |
|
492 | 3x |
dplyr::group_by(USUBJID) %>% |
493 | 3x |
dplyr::ungroup() |
494 | ||
495 |
# get first and second subject ids |
|
496 | 3x |
first_second_subj <- select(qs_all, USUBJID) %>% |
497 | 3x |
distinct() %>% |
498 | 3x |
slice(1:2) |
499 | ||
500 | 3x |
qs1 <- filter( |
501 | 3x |
qs_all, |
502 | 3x |
USUBJID %in% first_second_subj$USUBJID |
503 |
) |
|
504 | ||
505 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
506 | 3x |
qs1 <- mutate_na(ds = qs1, na_vars = na_vars, na_percentage = na_percentage) |
507 |
} |
|
508 | ||
509 |
# QSSTAT = NOT DONE |
|
510 | 3x |
qs1 <- mutate( |
511 | 3x |
qs1, |
512 | 3x |
QSSTAT = case_when( |
513 | 3x |
is.na(QSORRES) & is.na(QSSTRESC) ~ "NOT DONE" |
514 |
) |
|
515 |
) |
|
516 | ||
517 |
# remove first and second subjects from main data |
|
518 | 3x |
qs2 <- anti_join( |
519 | 3x |
qs_all, |
520 | 3x |
qs1, |
521 | 3x |
by = c("USUBJID") |
522 |
) |
|
523 | ||
524 | 3x |
final_qs <- rbind( |
525 | 3x |
qs1, |
526 | 3x |
qs2 |
527 |
) %>% |
|
528 | 3x |
group_by(USUBJID) %>% |
529 | 3x |
dplyr::mutate(QSSEQ = row_number()) %>% |
530 | 3x |
arrange( |
531 | 3x |
STUDYID, |
532 | 3x |
USUBJID, |
533 | 3x |
VISITNUM |
534 |
) %>% |
|
535 | 3x |
ungroup() |
536 | ||
537 |
# ordered variables as per gdsr |
|
538 | 3x |
final_qs <- select( |
539 | 3x |
final_qs, |
540 | 3x |
STUDYID, |
541 | 3x |
USUBJID, |
542 | 3x |
QSSEQ, |
543 | 3x |
QSTESTCD, |
544 | 3x |
QSTEST, |
545 | 3x |
QSCAT, |
546 | 3x |
QSSCAT, |
547 | 3x |
QSORRES, |
548 | 3x |
QSORRESU, |
549 | 3x |
QSSTRESC, |
550 | 3x |
QSSTRESU, |
551 | 3x |
QSSTAT, |
552 | 3x |
QSREASND, |
553 | 3x |
VISITNUM, |
554 | 3x |
VISIT, |
555 | 3x |
QSDTC, |
556 | 3x |
QSEVLINT |
557 |
) |
|
558 | 3x |
return(final_qs) |
559 |
} |
|
560 | ||
561 |
#' @describeIn h_adqlqc Function for generating random dates between 2 dates |
|
562 |
#' |
|
563 |
#' @param from (`datetime vector`)\cr Start date/times. |
|
564 |
#' @param to (`datetime vector`)\cr End date/times. |
|
565 |
#' @param visit_id (`vector`)\cr Visit identifiers. |
|
566 |
#' |
|
567 |
#' @return Data frame with new randomly generated dates variable. |
|
568 |
#' @keywords internal |
|
569 |
get_random_dates_between <- function(from, to, visit_id) { |
|
570 | 30x |
min_date <- min(lubridate::as_datetime(from), na.rm = TRUE) |
571 | 30x |
max_date <- max(lubridate::as_datetime(to), na.rm = TRUE) |
572 | 30x |
date_seq <- seq(from = min_date + lubridate::days(1), to = max_date, by = "28 days") |
573 | ||
574 | 30x |
visit_ids <- unique(visit_id) |
575 | 30x |
out <- sapply(visit_ids, simplify = TRUE, USE.NAMES = TRUE, FUN = function(x) { |
576 | 177x |
if (x == -1) { |
577 | 30x |
random_days_to_subtract <- lubridate::days(sample(1:10, size = 1)) |
578 | 30x |
min_date - random_days_to_subtract |
579 | 147x |
} else if (x == 0) { |
580 | 30x |
min_date |
581 | 117x |
} else if (x > 0) { |
582 | 117x |
if (x %in% seq_along(date_seq)) { |
583 | 117x |
date_seq[[x]] |
584 |
} else { |
|
585 | 30x |
NA |
586 |
} |
|
587 |
} |
|
588 |
}) |
|
589 | 30x |
lubridate::as_datetime(out[match(visit_id, visit_ids)]) |
590 |
} |
|
591 | ||
592 |
#' @describeIn h_adqlqc Prepare ADaM ADQLQC data, adding PARAMCD to SDTM QS data |
|
593 |
#' |
|
594 |
#' @param df (`data.frame`)\cr SDTM QS dataset. |
|
595 |
#' |
|
596 |
#' @return `data.frame` |
|
597 |
#' @keywords internal |
|
598 |
prep_adqlqc <- function(df) { |
|
599 |
# create PARAMCD from QSTESTCD |
|
600 | 3x |
adqlqc <- dplyr::mutate( |
601 | 3x |
df, |
602 | 3x |
PARAMCD = case_when( |
603 | 3x |
QSTESTCD == "EOR0101" ~ "QS02801", |
604 | 3x |
QSTESTCD == "EOR0102" ~ "QS02802", |
605 | 3x |
QSTESTCD == "EOR0103" ~ "QS02803", |
606 | 3x |
QSTESTCD == "EOR0104" ~ "QS02804", |
607 | 3x |
QSTESTCD == "EOR0105" ~ "QS02805", |
608 | 3x |
QSTESTCD == "EOR0106" ~ "QS02806", |
609 | 3x |
QSTESTCD == "EOR0107" ~ "QS02807", |
610 | 3x |
QSTESTCD == "EOR0108" ~ "QS02808", |
611 | 3x |
QSTESTCD == "EOR0109" ~ "QS02809", |
612 | 3x |
QSTESTCD == "EOR0110" ~ "QS02810", |
613 | 3x |
QSTESTCD == "EOR0111" ~ "QS02811", |
614 | 3x |
QSTESTCD == "EOR0112" ~ "QS02812", |
615 | 3x |
QSTESTCD == "EOR0113" ~ "QS02813", |
616 | 3x |
QSTESTCD == "EOR0114" ~ "QS02814", |
617 | 3x |
QSTESTCD == "EOR0115" ~ "QS02815", |
618 | 3x |
QSTESTCD == "EOR0116" ~ "QS02816", |
619 | 3x |
QSTESTCD == "EOR0117" ~ "QS02817", |
620 | 3x |
QSTESTCD == "EOR0118" ~ "QS02818", |
621 | 3x |
QSTESTCD == "EOR0119" ~ "QS02819", |
622 | 3x |
QSTESTCD == "EOR0120" ~ "QS02820", |
623 | 3x |
QSTESTCD == "EOR0121" ~ "QS02821", |
624 | 3x |
QSTESTCD == "EOR0122" ~ "QS02822", |
625 | 3x |
QSTESTCD == "EOR0123" ~ "QS02823", |
626 | 3x |
QSTESTCD == "EOR0124" ~ "QS02824", |
627 | 3x |
QSTESTCD == "EOR0125" ~ "QS02825", |
628 | 3x |
QSTESTCD == "EOR0126" ~ "QS02826", |
629 | 3x |
QSTESTCD == "EOR0127" ~ "QS02827", |
630 | 3x |
QSTESTCD == "EOR0128" ~ "QS02828", |
631 | 3x |
QSTESTCD == "EOR0129" ~ "QS02829", |
632 | 3x |
QSTESTCD == "EOR0130" ~ "QS02830", |
633 | 3x |
TRUE ~ QSTESTCD |
634 |
) |
|
635 |
) |
|
636 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
637 | 3x |
adqlqc1 <- dplyr::left_join( |
638 | 3x |
adqlqc, |
639 | 3x |
gdsr_param_adqlqc, |
640 | 3x |
by = "PARAMCD" |
641 |
) |
|
642 | 3x |
return(adqlqc1) |
643 |
} |
|
644 | ||
645 |
#' @describeIn h_adqlqc Scale calculation for ADQLQC data |
|
646 |
#' |
|
647 |
#' @param adqlqc1 (`data.frame`)\cr Prepared data generated from the [prep_adqlqc()] function. |
|
648 |
#' |
|
649 |
#' @return `data.frame` |
|
650 |
#' @keywords internal |
|
651 |
calc_scales <- function(adqlqc1) { |
|
652 |
# Prep scale data --------------------------------------------------------- |
|
653 |
# parcat2 = scales or global health status |
|
654 |
# global health status/scales data |
|
655 |
# QSTESTCD: EOR0131 to EOR0145 (global health status and scales) |
|
656 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data")) |
657 | 3x |
eortc_qlq_c30_sub <- filter( |
658 | 3x |
eortc_qlq_c30, |
659 | 3x |
!(as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130) |
660 |
) %>% |
|
661 | 3x |
mutate( |
662 | 3x |
PARAMCD = case_when( |
663 | 3x |
QSTESTCD == "EOR0131" ~ "QS028QL2", |
664 | 3x |
QSTESTCD == "EOR0132" ~ "QS028PF2", |
665 | 3x |
QSTESTCD == "EOR0133" ~ "QS028RF2", |
666 | 3x |
QSTESTCD == "EOR0134" ~ "QS028EF", |
667 | 3x |
QSTESTCD == "EOR0135" ~ "QS028CF", |
668 | 3x |
QSTESTCD == "EOR0136" ~ "QS028SF", |
669 | 3x |
QSTESTCD == "EOR0137" ~ "QS028FA", |
670 | 3x |
QSTESTCD == "EOR0138" ~ "QS028NV", |
671 | 3x |
QSTESTCD == "EOR0139" ~ "QS028PA", |
672 | 3x |
QSTESTCD == "EOR0140" ~ "QS028DY", |
673 | 3x |
QSTESTCD == "EOR0141" ~ "QS028SL", |
674 | 3x |
QSTESTCD == "EOR0142" ~ "QS028AP", |
675 | 3x |
QSTESTCD == "EOR0143" ~ "QS028CO", |
676 | 3x |
QSTESTCD == "EOR0144" ~ "QS028DI", |
677 | 3x |
QSTESTCD == "EOR0145" ~ "QS028FI", |
678 | 3x |
TRUE ~ QSTESTCD |
679 |
) |
|
680 |
) %>% |
|
681 | 3x |
select(-publication_name) |
682 | ||
683 |
# ADaM global health status and scales from gdsr |
|
684 | 3x |
gdsr_param_adqlqc <- gdsr_param_adqlqc %>% |
685 | 3x |
filter( |
686 | 3x |
!str_detect(PARCAT2, "Original Items|Completion") |
687 |
) |
|
688 | ||
689 | 3x |
ghs_scales <- left_join( |
690 | 3x |
eortc_qlq_c30_sub, |
691 | 3x |
gdsr_param_adqlqc, |
692 | 3x |
by = "PARAMCD" |
693 |
) |
|
694 |
# scale data |
|
695 | 3x |
df <- data.frame(index = seq_len(nrow(ghs_scales))) |
696 | 3x |
df$previous <- list( |
697 | 3x |
c("QS02826", "QS02827"), |
698 | 3x |
c("QS02811"), |
699 | 3x |
c("QS02810", "QS02812", "QS02818"), |
700 | 3x |
c("QS02806", "QS02807"), |
701 | 3x |
c("QS02814", "QS02815"), |
702 | 3x |
c("QS02808"), |
703 | 3x |
c("QS02817"), |
704 | 3x |
c("QS02816"), |
705 | 3x |
c("QS02821", "QS02822", "QS02823", "QS02824"), |
706 | 3x |
c("QS02829", "QS02830"), |
707 | 3x |
c("QS02813"), |
708 | 3x |
c("QS02801", "QS02802", "QS02803", "QS02804", "QS02805"), |
709 | 3x |
c("QS02809", "QS02819"), |
710 | 3x |
c("QS02820", "QS02825"), |
711 | 3x |
c("QS02828") |
712 |
) |
|
713 | 3x |
df$newName <- list( |
714 | 3x |
"QS028SF", |
715 | 3x |
"QS028SL", |
716 | 3x |
"QS028FA", |
717 | 3x |
"QS028RF2", |
718 | 3x |
"QS028NV", |
719 | 3x |
"QS028DY", |
720 | 3x |
"QS028DI", |
721 | 3x |
"QS028CO", |
722 | 3x |
"QS028EF", |
723 | 3x |
"QS028QL2", |
724 | 3x |
"QS028AP", |
725 | 3x |
"QS028PF2", |
726 | 3x |
"QS028PA", |
727 | 3x |
"QS028CF", |
728 | 3x |
"QS028FI" |
729 |
) |
|
730 | 3x |
df$newNamelabel <- list( |
731 | 3x |
"EORTC QLQ-C30: Social functioning", |
732 | 3x |
"EORTC QLQ-C30: Insomnia", |
733 | 3x |
"EORTC QLQ-C30: Fatigue", |
734 | 3x |
"EORTC QLQ-C30: Role functioning (revised)", |
735 | 3x |
"EORTC QLQ-C30: Nausea and vomiting", |
736 | 3x |
"EORTC QLQ-C30: Dyspnoea", |
737 | 3x |
"EORTC QLQ-C30: Diarrhoea", |
738 | 3x |
"EORTC QLQ-C30: Constipation", |
739 | 3x |
"EORTC QLQ-C30: Emotional functioning", |
740 | 3x |
"EORTC QLQ-C30: Global health status/QoL (revised)", |
741 | 3x |
"EORTC QLQ-C30: Appetite loss", |
742 | 3x |
"EORTC QLQ-C30: Physical functioning (revised)", |
743 | 3x |
"EORTC QLQ-C30: Pain", |
744 | 3x |
"EORTC QLQ-C30: Cognitive functioning", |
745 | 3x |
"EORTC QLQ-C30: Financial difficulties" |
746 |
) |
|
747 | 3x |
df$newNameCategory <- list( |
748 | 3x |
"Functional Scales", |
749 | 3x |
"Symptom Scales", |
750 | 3x |
"Symptom Scales", |
751 | 3x |
"Functional Scales", |
752 | 3x |
"Symptom Scales", |
753 | 3x |
"Symptom Scales", |
754 | 3x |
"Symptom Scales", |
755 | 3x |
"Symptom Scales", |
756 | 3x |
"Functional Scales", |
757 | 3x |
"Global Health Status", |
758 | 3x |
"Symptom Scales", |
759 | 3x |
"Functional Scales", |
760 | 3x |
"Symptom Scales", |
761 | 3x |
"Functional Scales", |
762 | 3x |
"Symptom Scales" |
763 |
) |
|
764 | 3x |
df$num_param <- list( |
765 | 3x |
"1", |
766 | 3x |
"1", |
767 | 3x |
"2", |
768 | 3x |
"1", |
769 | 3x |
"1", |
770 | 3x |
"1", |
771 | 3x |
"1", |
772 | 3x |
"1", |
773 | 3x |
"2", |
774 | 3x |
"1", |
775 | 3x |
"1", |
776 | 3x |
"3", |
777 | 3x |
"1", |
778 | 3x |
"1", |
779 | 3x |
"1" |
780 |
) |
|
781 | 3x |
df$equation <- list( |
782 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
783 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
784 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
785 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
786 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
787 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
788 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
789 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
790 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
791 | 3x |
"new_value = ((temp_val/var_length-1)/6)*100.0", |
792 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
793 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
794 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
795 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
796 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0" |
797 |
) |
|
798 | ||
799 | 3x |
expect_data <- data.frame( |
800 | 3x |
PARAM = expect$PARAM, |
801 | 3x |
PARAMCD = expect$PARAMCD, |
802 | 3x |
PARCAT2 = expect$PARCAT2, |
803 | 3x |
PARCAT1N = expect$PARCAT1N, |
804 | 3x |
AVAL = c(0, 1), |
805 | 3x |
AVALC = c( |
806 | 3x |
"Not expected to complete questionnaire", |
807 | 3x |
"Expected to complete questionnaire" |
808 |
) |
|
809 |
) |
|
810 | ||
811 | 3x |
df_saved <- data.frame() |
812 | ||
813 | 3x |
unique_id <- unique(adqlqc1$USUBJID) |
814 | ||
815 | 3x |
for (id in unique_id) { |
816 | 30x |
id_data <- adqlqc1[adqlqc1$USUBJID == id, ] |
817 | 30x |
unique_avisit <- unique(id_data$AVISIT) |
818 | 30x |
for (visit in unique_avisit) { |
819 | 180x |
if (is.na(visit)) { |
820 | ! |
next |
821 |
} |
|
822 | 180x |
id_data_at_visit <- id_data[id_data$AVISIT == visit, ] |
823 | ||
824 | 180x |
if (any(id_data_at_visit$PARAMCD != "QSALL")) { |
825 | 177x |
for (idx in seq_along(df$index)) { |
826 | 2655x |
previous_names <- df$previous[idx] |
827 | 2655x |
current_name <- df$newName[idx] |
828 | 2655x |
current_name_label <- df$newNamelabel[idx] |
829 | 2655x |
current_name_category <- df$newNameCategory[idx] |
830 | 2655x |
eqn <- df$equation[idx] |
831 | 2655x |
temp_val <- 0 |
832 | 2655x |
var_length <- 0 |
833 | 2655x |
for (param_name in previous_names[[1]]) { |
834 | 5310x |
if (param_name %in% id_data_at_visit$PARAMCD) { #### |
835 | 5310x |
current_val <- as.numeric(as.character(id_data_at_visit$AVAL[id_data_at_visit$PARAMCD == param_name])) |
836 | 5310x |
if (!is.na(current_val)) { |
837 | 5094x |
temp_val <- temp_val + current_val ### |
838 | 5094x |
var_length <- var_length + 1 |
839 |
} |
|
840 |
} # if |
|
841 |
} # param_name |
|
842 |
# eval |
|
843 | 2655x |
if (var_length >= as.numeric(df$num_param[idx])) { |
844 | 2604x |
eval(parse(text = eqn)) ##### |
845 |
} else { |
|
846 | 51x |
new_value <- NA |
847 |
} |
|
848 | ||
849 | 2655x |
new_data_row <- data.frame( |
850 | 2655x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
851 | 2655x |
id, |
852 | 2655x |
visit, |
853 | 2655x |
id_data_at_visit$AVISITN[1], |
854 | 2655x |
id_data_at_visit$QSDTC[1], |
855 | 2655x |
current_name_category, |
856 | 2655x |
current_name_label, |
857 | 2655x |
current_name, |
858 | 2655x |
new_value, |
859 | 2655x |
NA, |
860 | 2655x |
stringsAsFactors = FALSE |
861 |
) |
|
862 | 2655x |
colnames(new_data_row) <- c( |
863 | 2655x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
864 | 2655x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", |
865 | 2655x |
"AVAL", "AVALC" |
866 |
) ### |
|
867 | 2655x |
df_saved <- rbind(df_saved, new_data_row) ##### |
868 |
} # idx |
|
869 |
} |
|
870 |
# add expect data |
|
871 | 180x |
expect_value <- sample(expect_data$AVAL, 1, prob = c(0.10, 0.90)) |
872 | 180x |
expect_valuec <- expect_data$AVALC[expect_data$AVAL == expect_value] |
873 | ||
874 | 180x |
new_data_row <- data.frame( |
875 | 180x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
876 | 180x |
id, |
877 | 180x |
visit, |
878 | 180x |
id_data_at_visit$AVISITN[1], |
879 | 180x |
datetime = NA, |
880 | 180x |
expect_data$PARCAT2[1], |
881 | 180x |
expect_data$PARAM[1], |
882 | 180x |
expect_data$PARAMCD[1], |
883 | 180x |
expect_value, |
884 | 180x |
expect_valuec, |
885 | 180x |
stringsAsFactors = FALSE |
886 |
) |
|
887 | 180x |
colnames(new_data_row) <- c( |
888 | 180x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
889 | 180x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", "AVAL", |
890 | 180x |
"AVALC" |
891 |
) ### |
|
892 | 180x |
df_saved <- rbind(df_saved, new_data_row) |
893 |
} # visit |
|
894 |
} # id |
|
895 | ||
896 | 3x |
df_saved1 <- left_join( |
897 | 3x |
df_saved, |
898 | 3x |
ghs_scales, |
899 | 3x |
by = c( |
900 | 3x |
"PARAM", |
901 | 3x |
"PARAMCD", |
902 | 3x |
"PARCAT2" |
903 |
) |
|
904 |
) %>% |
|
905 | 3x |
mutate( |
906 | 3x |
AVALC = ifelse(is.na(AVALC), as.character(AVAL), AVALC), |
907 | 3x |
PARCAT1 = ifelse(PARAMCD == "EX028", expect$PARCAT1, PARCAT1), |
908 | 3x |
PARCAT1N = ifelse(PARAMCD == "EX028", expect$PARCAT1N, PARCAT1N) |
909 |
) |
|
910 | ||
911 | 3x |
adqlqc_tmp <- bind_rows(adqlqc1, df_saved1) %>% |
912 | 3x |
arrange( |
913 | 3x |
USUBJID, |
914 | 3x |
AVISITN, |
915 | 3x |
QSTESTCD |
916 |
) |
|
917 | 3x |
return(adqlqc_tmp) |
918 |
} |
|
919 | ||
920 |
#' @describeIn h_adqlqc Calculate Change from Baseline Category 1 |
|
921 |
#' |
|
922 |
#' @param dataset (`data.frame`)\cr ADaM dataset. |
|
923 |
#' |
|
924 |
#' @return `data.frame` |
|
925 |
#' @keywords internal |
|
926 |
derv_chgcat1 <- function(dataset) { |
|
927 |
# derivation of CHGCAT1 |
|
928 | 3x |
check_vars <- c("PARCAT2", "CHG") |
929 | ||
930 | 3x |
if (all(check_vars %in% names(dataset))) { |
931 | 3x |
dataset$CHGCAT1 <- ifelse( |
932 | 3x |
dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG <= -10, |
933 | 3x |
"Improved", "" |
934 |
) |
|
935 | 3x |
dataset$CHGCAT1 <- ifelse( |
936 | 3x |
dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG >= 10, |
937 | 3x |
"Worsened", dataset$CHGCAT1 |
938 |
) |
|
939 | 3x |
dataset$CHGCAT1 <- ifelse( |
940 | 3x |
dataset$PARCAT2 == "Symptom Scales" & |
941 | 3x |
!is.na(dataset$CHG) & dataset$CHG > -10 & |
942 | 3x |
dataset$CHG < 10, |
943 | 3x |
"No change", dataset$CHGCAT1 |
944 |
) |
|
945 | ||
946 | 3x |
dataset$CHGCAT1 <- ifelse( |
947 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
948 | 3x |
!is.na(dataset$CHG) & dataset$CHG >= 10, |
949 | 3x |
"Improved", dataset$CHGCAT1 |
950 |
) |
|
951 | 3x |
dataset$CHGCAT1 <- ifelse( |
952 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
953 | 3x |
!is.na(dataset$CHG) & dataset$CHG <= -10, |
954 | 3x |
"Worsened", dataset$CHGCAT1 |
955 |
) |
|
956 | 3x |
dataset$CHGCAT1 <- ifelse( |
957 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") & |
958 | 3x |
!is.na(dataset$CHG) & |
959 | 3x |
dataset$CHG > -10 & dataset$CHG < 10, |
960 | 3x |
"No change", dataset$CHGCAT1 |
961 |
) |
|
962 | ||
963 | 3x |
dataset$CHGCAT1 <- ifelse( |
964 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 6, |
965 | 3x |
"Improved by six levels", dataset$CHGCAT1 |
966 |
) |
|
967 | 3x |
dataset$CHGCAT1 <- ifelse( |
968 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 5, |
969 | 3x |
"Improved by five levels", dataset$CHGCAT1 |
970 |
) |
|
971 | 3x |
dataset$CHGCAT1 <- ifelse( |
972 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 4, |
973 | 3x |
"Improved by four levels", dataset$CHGCAT1 |
974 |
) |
|
975 | 3x |
dataset$CHGCAT1 <- ifelse( |
976 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 3, |
977 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
978 |
) |
|
979 | 3x |
dataset$CHGCAT1 <- ifelse( |
980 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 2, |
981 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
982 |
) |
|
983 | 3x |
dataset$CHGCAT1 <- ifelse( |
984 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 1, |
985 | 3x |
"Improved by one level", dataset$CHGCAT1 |
986 |
) |
|
987 | 3x |
dataset$CHGCAT1 <- ifelse( |
988 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 0, |
989 | 3x |
"No change", dataset$CHGCAT1 |
990 |
) |
|
991 | 3x |
dataset$CHGCAT1 <- ifelse( |
992 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -1, |
993 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
994 |
) |
|
995 | 3x |
dataset$CHGCAT1 <- ifelse( |
996 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -2, |
997 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
998 |
) |
|
999 | 3x |
dataset$CHGCAT1 <- ifelse( |
1000 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -3, |
1001 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1002 |
) |
|
1003 | 3x |
dataset$CHGCAT1 <- ifelse( |
1004 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -4, |
1005 | 3x |
"Worsened by four levels", dataset$CHGCAT1 |
1006 |
) |
|
1007 | 3x |
dataset$CHGCAT1 <- ifelse( |
1008 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -5, |
1009 | 3x |
"Worsened by five levels", dataset$CHGCAT1 |
1010 |
) |
|
1011 | 3x |
dataset$CHGCAT1 <- ifelse( |
1012 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -6, |
1013 | 3x |
"Worsened by six levels", dataset$CHGCAT1 |
1014 |
) |
|
1015 | ||
1016 | 3x |
dataset$CHGCAT1 <- ifelse( |
1017 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -3, |
1018 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1019 |
) |
|
1020 | 3x |
dataset$CHGCAT1 <- ifelse( |
1021 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -2, |
1022 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1023 |
) |
|
1024 | 3x |
dataset$CHGCAT1 <- ifelse( |
1025 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -1, |
1026 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1027 |
) |
|
1028 | 3x |
dataset$CHGCAT1 <- ifelse( |
1029 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 0, |
1030 | 3x |
"No change", dataset$CHGCAT1 |
1031 |
) |
|
1032 | 3x |
dataset$CHGCAT1 <- ifelse( |
1033 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 1, |
1034 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1035 |
) |
|
1036 | 3x |
dataset$CHGCAT1 <- ifelse( |
1037 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 2, |
1038 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1039 |
) |
|
1040 | 3x |
dataset$CHGCAT1 <- ifelse( |
1041 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 3, |
1042 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1043 |
) |
|
1044 | ||
1045 | 3x |
dataset$CHGCAT1 <- ifelse( |
1046 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -3, |
1047 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1048 |
) |
|
1049 | 3x |
dataset$CHGCAT1 <- ifelse( |
1050 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -2, |
1051 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1052 |
) |
|
1053 | 3x |
dataset$CHGCAT1 <- ifelse( |
1054 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -1, |
1055 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1056 |
) |
|
1057 | 3x |
dataset$CHGCAT1 <- ifelse( |
1058 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 0, |
1059 | 3x |
"No changed", dataset$CHGCAT1 |
1060 |
) |
|
1061 | 3x |
dataset$CHGCAT1 <- ifelse( |
1062 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 1, |
1063 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1064 |
) |
|
1065 | 3x |
dataset$CHGCAT1 <- ifelse( |
1066 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 2, |
1067 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1068 |
) |
|
1069 | 3x |
dataset$CHGCAT1 <- ifelse( |
1070 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 3, |
1071 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1072 |
) |
|
1073 | ||
1074 | 3x |
paramcd_vec <- c( |
1075 | 3x |
"QS02803", "QS02804", "QS02805", "QS02807", "QS02808", "QS02809", "QS02810", |
1076 | 3x |
"QS02811", "QS02812", "QS02813", "QS02814", "QS02815", "QS02816", "QS02817", |
1077 | 3x |
"QS02818", "QS02819", "QS02820", "QS02821", "QS02822", "QS02823", "QS02824", |
1078 | 3x |
"QS02825", "QS02826", "QS02827", "QS02828" |
1079 |
) |
|
1080 | ||
1081 | 3x |
dataset$CHGCAT1 <- ifelse( |
1082 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -3, |
1083 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
1084 |
) |
|
1085 | 3x |
dataset$CHGCAT1 <- ifelse( |
1086 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -2, |
1087 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
1088 |
) |
|
1089 | 3x |
dataset$CHGCAT1 <- ifelse( |
1090 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -1, |
1091 | 3x |
"Improved by one level", dataset$CHGCAT1 |
1092 |
) |
|
1093 | 3x |
dataset$CHGCAT1 <- ifelse( |
1094 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 0, |
1095 | 3x |
"No change", dataset$CHGCAT1 |
1096 |
) |
|
1097 | 3x |
dataset$CHGCAT1 <- ifelse( |
1098 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 1, |
1099 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
1100 |
) |
|
1101 | 3x |
dataset$CHGCAT1 <- ifelse( |
1102 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 2, |
1103 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
1104 |
) |
|
1105 | 3x |
dataset$CHGCAT1 <- ifelse( |
1106 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 3, |
1107 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
1108 |
) |
|
1109 | ||
1110 | 3x |
return(dataset) |
1111 |
} else { |
|
1112 | ! |
collapse_vars <- paste(check_vars, collapse = ", ") |
1113 | ! |
stop(sprintf( |
1114 | ! |
"%s: one or both variables is/are missing, needed for derivation", |
1115 | ! |
collapse_vars |
1116 |
)) |
|
1117 |
} |
|
1118 |
} |
|
1119 | ||
1120 |
#' @describeIn h_adqlqc Completion/Compliance Data Calculation |
|
1121 |
#' |
|
1122 |
#' @param dataset (`data.frame`)\cr Dataset. |
|
1123 |
#' |
|
1124 |
#' @return `data.frame` |
|
1125 |
#' @keywords internal |
|
1126 |
comp_derv <- function(dataset, percent, number) { |
|
1127 |
# original items data |
|
1128 | 3x |
orig_data <- filter( |
1129 | 3x |
dataset, |
1130 | 3x |
PARCAT2 == "Original Items" |
1131 |
) |
|
1132 |
# total number of questionnaires |
|
1133 | 3x |
comp_count_all <- select( |
1134 | 3x |
orig_data, |
1135 | 3x |
PARAMCD |
1136 |
) %>% |
|
1137 | 3x |
distinct() %>% |
1138 | 3x |
count() |
1139 | 3x |
comp_count_all <- comp_count_all$n |
1140 |
# original items data count of questions answered |
|
1141 | 3x |
orig_data_summ <- group_by( |
1142 | 3x |
orig_data, |
1143 | 3x |
STUDYID, |
1144 | 3x |
USUBJID, |
1145 | 3x |
PARCAT1, |
1146 | 3x |
AVISIT, |
1147 | 3x |
AVISITN, |
1148 | 3x |
ADTM, |
1149 | 3x |
ADY |
1150 |
) %>% |
|
1151 | 3x |
summarise( |
1152 | 3x |
comp_count = sum(!is.na(AVAL)), |
1153 | 3x |
comp_count_all = comp_count_all, |
1154 | 3x |
.groups = "drop" |
1155 |
) %>% |
|
1156 | 3x |
mutate( |
1157 | 3x |
per_comp = trunc((comp_count / comp_count_all) * 100) |
1158 |
) |
|
1159 |
# expected data |
|
1160 | 3x |
ex028_data <- filter( |
1161 | 3x |
dataset, |
1162 | 3x |
PARAMCD == "EX028", |
1163 | 3x |
AVAL == 1 |
1164 |
) %>% |
|
1165 | 3x |
select( |
1166 | 3x |
STUDYID, |
1167 | 3x |
USUBJID, |
1168 | 3x |
PARCAT1, |
1169 | 3x |
AVISIT, |
1170 | 3x |
AVISITN, |
1171 | 3x |
ADTM, |
1172 | 3x |
ADY, |
1173 | 3x |
AVAL_ex028 = AVAL |
1174 |
) %>% |
|
1175 | 3x |
mutate( |
1176 | 3x |
comp_count_all = comp_count_all |
1177 |
) |
|
1178 | ||
1179 | 3x |
joined <- left_join( |
1180 | 3x |
ex028_data, |
1181 | 3x |
orig_data_summ, |
1182 | 3x |
by = c( |
1183 | 3x |
"STUDYID", |
1184 | 3x |
"USUBJID", |
1185 | 3x |
"PARCAT1", |
1186 | 3x |
"AVISIT", |
1187 | 3x |
"AVISITN", |
1188 | 3x |
"comp_count_all" |
1189 |
) |
|
1190 |
) %>% |
|
1191 | 3x |
select(-c("ADTM.x", "ADY.x")) |
1192 | ||
1193 | 3x |
joined <- rename( |
1194 | 3x |
joined, |
1195 | 3x |
ADTM = ADTM.y, |
1196 | 3x |
ADY = ADY.y |
1197 |
) |
|
1198 |
# CO028ALL |
|
1199 | 3x |
co028all <- mutate( |
1200 | 3x |
joined, |
1201 | 3x |
PARAMCD = "CO028ALL", |
1202 | 3x |
PARAM = "EORTC QLQ-C30: Completion - Completed all questions", |
1203 | 3x |
PARCAT2 = "Completion", |
1204 | 3x |
AVAL = case_when( |
1205 | 3x |
AVAL_ex028 == 1 & comp_count == comp_count_all ~ 1, |
1206 | 3x |
AVAL_ex028 == 1 & (is.na(comp_count) | comp_count < comp_count_all) ~ 0 |
1207 |
), |
|
1208 | 3x |
AVALC = case_when( |
1209 | 3x |
AVAL == 1 ~ "Completed all questions", |
1210 | 3x |
AVAL == 0 ~ "Did not complete all questions" |
1211 |
) |
|
1212 |
) |
|
1213 |
# CO028<y>P |
|
1214 | 3x |
co028p <- mutate( |
1215 | 3x |
joined, |
1216 | 3x |
PARAMCD = paste0("CO028", as.character(percent), "P"), |
1217 | 3x |
PARAM = sprintf( |
1218 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s%% of questions", |
1219 | 3x |
as.character(percent) |
1220 |
), |
|
1221 | 3x |
PARCAT2 = "Completion", |
1222 | 3x |
AVAL = case_when( |
1223 | 3x |
AVAL_ex028 == 1 & per_comp >= percent ~ 1, |
1224 | 3x |
AVAL_ex028 == 1 & (is.na(per_comp) | per_comp < percent) ~ 0 |
1225 |
), |
|
1226 | 3x |
AVALC = case_when( |
1227 | 3x |
AVAL == 1 ~ sprintf( |
1228 | 3x |
"Completed at least %s%% of questions", |
1229 | 3x |
as.character(percent) |
1230 |
), |
|
1231 | 3x |
AVAL == 0 ~ sprintf( |
1232 | 3x |
"Did not complete at least %s%% of questions", |
1233 | 3x |
as.character(percent) |
1234 |
) |
|
1235 |
) |
|
1236 |
) |
|
1237 |
# CO028<x>Q |
|
1238 | 3x |
co028q <- mutate( |
1239 | 3x |
joined, |
1240 | 3x |
PARAMCD = paste0("CO028", as.character(number), "Q"), |
1241 | 3x |
PARAM = sprintf( |
1242 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s question(s)", |
1243 | 3x |
as.character(number) |
1244 |
), |
|
1245 | 3x |
PARCAT2 = "Completion", |
1246 | 3x |
AVAL = case_when( |
1247 | 3x |
AVAL_ex028 == 1 & comp_count >= number ~ 1, |
1248 | 3x |
AVAL_ex028 == 1 & (comp_count < number | is.na(comp_count)) ~ 0 |
1249 |
), |
|
1250 | 3x |
AVALC = case_when( |
1251 | 3x |
AVAL == 1 ~ sprintf( |
1252 | 3x |
"Completed at least %s questions", |
1253 | 3x |
as.character(number) |
1254 |
), |
|
1255 | 3x |
AVAL == 0 ~ sprintf( |
1256 | 3x |
"Did not complete at least %s question(s)", |
1257 | 3x |
as.character(number) |
1258 |
) |
|
1259 |
) |
|
1260 |
) |
|
1261 | ||
1262 | 3x |
co028_bind <- rbind( |
1263 | 3x |
co028all, |
1264 | 3x |
co028p, |
1265 | 3x |
co028q |
1266 |
) %>% |
|
1267 | 3x |
select( |
1268 | 3x |
-c("AVAL_ex028", "comp_count", "comp_count_all", "per_comp") |
1269 |
) |
|
1270 | 3x |
return(co028_bind) |
1271 |
} |
1 |
#' Exposure Analysis Dataset (ADEX) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating random Exposure Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `EXSEQ`, `PARAMCD`, `PARCAT1`, `ASTDTM`, `AENDTM`, `ASTDY`, `AENDY`, |
|
11 |
#' `AVISITN`, `EXDOSFRQ`, `EXROUTE`, `VISIT`, `VISITDY`, `EXSTDTC`, `EXENDTC`, `EXSTDY`, `EXENDY` |
|
12 |
#' |
|
13 |
#' @inheritParams argument_convention |
|
14 |
#' @param parcat1 (`character vector`)\cr Dose amount categories. Defaults to "Individual" and "Overall". |
|
15 |
#' @param parcat2 (`character vector`)\cr Types of drug received. Defaults to "Drug A" and "Drug B". |
|
16 |
#' @param max_n_exs (`integer`)\cr Maximum number of exposures per patient. Defaults to 6. |
|
17 |
#' @template param_cached |
|
18 |
#' @templateVar data adex |
|
19 |
#' |
|
20 |
#' @return `data.frame` |
|
21 |
#' @export |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
25 |
#' |
|
26 |
#' adex <- radex(adsl, seed = 2) |
|
27 |
#' adex |
|
28 |
radex <- function(adsl, |
|
29 |
param = c( |
|
30 |
"Dose administered during constant dosing interval", |
|
31 |
"Number of doses administered during constant dosing interval", |
|
32 |
"Total dose administered", |
|
33 |
"Total number of doses administered" |
|
34 |
), |
|
35 |
paramcd = c("DOSE", "NDOSE", "TDOSE", "TNDOSE"), |
|
36 |
paramu = c("mg", " ", "mg", " "), |
|
37 |
parcat1 = c("INDIVIDUAL", "OVERALL"), |
|
38 |
parcat2 = c("Drug A", "Drug B"), |
|
39 |
visit_format = "WEEK", |
|
40 |
n_assessments = 5L, |
|
41 |
n_days = 5L, |
|
42 |
max_n_exs = 6L, |
|
43 |
lookup = NULL, |
|
44 |
seed = NULL, |
|
45 |
na_percentage = 0, |
|
46 |
na_vars = list(AVAL = c(NA, 0.1), AVALU = c(NA), 0.1), |
|
47 |
cached = FALSE) { |
|
48 | 4x |
checkmate::assert_flag(cached) |
49 | 4x |
if (cached) { |
50 | 1x |
return(get_cached_data("cadex")) |
51 |
} |
|
52 | ||
53 | 3x |
checkmate::assert_data_frame(adsl) |
54 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
55 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
56 | 3x |
checkmate::assert_character(parcat1, min.len = 1, any.missing = FALSE) |
57 | 3x |
checkmate::assert_character(parcat2, min.len = 1, any.missing = FALSE) |
58 | 3x |
checkmate::assert_string(visit_format) |
59 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
60 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_integer(max_n_exs, len = 1, any.missing = FALSE) |
62 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
63 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
64 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
65 | 3x |
checkmate::assert_true(na_percentage < 1) |
66 | ||
67 |
# validate and initialize related variables |
|
68 | 3x |
param_init_list <- relvar_init(param, paramcd) |
69 | 3x |
unit_init_list <- relvar_init(param, paramu) |
70 | ||
71 | 3x |
if (!is.null(seed)) { |
72 | 3x |
set.seed(seed) |
73 |
} |
|
74 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
75 | ||
76 | 3x |
adex <- expand.grid( |
77 | 3x |
STUDYID = unique(adsl$STUDYID), |
78 | 3x |
USUBJID = adsl$USUBJID, |
79 | 3x |
PARAM = c( |
80 | 3x |
rep( |
81 | 3x |
param_init_list$relvar1[1], |
82 | 3x |
length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days))) |
83 |
), |
|
84 | 3x |
rep( |
85 | 3x |
param_init_list$relvar1[2], |
86 | 3x |
length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days))) |
87 |
), |
|
88 | 3x |
param_init_list$relvar1[3:4] |
89 |
), |
|
90 | 3x |
stringsAsFactors = FALSE |
91 |
) |
|
92 | ||
93 |
# assign related variable values: PARAMxPARAMCD are related |
|
94 | 3x |
adex <- adex %>% rel_var( |
95 | 3x |
var_name = "PARAMCD", |
96 | 3x |
related_var = "PARAM", |
97 | 3x |
var_values = param_init_list$relvar2 |
98 |
) |
|
99 | ||
100 |
# assign related variable values: AVALUxPARAM are related |
|
101 | 3x |
adex <- adex %>% rel_var( |
102 | 3x |
var_name = "AVALU", |
103 | 3x |
related_var = "PARAM", |
104 | 3x |
var_values = unit_init_list$relvar2 |
105 |
) |
|
106 | ||
107 | 3x |
adex <- adex %>% |
108 | 3x |
dplyr::group_by(USUBJID) %>% |
109 | 3x |
dplyr::mutate(PARCAT_ind = sample(c(1, 2), size = 1)) %>% |
110 | 3x |
dplyr::mutate(PARCAT2 = ifelse(PARCAT_ind == 1, parcat2[1], parcat2[2])) %>% |
111 | 3x |
dplyr::select(-"PARCAT_ind") |
112 | ||
113 |
# Add in PARCAT1 |
|
114 | 3x |
adex <- adex %>% dplyr::mutate(PARCAT1 = dplyr::case_when( |
115 | 3x |
(PARAMCD == "TNDOSE" | PARAMCD == "TDOSE") ~ "OVERALL", |
116 | 3x |
PARAMCD == "DOSE" | PARAMCD == "NDOSE" ~ "INDIVIDUAL" |
117 |
)) |
|
118 | ||
119 | 3x |
adex_visit <- adex %>% |
120 | 3x |
dplyr::filter(PARAMCD == "DOSE" | PARAMCD == "NDOSE") %>% |
121 | 3x |
dplyr::mutate( |
122 | 3x |
AVISIT = rep(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), 2) |
123 |
) |
|
124 | ||
125 | 3x |
adex <- dplyr::left_join( |
126 | 3x |
adex %>% |
127 | 3x |
dplyr::group_by( |
128 | 3x |
USUBJID, |
129 | 3x |
STUDYID, |
130 | 3x |
PARAM, |
131 | 3x |
PARAMCD, |
132 | 3x |
AVALU, |
133 | 3x |
PARCAT1, |
134 | 3x |
PARCAT2 |
135 |
) %>% |
|
136 | 3x |
dplyr::mutate(id = dplyr::row_number()), |
137 | 3x |
adex_visit %>% |
138 | 3x |
dplyr::group_by( |
139 | 3x |
USUBJID, |
140 | 3x |
STUDYID, |
141 | 3x |
PARAM, |
142 | 3x |
PARAMCD, |
143 | 3x |
AVALU, |
144 | 3x |
PARCAT1, |
145 | 3x |
PARCAT2 |
146 |
) %>% |
|
147 | 3x |
dplyr::mutate(id = dplyr::row_number()), |
148 | 3x |
by = c("USUBJID", "STUDYID", "PARCAT1", "PARCAT2", "id", "PARAMCD", "PARAM", "AVALU") |
149 |
) %>% |
|
150 | 3x |
dplyr::select(-"id") |
151 | ||
152 |
# Visit numbers |
|
153 | 3x |
adex <- adex %>% dplyr::mutate(AVISITN = dplyr::case_when( |
154 | 3x |
AVISIT == "SCREENING" ~ -1, |
155 | 3x |
AVISIT == "BASELINE" ~ 0, |
156 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
157 | 3x |
TRUE ~ 999000 |
158 |
)) |
|
159 | ||
160 | ||
161 | 3x |
adex2 <- split(adex, adex$USUBJID) %>% |
162 | 3x |
lapply(function(pinfo) { |
163 | 30x |
pinfo %>% |
164 | 30x |
dplyr::filter(PARAMCD == "DOSE") %>% |
165 | 30x |
dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>% |
166 | 30x |
dplyr::mutate(changeind = dplyr::case_when( |
167 | 30x |
AVISIT == "SCREENING" ~ 0, |
168 | 30x |
AVISIT != "SCREENING" ~ sample(c(-1, 0, 1), |
169 | 30x |
size = 1, |
170 | 30x |
prob = c(0.25, 0.5, 0.25), |
171 | 30x |
replace = TRUE |
172 |
) |
|
173 |
)) %>% |
|
174 | 30x |
dplyr::ungroup() %>% |
175 | 30x |
dplyr::group_by(USUBJID, PARCAT2) %>% |
176 | 30x |
dplyr::mutate( |
177 | 30x |
csum = cumsum(changeind), |
178 | 30x |
changeind = dplyr::case_when( |
179 | 30x |
csum <= -3 ~ sample(c(0, 1), size = 1, prob = c(0.5, 0.5)), |
180 | 30x |
csum >= 3 ~ sample(c(0, -1), size = 1, prob = c(0.5, 0.5)), |
181 | 30x |
TRUE ~ changeind |
182 |
) |
|
183 |
) %>% |
|
184 | 30x |
dplyr::mutate(csum = cumsum(changeind)) %>% |
185 | 30x |
dplyr::ungroup() %>% |
186 | 30x |
dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>% |
187 | 30x |
dplyr::mutate(AVAL = dplyr::case_when( |
188 | 30x |
csum == -2 ~ 480, |
189 | 30x |
csum == -1 ~ 720, |
190 | 30x |
csum == 0 ~ 960, |
191 | 30x |
csum == 1 ~ 1200, |
192 | 30x |
csum == 2 ~ 1440 |
193 |
)) %>% |
|
194 | 30x |
dplyr::select(-c("csum", "changeind")) %>% |
195 | 30x |
dplyr::ungroup() |
196 |
}) %>% |
|
197 | 3x |
Reduce(rbind, .) |
198 | ||
199 | 3x |
adex_tmp <- dplyr::full_join(adex2, adex, by = names(adex)) |
200 | 3x |
adex <- adex_tmp %>% |
201 | 3x |
dplyr::group_by(USUBJID) %>% |
202 | 3x |
dplyr::mutate(AVAL = ifelse(PARAMCD == "NDOSE", 1, AVAL)) %>% |
203 | 3x |
dplyr::mutate(AVAL = ifelse( |
204 | 3x |
PARAMCD == "TNDOSE", |
205 | 3x |
sum(AVAL[PARAMCD == "NDOSE"]), |
206 | 3x |
AVAL |
207 |
)) %>% |
|
208 | 3x |
dplyr::ungroup() %>% |
209 | 3x |
dplyr::group_by(USUBJID, STUDYID, PARCAT2) %>% |
210 | 3x |
dplyr::mutate(AVAL = ifelse( |
211 | 3x |
PARAMCD == "TDOSE", |
212 | 3x |
sum(AVAL[PARAMCD == "DOSE"]), |
213 | 3x |
AVAL |
214 |
)) |
|
215 | ||
216 | 3x |
adex <- rcd_var_relabel( |
217 | 3x |
adex, |
218 | 3x |
STUDYID = "Study Identifier", |
219 | 3x |
USUBJID = "Unique Subject Identifier" |
220 |
) |
|
221 | ||
222 |
# merge ADSL to be able to add ADEX date and study day variables |
|
223 | 3x |
adex <- dplyr::inner_join(adex, adsl, by = c("STUDYID", "USUBJID")) %>% |
224 | 3x |
dplyr::rowwise() %>% |
225 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
226 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
227 | 3x |
TRUE ~ TRTEDTM |
228 |
))) %>% |
|
229 | 3x |
dplyr::mutate(ASTDTM = sample( |
230 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
231 | 3x |
size = 1 |
232 |
)) %>% |
|
233 |
# add 1 to end of range incase both values passed to sample() are the same |
|
234 | 3x |
dplyr::mutate(AENDTM = sample( |
235 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
236 | 3x |
size = 1 |
237 |
)) %>% |
|
238 | 3x |
dplyr::select(-TRTENDT) %>% |
239 | 3x |
dplyr::ungroup() %>% |
240 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM) |
241 | ||
242 | ||
243 | 3x |
adex <- adex %>% |
244 | 3x |
dplyr::group_by(USUBJID) %>% |
245 | 3x |
dplyr::mutate(EXSEQ = seq_len(dplyr::n())) %>% |
246 | 3x |
dplyr::mutate(ASEQ = EXSEQ) %>% |
247 | 3x |
dplyr::ungroup() %>% |
248 | 3x |
dplyr::arrange( |
249 | 3x |
STUDYID, |
250 | 3x |
USUBJID, |
251 | 3x |
PARAMCD, |
252 | 3x |
ASTDTM, |
253 | 3x |
AVISITN, |
254 | 3x |
EXSEQ |
255 |
) |
|
256 | ||
257 |
# Adding EXDOSFRQ |
|
258 | 3x |
adex <- adex %>% |
259 | 3x |
dplyr::mutate(EXDOSFRQ = dplyr::case_when( |
260 | 3x |
PARCAT1 == "INDIVIDUAL" ~ "ONCE", |
261 | 3x |
TRUE ~ "" |
262 |
)) |
|
263 | ||
264 |
# Adding EXROUTE |
|
265 | 3x |
adex <- adex %>% |
266 | 3x |
dplyr::mutate(EXROUTE = dplyr::case_when( |
267 | 3x |
PARCAT1 == "INDIVIDUAL" ~ sample(c("INTRAVENOUS", "SUBCUTANEOUS"), |
268 | 3x |
nrow(adex), |
269 | 3x |
replace = TRUE, |
270 | 3x |
prob = c(0.9, 0.1) |
271 |
), |
|
272 | 3x |
TRUE ~ "" |
273 |
)) |
|
274 | ||
275 |
# Fix VISIT according to AVISIT |
|
276 | 3x |
adex <- adex %>% |
277 | 3x |
dplyr::mutate(VISIT = AVISIT) |
278 | ||
279 |
# Hack for VISITDY - to fix in ADSL |
|
280 | 3x |
visit_levels <- str_extract(levels(adex$VISIT), pattern = "[0-9]+") |
281 | 3x |
vl_extracted <- vapply(visit_levels, function(x) as.numeric(x[2]), numeric(1)) |
282 | 3x |
vl_extracted <- c(-1, 1, vl_extracted[!is.na(vl_extracted)]) |
283 | ||
284 |
# Adding VISITDY |
|
285 | 3x |
adex <- adex %>% |
286 | 3x |
dplyr::mutate(VISITDY = as.numeric(as.character(factor(VISIT, labels = vl_extracted)))) |
287 | ||
288 |
# Exposure time stamps |
|
289 | 3x |
adex <- adex %>% |
290 | 3x |
dplyr::mutate( |
291 | 3x |
EXSTDTC = TRTSDTM + lubridate::days(VISITDY), |
292 | 3x |
EXENDTC = EXSTDTC + lubridate::hours(1), |
293 | 3x |
EXSTDY = VISITDY, |
294 | 3x |
EXENDY = VISITDY |
295 |
) |
|
296 | ||
297 |
# Correcting last exposure to treatment |
|
298 | 3x |
adex <- adex %>% |
299 | 3x |
dplyr::group_by(SUBJID) %>% |
300 | 3x |
dplyr::mutate(TRTEDTM = lubridate::as_datetime(max(EXENDTC, na.rm = TRUE))) %>% |
301 | 3x |
dplyr::ungroup() |
302 | ||
303 |
# Fixing Date - to add into ADSL |
|
304 | 3x |
adex <- adex %>% |
305 | 3x |
dplyr::mutate( |
306 | 3x |
TRTSDT = lubridate::date(TRTSDTM), |
307 | 3x |
TRTEDT = lubridate::date(TRTEDTM) |
308 |
) |
|
309 | ||
310 |
# Fixing analysis time stamps |
|
311 | 3x |
adex <- adex %>% |
312 | 3x |
dplyr::mutate( |
313 | 3x |
ASTDY = EXSTDY, |
314 | 3x |
AENDY = EXENDY, |
315 | 3x |
ASTDTM = EXSTDTC, |
316 | 3x |
AENDTM = EXENDTC |
317 |
) |
|
318 | ||
319 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) { |
320 | ! |
adex <- mutate_na(ds = adex, na_vars = na_vars, na_percentage = na_percentage) |
321 |
} |
|
322 | ||
323 |
# apply metadata |
|
324 | 3x |
adex <- apply_metadata(adex, "metadata/ADEX.yml") |
325 |
} |
|
326 | ||
327 |
# Equivalent of stringr::str_extract_all() |
|
328 |
str_extract <- function(string, pattern) { |
|
329 | 2850x |
regmatches(string, gregexpr(pattern, string)) |
330 |
} |
1 |
#' Laboratory Data Analysis Dataset (ADLB) |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("stable")` |
|
4 |
#' |
|
5 |
#' Function for generating a random Laboratory Data Analysis Dataset for a given |
|
6 |
#' Subject-Level Analysis Dataset. |
|
7 |
#' |
|
8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
9 |
#' |
|
10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `LBSEQ`, `ASPID` |
|
11 |
# |
|
12 |
#' @inheritParams argument_convention |
|
13 |
#' @param lbcat (`character vector`)\cr LB category values. |
|
14 |
#' @param max_n_lbs (`integer`)\cr Maximum number of labs per patient. Defaults to 10. |
|
15 |
#' @template param_cached |
|
16 |
#' @templateVar data adlb |
|
17 |
#' |
|
18 |
#' @return `data.frame` |
|
19 |
#' @export |
|
20 |
#' |
|
21 |
#' @author tomlinsj, npaszty, Xuefeng Hou |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
25 |
#' |
|
26 |
#' adlb <- radlb(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
27 |
#' adlb |
|
28 |
#' |
|
29 |
#' adlb <- radlb(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2) |
|
30 |
#' adlb |
|
31 |
radlb <- function(adsl, |
|
32 |
lbcat = c("CHEMISTRY", "CHEMISTRY", "IMMUNOLOGY"), |
|
33 |
param = c( |
|
34 |
"Alanine Aminotransferase Measurement", |
|
35 |
"C-Reactive Protein Measurement", |
|
36 |
"Immunoglobulin A Measurement" |
|
37 |
), |
|
38 |
paramcd = c("ALT", "CRP", "IGA"), |
|
39 |
paramu = c("U/L", "mg/L", "g/L"), |
|
40 |
aval_mean = c(18, 9, 2.9), |
|
41 |
visit_format = "WEEK", |
|
42 |
n_assessments = 5L, |
|
43 |
n_days = 5L, |
|
44 |
max_n_lbs = 10L, |
|
45 |
lookup = NULL, |
|
46 |
seed = NULL, |
|
47 |
na_percentage = 0, |
|
48 |
na_vars = list( |
|
49 |
LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1), |
|
50 |
BASE2 = c(NA, 0.1), BASE = c(NA, 0.1), |
|
51 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
52 |
), |
|
53 |
cached = FALSE) { |
|
54 | 4x |
checkmate::assert_flag(cached) |
55 | 4x |
if (cached) { |
56 | 1x |
return(get_cached_data("cadlb")) |
57 |
} |
|
58 | ||
59 | 3x |
checkmate::assert_data_frame(adsl) |
60 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
61 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
62 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
63 | 3x |
checkmate::assert_character(lbcat, min.len = 1, any.missing = FALSE) |
64 | 3x |
checkmate::assert_string(visit_format) |
65 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
66 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
67 | 3x |
checkmate::assert_integer(max_n_lbs, len = 1, any.missing = FALSE) |
68 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
69 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
70 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
71 | 3x |
checkmate::assert_true(na_percentage < 1) |
72 | ||
73 |
# validate and initialize related variables |
|
74 | 3x |
lbcat_init_list <- relvar_init(param, lbcat) |
75 | 3x |
param_init_list <- relvar_init(param, paramcd) |
76 | 3x |
unit_init_list <- relvar_init(param, paramu) |
77 | ||
78 | 3x |
if (!is.null(seed)) { |
79 | 3x |
set.seed(seed) |
80 |
} |
|
81 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
82 | ||
83 | 3x |
adlb <- expand.grid( |
84 | 3x |
STUDYID = unique(adsl$STUDYID), |
85 | 3x |
USUBJID = adsl$USUBJID, |
86 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
87 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
88 | 3x |
stringsAsFactors = FALSE |
89 |
) |
|
90 | ||
91 |
# assign AVAL based on different tests |
|
92 | 3x |
adlb <- adlb %>% mutate(AVAL = case_when( |
93 | 3x |
PARAM == param[1] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[1], sd = 10)), |
94 | 3x |
PARAM == param[2] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[2], sd = 1)), |
95 | 3x |
PARAM == param[3] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[3], sd = 0.1)) |
96 |
)) |
|
97 | ||
98 |
# assign related variable values: PARAMxLBCAT are related |
|
99 | 3x |
adlb <- adlb %>% rel_var( |
100 | 3x |
var_name = "LBCAT", |
101 | 3x |
related_var = "PARAM", |
102 | 3x |
var_values = lbcat_init_list$relvar2 |
103 |
) |
|
104 | ||
105 |
# assign related variable values: PARAMxPARAMCD are related |
|
106 | 3x |
adlb <- adlb %>% rel_var( |
107 | 3x |
var_name = "PARAMCD", |
108 | 3x |
related_var = "PARAM", |
109 | 3x |
var_values = param_init_list$relvar2 |
110 |
) |
|
111 | ||
112 | 3x |
adlb <- adlb %>% |
113 | 3x |
dplyr::mutate(LBTESTCD = PARAMCD) %>% |
114 | 3x |
dplyr::mutate(LBTEST = PARAM) |
115 | ||
116 | 3x |
adlb <- adlb %>% dplyr::mutate(AVISITN = dplyr::case_when( |
117 | 3x |
AVISIT == "SCREENING" ~ -1, |
118 | 3x |
AVISIT == "BASELINE" ~ 0, |
119 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
120 | 3x |
TRUE ~ NA_real_ |
121 |
)) |
|
122 | ||
123 | 3x |
adlb <- adlb %>% rel_var( |
124 | 3x |
var_name = "AVALU", |
125 | 3x |
related_var = "PARAM", |
126 | 3x |
var_values = unit_init_list$relvar2 |
127 |
) |
|
128 | ||
129 | 3x |
adlb <- adlb %>% |
130 | 3x |
dplyr::mutate(AVISITN = dplyr::case_when( |
131 | 3x |
AVISIT == "SCREENING" ~ -1, |
132 | 3x |
AVISIT == "BASELINE" ~ 0, |
133 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2, |
134 | 3x |
TRUE ~ NA_real_ |
135 |
)) |
|
136 | ||
137 |
# order to prepare for change from screening and baseline values |
|
138 | 3x |
adlb <- adlb[order(adlb$STUDYID, adlb$USUBJID, adlb$PARAMCD, adlb$AVISITN), ] |
139 | ||
140 | 3x |
adlb <- Reduce(rbind, lapply(split(adlb, adlb$USUBJID), function(x) { |
141 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
142 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
143 | 30x |
x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
144 | 30x |
"Y", |
145 | 30x |
ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "") |
146 |
) |
|
147 | 30x |
x |
148 |
})) |
|
149 | ||
150 | 3x |
adlb$BASE2 <- retain(adlb, adlb$AVAL, adlb$ABLFL2 == "Y") |
151 | 3x |
adlb$BASE <- ifelse(adlb$ABLFL2 != "Y", retain(adlb, adlb$AVAL, adlb$ABLFL == "Y"), NA) |
152 | ||
153 | 3x |
adlb <- adlb %>% |
154 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
155 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
156 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
157 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
158 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
159 | 3x |
dplyr::mutate(ANRLO = dplyr::case_when( |
160 | 3x |
PARAMCD == "ALT" ~ 7, |
161 | 3x |
PARAMCD == "CRP" ~ 8, |
162 | 3x |
PARAMCD == "IGA" ~ 0.8 |
163 |
)) %>% |
|
164 | 3x |
dplyr::mutate(ANRHI = dplyr::case_when( |
165 | 3x |
PARAMCD == "ALT" ~ 55, |
166 | 3x |
PARAMCD == "CRP" ~ 10, |
167 | 3x |
PARAMCD == "IGA" ~ 3 |
168 |
)) %>% |
|
169 | 3x |
dplyr::mutate(ANRIND = factor(dplyr::case_when( |
170 | 3x |
AVAL < ANRLO ~ "LOW", |
171 | 3x |
AVAL > ANRHI ~ "HIGH", |
172 | 3x |
TRUE ~ "NORMAL" |
173 |
))) %>% |
|
174 | 3x |
dplyr::mutate(LBSTRESC = factor(dplyr::case_when( |
175 | 3x |
PARAMCD == "ALT" ~ "<7", |
176 | 3x |
PARAMCD == "CRP" ~ "<8", |
177 | 3x |
PARAMCD == "IGA" ~ ">3" |
178 |
))) %>% |
|
179 | 3x |
dplyr::rowwise() %>% |
180 | 3x |
dplyr::mutate(LOQFL = factor( |
181 | 3x |
ifelse(eval(parse(text = paste(AVAL, LBSTRESC))), "Y", "N") |
182 |
)) %>% |
|
183 | 3x |
dplyr::ungroup() %>% |
184 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
185 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
186 | 3x |
dplyr::ungroup() %>% |
187 | 3x |
dplyr::mutate(SHIFT1 = factor(ifelse( |
188 | 3x |
AVISITN > 0, |
189 | 3x |
paste( |
190 | 3x |
retain( |
191 | 3x |
adlb, as.character(BNRIND), |
192 | 3x |
AVISITN == 0 |
193 |
), |
|
194 | 3x |
ANRIND, |
195 | 3x |
sep = " to " |
196 |
), |
|
197 |
"" |
|
198 |
))) %>% |
|
199 | 3x |
dplyr::mutate(ATOXGR = factor(dplyr::case_when( |
200 | 3x |
ANRIND == "LOW" ~ sample( |
201 | 3x |
c("-1", "-2", "-3", "-4", "-5"), |
202 | 3x |
nrow(adlb), |
203 | 3x |
replace = TRUE, |
204 | 3x |
prob = c(0.30, 0.25, 0.20, 0.15, 0) |
205 |
), |
|
206 | 3x |
ANRIND == "HIGH" ~ sample( |
207 | 3x |
c("1", "2", "3", "4", "5"), |
208 | 3x |
nrow(adlb), |
209 | 3x |
replace = TRUE, |
210 | 3x |
prob = c(0.30, 0.25, 0.20, 0.15, 0) |
211 |
), |
|
212 | 3x |
ANRIND == "NORMAL" ~ "0" |
213 |
))) %>% |
|
214 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
215 | 3x |
dplyr::mutate(BTOXGR = ATOXGR[ABLFL == "Y"]) %>% |
216 | 3x |
dplyr::ungroup() %>% |
217 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
218 | 3x |
dplyr::mutate(DTYPE = NA) %>% |
219 | 3x |
dplyr::mutate(BTOXGRL = factor(dplyr::case_when( |
220 | 3x |
BTOXGR == "0" ~ "0", |
221 | 3x |
BTOXGR == "-1" ~ "1", |
222 | 3x |
BTOXGR == "-2" ~ "2", |
223 | 3x |
BTOXGR == "-3" ~ "3", |
224 | 3x |
BTOXGR == "-4" ~ "4", |
225 | 3x |
BTOXGR == "1" ~ "<Missing>", |
226 | 3x |
BTOXGR == "2" ~ "<Missing>", |
227 | 3x |
BTOXGR == "3" ~ "<Missing>", |
228 | 3x |
BTOXGR == "4" ~ "<Missing>" |
229 |
))) %>% |
|
230 | 3x |
dplyr::mutate(BTOXGRH = factor(dplyr::case_when( |
231 | 3x |
BTOXGR == "0" ~ "0", |
232 | 3x |
BTOXGR == "1" ~ "1", |
233 | 3x |
BTOXGR == "2" ~ "2", |
234 | 3x |
BTOXGR == "3" ~ "3", |
235 | 3x |
BTOXGR == "4" ~ "4", |
236 | 3x |
BTOXGR == "-1" ~ "<Missing>", |
237 | 3x |
BTOXGR == "-2" ~ "<Missing>", |
238 | 3x |
BTOXGR == "-3" ~ "<Missing>", |
239 | 3x |
BTOXGR == "-4" ~ "<Missing>", |
240 |
))) %>% |
|
241 | 3x |
dplyr::mutate(ATOXGRL = factor(dplyr::case_when( |
242 | 3x |
ATOXGR == "0" ~ "0", |
243 | 3x |
ATOXGR == "-1" ~ "1", |
244 | 3x |
ATOXGR == "-2" ~ "2", |
245 | 3x |
ATOXGR == "-3" ~ "3", |
246 | 3x |
ATOXGR == "-4" ~ "4", |
247 | 3x |
ATOXGR == "1" ~ "<Missing>", |
248 | 3x |
ATOXGR == "2" ~ "<Missing>", |
249 | 3x |
ATOXGR == "3" ~ "<Missing>", |
250 | 3x |
ATOXGR == "4" ~ "<Missing>", |
251 |
))) %>% |
|
252 | 3x |
dplyr::mutate(ATOXGRH = factor(dplyr::case_when( |
253 | 3x |
ATOXGR == "0" ~ "0", |
254 | 3x |
ATOXGR == "1" ~ "1", |
255 | 3x |
ATOXGR == "2" ~ "2", |
256 | 3x |
ATOXGR == "3" ~ "3", |
257 | 3x |
ATOXGR == "4" ~ "4", |
258 | 3x |
ATOXGR == "-1" ~ "<Missing>", |
259 | 3x |
ATOXGR == "-2" ~ "<Missing>", |
260 | 3x |
ATOXGR == "-3" ~ "<Missing>", |
261 | 3x |
ATOXGR == "-4" ~ "<Missing>", |
262 |
))) %>% |
|
263 | 3x |
rcd_var_relabel( |
264 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
265 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
266 |
) |
|
267 | ||
268 |
# High and low descriptions of the different PARAMCD values |
|
269 |
# This is currently hard coded as the GDSR does not have these descriptions yet |
|
270 | 3x |
grade_lookup <- tibble::tribble( |
271 | 3x |
~PARAMCD, ~ATOXDSCL, ~ATOXDSCH, |
272 | 3x |
"ALB", "Hypoalbuminemia", NA_character_, |
273 | 3x |
"ALKPH", NA_character_, "Alkaline phosphatase increased", |
274 | 3x |
"ALT", NA_character_, "Alanine aminotransferase increased", |
275 | 3x |
"AST", NA_character_, "Aspartate aminotransferase increased", |
276 | 3x |
"BILI", NA_character_, "Blood bilirubin increased", |
277 | 3x |
"CA", "Hypocalcemia", "Hypercalcemia", |
278 | 3x |
"CHOLES", NA_character_, "Cholesterol high", |
279 | 3x |
"CK", NA_character_, "CPK increased", |
280 | 3x |
"CREAT", NA_character_, "Creatinine increased", |
281 | 3x |
"CRP", NA_character_, "C reactive protein increased", |
282 | 3x |
"GGT", NA_character_, "GGT increased", |
283 | 3x |
"GLUC", "Hypoglycemia", "Hyperglycemia", |
284 | 3x |
"HGB", "Anemia", "Hemoglobin increased", |
285 | 3x |
"IGA", NA_character_, "Immunoglobulin A increased", |
286 | 3x |
"POTAS", "Hypokalemia", "Hyperkalemia", |
287 | 3x |
"LYMPH", "CD4 lymphocytes decreased", NA_character_, |
288 | 3x |
"PHOS", "Hypophosphatemia", NA_character_, |
289 | 3x |
"PLAT", "Platelet count decreased", NA_character_, |
290 | 3x |
"SODIUM", "Hyponatremia", "Hypernatremia", |
291 | 3x |
"WBC", "White blood cell decreased", "Leukocytosis", |
292 |
) |
|
293 | ||
294 |
# merge grade_lookup onto adlb |
|
295 | 3x |
adlb <- dplyr::left_join(adlb, grade_lookup, by = "PARAMCD") |
296 | ||
297 | 3x |
adlb <- rcd_var_relabel( |
298 | 3x |
adlb, |
299 | 3x |
STUDYID = "Study Identifier", |
300 | 3x |
USUBJID = "Unique Subject Identifier" |
301 |
) |
|
302 | ||
303 |
# merge ADSL to be able to add LB date and study day variables |
|
304 | 3x |
adlb <- dplyr::inner_join( |
305 | 3x |
adlb, |
306 | 3x |
adsl, |
307 | 3x |
by = c("STUDYID", "USUBJID") |
308 |
) %>% |
|
309 | 3x |
dplyr::rowwise() %>% |
310 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
311 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
312 | 3x |
TRUE ~ TRTEDTM |
313 |
))) %>% |
|
314 | 3x |
dplyr::ungroup() |
315 | ||
316 | 3x |
adlb <- adlb %>% |
317 | 3x |
dplyr::group_by(USUBJID) %>% |
318 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
319 | 3x |
dplyr::mutate(ADTM = rep( |
320 | 3x |
sort(sample( |
321 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
322 | 3x |
size = nlevels(AVISIT) |
323 |
)), |
|
324 | 3x |
each = n() / nlevels(AVISIT) |
325 |
)) %>% |
|
326 | 3x |
dplyr::ungroup() %>% |
327 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
328 | 3x |
dplyr::select(-TRTENDT) %>% |
329 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
330 | ||
331 | 3x |
adlb <- adlb %>% |
332 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
333 | 3x |
dplyr::group_by(USUBJID) %>% |
334 | 3x |
dplyr::mutate(LBSEQ = seq_len(dplyr::n())) %>% |
335 | 3x |
dplyr::mutate(ASEQ = LBSEQ) %>% |
336 | 3x |
dplyr::ungroup() %>% |
337 | 3x |
dplyr::arrange( |
338 | 3x |
STUDYID, |
339 | 3x |
USUBJID, |
340 | 3x |
PARAMCD, |
341 | 3x |
BASETYPE, |
342 | 3x |
AVISITN, |
343 | 3x |
ATPTN, |
344 | 3x |
DTYPE, |
345 | 3x |
ADTM, |
346 | 3x |
LBSEQ, |
347 | 3x |
ASPID |
348 |
) |
|
349 | ||
350 | 3x |
adlb <- adlb %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
351 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y", |
352 | 3x |
TRUE ~ "" |
353 |
))) |
|
354 | ||
355 | 3x |
flag_variables <- function(data, |
356 | 3x |
apply_grouping, |
357 | 3x |
apply_filter, |
358 | 3x |
apply_mutate) { |
359 | 15x |
data_compare <- data %>% |
360 | 15x |
dplyr::mutate(row_check = seq_len(nrow(data))) |
361 | ||
362 | 15x |
data <- data_compare %>% |
363 |
{ |
|
364 | 15x |
if (apply_grouping == TRUE) { |
365 | 9x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) |
366 |
} else { |
|
367 | 6x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE) |
368 |
} |
|
369 |
} %>% |
|
370 | 15x |
dplyr::arrange(ADTM, ASPID, LBSEQ) %>% |
371 |