adam_db_argument.RmdWe have decided to use the dm package in
chevron to model the data relations of the CDISC ADaM
datasets. In this vignette we introduce how we intend to use ADaM data
in a relational model, in R with dm. Note that
dm can be connected to a database.
We remove the ADSL variables in the database of the non-adsl datasets and merge the adsl variables back as needed.
library(chevron)
#> Registered S3 method overwritten by 'tern':
#> method from
#> tidy.glm broom
library(dm)
#>
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#>
#> filter
syn_data <- syn_test_data()[c("adsl", "adae", "adlb", "adrs", "adsub")]
drop_adsl_vars <- function(df, adsl) {
df[c("USUBJID", "STUDYID", setdiff(names(df), names(adsl)))]
}
dbl <- c(list(adsl = syn_data$adsl), lapply(syn_data[-1], drop_adsl_vars, adsl = syn_data$adsl))
### Define primary and foreign keys
adam_study_data <- new_dm(dbl) %>%
dm_add_pk(adsl, c("USUBJID", "STUDYID")) %>%
dm_add_fk(adae, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adae, c("USUBJID", "STUDYID", "ASTDTM", "AETERM", "AESEQ")) %>%
dm_add_fk(adlb, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adlb, c("STUDYID", "USUBJID", "PARAMCD", "BASETYPE", "AVISITN",
"ATPTN", "DTYPE", "ADTM", "LBSEQ", "ASPID")) %>%
dm_add_fk(adrs, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adrs, c("STUDYID", "USUBJID", "PARAMCD", "AVISITN", "ADTM", "RSSEQ")) %>%
dm_add_fk(adsub, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adsub, c("USUBJID", "STUDYID", "PARAMCD"))
validate_dm(adam_study_data)Note that the keys are defined in the data analysis plan at Roche.
List the keys
dm::dm_get_all_fks(adam_study_data)
#> # A tibble: 4 × 5
#> child_table child_fk_cols parent_table parent_key_cols on_delete
#> <chr> <keys> <chr> <keys> <chr>
#> 1 adae USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 2 adlb USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 3 adrs USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 4 adsub USUBJID, STUDYID adsl USUBJID, STUDYID no_action
dm::dm_get_all_pks(adam_study_data)
#> # A tibble: 5 × 2
#> table pk_col
#> <chr> <keys>
#> 1 adsl USUBJID, STUDYID
#> 2 adae USUBJID, STUDYID, … (5 total)
#> 3 adlb STUDYID, USUBJID, … (10 total)
#> 4 adrs STUDYID, USUBJID, … (6 total)
#> 5 adsub USUBJID, STUDYID, PARAMCDFurther we can subset the data:
which adds the filter information to the dm object but
does not apply the filtering
db %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 FIn order to get the filtered data one has to use the
dm_apply_filters() function
db %>%
dm_apply_filters() %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 For
db %>%
dm_apply_filters_to_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 FIf one does not need the lazy filtering feature then one can also
zoom to the desired table and use filter
adam_study_data %>%
dm_zoom_to(adsl) %>%
filter(SEX == "F") %>%
dm_update_zoomed() %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 Fdm supports most of the dplyr verbs, for
example mutate
db <- adam_study_data %>%
dm_zoom_to(adsl) %>%
mutate(TEST = paste(SEX, "-", BWGHTSI)) %>%
dm_update_zoomed()
db$adsl$TEST[1:5]
#> [1] "M - 109.5" "M - 111.1" "F - 108.3" "F - 131.1" "M - 83.1"In chevron we use dm objects to combine all
data into one object. The tlg-functions and the functions
returned std_filter and std_mutate require a
dm object to the adam_db argument.
dm objects also are useful to filter the data. For
example filtering for all females also subsets the other datasets as per
relation specification:
adam_study_data %>%
dm_filter(adsl, SEX == "F") %>%
dm_apply_filters() %>%
dm_join_to_tbl(adae, adsl) %>%
select(USUBJID, SEX, AESEQ, AETERM, AESEQ) %>%
head()
#> # A tibble: 6 × 4
#> USUBJID SEX AESEQ AETERM
#> <chr> <fct> <int> <fct>
#> 1 AB12345-BRA-1-id-141 F 1 trm B.2.1.2.1
#> 2 AB12345-BRA-1-id-141 F 2 trm D.2.1.5.3
#> 3 AB12345-BRA-1-id-141 F 3 trm A.1.1.1.1
#> 4 AB12345-BRA-1-id-141 F 4 trm A.1.1.1.2
#> 5 AB12345-BRA-1-id-141 F 5 trm A.1.1.1.1
#> 6 AB12345-BRA-1-id-141 F 6 trm D.1.1.1.1For standard outputs we do know which data and variables we need to create a TLG.
adam_study_data %>%
dm_join_to_tbl(adae, adsl) %>%
select(AESEQ, ARM, SEX)
#> # A tibble: 1,934 × 3
#> AESEQ ARM SEX
#> <int> <fct> <fct>
#> 1 1 A: Drug X M
#> 2 2 A: Drug X M
#> 3 3 A: Drug X M
#> 4 4 A: Drug X M
#> 5 1 C: Combination F
#> 6 2 C: Combination F
#> 7 3 C: Combination F
#> 8 4 C: Combination F
#> 9 5 C: Combination F
#> 10 6 C: Combination F
#> # … with 1,924 more rowsAdd variables from adsub to adsl:
library(tidyr)
adsub_wide <- adam_study_data$adsub %>%
filter(PARAMCD %in% c("BWGHTSI", "BECOG")) %>%
select(USUBJID, STUDYID, PARAMCD, AVAL, AVALU) %>%
pivot_wider(names_from = PARAMCD, values_from = c(AVAL, AVALU))
dm_add_tbl(adam_study_data, adsub_wide) %>%
dm_add_fk(adsub_wide, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adsub_wide, c("USUBJID", "STUDYID")) %>%
dm_join_to_tbl(adsl, adsub_wide)
#> # A tibble: 400 × 49
#> USUBJID STUDYID AVAL_BWGHTSI AVAL_BECOG AVALU_BWGHTSI AVALU_BECOG SUBJID
#> <chr> <chr> <dbl> <dbl> <fct> <fct> <chr>
#> 1 AB12345-BRA… AB12345 62.5 3 kg "" id-105
#> 2 AB12345-BRA… AB12345 98.8 3 kg "" id-134
#> 3 AB12345-BRA… AB12345 49.6 1 kg "" id-141
#> 4 AB12345-BRA… AB12345 162. 3 kg "" id-236
#> 5 AB12345-BRA… AB12345 105. 2 kg "" id-265
#> 6 AB12345-BRA… AB12345 53.8 4 kg "" id-42
#> 7 AB12345-BRA… AB12345 60.6 3 kg "" id-65
#> 8 AB12345-BRA… AB12345 63.8 4 kg "" id-93
#> 9 AB12345-BRA… AB12345 144. 3 kg "" id-171
#> 10 AB12345-BRA… AB12345 76.9 3 kg "" id-217
#> # … with 390 more rows, and 42 more variables: SITEID <chr>, AGE <int>,
#> # SEX <fct>, RACE <fct>, COUNTRY <fct>, DTHFL <fct>, INVID <chr>,
#> # INVNAM <chr>, ARM <fct>, ARMCD <fct>, ACTARM <fct>, ACTARMCD <fct>,
#> # REGION1 <fct>, STRATA1 <fct>, STRATA2 <fct>, BMRKR1 <dbl>, BMRKR2 <fct>,
#> # ITTFL <fct>, SAFFL <fct>, BMEASIFL <fct>, BEP01FL <fct>, RANDDT <date>,
#> # TRTSDTM <dttm>, TRTEDTM <dttm>, EOSSTT <fct>, EOSDT <date>, EOSDY <int>,
#> # DCSREAS <fct>, DTHDT <date>, DTHCAUS <fct>, DTHCAT <fct>, LDDTHELD <int>, …