adam_db_argument.Rmd
We have decided to use the dm
package in
chevron
to model the data relations of the CDISC ADaM
datasets. In this vignette we introduce how we intend to use ADaM data
in a relational model, in R with dm
. Note that
dm
can be connected to a database.
We remove the ADSL variables in the database of the non-adsl datasets and merge the adsl variables back as needed.
library(chevron)
#> Registered S3 method overwritten by 'tern':
#> method from
#> tidy.glm broom
library(dm)
#>
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#>
#> filter
syn_data <- syn_test_data()[c("adsl", "adae", "adlb", "adrs", "adsub")]
drop_adsl_vars <- function(df, adsl) {
df[c("USUBJID", "STUDYID", setdiff(names(df), names(adsl)))]
}
dbl <- c(list(adsl = syn_data$adsl), lapply(syn_data[-1], drop_adsl_vars, adsl = syn_data$adsl))
### Define primary and foreign keys
adam_study_data <- new_dm(dbl) %>%
dm_add_pk(adsl, c("USUBJID", "STUDYID")) %>%
dm_add_fk(adae, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adae, c("USUBJID", "STUDYID", "ASTDTM", "AETERM", "AESEQ")) %>%
dm_add_fk(adlb, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adlb, c("STUDYID", "USUBJID", "PARAMCD", "BASETYPE", "AVISITN",
"ATPTN", "DTYPE", "ADTM", "LBSEQ", "ASPID")) %>%
dm_add_fk(adrs, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adrs, c("STUDYID", "USUBJID", "PARAMCD", "AVISITN", "ADTM", "RSSEQ")) %>%
dm_add_fk(adsub, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adsub, c("USUBJID", "STUDYID", "PARAMCD"))
validate_dm(adam_study_data)
Note that the keys are defined in the data analysis plan at Roche.
List the keys
dm::dm_get_all_fks(adam_study_data)
#> # A tibble: 4 × 5
#> child_table child_fk_cols parent_table parent_key_cols on_delete
#> <chr> <keys> <chr> <keys> <chr>
#> 1 adae USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 2 adlb USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 3 adrs USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 4 adsub USUBJID, STUDYID adsl USUBJID, STUDYID no_action
dm::dm_get_all_pks(adam_study_data)
#> # A tibble: 5 × 2
#> table pk_col
#> <chr> <keys>
#> 1 adsl USUBJID, STUDYID
#> 2 adae USUBJID, STUDYID, … (5 total)
#> 3 adlb STUDYID, USUBJID, … (10 total)
#> 4 adrs STUDYID, USUBJID, … (6 total)
#> 5 adsub USUBJID, STUDYID, PARAMCD
Further we can subset the data:
which adds the filter information to the dm
object but
does not apply the filtering
db %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 F
In order to get the filtered data one has to use the
dm_apply_filters()
function
db %>%
dm_apply_filters() %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 F
or
db %>%
dm_apply_filters_to_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 F
If one does not need the lazy filtering feature then one can also
zoom to the desired table and use filter
adam_study_data %>%
dm_zoom_to(adsl) %>%
filter(SEX == "F") %>%
dm_update_zoomed() %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 F
dm
supports most of the dplyr
verbs, for
example mutate
db <- adam_study_data %>%
dm_zoom_to(adsl) %>%
mutate(TEST = paste(SEX, "-", BWGHTSI)) %>%
dm_update_zoomed()
db$adsl$TEST[1:5]
#> [1] "M - 109.5" "M - 111.1" "F - 108.3" "F - 131.1" "M - 83.1"
In chevron
we use dm
objects to combine all
data into one object. The tlg-functions and the functions
returned std_filter
and std_mutate
require a
dm
object to the adam_db
argument.
dm
objects also are useful to filter the data. For
example filtering for all females also subsets the other datasets as per
relation specification:
adam_study_data %>%
dm_filter(adsl, SEX == "F") %>%
dm_apply_filters() %>%
dm_join_to_tbl(adae, adsl) %>%
select(USUBJID, SEX, AESEQ, AETERM, AESEQ) %>%
head()
#> # A tibble: 6 × 4
#> USUBJID SEX AESEQ AETERM
#> <chr> <fct> <int> <fct>
#> 1 AB12345-BRA-1-id-141 F 1 trm B.2.1.2.1
#> 2 AB12345-BRA-1-id-141 F 2 trm D.2.1.5.3
#> 3 AB12345-BRA-1-id-141 F 3 trm A.1.1.1.1
#> 4 AB12345-BRA-1-id-141 F 4 trm A.1.1.1.2
#> 5 AB12345-BRA-1-id-141 F 5 trm A.1.1.1.1
#> 6 AB12345-BRA-1-id-141 F 6 trm D.1.1.1.1
For standard outputs we do know which data and variables we need to create a TLG.
adam_study_data %>%
dm_join_to_tbl(adae, adsl) %>%
select(AESEQ, ARM, SEX)
#> # A tibble: 1,934 × 3
#> AESEQ ARM SEX
#> <int> <fct> <fct>
#> 1 1 A: Drug X M
#> 2 2 A: Drug X M
#> 3 3 A: Drug X M
#> 4 4 A: Drug X M
#> 5 1 C: Combination F
#> 6 2 C: Combination F
#> 7 3 C: Combination F
#> 8 4 C: Combination F
#> 9 5 C: Combination F
#> 10 6 C: Combination F
#> # … with 1,924 more rows
Add variables from adsub
to adsl
:
library(tidyr)
adsub_wide <- adam_study_data$adsub %>%
filter(PARAMCD %in% c("BWGHTSI", "BECOG")) %>%
select(USUBJID, STUDYID, PARAMCD, AVAL, AVALU) %>%
pivot_wider(names_from = PARAMCD, values_from = c(AVAL, AVALU))
dm_add_tbl(adam_study_data, adsub_wide) %>%
dm_add_fk(adsub_wide, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adsub_wide, c("USUBJID", "STUDYID")) %>%
dm_join_to_tbl(adsl, adsub_wide)
#> # A tibble: 400 × 49
#> USUBJID STUDYID AVAL_BWGHTSI AVAL_BECOG AVALU_BWGHTSI AVALU_BECOG SUBJID
#> <chr> <chr> <dbl> <dbl> <fct> <fct> <chr>
#> 1 AB12345-BRA… AB12345 62.5 3 kg "" id-105
#> 2 AB12345-BRA… AB12345 98.8 3 kg "" id-134
#> 3 AB12345-BRA… AB12345 49.6 1 kg "" id-141
#> 4 AB12345-BRA… AB12345 162. 3 kg "" id-236
#> 5 AB12345-BRA… AB12345 105. 2 kg "" id-265
#> 6 AB12345-BRA… AB12345 53.8 4 kg "" id-42
#> 7 AB12345-BRA… AB12345 60.6 3 kg "" id-65
#> 8 AB12345-BRA… AB12345 63.8 4 kg "" id-93
#> 9 AB12345-BRA… AB12345 144. 3 kg "" id-171
#> 10 AB12345-BRA… AB12345 76.9 3 kg "" id-217
#> # … with 390 more rows, and 42 more variables: SITEID <chr>, AGE <int>,
#> # SEX <fct>, RACE <fct>, COUNTRY <fct>, DTHFL <fct>, INVID <chr>,
#> # INVNAM <chr>, ARM <fct>, ARMCD <fct>, ACTARM <fct>, ACTARMCD <fct>,
#> # REGION1 <fct>, STRATA1 <fct>, STRATA2 <fct>, BMRKR1 <dbl>, BMRKR2 <fct>,
#> # ITTFL <fct>, SAFFL <fct>, BMEASIFL <fct>, BEP01FL <fct>, RANDDT <date>,
#> # TRTSDTM <dttm>, TRTEDTM <dttm>, EOSSTT <fct>, EOSDT <date>, EOSDY <int>,
#> # DCSREAS <fct>, DTHDT <date>, DTHCAUS <fct>, DTHCAT <fct>, LDDTHELD <int>, …