The `adam_db` argument
adam_db_argument.Rmd
Introduction
We have decided to use the dm
package in
chevron
to model the data relations of the
CDISC
ADaM
datasets. In this vignette we
introduce how we intend to use ADaM
data in a relational
model, in R with dm
. Note that dm
can be
connected to a database.
Sample Data
We remove the ADSL
variables in the database of the
non-ADSL
datasets and merge the ADSL
variables
back as needed.
library(chevron)
#> Registered S3 method overwritten by 'tern':
#> method from
#> tidy.glm broom
library(dm)
#>
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#>
#> filter
drop_adsl_vars <- function(df, adsl) {
df[c("USUBJID", "STUDYID", setdiff(names(df), names(adsl)))]
}
data(syn_data, package = "chevron")
dbl <- c(list(adsl = syn_data$adsl), lapply(syn_data[-1], drop_adsl_vars, adsl = syn_data$adsl))
### Define primary and foreign keys
adam_study_data <- new_dm(dbl) %>%
dm_add_pk(adsl, c("USUBJID", "STUDYID")) %>%
dm_add_fk(adae, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adae, c("USUBJID", "STUDYID", "ASTDTM", "AETERM", "AESEQ")) %>%
dm_add_fk(adlb, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adlb, c(
"STUDYID", "USUBJID", "PARAMCD", "BASETYPE", "AVISITN",
"ATPTN", "DTYPE", "ADTM", "LBSEQ", "ASPID"
)) %>%
dm_add_fk(adrs, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adrs, c("STUDYID", "USUBJID", "PARAMCD", "AVISITN", "ADTM", "RSSEQ")) %>%
dm_add_fk(adsub, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adsub, c("USUBJID", "STUDYID", "PARAMCD"))
dm_validate(adam_study_data)
Note that the keys are defined in the data analysis plan at Roche.
Work With dm
Object
List the keys
dm::dm_get_all_fks(adam_study_data)
#> # A tibble: 4 × 5
#> child_table child_fk_cols parent_table parent_key_cols on_delete
#> <chr> <keys> <chr> <keys> <chr>
#> 1 adae USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 2 adlb USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 3 adrs USUBJID, STUDYID adsl USUBJID, STUDYID no_action
#> 4 adsub USUBJID, STUDYID adsl USUBJID, STUDYID no_action
dm::dm_get_all_pks(adam_study_data)
#> # A tibble: 5 × 3
#> table pk_col autoincrement
#> <chr> <keys> <lgl>
#> 1 adsl USUBJID, STUDYID FALSE
#> 2 adae USUBJID, STUDYID, … (5 total) FALSE
#> 3 adlb STUDYID, USUBJID, … (10 total) FALSE
#> 4 adrs STUDYID, USUBJID, … (6 total) FALSE
#> 5 adsub USUBJID, STUDYID, PARAMCD FALSE
Subset the data
Further we can subset the data:
The dataset adsl
is filtered
db %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 F
If one does not need the relational filtering feature then one can
also zoom to the desired table and use filter
adam_study_data %>%
dm_zoom_to(adsl) %>%
filter(SEX == "F") %>%
dm_update_zoomed() %>%
pull_tbl(adsl) %>%
select(USUBJID, STUDYID, SEX) %>%
head()
#> # A tibble: 6 × 3
#> USUBJID STUDYID SEX
#> <chr> <chr> <fct>
#> 1 AB12345-RUS-3-id-378 AB12345 F
#> 2 AB12345-CHN-11-id-220 AB12345 F
#> 3 AB12345-USA-1-id-45 AB12345 F
#> 4 AB12345-USA-1-id-261 AB12345 F
#> 5 AB12345-NGA-11-id-173 AB12345 F
#> 6 AB12345-CHN-4-id-73 AB12345 F
Mutate Data
dm
supports most of the dplyr
verbs, for
example mutate
db <- adam_study_data %>%
dm_zoom_to(adsl) %>%
mutate(TEST = paste(SEX, "-", BWGHTSI)) %>%
dm_update_zoomed()
db$adsl$TEST[1:5]
#> [1] "M - 109.5" "M - 111.1" "F - 108.3" "F - 131.1" "M - 83.1"
dm
Objects in Chevron
In chevron
we use dm
objects to combine all
data into one object. The TLG-functions and the functions
returned std_filter
and std_mutate
require a
dm
object to the adam_db
argument.
dm
objects also are useful to filter the data. For
example filtering for all females also subsets the other datasets as per
relation specification:
adam_study_data %>%
dm_filter(adsl, SEX == "F") %>%
dm_apply_filters() %>%
dm_flatten_to_tbl(adae, adsl) %>%
select(USUBJID, SEX, AESEQ, AETERM, AESEQ) %>%
head()
#> Warning: The `table` argument of `dm_filter()` is deprecated as of dm 1.0.0.
#> ℹ `dm_filter()` now takes named filter expressions, the names correspond to the
#> tables to be filtered. You no longer need to call `dm_apply_filters()` to
#> materialize the filters.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> # A tibble: 6 × 4
#> USUBJID SEX AESEQ AETERM
#> <chr> <fct> <int> <fct>
#> 1 AB12345-BRA-1-id-141 F 1 trm B.2.1.2.1
#> 2 AB12345-BRA-1-id-141 F 2 trm D.2.1.5.3
#> 3 AB12345-BRA-1-id-141 F 3 trm A.1.1.1.1
#> 4 AB12345-BRA-1-id-141 F 4 trm A.1.1.1.2
#> 5 AB12345-BRA-1-id-141 F 5 trm A.1.1.1.1
#> 6 AB12345-BRA-1-id-141 F 6 trm D.1.1.1.1
Get Analysis Data
Vertical Analysis Data
For standard outputs we do know which data and variables we need to create a TLG.
adam_study_data %>%
dm_join_to_tbl(adae, adsl) %>%
select(AESEQ, ARM, SEX)
#> Warning: `dm_join_to_tbl()` was deprecated in dm 0.3.0.
#> ℹ Please use `dm_flatten_to_tbl()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> # A tibble: 1,934 × 3
#> AESEQ ARM SEX
#> <int> <fct> <fct>
#> 1 1 A: Drug X M
#> 2 2 A: Drug X M
#> 3 3 A: Drug X M
#> 4 4 A: Drug X M
#> 5 1 C: Combination F
#> 6 2 C: Combination F
#> 7 3 C: Combination F
#> 8 4 C: Combination F
#> 9 5 C: Combination F
#> 10 6 C: Combination F
#> # ℹ 1,924 more rows
Horizontal
Add variables from adsub
to adsl
:
library(tidyr)
adsub_wide <- adam_study_data$adsub %>%
filter(PARAMCD %in% c("BWGHTSI", "BECOG")) %>%
select(USUBJID, STUDYID, PARAMCD, AVAL, AVALU) %>%
pivot_wider(names_from = PARAMCD, values_from = c(AVAL, AVALU))
dm(adam_study_data, adsub_wide) %>%
dm_add_fk(adsub_wide, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
dm_add_pk(adsub_wide, c("USUBJID", "STUDYID")) %>%
dm_join_to_tbl(adsl, adsub_wide)
#> # A tibble: 400 × 68
#> USUBJID STUDYID AVAL_BWGHTSI AVAL_BECOG AVALU_BWGHTSI AVALU_BECOG SUBJID
#> <chr> <chr> <dbl> <dbl> <fct> <fct> <chr>
#> 1 AB12345-BRA… AB12345 62.5 3 kg "" id-105
#> 2 AB12345-BRA… AB12345 98.8 3 kg "" id-134
#> 3 AB12345-BRA… AB12345 49.6 1 kg "" id-141
#> 4 AB12345-BRA… AB12345 162. 3 kg "" id-236
#> 5 AB12345-BRA… AB12345 105. 2 kg "" id-265
#> 6 AB12345-BRA… AB12345 53.8 4 kg "" id-42
#> 7 AB12345-BRA… AB12345 60.6 3 kg "" id-65
#> 8 AB12345-BRA… AB12345 63.8 4 kg "" id-93
#> 9 AB12345-BRA… AB12345 144. 3 kg "" id-171
#> 10 AB12345-BRA… AB12345 76.9 3 kg "" id-217
#> # ℹ 390 more rows
#> # ℹ 61 more variables: SITEID <chr>, AGE <int>, AGEU <fct>, SEX <fct>,
#> # RACE <fct>, ETHNIC <fct>, COUNTRY <fct>, DTHFL <fct>, INVID <chr>,
#> # INVNAM <chr>, ARM <fct>, ARMCD <fct>, ACTARM <fct>, ACTARMCD <fct>,
#> # TRT01P <fct>, TRT01A <fct>, TRT02P <fct>, TRT02A <fct>, REGION1 <fct>,
#> # STRATA1 <fct>, STRATA2 <fct>, BMRKR1 <dbl>, BMRKR2 <fct>, ITTFL <fct>,
#> # SAFFL <fct>, BMEASIFL <fct>, BEP01FL <fct>, AEWITHFL <fct>, …