The `adam_db` argument

Introduction

We have decided to use the dm package in chevron to model the data relations of the CDISC ADaM datasets. In this vignette we introduce how we intend to use ADaM data in a relational model, in R with dm. Note that dm can be connected to a database.

Sample Data

We remove the ADSL variables in the database of the non-ADSL datasets and merge the ADSL variables back as needed.

library(chevron)
#> Registered S3 method overwritten by 'tern':
#>   method   from 
#>   tidy.glm broom
library(dm)
#> 
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#> 
#>     filter

drop_adsl_vars <- function(df, adsl) {
  df[c("USUBJID", "STUDYID", setdiff(names(df), names(adsl)))]
}

data(syn_data, package = "chevron")
dbl <- c(list(adsl = syn_data$adsl), lapply(syn_data[-1], drop_adsl_vars, adsl = syn_data$adsl))

### Define primary and foreign keys
adam_study_data <- new_dm(dbl) %>%
  dm_add_pk(adsl, c("USUBJID", "STUDYID")) %>%
  dm_add_fk(adae, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adae, c("USUBJID", "STUDYID", "ASTDTM", "AETERM", "AESEQ")) %>%
  dm_add_fk(adlb, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adlb, c(
    "STUDYID", "USUBJID", "PARAMCD", "BASETYPE", "AVISITN",
    "ATPTN", "DTYPE", "ADTM", "LBSEQ", "ASPID"
  )) %>%
  dm_add_fk(adrs, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adrs, c("STUDYID", "USUBJID", "PARAMCD", "AVISITN", "ADTM", "RSSEQ")) %>%
  dm_add_fk(adsub, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adsub, c("USUBJID", "STUDYID", "PARAMCD"))

dm_validate(adam_study_data)

Note that the keys are defined in the data analysis plan at Roche.

Work With `dm` Object

List the keys

dm::dm_get_all_fks(adam_study_data)
#> # A tibble: 4 × 5
#>   child_table child_fk_cols    parent_table parent_key_cols  on_delete
#>   <chr>       <keys>           <chr>        <keys>           <chr>    
#> 1 adae        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 2 adlb        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 3 adrs        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 4 adsub       USUBJID, STUDYID adsl         USUBJID, STUDYID no_action

dm::dm_get_all_pks(adam_study_data)
#> # A tibble: 5 × 3
#>   table pk_col                         autoincrement
#>   <chr> <keys>                         <lgl>        
#> 1 adsl  USUBJID, STUDYID               FALSE        
#> 2 adae  USUBJID, STUDYID, … (5 total)  FALSE        
#> 3 adlb  STUDYID, USUBJID, … (10 total) FALSE        
#> 4 adrs  STUDYID, USUBJID, … (6 total)  FALSE        
#> 5 adsub USUBJID, STUDYID, PARAMCD      FALSE

adam_study_data %>%
  dm_draw()

Subset the data

Further we can subset the data:

db <- adam_study_data %>%
  dm_filter(adsl = (SEX == "F"))

The dataset adsl is filtered

db %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

If one does not need the relational filtering feature then one can also zoom to the desired table and use filter

adam_study_data %>%
  dm_zoom_to(adsl) %>%
  filter(SEX == "F") %>%
  dm_update_zoomed() %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

Mutate Data

dm supports most of the dplyr verbs, for example mutate

db <- adam_study_data %>%
  dm_zoom_to(adsl) %>%
  mutate(TEST = paste(SEX, "-", BWGHTSI)) %>%
  dm_update_zoomed()

db$adsl$TEST[1:5]
#> [1] "M - 109.5" "M - 111.1" "F - 108.3" "F - 131.1" "M - 83.1"

`dm` Objects in Chevron

In chevron we use dm objects to combine all data into one object. The TLG-functions and the functions returned std_filter and std_mutate require a dm object to the adam_db argument.

dm objects also are useful to filter the data. For example filtering for all females also subsets the other datasets as per relation specification:

adam_study_data %>%
  dm_filter(adsl, SEX == "F") %>%
  dm_apply_filters() %>%
  dm_flatten_to_tbl(adae, adsl) %>%
  select(USUBJID, SEX, AESEQ, AETERM, AESEQ) %>%
  head()
#> Warning: The `table` argument of `dm_filter()` is deprecated as of dm 1.0.0.
#> ℹ `dm_filter()` now takes named filter expressions, the names correspond to the
#>   tables to be filtered. You no longer need to call `dm_apply_filters()` to
#>   materialize the filters.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> # A tibble: 6 × 4
#>   USUBJID              SEX   AESEQ AETERM       
#>   <chr>                <fct> <int> <fct>        
#> 1 AB12345-BRA-1-id-141 F         1 trm B.2.1.2.1
#> 2 AB12345-BRA-1-id-141 F         2 trm D.2.1.5.3
#> 3 AB12345-BRA-1-id-141 F         3 trm A.1.1.1.1
#> 4 AB12345-BRA-1-id-141 F         4 trm A.1.1.1.2
#> 5 AB12345-BRA-1-id-141 F         5 trm A.1.1.1.1
#> 6 AB12345-BRA-1-id-141 F         6 trm D.1.1.1.1

Get Analysis Data

Vertical Analysis Data

For standard outputs we do know which data and variables we need to create a TLG.

adam_study_data %>%
  dm_join_to_tbl(adae, adsl) %>%
  select(AESEQ, ARM, SEX)
#> Warning: `dm_join_to_tbl()` was deprecated in dm 0.3.0.
#> ℹ Please use `dm_flatten_to_tbl()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> # A tibble: 1,934 × 3
#>    AESEQ ARM            SEX  
#>    <int> <fct>          <fct>
#>  1     1 A: Drug X      M    
#>  2     2 A: Drug X      M    
#>  3     3 A: Drug X      M    
#>  4     4 A: Drug X      M    
#>  5     1 C: Combination F    
#>  6     2 C: Combination F    
#>  7     3 C: Combination F    
#>  8     4 C: Combination F    
#>  9     5 C: Combination F    
#> 10     6 C: Combination F    
#> # ℹ 1,924 more rows

Horizontal

Add variables from adsub to adsl:

library(tidyr)

adsub_wide <- adam_study_data$adsub %>%
  filter(PARAMCD %in% c("BWGHTSI", "BECOG")) %>%
  select(USUBJID, STUDYID, PARAMCD, AVAL, AVALU) %>%
  pivot_wider(names_from = PARAMCD, values_from = c(AVAL, AVALU))

dm(adam_study_data, adsub_wide) %>%
  dm_add_fk(adsub_wide, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adsub_wide, c("USUBJID", "STUDYID")) %>%
  dm_join_to_tbl(adsl, adsub_wide)
#> # A tibble: 400 × 68
#>    USUBJID      STUDYID AVAL_BWGHTSI AVAL_BECOG AVALU_BWGHTSI AVALU_BECOG SUBJID
#>    <chr>        <chr>          <dbl>      <dbl> <fct>         <fct>       <chr> 
#>  1 AB12345-BRA… AB12345         62.5          3 kg            ""          id-105
#>  2 AB12345-BRA… AB12345         98.8          3 kg            ""          id-134
#>  3 AB12345-BRA… AB12345         49.6          1 kg            ""          id-141
#>  4 AB12345-BRA… AB12345        162.           3 kg            ""          id-236
#>  5 AB12345-BRA… AB12345        105.           2 kg            ""          id-265
#>  6 AB12345-BRA… AB12345         53.8          4 kg            ""          id-42 
#>  7 AB12345-BRA… AB12345         60.6          3 kg            ""          id-65 
#>  8 AB12345-BRA… AB12345         63.8          4 kg            ""          id-93 
#>  9 AB12345-BRA… AB12345        144.           3 kg            ""          id-171
#> 10 AB12345-BRA… AB12345         76.9          3 kg            ""          id-217
#> # ℹ 390 more rows
#> # ℹ 61 more variables: SITEID <chr>, AGE <int>, AGEU <fct>, SEX <fct>,
#> #   RACE <fct>, ETHNIC <fct>, COUNTRY <fct>, DTHFL <fct>, INVID <chr>,
#> #   INVNAM <chr>, ARM <fct>, ARMCD <fct>, ACTARM <fct>, ACTARMCD <fct>,
#> #   TRT01P <fct>, TRT01A <fct>, TRT02P <fct>, TRT02A <fct>, REGION1 <fct>,
#> #   STRATA1 <fct>, STRATA2 <fct>, BMRKR1 <dbl>, BMRKR2 <fct>, ITTFL <fct>,
#> #   SAFFL <fct>, BMEASIFL <fct>, BEP01FL <fct>, AEWITHFL <fct>, …