Introduction

We have decided to use the dm package in chevron to model the data relations of the CDISC ADaM datasets. In this vignette we introduce how we intend to use ADaM data in a relational model, in R with dm. Note that dm can be connected to a database.

Sample Data

We remove the ADSL variables in the database of the non-adsl datasets and merge the adsl variables back as needed.

library(chevron)
#> Registered S3 method overwritten by 'tern':
#>   method   from 
#>   tidy.glm broom
library(dm)
#> 
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#> 
#>     filter

syn_data <- syn_test_data()[c("adsl", "adae", "adlb", "adrs", "adsub")]

drop_adsl_vars <- function(df, adsl) {
  df[c("USUBJID", "STUDYID", setdiff(names(df), names(adsl)))]
}

dbl <- c(list(adsl = syn_data$adsl), lapply(syn_data[-1], drop_adsl_vars, adsl = syn_data$adsl))

### Define primary and foreign keys
adam_study_data <- new_dm(dbl) %>%
  dm_add_pk(adsl, c("USUBJID", "STUDYID")) %>%
  dm_add_fk(adae, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adae, c("USUBJID", "STUDYID", "ASTDTM", "AETERM", "AESEQ")) %>%
  dm_add_fk(adlb, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adlb, c("STUDYID", "USUBJID", "PARAMCD", "BASETYPE", "AVISITN",
                    "ATPTN", "DTYPE", "ADTM", "LBSEQ", "ASPID")) %>%
  dm_add_fk(adrs, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adrs, c("STUDYID", "USUBJID", "PARAMCD", "AVISITN", "ADTM", "RSSEQ")) %>%
  dm_add_fk(adsub, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adsub, c("USUBJID", "STUDYID", "PARAMCD"))

validate_dm(adam_study_data)

Note that the keys are defined in the data analysis plan at Roche.

Work With dm Object

List the keys

dm::dm_get_all_fks(adam_study_data)
#> # A tibble: 4 × 5
#>   child_table child_fk_cols    parent_table parent_key_cols  on_delete
#>   <chr>       <keys>           <chr>        <keys>           <chr>    
#> 1 adae        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 2 adlb        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 3 adrs        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 4 adsub       USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
dm::dm_get_all_pks(adam_study_data)
#> # A tibble: 5 × 2
#>   table pk_col                        
#>   <chr> <keys>                        
#> 1 adsl  USUBJID, STUDYID              
#> 2 adae  USUBJID, STUDYID, … (5 total) 
#> 3 adlb  STUDYID, USUBJID, … (10 total)
#> 4 adrs  STUDYID, USUBJID, … (6 total) 
#> 5 adsub USUBJID, STUDYID, PARAMCD
adam_study_data %>%
  dm_draw()

Subset the data

Further we can subset the data:

db <- adam_study_data %>%
  dm_filter(adsl, SEX == "F")

which adds the filter information to the dm object but does not apply the filtering

db %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

In order to get the filtered data one has to use the dm_apply_filters() function

db %>%
  dm_apply_filters() %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

or

db %>%
  dm_apply_filters_to_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

If one does not need the lazy filtering feature then one can also zoom to the desired table and use filter

adam_study_data %>%
  dm_zoom_to(adsl) %>%
  filter(SEX == "F") %>%
  dm_update_zoomed() %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

Mutate Data

dm supports most of the dplyr verbs, for example mutate

db <- adam_study_data %>%
  dm_zoom_to(adsl) %>%
  mutate(TEST = paste(SEX, "-", BWGHTSI)) %>%
  dm_update_zoomed()

db$adsl$TEST[1:5]
#> [1] "M - 109.5" "M - 111.1" "F - 108.3" "F - 131.1" "M - 83.1"

dm Objects in Chevron

In chevron we use dm objects to combine all data into one object. The tlg-functions and the functions returned std_filter and std_mutate require a dm object to the adam_db argument.

dm objects also are useful to filter the data. For example filtering for all females also subsets the other datasets as per relation specification:

adam_study_data %>%
  dm_filter(adsl, SEX == "F") %>%
  dm_apply_filters() %>%
  dm_join_to_tbl(adae, adsl) %>%
  select(USUBJID, SEX, AESEQ, AETERM, AESEQ) %>%
  head()
#> # A tibble: 6 × 4
#>   USUBJID              SEX   AESEQ AETERM       
#>   <chr>                <fct> <int> <fct>        
#> 1 AB12345-BRA-1-id-141 F         1 trm B.2.1.2.1
#> 2 AB12345-BRA-1-id-141 F         2 trm D.2.1.5.3
#> 3 AB12345-BRA-1-id-141 F         3 trm A.1.1.1.1
#> 4 AB12345-BRA-1-id-141 F         4 trm A.1.1.1.2
#> 5 AB12345-BRA-1-id-141 F         5 trm A.1.1.1.1
#> 6 AB12345-BRA-1-id-141 F         6 trm D.1.1.1.1

Get Analysis Data

Vertical Analysis Data

For standard outputs we do know which data and variables we need to create a TLG.

adam_study_data %>%
  dm_join_to_tbl(adae, adsl) %>%
  select(AESEQ, ARM, SEX)
#> # A tibble: 1,934 × 3
#>    AESEQ ARM            SEX  
#>    <int> <fct>          <fct>
#>  1     1 A: Drug X      M    
#>  2     2 A: Drug X      M    
#>  3     3 A: Drug X      M    
#>  4     4 A: Drug X      M    
#>  5     1 C: Combination F    
#>  6     2 C: Combination F    
#>  7     3 C: Combination F    
#>  8     4 C: Combination F    
#>  9     5 C: Combination F    
#> 10     6 C: Combination F    
#> # … with 1,924 more rows

Horizontal

Add variables from adsub to adsl:

library(tidyr)

adsub_wide <- adam_study_data$adsub %>%
  filter(PARAMCD %in% c("BWGHTSI", "BECOG")) %>%
  select(USUBJID, STUDYID, PARAMCD, AVAL, AVALU) %>%
  pivot_wider(names_from = PARAMCD, values_from = c(AVAL, AVALU))

dm_add_tbl(adam_study_data, adsub_wide) %>%
  dm_add_fk(adsub_wide, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adsub_wide, c("USUBJID", "STUDYID")) %>%
  dm_join_to_tbl(adsl, adsub_wide)
#> # A tibble: 400 × 49
#>    USUBJID      STUDYID AVAL_BWGHTSI AVAL_BECOG AVALU_BWGHTSI AVALU_BECOG SUBJID
#>    <chr>        <chr>          <dbl>      <dbl> <fct>         <fct>       <chr> 
#>  1 AB12345-BRA… AB12345         62.5          3 kg            ""          id-105
#>  2 AB12345-BRA… AB12345         98.8          3 kg            ""          id-134
#>  3 AB12345-BRA… AB12345         49.6          1 kg            ""          id-141
#>  4 AB12345-BRA… AB12345        162.           3 kg            ""          id-236
#>  5 AB12345-BRA… AB12345        105.           2 kg            ""          id-265
#>  6 AB12345-BRA… AB12345         53.8          4 kg            ""          id-42 
#>  7 AB12345-BRA… AB12345         60.6          3 kg            ""          id-65 
#>  8 AB12345-BRA… AB12345         63.8          4 kg            ""          id-93 
#>  9 AB12345-BRA… AB12345        144.           3 kg            ""          id-171
#> 10 AB12345-BRA… AB12345         76.9          3 kg            ""          id-217
#> # … with 390 more rows, and 42 more variables: SITEID <chr>, AGE <int>,
#> #   SEX <fct>, RACE <fct>, COUNTRY <fct>, DTHFL <fct>, INVID <chr>,
#> #   INVNAM <chr>, ARM <fct>, ARMCD <fct>, ACTARM <fct>, ACTARMCD <fct>,
#> #   REGION1 <fct>, STRATA1 <fct>, STRATA2 <fct>, BMRKR1 <dbl>, BMRKR2 <fct>,
#> #   ITTFL <fct>, SAFFL <fct>, BMEASIFL <fct>, BEP01FL <fct>, RANDDT <date>,
#> #   TRTSDTM <dttm>, TRTEDTM <dttm>, EOSSTT <fct>, EOSDT <date>, EOSDY <int>,
#> #   DCSREAS <fct>, DTHDT <date>, DTHCAUS <fct>, DTHCAT <fct>, LDDTHELD <int>, …