Skip to contents

Introduction

We have decided to use the dm package in chevron to model the data relations of the CDISC ADaM datasets. In this vignette we introduce how we intend to use ADaM data in a relational model, in R with dm. Note that dm can be connected to a database.

Sample Data

We remove the ADSL variables in the database of the non-ADSL datasets and merge the ADSL variables back as needed.

library(chevron)
#> Registered S3 method overwritten by 'tern':
#>   method   from 
#>   tidy.glm broom
library(dm)
#> 
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#> 
#>     filter

syn_data <- syn_test_data()[c("adsl", "adae", "adlb", "adrs", "adsub")]

drop_adsl_vars <- function(df, adsl) {
  df[c("USUBJID", "STUDYID", setdiff(names(df), names(adsl)))]
}

dbl <- c(list(adsl = syn_data$adsl), lapply(syn_data[-1], drop_adsl_vars, adsl = syn_data$adsl))

### Define primary and foreign keys
adam_study_data <- new_dm(dbl) %>%
  dm_add_pk(adsl, c("USUBJID", "STUDYID")) %>%
  dm_add_fk(adae, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adae, c("USUBJID", "STUDYID", "ASTDTM", "AETERM", "AESEQ")) %>%
  dm_add_fk(adlb, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adlb, c(
    "STUDYID", "USUBJID", "PARAMCD", "BASETYPE", "AVISITN",
    "ATPTN", "DTYPE", "ADTM", "LBSEQ", "ASPID"
  )) %>%
  dm_add_fk(adrs, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adrs, c("STUDYID", "USUBJID", "PARAMCD", "AVISITN", "ADTM", "RSSEQ")) %>%
  dm_add_fk(adsub, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adsub, c("USUBJID", "STUDYID", "PARAMCD"))

validate_dm(adam_study_data)
#> Warning: `validate_dm()` was deprecated in dm 0.3.0.
#> Please use `dm_validate()` instead.

Note that the keys are defined in the data analysis plan at Roche.

Work With dm Object

List the keys

dm::dm_get_all_fks(adam_study_data)
#> # A tibble: 4 × 5
#>   child_table child_fk_cols    parent_table parent_key_cols  on_delete
#>   <chr>       <keys>           <chr>        <keys>           <chr>    
#> 1 adae        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 2 adlb        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 3 adrs        USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
#> 4 adsub       USUBJID, STUDYID adsl         USUBJID, STUDYID no_action
dm::dm_get_all_pks(adam_study_data)
#> # A tibble: 5 × 2
#>   table pk_col                        
#>   <chr> <keys>                        
#> 1 adsl  USUBJID, STUDYID              
#> 2 adae  USUBJID, STUDYID, … (5 total) 
#> 3 adlb  STUDYID, USUBJID, … (10 total)
#> 4 adrs  STUDYID, USUBJID, … (6 total) 
#> 5 adsub USUBJID, STUDYID, PARAMCD
adam_study_data %>%
  dm_draw()

Subset the data

Further we can subset the data:

db <- adam_study_data %>%
  dm_filter(adsl, SEX == "F")
#> Warning: The `table` argument of `dm_filter()` is deprecated as of dm 1.0.0.
#> `dm_filter()` now takes named filter expressions, the names correspond to the tables to be filtered. You no longer need to call `dm_apply_filters()` to materialize the filters.

which adds the filter information to the dm object but does not apply the filtering

db %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

In order to get the filtered data one has to use the dm_apply_filters() function

db %>%
  dm_apply_filters() %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

or

db %>%
  dm_apply_filters_to_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

If one does not need the lazy filtering feature then one can also zoom to the desired table and use filter

adam_study_data %>%
  dm_zoom_to(adsl) %>%
  filter(SEX == "F") %>%
  dm_update_zoomed() %>%
  pull_tbl(adsl) %>%
  select(USUBJID, STUDYID, SEX) %>%
  head()
#> # A tibble: 6 × 3
#>   USUBJID               STUDYID SEX  
#>   <chr>                 <chr>   <fct>
#> 1 AB12345-RUS-3-id-378  AB12345 F    
#> 2 AB12345-CHN-11-id-220 AB12345 F    
#> 3 AB12345-USA-1-id-45   AB12345 F    
#> 4 AB12345-USA-1-id-261  AB12345 F    
#> 5 AB12345-NGA-11-id-173 AB12345 F    
#> 6 AB12345-CHN-4-id-73   AB12345 F

Mutate Data

dm supports most of the dplyr verbs, for example mutate

db <- adam_study_data %>%
  dm_zoom_to(adsl) %>%
  mutate(TEST = paste(SEX, "-", BWGHTSI)) %>%
  dm_update_zoomed()

db$adsl$TEST[1:5]
#> [1] "M - 109.5" "M - 111.1" "F - 108.3" "F - 131.1" "M - 83.1"

dm Objects in Chevron

In chevron we use dm objects to combine all data into one object. The TLG-functions and the functions returned std_filter and std_mutate require a dm object to the adam_db argument.

dm objects also are useful to filter the data. For example filtering for all females also subsets the other datasets as per relation specification:

adam_study_data %>%
  dm_filter(adsl, SEX == "F") %>%
  dm_apply_filters() %>%
  dm_join_to_tbl(adae, adsl) %>%
  select(USUBJID, SEX, AESEQ, AETERM, AESEQ) %>%
  head()
#> Warning: `dm_join_to_tbl()` was deprecated in dm 0.3.0.
#> Please use `dm_flatten_to_tbl()` instead.
#> Warning: The `table` argument of `dm_filter()` is deprecated as of dm 1.0.0.
#> `dm_filter()` now takes named filter expressions, the names correspond to the tables to be filtered. You no longer need to call `dm_apply_filters()` to materialize the filters.
#> # A tibble: 6 × 4
#>   USUBJID              SEX   AESEQ AETERM       
#>   <chr>                <fct> <int> <fct>        
#> 1 AB12345-BRA-1-id-141 F         1 trm B.2.1.2.1
#> 2 AB12345-BRA-1-id-141 F         2 trm D.2.1.5.3
#> 3 AB12345-BRA-1-id-141 F         3 trm A.1.1.1.1
#> 4 AB12345-BRA-1-id-141 F         4 trm A.1.1.1.2
#> 5 AB12345-BRA-1-id-141 F         5 trm A.1.1.1.1
#> 6 AB12345-BRA-1-id-141 F         6 trm D.1.1.1.1

Get Analysis Data

Vertical Analysis Data

For standard outputs we do know which data and variables we need to create a TLG.

adam_study_data %>%
  dm_join_to_tbl(adae, adsl) %>%
  select(AESEQ, ARM, SEX)
#> Warning: `dm_join_to_tbl()` was deprecated in dm 0.3.0.
#> Please use `dm_flatten_to_tbl()` instead.
#> # A tibble: 1,934 × 3
#>    AESEQ ARM            SEX  
#>    <int> <fct>          <fct>
#>  1     1 A: Drug X      M    
#>  2     2 A: Drug X      M    
#>  3     3 A: Drug X      M    
#>  4     4 A: Drug X      M    
#>  5     1 C: Combination F    
#>  6     2 C: Combination F    
#>  7     3 C: Combination F    
#>  8     4 C: Combination F    
#>  9     5 C: Combination F    
#> 10     6 C: Combination F    
#> # … with 1,924 more rows

Horizontal

Add variables from adsub to adsl:

library(tidyr)

adsub_wide <- adam_study_data$adsub %>%
  filter(PARAMCD %in% c("BWGHTSI", "BECOG")) %>%
  select(USUBJID, STUDYID, PARAMCD, AVAL, AVALU) %>%
  pivot_wider(names_from = PARAMCD, values_from = c(AVAL, AVALU))

dm_add_tbl(adam_study_data, adsub_wide) %>%
  dm_add_fk(adsub_wide, c("USUBJID", "STUDYID"), ref_table = "adsl") %>%
  dm_add_pk(adsub_wide, c("USUBJID", "STUDYID")) %>%
  dm_join_to_tbl(adsl, adsub_wide)
#> Warning: `dm_join_to_tbl()` was deprecated in dm 0.3.0.
#> Please use `dm_flatten_to_tbl()` instead.
#> Warning: `dm_add_tbl()` was deprecated in dm 1.0.0.
#> Please use `dm()` instead.
#> Use `.name_repair = "unique"` if necessary.
#> # A tibble: 400 × 49
#>    USUBJID     STUDYID AVAL_…¹ AVAL_…² AVALU…³ AVALU…⁴ SUBJID SITEID   AGE SEX  
#>    <chr>       <chr>     <dbl>   <dbl> <fct>   <fct>   <chr>  <chr>  <int> <fct>
#>  1 AB12345-BR… AB12345    62.5       3 kg      ""      id-105 BRA-1     38 M    
#>  2 AB12345-BR… AB12345    98.8       3 kg      ""      id-134 BRA-1     47 M    
#>  3 AB12345-BR… AB12345    49.6       1 kg      ""      id-141 BRA-1     35 F    
#>  4 AB12345-BR… AB12345   162.        3 kg      ""      id-236 BRA-1     32 M    
#>  5 AB12345-BR… AB12345   105.        2 kg      ""      id-265 BRA-1     25 M    
#>  6 AB12345-BR… AB12345    53.8       4 kg      ""      id-42  BRA-1     36 M    
#>  7 AB12345-BR… AB12345    60.6       3 kg      ""      id-65  BRA-1     25 F    
#>  8 AB12345-BR… AB12345    63.8       4 kg      ""      id-93  BRA-1     34 F    
#>  9 AB12345-BR… AB12345   144.        3 kg      ""      id-171 BRA-11    40 F    
#> 10 AB12345-BR… AB12345    76.9       3 kg      ""      id-217 BRA-11    43 M    
#> # … with 390 more rows, 39 more variables: RACE <fct>, COUNTRY <fct>,
#> #   DTHFL <fct>, INVID <chr>, INVNAM <chr>, ARM <fct>, ARMCD <fct>,
#> #   ACTARM <fct>, ACTARMCD <fct>, REGION1 <fct>, STRATA1 <fct>, STRATA2 <fct>,
#> #   BMRKR1 <dbl>, BMRKR2 <fct>, ITTFL <fct>, SAFFL <fct>, BMEASIFL <fct>,
#> #   BEP01FL <fct>, RANDDT <date>, TRTSDTM <dttm>, TRTEDTM <dttm>, EOSSTT <fct>,
#> #   EOSDT <date>, EOSDY <int>, DCSREAS <fct>, DTHDT <date>, DTHCAUS <fct>,
#> #   DTHCAT <fct>, LDDTHELD <int>, LDDTHGR1 <fct>, LSTALVDT <date>, …