# load libraries
library(cards)
DMT01 - Demographics
Demographics and Baseline Characteristics (DMT01) QC Workflow
1. Generate a table using {chevron}
Show the code
# Create a table using the chevron package
<- chevron::dmt01_main(chevron::syn_data, summaryvars = c("AGE", "SEX"))
tlg_dmt01 head(tlg_dmt01, n = 15)
A: Drug X B: Placebo C: Combination All Patients
(N=15) (N=15) (N=15) (N=45)
—————————————————————————————————————————————————————————————————————
Age
n 15 15 15 45
Mean (SD) 31.3 (5.3) 35.1 (9.0) 36.6 (6.4) 34.3 (7.3)
Median 31.0 35.0 35.0 34.0
Min - Max 24 - 40 24 - 57 24 - 49 24 - 57
Sex
n 15 15 15 45
F 12 (80.0%) 8 (53.3%) 10 (66.7%) 30 (66.7%)
M 3 (20.0%) 7 (46.7%) 5 (33.3%) 15 (33.3%)
2. Flatten the table into a data.frame
A {rtables} based output can be flattened into a data.frame using the as_results_df()
function from the {rtables} package. The make_ard
argument set to TRUE
, will format the data similar to the output generated by the {cards} package.
<- rtables::as_result_df(tlg_dmt01, make_ard = TRUE, add_tbl_str_decimals = FALSE)
rtables_result 1:10, c("group1_level", "variable", "variable_level", "stat_name", "stat")] rtables_result[
group1_level | variable | variable_level | stat_name | stat |
---|---|---|---|---|
A: Drug X | AGE | n | n | 15.000000 |
A: Drug X | AGE | mean_sd | mean | 31.333333 |
A: Drug X | AGE | mean_sd | sd | 5.259911 |
A: Drug X | AGE | median | median | 31.000000 |
A: Drug X | AGE | range | min | 24.000000 |
A: Drug X | AGE | range | max | 40.000000 |
A: Drug X | SEX | n.n | n | 15.000000 |
A: Drug X | SEX | count_fraction.F | count | 12.000000 |
A: Drug X | SEX | count_fraction.F | p | 0.800000 |
A: Drug X | SEX | count_fraction.M | count | 3.000000 |
3. Create a comparable ARD
Using the {cards} package, we stack the functions ard_continuous()
for the continuous variables and ard_categorical()
for categorical variables. The default statistics calculated for each of these data types are included - these can be adapted for bespoke analyses.
If any variable manipulation was done to the data prior to running the {citril}/{chevron}/{tern}/{rtables} commands, we suggest supplying the same data to these ARD functions, or running the same pre-processing steps to your data prior to creating ARDs to ensure variable names/levels match.
# build ARDs that calculate relevant statistics for continuous and categorical variables.
<-
ard_result ard_stack(
::syn_data$adsl,
chevronard_continuous(
variables = c(AGE),
statistic = ~ continuous_summary_fns(c("N", "mean", "sd", "median", "min", "max"))
),ard_categorical(variables = c(SEX), statistic = everything() ~ c("n", "p")),
ard_missing(variables = c(SEX), statistic = everything() ~ c("N_obs")),
.by = "ARM",
.overall = TRUE
|>
) apply_fmt_fn() |>
unlist_ard_columns()
1:10, c("group1_level", "variable", "variable_level", "stat_name", "stat")] ard_result[
group1_level | variable | variable_level | stat_name | stat |
---|---|---|---|---|
A: Drug X | AGE | NA | N | 15.000000 |
A: Drug X | AGE | NA | mean | 31.333333 |
A: Drug X | AGE | NA | sd | 5.259911 |
A: Drug X | AGE | NA | median | 31.000000 |
A: Drug X | AGE | NA | min | 24.000000 |
A: Drug X | AGE | NA | max | 40.000000 |
A: Drug X | SEX | F | n | 12.000000 |
A: Drug X | SEX | F | p | 0.800000 |
A: Drug X | SEX | M | n | 3.000000 |
A: Drug X | SEX | M | p | 0.200000 |
4.Statistics comparison
{rtables} reformat
In order to compare the two data.frames programatically, some identifying variables must align to be used as “key columns”. Below are some data wrangling steps used to match the statistics for comparison. Note the {rtables} output:
Show the code
tail(rtables_result)
group1 | group1_level | variable | variable_level | variable_label | stat_name | stat | |
---|---|---|---|---|---|---|---|
39 | ARM | All Patients | AGE | range | Min - Max | max | 57.0000000 |
40 | ARM | All Patients | SEX | n.n | n | n | 45.0000000 |
41 | ARM | All Patients | SEX | count_fraction.F | F | count | 30.0000000 |
42 | ARM | All Patients | SEX | count_fraction.F | F | p | 0.6666667 |
43 | ARM | All Patients | SEX | count_fraction.M | M | count | 15.0000000 |
44 | ARM | All Patients | SEX | count_fraction.M | M | p | 0.3333333 |
The variable_level
leads with the statistic name, followed by "."
and then the actual level that matches the variable_level
in the ARD object. We will mutate the level to match the ARD object. Similarly, the total number of observations for a group is labelled "N"
in the ARD object, while it is named "n"
in the {rtables} object. The following manipulations are completed below:
- Set the
variable_level
toNA
in the {rtables} result for a continuous data summary (as variable levels don’t apply and isNULL
in the ARD object). - Remove the stat_name (ie. “count”) before the variable level.
- Convert the “n”:“N” and “count”:“n”.
- Remove columns we know won’t be in the ARD data.frame for simplicity (ie.variable_label)
<- rtables_result |>
rtables_result ::mutate(
dplyrvariable_level = dplyr::case_when(
== "AGE" & variable_level %in% c("mean_sd", "median", "range", "n") ~ NA_character_,
variable TRUE ~ variable_level
),variable_level = sub("^[^.]*\\.", "", variable_level), # use variable_label
stat_name = dplyr::recode(stat_name, "n" = "N", "count" = "n")
|>
) ::select(-c("variable_label"))
dplyr
head(rtables_result, n = 10)
group1 | group1_level | variable | variable_level | stat_name | stat |
---|---|---|---|---|---|
ARM | A: Drug X | AGE | NA | N | 15.000000 |
ARM | A: Drug X | AGE | NA | mean | 31.333333 |
ARM | A: Drug X | AGE | NA | sd | 5.259911 |
ARM | A: Drug X | AGE | NA | median | 31.000000 |
ARM | A: Drug X | AGE | NA | min | 24.000000 |
ARM | A: Drug X | AGE | NA | max | 40.000000 |
ARM | A: Drug X | SEX | n | N | 15.000000 |
ARM | A: Drug X | SEX | F | n | 12.000000 |
ARM | A: Drug X | SEX | F | p | 0.800000 |
ARM | A: Drug X | SEX | M | n | 3.000000 |
ARD reformat
A reformatting step is necessary for the ARD output to complete the comparison. We’ll add the string “ARM” to any NULL
observations in the group1
column to match the {rtables} result and add the “All Patients” label to the group1_level
.
<- ard_result |>
ard_result ::mutate(
dplyrgroup1 = dplyr::coalesce(group1, "ARM"),
group1_level = dplyr::coalesce(group1_level, "All Patients"),
stat_name = dplyr::recode(stat_name, "N_obs" = "N")
|>
) ::select(c("group1_level", "group1", "variable", "variable_level", "stat_name", "stat")) dplyr
Note that the ARD result is larger than the {rtables} result. When using ard_stack
with a listed by
variable, a univariate analysis is run for that variable. Here, it is "ARM"
. We can remove those statistics as they are not in the rtables_result
.
<- ard_result |>
ard_result ::filter(
dplyr!((variable == "ARM")) | is.na(variable_level)
|>
) ::mutate(variable_level = dplyr::if_else(stat_name == "N" & is.na(variable_level) & variable != "AGE", "n", variable_level)) dplyr
Compare programmatically
Here we propose using the {diffdf} package to compare the statistics produced by the two table engines. {diffdf} is designed to compare two data.frames and report any differences/inconsistencies to the user.
::diffdf(rtables_result,
diffdf
ard_result,keys = c("group1_level", "group1", "variable", "variable_level", "stat_name"),
suppress_warnings = TRUE
)
No issues were found!
If there are any differences you wish to explore, the above code can be assigned to an object which will collect the reported differences (comparison based on key columns, see dplyr::anti_join()
).