R/df_add_poolings.R
df_add_poolings.RdDISCLAIMER: this is a risky function. Please consider using tbl_with_pools() instead.
This function allows you to create new pooled groups in your ADaM datasets based
on specified arm values. You can choose to keep the original unpooled rows or not.
Important Note: If you choose to keep the original rows and also add a pool that
includes all patients (using the "all" keyword), you will end up with duplicate
rows in your dataset. This can lead to incorrect patient counts if you later add a
total column. Use this option with caution and ensure that you do not add a
standard total column later to avoid double-counting.
df_add_poolings(adam_db, pools, arm_var = "TRT01A", keep_original = TRUE)(list)
List of ADaM datasets containing at least the adsl data frame.
(list)
Named list of custom pools. Values can be character vectors of arm names,
logical expressions wrapped in rlang::expr(), or the keyword "all" to
include all patients.
(character)
String of the arm variable to evaluate and overwrite.
(logical)
Whether to keep the original unpooled rows. Default is TRUE.
Updated list of ADaM datasets.
tbl_with_pools() for a safer alternative that creates pooled summaries
without modifying the underlying datasets.
# Create a minimal dummy adam_db
adsl <- data.frame(
USUBJID = c("001", "002", "003", "004", "005"),
TRT01A = c("Drug A", "Drug A", "Drug B", "Drug C", "Drug C"),
FLAG = c("Y", "N", "Y", "N", "Y"),
stringsAsFactors = FALSE
)
adam_db <- list(adsl = adsl)
# Define the requested pools
my_pools <- list(
"Drugs A and B" = c("Drug A", "Drug B"),
"All Patients" = "all"
)
# Example A: Safe pooling (keep_original = FALSE, no "all" pool) -----------------
safe_pools <- list("Drugs A and B" = c("Drug A", "Drug B"))
adam_db_safe <- df_add_poolings(adam_db, pools = safe_pools, keep_original = FALSE)
print(adam_db_safe$adsl)
#> USUBJID TRT01A FLAG
#> 1 001 Drugs A and B Y
#> 2 002 Drugs A and B N
#> 3 003 Drugs A and B Y
# Example B: Triggering the warnings (keep_original = TRUE and "all" pool) -------
# This will throw two warnings: one for duplicates, one for the "all" pool.
adam_db_warnings <- df_add_poolings(adam_db, pools = my_pools, keep_original = TRUE)
#> Warning: Preserving original rows while adding pools creates duplicates.
#> ℹ If you add a total column later, the patient counts will be incorrect.
#> Warning: You are adding an 'all' patients pool to "adsl".
#> ℹ Ensure you do not add a standard total column later.
print(adam_db_warnings$adsl)
#> USUBJID TRT01A FLAG
#> 1 001 Drug A Y
#> 2 002 Drug A N
#> 3 003 Drug B Y
#> 4 004 Drug C N
#> 5 005 Drug C Y
#> 6 001 Drugs A and B Y
#> 7 002 Drugs A and B N
#> 8 003 Drugs A and B Y
#> 9 001 All Patients Y
#> 10 002 All Patients N
#> 11 003 All Patients Y
#> 12 004 All Patients N
#> 13 005 All Patients Y
# Example C: Complex pooling using logical expressions ---------------------------
complex_pools <- list(
"Flagged Patients" = rlang::expr(FLAG == "Y"),
"Drug A Flagged" = rlang::expr(TRT01A == "Drug A" & FLAG == "Y")
)
adam_db_complex <- df_add_poolings(adam_db, pools = complex_pools, keep_original = FALSE)
print(adam_db_complex$adsl)
#> USUBJID TRT01A FLAG
#> 1 001 Flagged Patients Y
#> 2 003 Flagged Patients Y
#> 3 005 Flagged Patients Y
#> 4 001 Drug A Flagged Y
if (FALSE) { # identical(Sys.getenv("NOT_CRAN"), "true") && requireNamespace("yaml", quietly = TRUE)
# Example D: Use yaml to define the pools config and run the function ------------
# Creating Dummy Data
adex <- data.frame(
USUBJID = c("001", "002", "003", "004"),
AEDECOD = c("Headache", "Nausea", "Fatigue", "Dizziness"),
stringsAsFactors = FALSE
)
adam_db <- list(adsl = adsl, adex = adex, adsl2 = adsl)
# Define the config as a standard R list
config_to_write <- list(
df_add_poolings_config = list(
keep_original = FALSE,
arm_var = "TRT01A",
pools = list(
"Drug A + B" = c("Drug A", "Drug B"),
"Drug C + B" = c("Drug C", "Drug B"),
"All Patients" = "all"
)
)
)
# Write it to a file (using a temp file for this example)
yaml_path <- tempfile(fileext = ".yaml")
yaml::write_yaml(config_to_write, yaml_path)
# Print out what the physical YAML file looks like
cat("--- Contents of the generated YAML file ---\n")
cat(readLines(yaml_path), sep = "\n")
cat("-------------------------------------------\n\n")
# Read the YAML file back into R
arg_specs <- yaml::read_yaml(yaml_path)
# Extract just the poolings config block
pool_args <- arg_specs$df_add_poolings_config
# Run the function
if (!is.null(pool_args)) {
adam_db_pooled <- df_add_poolings(
adam_db = adam_db,
pools = pool_args$pools,
arm_var = pool_args$arm_var,
keep_original = pool_args$keep_original
)
}
# View the result
adam_db_pooled$adsl2
}