DISCLAIMER: this is a risky function. Please consider using tbl_with_pools() instead. This function allows you to create new pooled groups in your ADaM datasets based on specified arm values. You can choose to keep the original unpooled rows or not. Important Note: If you choose to keep the original rows and also add a pool that includes all patients (using the "all" keyword), you will end up with duplicate rows in your dataset. This can lead to incorrect patient counts if you later add a total column. Use this option with caution and ensure that you do not add a standard total column later to avoid double-counting.

df_add_poolings(adam_db, pools, arm_var = "TRT01A", keep_original = TRUE)

Arguments

adam_db

(list)
List of ADaM datasets containing at least the adsl data frame.

pools

(list)
Named list of custom pools. Values can be character vectors of arm names, logical expressions wrapped in rlang::expr(), or the keyword "all" to include all patients.

arm_var

(character)
String of the arm variable to evaluate and overwrite.

keep_original

(logical)
Whether to keep the original unpooled rows. Default is TRUE.

Value

Updated list of ADaM datasets.

See also

tbl_with_pools() for a safer alternative that creates pooled summaries without modifying the underlying datasets.

Examples

# Create a minimal dummy adam_db
adsl <- data.frame(
  USUBJID = c("001", "002", "003", "004", "005"),
  TRT01A = c("Drug A", "Drug A", "Drug B", "Drug C", "Drug C"),
  FLAG = c("Y", "N", "Y", "N", "Y"),
  stringsAsFactors = FALSE
)
adam_db <- list(adsl = adsl)

# Define the requested pools
my_pools <- list(
  "Drugs A and B" = c("Drug A", "Drug B"),
  "All Patients"  = "all"
)

# Example A: Safe pooling (keep_original = FALSE, no "all" pool) -----------------
safe_pools <- list("Drugs A and B" = c("Drug A", "Drug B"))
adam_db_safe <- df_add_poolings(adam_db, pools = safe_pools, keep_original = FALSE)
print(adam_db_safe$adsl)
#>   USUBJID        TRT01A FLAG
#> 1     001 Drugs A and B    Y
#> 2     002 Drugs A and B    N
#> 3     003 Drugs A and B    Y

# Example B: Triggering the warnings (keep_original = TRUE and "all" pool) -------
# This will throw two warnings: one for duplicates, one for the "all" pool.
adam_db_warnings <- df_add_poolings(adam_db, pools = my_pools, keep_original = TRUE)
#> Warning: Preserving original rows while adding pools creates duplicates.
#>  If you add a total column later, the patient counts will be incorrect.
#> Warning: You are adding an 'all' patients pool to "adsl".
#>  Ensure you do not add a standard total column later.
print(adam_db_warnings$adsl)
#>    USUBJID        TRT01A FLAG
#> 1      001        Drug A    Y
#> 2      002        Drug A    N
#> 3      003        Drug B    Y
#> 4      004        Drug C    N
#> 5      005        Drug C    Y
#> 6      001 Drugs A and B    Y
#> 7      002 Drugs A and B    N
#> 8      003 Drugs A and B    Y
#> 9      001  All Patients    Y
#> 10     002  All Patients    N
#> 11     003  All Patients    Y
#> 12     004  All Patients    N
#> 13     005  All Patients    Y

# Example C: Complex pooling using logical expressions ---------------------------
complex_pools <- list(
  "Flagged Patients" = rlang::expr(FLAG == "Y"),
  "Drug A Flagged"   = rlang::expr(TRT01A == "Drug A" & FLAG == "Y")
)

adam_db_complex <- df_add_poolings(adam_db, pools = complex_pools, keep_original = FALSE)
print(adam_db_complex$adsl)
#>   USUBJID           TRT01A FLAG
#> 1     001 Flagged Patients    Y
#> 2     003 Flagged Patients    Y
#> 3     005 Flagged Patients    Y
#> 4     001   Drug A Flagged    Y

if (FALSE) { # identical(Sys.getenv("NOT_CRAN"), "true") && requireNamespace("yaml", quietly = TRUE)
# Example D: Use yaml to define the pools config and run the function ------------
# Creating Dummy Data
adex <- data.frame(
  USUBJID = c("001", "002", "003", "004"),
  AEDECOD = c("Headache", "Nausea", "Fatigue", "Dizziness"),
  stringsAsFactors = FALSE
)
adam_db <- list(adsl = adsl, adex = adex, adsl2 = adsl)

# Define the config as a standard R list
config_to_write <- list(
  df_add_poolings_config = list(
    keep_original = FALSE,
    arm_var = "TRT01A",
    pools = list(
      "Drug A + B" = c("Drug A", "Drug B"),
      "Drug C + B" = c("Drug C", "Drug B"),
      "All Patients" = "all"
    )
  )
)

# Write it to a file (using a temp file for this example)
yaml_path <- tempfile(fileext = ".yaml")
yaml::write_yaml(config_to_write, yaml_path)

# Print out what the physical YAML file looks like
cat("--- Contents of the generated YAML file ---\n")
cat(readLines(yaml_path), sep = "\n")
cat("-------------------------------------------\n\n")

# Read the YAML file back into R
arg_specs <- yaml::read_yaml(yaml_path)

# Extract just the poolings config block
pool_args <- arg_specs$df_add_poolings_config

# Run the function
if (!is.null(pool_args)) {
  adam_db_pooled <- df_add_poolings(
    adam_db       = adam_db,
    pools         = pool_args$pools,
    arm_var       = pool_args$arm_var,
    keep_original = pool_args$keep_original
  )
}

# View the result
adam_db_pooled$adsl2
}