Cleans, expands, joins, and computes summary statistics (using calc_stats) from the raw data frame, preparing it for plotting.

preprocess_lineplot_data(
  df,
  alt_counts_df = NULL,
  x = "AVISIT",
  y = "AVAL",
  group_var = "ARM",
  subject_var = "USUBJID",
  mid = "mean",
  calc_stats_func = calc_stats,
  ...
)

Arguments

df

(data.frame)
The primary data frame containing the data to plot (e.g., ADaM BDS).

alt_counts_df

(data.frame)
An optional data frame for calculating N counts (e.g., ADSL).

x

(string)
Column name for the x-axis (e.g., "AVISIT").

y

(string)
Column name for the y-axis values (e.g., "AVAL").

group_var

(string)
Column name for the grouping variable (e.g., "ARM").

subject_var

(string)
Column name for the subject ID (e.g., "USUBJID").

mid

(string)
Column name for the mean/median statistic to be plotted (e.g., "mean").

calc_stats_func

(function)
A function to calculate summary statistics, defaulting to the provided calc_stats function.

...

Additional arguments passed to the calc_stats function (e.g., conf_level, decimal_places).

Value

A data frame (df_stats) containing the calculated statistics, ready for plotting.

Examples

library(dplyr)
library(tidyr)

# Create example ADaM-like data
set.seed(123)
adlb <- data.frame(
  USUBJID = rep(paste0("SUBJ-", 1:20), each = 3),
  ARM = rep(c(rep("Treatment A", 10), rep("Treatment B", 10)), each = 3),
  AVISIT = rep(factor(c("Baseline", "Week 4", "Week 8")), 20),
  AVAL = rnorm(60, mean = 12, sd = 2)
)

adsl <- data.frame(
  USUBJID = paste0("SUBJ-", 1:20),
  ARM = c(rep("Treatment A", 10), rep("Treatment B", 10))
)

# Preprocess data for line plot with default confidence level
df_stats <- preprocess_lineplot_data(
  df = adlb,
  alt_counts_df = adsl,
  x = "AVISIT",
  y = "AVAL",
  group_var = "ARM",
  subject_var = "USUBJID"
)

# Custom confidence level using ...
df_stats_90ci <- preprocess_lineplot_data(
  df = adlb,
  alt_counts_df = adsl,
  x = "AVISIT",
  y = "AVAL",
  group_var = "ARM",
  subject_var = "USUBJID",
  conf_level = 0.90
)

# Custom decimal places using ...
df_stats_3dec <- preprocess_lineplot_data(
  df = adlb,
  alt_counts_df = adsl,
  x = "AVISIT",
  y = "AVAL",
  group_var = "ARM",
  subject_var = "USUBJID",
  decimal_places = 3
)

# Without grouping variable
df_stats_ungrouped <- preprocess_lineplot_data(
  df = adlb,
  x = "AVISIT",
  y = "AVAL",
  group_var = NULL
)