TTET01 - Time-to-Event

Time-to-event Summary (TTET01) QC Workflow

# load libraries
library(cards)
library(cardx)
library(dplyr)
library(survival)
library(broom.helpers)

1. Generate a table using {chevron}

Show the code
# Create a table using the chevron package
proc_data <- dunlin::log_filter(chevron::syn_data, PARAMCD == "PFS", "adtte")
tlg_ttet01 <- chevron::run(chevron::ttet01, proc_data)
head(tlg_ttet01, n = 15)
                                    A: Drug X       B: Placebo    C: Combination
                                      (N=15)          (N=15)          (N=15)    
  ——————————————————————————————————————————————————————————————————————————————
  Patients with event (%)           7 (46.7%)        12 (80%)       8 (53.3%)   
    Earliest contributing event                                                 
      Death                             5               11              7       
      Disease Progression               2               1               1       
  Patients without event (%)        8 (53.3%)        3 (20%)        7 (46.7%)   
  Time to Event (MONTHS)                                                        
    Median                             8.6             6.2             8.4      
      95% CI                        (7.3, NE)       (4.8, 7.6)      (7.0, NE)   
    25% and 75%-ile                  3.8, NE         4.7, 8.4        5.8, NE    
    Range                         1.2 to 9.5 {1}    0.9 to 9.1    0.9 to 9.5 {1}
  Unstratified Analysis                                                         
    p-value (log-rank)                                0.0973          0.9111    
    Hazard Ratio                                       2.18            1.06     
    95% CI                                         (0.85, 5.60)    (0.38, 2.94) 
  6 MONTHS                                                                      
  ——————————————————————————————————————————————————————————————————————————————

  {1} - Censored observation: range maximum
  ——————————————————————————————————————————————————————————————————————————————

2. Flatten the table into a data.frame

A rtables based output can be flattened into a data frame using the as_results_df() function from the rtables package. The make_ard argument set to TRUE, will format the data similar to the output generated by the {cards} package.

rtables_results <- rtables::as_result_df(tlg_ttet01, make_ard = TRUE, add_tbl_str_decimals = FALSE)
head(rtables_results, n = 5)
group1 group1_level group2 group2_level variable variable_level variable_label stat_name stat
NA NA ARM A: Drug X IS_EVENT count_fraction Patients with event (%) count 7.0000000
NA NA ARM A: Drug X IS_EVENT count_fraction Patients with event (%) fraction 0.4666667
EVNT1 Patients with event (%) ARM A: Drug X EVNTDESC count.Death Death count 5.0000000
EVNT1 Patients with event (%) ARM A: Drug X EVNTDESC count.Disease Progression Disease Progression count 2.0000000
NA NA ARM A: Drug X IS_NOT_EVENT count_fraction Patients without event (%) count 8.0000000

3. Create comparable ARDs

Using the {cards} package, we stack the functions ard_continuous() for the continuous variables and ard_categorical() for categorical variables. The default statistics calculated for each of these data types are included - these can be adapted for bespoke analyses.

If any variable manipulation was done to the data prior to running the citril/chevron/tern/rtables commands, we suggest supplying the same data to these ARD functions, or running the same pre-processing steps to your data prior to creating ARDs to ensure variable names/levels match.

Note: when a ‘by’ variable is specified in the ard_stack() call, a univariate tabulation is returned. We will have to remove those observations later in the workflow to create a 1:1 match.

# Data Pre-processing
adtte <- proc_data$adtte |>
  dplyr::mutate(
    AVAL = (AVAL / 30.4375), # convert days to months, this is the same calculation chevron uses.
    is_event = CNSR == 0, # chevron coding of events
    is_not_event = CNSR == 1
  )

# ----- ARDS -----
# Calculate Event Rates
ard_event_rates <- cards::bind_ard(
  # Patient Event Rates
  ard_categorical(adtte, strata = c(ARM), variable = c(is_event), statistic = everything() ~ c("n", "p")),
  # Earliest Contributing Event
  ard_categorical(adtte |> dplyr::filter(is_event == TRUE), strata = c(ARM), variable = c(EVNTDESC), statistic = everything() ~ c("n"))
) |>
  apply_fmt_fn() |>
  unlist_ard_columns()

# Calculate Time to Event
ard_tte <- cards::bind_ard(
  # calculate median
  survfit(Surv(AVAL, is_event) ~ ARM, data = adtte, conf.type = "plain") |>
    ard_survival_survfit(probs = 0.5),
  # calculate quantiles
  survfit(Surv(AVAL, is_event) ~ ARM, data = adtte, conf.type = "plain") |>
    ard_survival_survfit(probs = c(0.25, 0.75)) |>
    filter(stat_name == "estimate"),
  # calculate range
  ard_continuous(adtte, by = c(ARM), variable = c(AVAL), statistic = ~ continuous_summary_fns(c("min", "max")))
) |>
  apply_fmt_fn() |>
  unlist_ard_columns(fct_as_chr = TRUE)

# Calculate Unstratified coxph
# set the reference arm
ref_arm <- "A: Drug X"

ard_unstrat_coxph <- ard_pairwise(
  adtte,
  variable = ARM,
  .f = \(df) {
    coxph(Surv(AVAL, is_event) ~ ARM, data = df, ties = "efron") |>
      ard_regression(exponentiate = TRUE) |>
      dplyr::filter(stat_name %in% c("estimate", "conf.low", "conf.high") & variable_level != ref_arm)
  },
  include = ref_arm
) |>
  bind_ard() |>
  apply_fmt_fn() |>
  unlist_ard_columns()

# calculate the pvalue
ard_pval <- ard_pairwise(
  adtte,
  variable = ARM,
  .f = \(df) {
    ard_survival_survdiff(survival::Surv(AVAL, is_event) ~ ARM, data = df) |>
      dplyr::filter(stat_name %in% "p.value")
  },
  include = ref_arm
) # this produces a list of data frames

ard_pval <- lapply(ard_pval, unlist_ard_columns)
ard_pval <- do.call(rbind, ard_pval)

# 6 Months Risk Analysis
ard_surv_time <- survfit(Surv(AVAL, is_event) ~ ARM, adtte, conf.int = 0.95, conf.type = "plain") |>
  ard_survival_survfit(time = 6) |>
  dplyr::filter(stat_name != "std.error") |>
  apply_fmt_fn() |>
  unlist_ard_columns()

Note about survival surf_diff tables

Survival Time Difference estimates are calculated usually calculated by the treatment - the control group. It’s been noted that {tern}’s surv_timepoint function and {cardx}’s ard_survival_surfit_diff function calculate this in different orders - resulting in statistics where the absolute value is identical but not the direction. If this is found in your data - you’ll need to relevel the factor for the by variable, such that the treatment group is ordered before the control group. Below, we reorder the level so “B: Placebo” comes before the control group, “A: Drug X”. This is repeated so “C: Combination” comes before “A: Drug X” before computing the ARD.

# difference in event free rate
# relevel the ARM variable to match the calculation done in tern
adtte$ARM <- forcats::fct_relevel(adtte$ARM, "B: Placebo", after = 0)
ard_surv_time_diff_1 <- survfit(Surv(AVAL, is_event) ~ ARM, data = adtte, conf.int = 0.95, conf.type = "plain") |>
  ard_survival_survfit_diff(times = 6) |>
  filter(stat_name %in% c("estimate", "conf.low", "conf.high", "p.value")) |>
  apply_fmt_fn() |>
  unlist_ard_columns() |>
  filter(group1_level == "A: Drug X") |>
  mutate(group1_level = "B: Placebo")


adtte$ARM <- forcats::fct_relevel(adtte$ARM, "B: Placebo", after = 3)
ard_surv_time_diff_2 <- survfit(Surv(AVAL, is_event) ~ ARM, data = adtte, conf.int = 0.95, conf.type = "plain") |>
  ard_survival_survfit_diff(times = 6) |>
  filter(stat_name %in% c("estimate", "conf.low", "conf.high", "p.value")) |>
  apply_fmt_fn() |>
  unlist_ard_columns() |>
  filter(group1_level == "C: Combination")

ard_surv_time_diff <- bind_rows(ard_surv_time_diff_1, ard_surv_time_diff_2)

head(ard_surv_time_diff, n = 5)
group1 group1_level variable variable_level context stat_name stat_label stat stat_fmt fmt_fn warning error
ARM B: Placebo time 6 survival_survfit_diff estimate Survival Difference -0.2000000 -0.2 1 NULL NULL
ARM B: Placebo time 6 survival_survfit_diff conf.low CI Lower Bound -0.5373737 -0.5 1 NULL NULL
ARM B: Placebo time 6 survival_survfit_diff conf.high CI Upper Bound 0.1373737 0.1 1 NULL NULL
ARM B: Placebo time 6 survival_survfit_diff p.value p-value 0.2452781 0.2 1 NULL NULL
ARM C: Combination time 6 survival_survfit_diff estimate Survival Difference 0.0000000 0.0 1 NULL NULL

4.Statistics comparison

To ensure the {rtables} and ARD results and compatiable for comparison, some reformatting will need to be applied and may be different for each sub-table. Below is a helper function created to apply a row transformation for duplicate stat_namein the {rtables} result. For example, two rows may be called “rate_ci” where one row is the lower bound of the confidence interval and the other is the upper bound. This function transforms these rows using the new stat_name values provided in the values parameter.

# function to apply row-based transformation
apply_row_transformation <- function(data, condition, values) {
  data |>
    group_by(group = (variable_level == condition)) |>
    mutate(
      row_num = row_number(),
      variable_level = case_when(
        group & row_number() %% 2 == 1 ~ values[1], # Odd row in condition group
        group & row_number() %% 2 == 0 ~ values[2], # Even row in condition group
        TRUE ~ variable_level # Keep original value
      )
    ) |>
    ungroup() |>
    select(-group, -row_num) # Remove temporary columns
}

Event Rate Table

For {rtables}: 1. Filter for event rate statistics. 2. Rename the grouping variables to match ARD 3. Recode “count” to “n” and “fraction” to “p” to match ARD

For ARD: 1. recode variable to match {rtables}

# {rtables}
rtables_event_rate <- rtables_results |>
  filter(variable_label == "Patients with event (%)" | variable_label == "Patients without event (%)" | group1_level == "Patients with event (%)") |>
  dplyr::rename(
    group1 = group2,
    group1_level = group2_level,
    # rename other vars to avoid error
    group2 = group1,
    group2_level = group1_level
  ) |>
  dplyr::select(c(group1, group1_level, variable, stat_name, stat)) |>
  dplyr::mutate(
    stat_name = dplyr::recode(stat_name, "count" = "n"),
    stat_name = dplyr::recode(stat_name, "fraction" = "p")
  )

# {cards}
ard_event_rates <- ard_event_rates |>
  mutate(
    variable = recode(variable, "is_event" = "IS_EVENT"),
    variable = ifelse(variable_level == FALSE & variable != "EVNTDESC", "IS_NOT_EVENT", variable)
  ) |>
  dplyr::select(c("group1", "group1_level", "variable", "stat_name", "stat"))

# compare stats
diffdf::diffdf(
  rtables_event_rate,
  ard_event_rates,
  keys = c("group1", "group1_level", "variable", "stat_name", "stat"),
  suppress_warnings = TRUE
)
No issues were found!

Time to Event Table

For {rtables}: 1. Filter for event rate statistics. 2. Rename the grouping variables to match ARD 3. Apply row transformations to differentiate bounds of stats 4. Recode “median” to “estimate” to match ARD

# {rtables}
rtables_tte <- rtables_results |>
  filter(variable == "time_to_event") |>
  rename(
    group1 = group2,
    group1_level = group2_level,
    group2 = group1,
    group2_level = group1_level
  ) |>
  apply_row_transformation("95% CI", c("conf.low", "conf.high")) |>
  apply_row_transformation("Range", c("min", "max")) |>
  apply_row_transformation("25% and 75%-ile", c("0.25", "0.75")) |>
  mutate(variable_level = recode(variable_level, "Median" = "estimate")) |>
  dplyr::select(c(group1, group1_level, variable_level, stat)) |>
  rename(stat_name = variable_level)

# {cards}
ard_tte <- ard_tte |>
  mutate(
    stat_name = if_else(
      variable_level %in% c("0.25", "0.75") & stat_name == "estimate",
      as.character(variable_level), # Convert to character if true
      stat_name # Keep original stat_name if false
    )
  ) |>
  dplyr::select(c("group1", "group1_level", "stat_name", "stat"))

# compare stats
diffdf::diffdf(
  rtables_tte,
  ard_tte,
  keys = c("group1", "group1_level", "stat_name", "stat"),
  suppress_warnings = TRUE
)
No issues were found!

Unstratified Pairwise Coxph

For {rtables}: 1. Filter for event rate statistics. 2. Rename the grouping variables to match ARD 3. Apply row transformations to differentiate bounds of stats 4. Recode “hr” to “estimate” to match ARD

For ARD: 1. Recode variable to match {rtables} 2. Set attributes to NULL

# {rtables}
rtables_unstrat_coxph <- rtables_results |>
  filter(variable == "coxph_unstratified" & !is.na(stat) & variable_level != "pvalue") |>
  select("group2", "group2_level", "variable_level", "stat") |>
  apply_row_transformation("hr_ci", c("conf.low", "conf.high")) |>
  rename(
    variable = group2,
    variable_level = group2_level,
    stat_name = variable_level
  ) |>
  mutate(stat_name = dplyr::recode(stat_name, "hr" = "estimate"))

# logpvalue
rtables_pval <- rtables_results |>
  filter(variable_level == "pvalue" & !is.na(stat)) |>
  select("group2_level", "stat") |>
  rename("rowname" = "group2_level")

# {cards}/{cardx}
ard_unstrat_coxph <- ard_unstrat_coxph |>
  dplyr::select(c("variable", "variable_level", "stat_name", "stat"))
attr(ard_unstrat_coxph$variable_level, "names") <- NULL
attr(ard_unstrat_coxph$stat, "names") <- NULL

# log_pval
ard_pval <- ard_pval |>
  tibble::rownames_to_column() |>
  select(c("rowname", "stat"))
ard_pval$rowname <- sub(".*vs\\.\\s*", "", ard_pval$rowname)
ard_pval$rowname <- sub("^'(.*)'$", "\\1", ard_pval$rowname)

# Compare stats
diffdf::diffdf(
  rtables_unstrat_coxph,
  ard_unstrat_coxph,
  keys = c("variable", "variable_level", "stat_name", "stat"),
  suppress_warnings = TRUE
)
No issues were found!
# Log pvalue
diffdf::diffdf(
  rtables_pval,
  ard_pval,
  keys = c("rowname", "stat"),
  suppress_warnings = TRUE
)
No issues were found!

Survival (Timepoint)

For {rtables}: 1. Filter for event rate statistics. 2. Rename the grouping variables to match ARD 3. Apply row transformations to differentiate bounds of stats 4. Recode “pt_at_risk” to “n.risk” and “event_free_rate” to “estimate” to match ARD

For ARD: 1. Recode variable to match {rtables} 2. Format proportions to scale of 100

# {rtables}
rtables_surv_time <- rtables_results |>
  filter(variable == "surv_6") |>
  select(c("group2_level", "variable_level", "stat")) |>
  apply_row_transformation("rate_ci", c("conf.low", "conf.high")) |>
  rename(
    group1_level = group2_level,
    stat_name = variable_level
  ) |>
  mutate(stat_name = dplyr::recode(stat_name,
    "pt_at_risk" = "n.risk",
    "event_free_rate" = "estimate"
  ))
# {cards}/{cardx}
ard_surv_time <- ard_surv_time |>
  select("group1_level", "stat_name", "stat") |>
  mutate(
    stat = case_when(
      stat_name == "estimate" ~ stat * 100,
      stat_name == "conf.low" ~ stat * 100,
      stat_name == "conf.high" ~ stat * 100,
      TRUE ~ stat
    )
  )

# Compare stats
diffdf::diffdf(
  rtables_surv_time,
  ard_surv_time,
  keys = c("group1_level", "stat_name", "stat"),
  suppress_warnings = TRUE
)
No issues were found!

Survival Diff

For {rtables}: 1. Filter for event rate statistics. 2. Rename the grouping variables to match ARD 3. Apply row transformations to differentiate bounds of stats 4. Recode “rate_diff” to “estimate” and “ztest_pval” to “p.value” to match ARD

For ARD: 1. Recode variable to match {rtables} 2. Format proportions to scale of 100

The numeric values of the results are identical but differing directions (due to the A vs. B and B vs. A type differences in the calculations).

# {rtables}
rtables_surv_diff <- rtables_results |>
  filter(variable == "surv_diff_6" & !is.na(stat)) |>
  select(c("group2_level", "variable_level", "stat")) |>
  apply_row_transformation("rate_diff_ci", c("conf.low", "conf.high")) |>
  rename(
    group1_level = group2_level,
    stat_name = variable_level
  ) |>
  mutate(stat_name = dplyr::recode(stat_name,
    "rate_diff" = "estimate",
    "ztest_pval" = "p.value"
  ))
# {cards}/{cardx}
ard_surv_time_diff <- ard_surv_time_diff |>
  select("group1_level", "stat_name", "stat") |>
  mutate(
    stat = case_when(
      stat_name == "estimate" ~ stat * 100,
      stat_name == "conf.low" ~ stat * 100,
      stat_name == "conf.high" ~ stat * 100,
      TRUE ~ stat
    )
  )

# compare stats
diffdf::diffdf(
  rtables_surv_diff,
  ard_surv_time_diff,
  keys = c("group1_level", "stat_name", "stat"),
  suppress_warnings = TRUE
)
No issues were found!