Generate a Result Data Frame — data.frame

Collection of utilities to exctract data.frame from TableTree objects.

Usage

as_result_df(tt, spec = "v0_experimental", simplify = FALSE, ...)

result_df_specs()

path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values)

Arguments

tt

TableTree (or related class). A TableTree object representing a populated table.

spec

character(1). The specification to use to extract the result data frame. See details

simplify

logical(1). If TRUE, the result data frame will have only visible labels and result columns.

...

Passed to spec-specific result data frame conversion function. Currently it can be one or more of the following parameters (valid only for v0_experimental spec for now):

expand_colnames: when TRUE, the result data frame will have expanded column names above the usual output. This is useful when the result data frame is used for further processing.
simplify: when TRUE, the result data frame will have only visible labels and result columns.
as_strings: when TRUE, the result data frame will have all values as strings, as they appear in the final table (it can also be retrieved from matrix_form(tt)$strings). This is also true for column counts if expand_colnames = TRUE.
as_viewer: when TRUE, the result data frame will have all values as they appear in the final table, i.e. with the same precision and numbers, but in easy-to-use numeric form.

path_fun

function. Function to transform paths into single-string row/column names.

value_fun

function. Function to transform cell values into cells of the data.frame. Defaults to collapse_values which creates strings where multi-valued cells are collapsed together, separated by |.

Value

result_df_specs(): returns a named list of result data frame extraction functions by "specification".

path_enriched_df(): returns a data frame of tt's cell values (processed by value_fun, with columns named by the full column paths (processed by path_fun and an additional row_path column with the row paths (processed by by path_fun).

Details

as_result_df(): Result data frame specifications may differ in the exact information they include and the form in which they represent it. Specifications whose names end in "_experimental" are subject to change without notice, but specifications without the "_experimental" suffix will remain available including any bugs in their construction indefinitely.

Functions

result_df_specs(): list of functions that extract result data frames from TableTrees.
path_enriched_df(): transform TableTree object to Path-Enriched data.frame.

Examples

lyt <- basic_table() %>%
  split_cols_by("ARM") %>%
  split_rows_by("STRATA1") %>%
  analyze(c("AGE", "BMRKR2"))

tbl <- build_table(lyt, ex_adsl)
as_result_df(tbl)
#>    spl_var_1 spl_value_1 avar_name row_name row_num is_group_summary node_class
#> 1    STRATA1           A       AGE     Mean       3            FALSE    DataRow
#> 2    STRATA1           A    BMRKR2      LOW       5            FALSE    DataRow
#> 3    STRATA1           A    BMRKR2   MEDIUM       6            FALSE    DataRow
#> 4    STRATA1           A    BMRKR2     HIGH       7            FALSE    DataRow
#> 5    STRATA1           B       AGE     Mean      10            FALSE    DataRow
#> 6    STRATA1           B    BMRKR2      LOW      12            FALSE    DataRow
#> 7    STRATA1           B    BMRKR2   MEDIUM      13            FALSE    DataRow
#> 8    STRATA1           B    BMRKR2     HIGH      14            FALSE    DataRow
#> 9    STRATA1           C       AGE     Mean      17            FALSE    DataRow
#> 10   STRATA1           C    BMRKR2      LOW      19            FALSE    DataRow
#> 11   STRATA1           C    BMRKR2   MEDIUM      20            FALSE    DataRow
#> 12   STRATA1           C    BMRKR2     HIGH      21            FALSE    DataRow
#>    A: Drug X B: Placebo C: Combination
#> 1   33.07895   35.11364         34.225
#> 2         12         16             14
#> 3         10         17             13
#> 4         16         11             13
#> 5   33.85106         36       36.32558
#> 6         19         13             10
#> 7         13         22             16
#> 8         15         10             17
#> 9   34.22449   35.17778       35.63265
#> 10        19         16             16
#> 11        14         17             13
#> 12        16         12             20

result_df_specs()
#> $v0_experimental
#> function(tt,
#>                                       as_viewer = FALSE,
#>                                       as_strings = FALSE,
#>                                       expand_colnames = FALSE) {
#>   checkmate::assert_flag(as_viewer)
#>   checkmate::assert_flag(as_strings)
#>   checkmate::assert_flag(expand_colnames)
#> 
#>   raw_cvals <- cell_values(tt)
#>   ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values
#>   ## rather than a list of length 1 representing the single row. This is bad but may not be changeable
#>   ## at this point.
#>   if (nrow(tt) == 1 && length(raw_cvals) > 1) {
#>     raw_cvals <- list(raw_cvals)
#>   }
#> 
#>   cellvals <- as.data.frame(do.call(rbind, raw_cvals))
#>   row.names(cellvals) <- NULL
#> 
#>   if (nrow(tt) == 1 && ncol(tt) == 1) {
#>     colnames(cellvals) <- names(raw_cvals)
#>   }
#> 
#>   if (as_viewer || as_strings) {
#>     # we keep previous calculations to check the format of the data
#>     mf_tt <- matrix_form(tt)
#>     mf_result_chars <- mf_strings(mf_tt)[-seq_len(mf_nlheader(mf_tt)), -1]
#>     mf_result_chars <- .remove_empty_elements(mf_result_chars)
#>     mf_result_numeric <- as.data.frame(
#>       .make_numeric_char_mf(mf_result_chars)
#>     )
#>     mf_result_chars <- as.data.frame(mf_result_chars)
#>     if (!setequal(dim(mf_result_numeric), dim(cellvals)) ||
#>       !setequal(dim(mf_result_chars), dim(cellvals))) {
#>       stop(
#>         "The extracted numeric data.frame does not have the same dimension of the",
#>         " cell values extracted with cell_values(). This is a bug. Please report it."
#>       ) # nocov
#>     }
#>     if (as_strings) {
#>       colnames(mf_result_chars) <- colnames(cellvals)
#>       cellvals <- mf_result_chars
#>     } else {
#>       colnames(mf_result_numeric) <- colnames(cellvals)
#>       cellvals <- mf_result_numeric
#>     }
#>   }
#> 
#>   rdf <- make_row_df(tt)
#> 
#>   df <- cbind(
#>     rdf[
#>       rdf$node_class != "LabelRow",
#>       c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class")
#>     ],
#>     cellvals
#>   )
#> 
#>   maxlen <- max(lengths(df$path))
#>   metadf <- do.call(
#>     rbind.data.frame,
#>     lapply(
#>       seq_len(NROW(df)),
#>       function(ii) {
#>         handle_rdf_row(df[ii, ], maxlen = maxlen)
#>       }
#>     )
#>   )
#> 
#>   ret <- cbind(
#>     metadf[metadf$node_class != "LabelRow", ],
#>     cellvals
#>   )
#> 
#>   # If we want to expand colnames
#>   if (expand_colnames) {
#>     col_name_structure <- .get_formatted_colnames(clayout(tt))
#>     number_of_non_data_cols <- which(colnames(ret) == "node_class")
#>     if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) {
#>       stop(
#>         "When expanding colnames structure, we were not able to find the same",
#>         " number of columns as in the result data frame. This is a bug. Please report it."
#>       ) # nocov
#>     }
#> 
#>     buffer_rows_for_colnames <- matrix(
#>       rep("<only_for_column_names>", number_of_non_data_cols * NROW(col_name_structure)),
#>       nrow = NROW(col_name_structure)
#>     )
#> 
#>     header_colnames_matrix <- cbind(buffer_rows_for_colnames, data.frame(col_name_structure))
#>     colnames(header_colnames_matrix) <- colnames(ret)
#> 
#>     count_row <- NULL
#>     if (disp_ccounts(tt)) {
#>       ccounts <- col_counts(tt)
#>       if (as_strings) {
#>         ccounts <- mf_strings(mf_tt)[mf_nlheader(mf_tt), ]
#>         ccounts <- .remove_empty_elements(ccounts)
#>       }
#>       count_row <- c(rep("<only_for_column_counts>", number_of_non_data_cols), ccounts)
#>       header_colnames_matrix <- rbind(header_colnames_matrix, count_row)
#>     }
#>     ret <- rbind(header_colnames_matrix, ret)
#>   }
#> 
#>   ret
#> }
#> <environment: namespace:rtables>
#> 

lyt <- basic_table() %>%
  split_cols_by("ARM") %>%
  analyze(c("AGE", "BMRKR2"))

tbl <- build_table(lyt, ex_adsl)
path_enriched_df(tbl)
#>                      row_path ARM|A: Drug X ARM|B: Placebo ARM|C: Combination
#> 1      ma_AGE_BMRKR2|AGE|Mean      33.76866       35.43284           35.43182
#> 2    ma_AGE_BMRKR2|BMRKR2|LOW      50.00000       45.00000           40.00000
#> 3 ma_AGE_BMRKR2|BMRKR2|MEDIUM      37.00000       56.00000           42.00000
#> 4   ma_AGE_BMRKR2|BMRKR2|HIGH      47.00000       33.00000           50.00000