Collection of utilities to exctract data.frame
from TableTree
objects.
Usage
as_result_df(tt, spec = "v0_experimental", simplify = FALSE, ...)
result_df_specs()
path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values)
Arguments
- tt
TableTree
(or related class). ATableTree
object representing a populated table.- spec
character(1). The specification to use to extract the result data frame. See details
- simplify
logical(1). If
TRUE
, the result data frame will have only visible labels and result columns.- ...
-
Passed to spec-specific result data frame conversion function. Currently it can be one or more of the following parameters (valid only for
v0_experimental
spec for now):expand_colnames
: whenTRUE
, the result data frame will have expanded column names above the usual output. This is useful when the result data frame is used for further processing.simplify
: whenTRUE
, the result data frame will have only visible labels and result columns.as_strings
: whenTRUE
, the result data frame will have all values as strings, as they appear in the final table (it can also be retrieved frommatrix_form(tt)$strings
). This is also true for column counts ifexpand_colnames = TRUE
.as_viewer
: whenTRUE
, the result data frame will have all values as they appear in the final table, i.e. with the same precision and numbers, but in easy-to-use numeric form.
- path_fun
function. Function to transform paths into single-string row/column names.
- value_fun
function. Function to transform cell values into cells of the data.frame. Defaults to
collapse_values
which creates strings where multi-valued cells are collapsed together, separated by|
.
Value
result_df_specs()
: returns a named list of result data frame extraction functions by "specification".
path_enriched_df()
: returns a data frame of tt
's cell values (processed by
value_fun
, with columns named by the full column paths (processed by
path_fun
and an additional row_path
column with the row paths
(processed by by path_fun
).
Details
as_result_df()
: Result data frame specifications may differ in the exact information
they include and the form in which they represent it. Specifications whose names end in "_experimental"
are subject to change without notice, but specifications without the "_experimental"
suffix will remain available including any bugs in their construction indefinitely.
Functions
result_df_specs()
: list of functions that extract result data frames fromTableTree
s.path_enriched_df()
: transformTableTree
object to Path-Enricheddata.frame
.
Examples
lyt <- basic_table() %>%
split_cols_by("ARM") %>%
split_rows_by("STRATA1") %>%
analyze(c("AGE", "BMRKR2"))
tbl <- build_table(lyt, ex_adsl)
as_result_df(tbl)
#> spl_var_1 spl_value_1 avar_name row_name row_num is_group_summary node_class
#> 1 STRATA1 A AGE Mean 3 FALSE DataRow
#> 2 STRATA1 A BMRKR2 LOW 5 FALSE DataRow
#> 3 STRATA1 A BMRKR2 MEDIUM 6 FALSE DataRow
#> 4 STRATA1 A BMRKR2 HIGH 7 FALSE DataRow
#> 5 STRATA1 B AGE Mean 10 FALSE DataRow
#> 6 STRATA1 B BMRKR2 LOW 12 FALSE DataRow
#> 7 STRATA1 B BMRKR2 MEDIUM 13 FALSE DataRow
#> 8 STRATA1 B BMRKR2 HIGH 14 FALSE DataRow
#> 9 STRATA1 C AGE Mean 17 FALSE DataRow
#> 10 STRATA1 C BMRKR2 LOW 19 FALSE DataRow
#> 11 STRATA1 C BMRKR2 MEDIUM 20 FALSE DataRow
#> 12 STRATA1 C BMRKR2 HIGH 21 FALSE DataRow
#> A: Drug X B: Placebo C: Combination
#> 1 33.07895 35.11364 34.225
#> 2 12 16 14
#> 3 10 17 13
#> 4 16 11 13
#> 5 33.85106 36 36.32558
#> 6 19 13 10
#> 7 13 22 16
#> 8 15 10 17
#> 9 34.22449 35.17778 35.63265
#> 10 19 16 16
#> 11 14 17 13
#> 12 16 12 20
result_df_specs()
#> $v0_experimental
#> function(tt,
#> as_viewer = FALSE,
#> as_strings = FALSE,
#> expand_colnames = FALSE) {
#> checkmate::assert_flag(as_viewer)
#> checkmate::assert_flag(as_strings)
#> checkmate::assert_flag(expand_colnames)
#>
#> raw_cvals <- cell_values(tt)
#> ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values
#> ## rather than a list of length 1 representing the single row. This is bad but may not be changeable
#> ## at this point.
#> if (nrow(tt) == 1 && length(raw_cvals) > 1) {
#> raw_cvals <- list(raw_cvals)
#> }
#>
#> cellvals <- as.data.frame(do.call(rbind, raw_cvals))
#> row.names(cellvals) <- NULL
#>
#> if (nrow(tt) == 1 && ncol(tt) == 1) {
#> colnames(cellvals) <- names(raw_cvals)
#> }
#>
#> if (as_viewer || as_strings) {
#> # we keep previous calculations to check the format of the data
#> mf_tt <- matrix_form(tt)
#> mf_result_chars <- mf_strings(mf_tt)[-seq_len(mf_nlheader(mf_tt)), -1]
#> mf_result_chars <- .remove_empty_elements(mf_result_chars)
#> mf_result_numeric <- as.data.frame(
#> .make_numeric_char_mf(mf_result_chars)
#> )
#> mf_result_chars <- as.data.frame(mf_result_chars)
#> if (!setequal(dim(mf_result_numeric), dim(cellvals)) ||
#> !setequal(dim(mf_result_chars), dim(cellvals))) {
#> stop(
#> "The extracted numeric data.frame does not have the same dimension of the",
#> " cell values extracted with cell_values(). This is a bug. Please report it."
#> ) # nocov
#> }
#> if (as_strings) {
#> colnames(mf_result_chars) <- colnames(cellvals)
#> cellvals <- mf_result_chars
#> } else {
#> colnames(mf_result_numeric) <- colnames(cellvals)
#> cellvals <- mf_result_numeric
#> }
#> }
#>
#> rdf <- make_row_df(tt)
#>
#> df <- cbind(
#> rdf[
#> rdf$node_class != "LabelRow",
#> c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class")
#> ],
#> cellvals
#> )
#>
#> maxlen <- max(lengths(df$path))
#> metadf <- do.call(
#> rbind.data.frame,
#> lapply(
#> seq_len(NROW(df)),
#> function(ii) {
#> handle_rdf_row(df[ii, ], maxlen = maxlen)
#> }
#> )
#> )
#>
#> ret <- cbind(
#> metadf[metadf$node_class != "LabelRow", ],
#> cellvals
#> )
#>
#> # If we want to expand colnames
#> if (expand_colnames) {
#> col_name_structure <- .get_formatted_colnames(clayout(tt))
#> number_of_non_data_cols <- which(colnames(ret) == "node_class")
#> if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) {
#> stop(
#> "When expanding colnames structure, we were not able to find the same",
#> " number of columns as in the result data frame. This is a bug. Please report it."
#> ) # nocov
#> }
#>
#> buffer_rows_for_colnames <- matrix(
#> rep("<only_for_column_names>", number_of_non_data_cols * NROW(col_name_structure)),
#> nrow = NROW(col_name_structure)
#> )
#>
#> header_colnames_matrix <- cbind(buffer_rows_for_colnames, data.frame(col_name_structure))
#> colnames(header_colnames_matrix) <- colnames(ret)
#>
#> count_row <- NULL
#> if (disp_ccounts(tt)) {
#> ccounts <- col_counts(tt)
#> if (as_strings) {
#> ccounts <- mf_strings(mf_tt)[mf_nlheader(mf_tt), ]
#> ccounts <- .remove_empty_elements(ccounts)
#> }
#> count_row <- c(rep("<only_for_column_counts>", number_of_non_data_cols), ccounts)
#> header_colnames_matrix <- rbind(header_colnames_matrix, count_row)
#> }
#> ret <- rbind(header_colnames_matrix, ret)
#> }
#>
#> ret
#> }
#> <environment: namespace:rtables>
#>
lyt <- basic_table() %>%
split_cols_by("ARM") %>%
analyze(c("AGE", "BMRKR2"))
tbl <- build_table(lyt, ex_adsl)
path_enriched_df(tbl)
#> row_path ARM|A: Drug X ARM|B: Placebo ARM|C: Combination
#> 1 ma_AGE_BMRKR2|AGE|Mean 33.76866 35.43284 35.43182
#> 2 ma_AGE_BMRKR2|BMRKR2|LOW 50.00000 45.00000 40.00000
#> 3 ma_AGE_BMRKR2|BMRKR2|MEDIUM 37.00000 56.00000 42.00000
#> 4 ma_AGE_BMRKR2|BMRKR2|HIGH 47.00000 33.00000 50.00000