Collection of utilities to extract data.frame
objects from TableTree
objects.
Usage
as_result_df(tt, spec = "v0_experimental", simplify = FALSE, ...)
result_df_specs()
path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values)
Arguments
- tt
(
TableTree
or related class)
aTableTree
object representing a populated table.- spec
(
string
)
the specification to use to extract the result data frame. See Details below.- simplify
(
flag
)
whether the result data frame should only have labels and result columns visible.- ...
-
additional arguments passed to spec-specific result data frame conversion function. Currently it can be one or more of the following parameters (valid only for
v0_experimental
spec. for now):expand_colnames
: whenTRUE
, the result data frame will have expanded column names above the usual output. This is useful when the result data frame is used for further processing.simplify
: whenTRUE
, the result data frame will have only visible labels and result columns.as_strings
: whenTRUE
, the result data frame will have all values as strings, as they appear in the final table (it can also be retrieved frommatrix_form(tt)$strings
). This is also true for column counts ifexpand_colnames = TRUE
.as_viewer
: whenTRUE
, the result data frame will have all values as they appear in the final table, i.e. with the same precision and numbers, but in easy-to-use numeric form.keep_label_rows
: whenTRUE
, the result data frame will have all labels as they appear in the final table.as_is
: whenTRUE
, the result data frame will have all the values as they appear in the final table, but without information about the row structure. Row labels will be assigned to rows so to work well withdf_to_tt()
.
- path_fun
(
function
)
function to transform paths into single-string row/column names.- value_fun
(
function
)
function to transform cell values into cells of adata.frame
. Defaults tocollapse_values
, which creates strings where multi-valued cells are collapsed together, separated by|
.
Value
as_result_df
returns a resultdata.frame
.
result_df_specs()
returns a named list of result data frame extraction functions by "specification".
path_enriched_df()
returns adata.frame
oftt
's cell values (processed byvalue_fun
, with columns named by the full column paths (processed bypath_fun
and an additionalrow_path
column with the row paths (processed bypath_fun
).
Details
as_result_df()
: Result data frame specifications may differ in the exact information
they include and the form in which they represent it. Specifications whose names end in "_experimental"
are subject to change without notice, but specifications without the "_experimental"
suffix will remain available including any bugs in their construction indefinitely.
Functions
result_df_specs()
: A list of functions that extract result data frames fromTableTree
s.path_enriched_df()
: Transform aTableTree
object to a path-enricheddata.frame
.
See also
df_to_tt()
when using as_is = TRUE
and formatters::make_row_df()
to have a comprehensive view of the
hierarchical structure of the rows.
Examples
lyt <- basic_table() %>%
split_cols_by("ARM") %>%
split_rows_by("STRATA1") %>%
analyze(c("AGE", "BMRKR2"))
tbl <- build_table(lyt, ex_adsl)
as_result_df(tbl)
#> spl_var_1 spl_value_1 avar_name row_name label_name row_num is_group_summary
#> 1 STRATA1 A AGE Mean Mean 3 FALSE
#> 2 STRATA1 A BMRKR2 LOW LOW 5 FALSE
#> 3 STRATA1 A BMRKR2 MEDIUM MEDIUM 6 FALSE
#> 4 STRATA1 A BMRKR2 HIGH HIGH 7 FALSE
#> 5 STRATA1 B AGE Mean Mean 10 FALSE
#> 6 STRATA1 B BMRKR2 LOW LOW 12 FALSE
#> 7 STRATA1 B BMRKR2 MEDIUM MEDIUM 13 FALSE
#> 8 STRATA1 B BMRKR2 HIGH HIGH 14 FALSE
#> 9 STRATA1 C AGE Mean Mean 17 FALSE
#> 10 STRATA1 C BMRKR2 LOW LOW 19 FALSE
#> 11 STRATA1 C BMRKR2 MEDIUM MEDIUM 20 FALSE
#> 12 STRATA1 C BMRKR2 HIGH HIGH 21 FALSE
#> node_class A: Drug X B: Placebo C: Combination
#> 1 DataRow 33.07895 35.11364 34.225
#> 2 DataRow 12 16 14
#> 3 DataRow 10 17 13
#> 4 DataRow 16 11 13
#> 5 DataRow 33.85106 36 36.32558
#> 6 DataRow 19 13 10
#> 7 DataRow 13 22 16
#> 8 DataRow 15 10 17
#> 9 DataRow 34.22449 35.17778 35.63265
#> 10 DataRow 19 16 16
#> 11 DataRow 14 17 13
#> 12 DataRow 16 12 20
result_df_specs()
#> $v0_experimental
#> function(tt,
#> as_viewer = FALSE,
#> as_strings = FALSE,
#> expand_colnames = FALSE,
#> keep_label_rows = FALSE,
#> as_is = FALSE) {
#> checkmate::assert_flag(as_viewer)
#> checkmate::assert_flag(as_strings)
#> checkmate::assert_flag(expand_colnames)
#> checkmate::assert_flag(keep_label_rows)
#> checkmate::assert_flag(as_is)
#>
#> if (as_is) {
#> keep_label_rows <- TRUE
#> expand_colnames <- FALSE
#> }
#>
#> raw_cvals <- cell_values(tt)
#> ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values
#> ## rather than a list of length 1 representing the single row. This is bad but may not be changeable
#> ## at this point.
#> if (nrow(tt) == 1 && length(raw_cvals) > 1) {
#> raw_cvals <- list(raw_cvals)
#> }
#>
#> # Flatten the list of lists (rows) of cell values into a data frame
#> cellvals <- as.data.frame(do.call(rbind, raw_cvals))
#> row.names(cellvals) <- NULL
#>
#> if (nrow(tt) == 1 && ncol(tt) == 1) {
#> colnames(cellvals) <- names(raw_cvals)
#> }
#>
#> if (as_viewer || as_strings) {
#> # we keep previous calculations to check the format of the data
#> mf_tt <- matrix_form(tt)
#> mf_result_chars <- mf_strings(mf_tt)[-seq_len(mf_nlheader(mf_tt)), -1]
#> mf_result_chars <- .remove_empty_elements(mf_result_chars)
#> mf_result_numeric <- as.data.frame(
#> .make_numeric_char_mf(mf_result_chars)
#> )
#> mf_result_chars <- as.data.frame(mf_result_chars)
#> if (!setequal(dim(mf_result_numeric), dim(cellvals)) || !setequal(dim(mf_result_chars), dim(cellvals))) {
#> stop(
#> "The extracted numeric data.frame does not have the same dimension of the",
#> " cell values extracted with cell_values(). This is a bug. Please report it."
#> ) # nocov
#> }
#> if (as_strings) {
#> colnames(mf_result_chars) <- colnames(cellvals)
#> cellvals <- mf_result_chars
#> } else {
#> colnames(mf_result_numeric) <- colnames(cellvals)
#> cellvals <- mf_result_numeric
#> }
#> }
#>
#> rdf <- make_row_df(tt)
#>
#> df <- rdf[, c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class")]
#> # Removing initial root elements from path (out of the loop -> right maxlen)
#> df$path <- lapply(df$path, .remove_root_elems_from_path,
#> which_root_name = c("root", "rbind_root"),
#> all = TRUE
#> )
#> maxlen <- max(lengths(df$path))
#>
#> # Loop for metadata (path and details from make_row_df)
#> metadf <- do.call(
#> rbind.data.frame,
#> lapply(
#> seq_len(NROW(df)),
#> function(ii) {
#> handle_rdf_row(df[ii, ], maxlen = maxlen)
#> }
#> )
#> )
#>
#> # Should we keep label rows with NAs instead of values?
#> if (keep_label_rows) {
#> cellvals_mat_struct <- as.data.frame(
#> matrix(NA, nrow = nrow(rdf), ncol = ncol(cellvals))
#> )
#> colnames(cellvals_mat_struct) <- colnames(cellvals)
#> cellvals_mat_struct[metadf$node_class != "LabelRow", ] <- cellvals
#> ret <- cbind(metadf, cellvals_mat_struct)
#> } else {
#> ret <- cbind(
#> metadf[metadf$node_class != "LabelRow", ],
#> cellvals
#> )
#> }
#>
#> # If we want to expand colnames
#> if (expand_colnames) {
#> col_name_structure <- .get_formatted_colnames(clayout(tt))
#> number_of_non_data_cols <- which(colnames(ret) == "node_class")
#> if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) {
#> stop(
#> "When expanding colnames structure, we were not able to find the same",
#> " number of columns as in the result data frame. This is a bug. Please report it."
#> ) # nocov
#> }
#>
#> buffer_rows_for_colnames <- matrix(
#> rep("<only_for_column_names>", number_of_non_data_cols * NROW(col_name_structure)),
#> nrow = NROW(col_name_structure)
#> )
#>
#> header_colnames_matrix <- cbind(buffer_rows_for_colnames, data.frame(col_name_structure))
#> colnames(header_colnames_matrix) <- colnames(ret)
#>
#> count_row <- NULL
#> if (disp_ccounts(tt)) {
#> ccounts <- col_counts(tt)
#> if (as_strings) {
#> ccounts <- mf_strings(mf_tt)[mf_nlheader(mf_tt), ]
#> ccounts <- .remove_empty_elements(ccounts)
#> }
#> count_row <- c(rep("<only_for_column_counts>", number_of_non_data_cols), ccounts)
#> header_colnames_matrix <- rbind(header_colnames_matrix, count_row)
#> }
#> ret <- rbind(header_colnames_matrix, ret)
#> }
#>
#> # Using only labels for row names and losing information about paths
#> if (as_is) {
#> tmp_rownames <- ret$label_name
#> ret <- ret[, -seq_len(which(colnames(ret) == "node_class"))]
#> if (length(unique(tmp_rownames)) == length(tmp_rownames)) {
#> rownames(ret) <- tmp_rownames
#> } else {
#> ret <- cbind("label_name" = tmp_rownames, ret)
#> rownames(ret) <- NULL
#> }
#> } else {
#> rownames(ret) <- NULL
#> }
#>
#> ret
#> }
#> <environment: namespace:rtables>
#>
lyt <- basic_table() %>%
split_cols_by("ARM") %>%
analyze(c("AGE", "BMRKR2"))
tbl <- build_table(lyt, ex_adsl)
path_enriched_df(tbl)
#> row_path ARM|A: Drug X ARM|B: Placebo ARM|C: Combination
#> 1 ma_AGE_BMRKR2|AGE|Mean 33.76866 35.43284 35.43182
#> 2 ma_AGE_BMRKR2|BMRKR2|LOW 50.00000 45.00000 40.00000
#> 3 ma_AGE_BMRKR2|BMRKR2|MEDIUM 37.00000 56.00000 42.00000
#> 4 ma_AGE_BMRKR2|BMRKR2|HIGH 47.00000 33.00000 50.00000