dunlin coverage - 97.50%

Files
Source

#' Transforming data.frame into Wide Format
#'
#' @details instead of nesting duplicated values, the function will throw an error if the same parameter is
#'   provided twice for the same observation.
#'
#' @param data (`data.frame`) to be pivoted.
#' @param id (`character`) the name of the column identifying the observations. It will correspond to the row names
#'   of the output.
#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The
#'   unique values in this column will become column names in the output.
#' @param value_from (`character`) the name of the column containing the values that will populate the output.
#'
#' @return `data.frame` in a wide format.
#'
#' @export
#' @examples
#' test_data <- data.frame(
#'   the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
#'   the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
#'   the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
#'   the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)
#' )
#'
#' mini_pivot_wider(test_data, "the_obs", "the_param", "the_val")
mini_pivot_wider <- function(data,
                             id,
                             param_from,
                             value_from) {
  # check for duplication of observation-parameter
  checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)
  checkmate::assert_character(id, len = 1)
  checkmate::assert_character(param_from, len = 1)
  checkmate::assert_character(value_from, len = 1)
  checkmate::assert_subset(c(id, param_from, value_from), colnames(data))
  checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))

  unique_id <- sort(unique(data[[id]]))
  param <- data[[param_from]]

  mini_data <- data[, c(id, param_from, value_from)]
  data_ls <- split(mini_data, param)

  # transform to named vector
  data_vec <-
    lapply(
      data_ls,
      function(x) setNames(x[[value_from]], x[[id]])
    )

  # query each id in each param
  all_vec <- lapply(data_vec, function(x) setNames(x[unique_id], unique_id))

  bind_data <- as.data.frame(all_vec)

  res <- cbind(id = unique_id, bind_data)
  rownames(res) <- NULL

  res
}

#' Transforming data.frame with Complex Identifiers into Wide Format
#'
#' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting
#'   duplicated values, the function will throw an error if the same parameter is provided twice for the same
#'   observation.
#'
#' @param data (`data.frame`) to be pivoted.
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The
#'   unique values in this column will become column names in the output.
#' @param value_from (`character`) the name of the column containing the values that will populate the output.
#' @param drop_na (`logical`) should column containing only `NAs` be dropped.
#'
#' @return `data.frame` in a wide format.
#'
#' @export
#' @examples
#' test_data <- data.frame(
#'   the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
#'   the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
#'   the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
#'   the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)
#' )
#'
#' multi_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val")
#' multi_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
multi_pivot_wider <- function(data,
                              id,
                              param_from,
                              value_from,
                              drop_na = FALSE) {
  # check for duplication of observation-parameter
  checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)
  checkmate::assert_character(id)
  checkmate::assert_character(param_from, len = 1)
  checkmate::assert_character(value_from, len = 1)
  checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))
  checkmate::assert_subset(c(id, param_from, value_from), colnames(data))

  # find a way to sort
  unique_id <- unique(data[id])
  key <- apply(unique_id[id], 1, paste, collapse = "-")
  unique_id <- cbind(key, unique_id)

  param <- data[[param_from]]

  mini_data <- data[, c(param_from, value_from)]
  f_key <- apply(data[id], 1, paste, collapse = "-")
  mini_data <- cbind(f_key, mini_data)

  data_ls <- split(mini_data, param)

  # Transform to named vector, the first column is the key.
  data_vec <-
    lapply(
      data_ls,
      function(x) setNames(x[[value_from]], x[, 1])
    )

  # query each id in each param
  all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]])

  if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec)

  bind_data <- do.call(cbind, all_vec)

  res <- cbind(unique_id[, -1, drop = FALSE], bind_data)

  rownames(res) <- NULL
  res
}

#' Transforming data.frame with multiple Data Column into Wide Format
#'
#' @details This function is adapted to cases where the data are distributed in several columns while the name of the
#'   parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are
#'   in `AVALC`.
#'
#' @param data (`data.frame`) to be pivoted.
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
#' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The
#'   unique values in this column will become column names in the output.
#' @param value_from (`character`) the name of the column containing the values that will populate the output.
#' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not
#'   provided, the labels will be equal to the column names. When several labels are available for the same column, the
#'   first one will be selected.
#'
#' @return `list` of `data.frame` in a wide format with label attribute attached to each columns.
#'
#' @export
#' @examples
#' test_data <- data.frame(
#'   the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
#'   the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
#'   the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
#'   the_label = c(
#'     "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)",
#'     "Gender", "Height (cm)", "Height (cm)", "Pre-condition"
#'   ),
#'   the_val = c(65, 165, NA, 66, NA, 166, 155, NA),
#'   the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE)
#' )
#'
#' x <- poly_pivot_wider(
#'   test_data,
#'   c("the_obs", "the_obs2"),
#'   "the_param",
#'   c("the_val", "the_val2"),
#'   "the_label"
#' )
#' x
#' Reduce(function(u, v) merge(u, v, all = TRUE), x)
poly_pivot_wider <- function(data,
                             id,
                             param_from,
                             value_from,
                             labels_from = NULL) {
  # other tests are performed at lower levels.
  checkmate::assert_character(value_from, unique = TRUE)

  # Create new labels for new columns.
  if (is.null(labels_from) || labels_from == param_from) {
    new_labels <- unique(data[[param_from]])
    names(new_labels) <- new_labels
  } else {
    checkmate::assert_character(labels_from, len = 1)
    checkmate::assert_subset(labels_from, colnames(data))

    new_labels_df <- data[, c(labels_from, param_from)]
    new_labels_df <- unique(new_labels_df)

    new_labels <- as.character(new_labels_df[[labels_from]])
    names(new_labels) <- as.character(new_labels_df[[param_from]])
  }

  # Retrieve old labels.
  old_labels <- lapply(data, attr, "label")
  n_old_label <- names(old_labels)
  null_label <- unlist(lapply(old_labels, is.null))
  old_labels[null_label] <- n_old_label[null_label]
  old_labels <- unlist(old_labels)

  all_labels <- c(new_labels, old_labels)

  res_ls <- list()
  for (n_value_from in value_from) {
    res <- multi_pivot_wider(
      data = data,
      id = id,
      param_from = param_from,
      value_from = n_value_from,
      drop_na = TRUE
    )

    res <- attr_label_df(res, all_labels[colnames(res)])
    res_ls[[n_value_from]] <- res
  }
  res_ls
}

#' Join `adsub` to `adsl`
#'
#' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table.
#' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row.
#' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
#'   columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`,
#'   none are selected.
#' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
#'   columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`,
#'   none are selected.
#' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values.
#' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical
#'   values.
#'
#' @return a `list` of `data.frame` with new columns in the `adsl` table.
#'
#' @rdname join_adsub_adsl
#' @export
#'
join_adsub_adsl <- function(adam_db,
                            keys,
                            continuous_var,
                            categorical_var,
                            continuous_suffix,
                            categorical_suffix) {
  UseMethod("join_adsub_adsl")
}

#' @rdname join_adsub_adsl
#' @export
#'
#' @examples
#' adsl <- data.frame(
#'   USUBJID = c("S1", "S2", "S3", "S4"),
#'   STUDYID = "My_study",
#'   AGE = c(60, 44, 23, 31)
#' )
#'
#' adsub <- data.frame(
#'   USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"),
#'   STUDYID = "My_study",
#'   PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"),
#'   PARAMCD = c("w", "w", "w", "w", "h", "h", "h"),
#'   AVAL = c(98, 75, 70, 71, 182, 155, 152),
#'   AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180")
#' )
#'
#' db <- list(adsl = adsl, adsub = adsub)
#'
#' x <- join_adsub_adsl(adam_db = db)
#' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
join_adsub_adsl.list <- function(adam_db,
                                 keys = c("USUBJID", "STUDYID"),
                                 continuous_var = "all",
                                 categorical_var = "all",
                                 continuous_suffix = "",
                                 categorical_suffix = "_CAT") {
  checkmate::assert_list(adam_db, types = "data.frame")
  checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub"))
  checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys))
  checkmate::assert_names(names(adam_db$adsl), must.include = keys)
  checkmate::assert_string(continuous_suffix)
  checkmate::assert_string(categorical_suffix)

  value_col <- c("AVAL", "AVALC")
  vars_ls <- list(continuous_var, categorical_var)
  suffix_ls <- list(continuous_suffix, categorical_suffix)

  # Select variables names.
  vars_ls <- lapply(vars_ls, function(x) {
    if (identical(x, "all")) {
      unique(adam_db$adsub$PARAMCD)
    } else {
      x
    }
  })

  # Create new variable names.
  vars_nam <- mapply(
    function(x, y) {
      if (!is.null(x)) {
        names(x) <- paste0(x, y)
        x
      } else {
        NULL
      }
    },
    vars_ls,
    suffix_ls,
    SIMPLIFY = FALSE
  )

  # Test if new columns already exist in adsl.
  assert_names_notadsl(vars_nam, adam_db$adsl)

  # Test if categorical and continuous column will result in the same column name.
  assert_names_collision(vars_nam)

  # Pivot and keep labels.
  adsub_wide_ls <-
    adam_db$adsub %>%
    poly_pivot_wider(id = keys, param_from = "PARAMCD", value_from = value_col, labels_from = "PARAM")

  # Merge categorical and continuous variables.
  for (i in seq_along(value_col)) {
    adsub_df <- adsub_wide_ls[[value_col[i]]]
    adsub_df <- adsub_df[, c(keys, vars_nam[[i]])]
    colnames(adsub_df) <- c(keys, names(vars_nam[[i]]))

    adam_db$adsl <- dplyr::left_join(
      x = adam_db$adsl,
      y = adsub_df,
      by = keys
    )
  }

  adam_db
}

# Utility functions ----

assert_names_collision <- function(vars_nam) {
  final_names_ls <- lapply(vars_nam, names)
  in_both <- final_names_ls[[1]] %in% final_names_ls[[2]]
  if (any(in_both)) {
    warning(
      paste(
        toString(final_names_ls[[1]][in_both]),
        "are new columns for continuous and categorical variable,
Please set different `continuous_suffix` or `categorical_suffix`
or select different columns to avoid automatic renaming."
      )
    )
  }
}

assert_names_notadsl <- function(vars_nam, df) {
  final_names <- unique(sapply(vars_nam, names))
  already_in_adsl <- final_names %in% colnames(df)
  if (any(already_in_adsl)) {
    warning(
      paste(
        toString(final_names[already_in_adsl]),
        "already exist in adsl, the name will default to another values.
Please change `continuous_suffix` or `categorical_suffix` to avoid automatic reneaming"
      )
    )
  }
}

#' Filter Data with Log
#' @param data (`data.frame`) input data to subset, or named (`list`) of (`data.frame`).
#' @param condition (`call`) of subset condition. Must evaluate as logical.
#' @param suffix (`string`) optional argument describing the filter.
#' @param ... further arguments to be passed to or from other methods.
#'
#' @details
#' `log_filter` will filter the data/named list of data according to the `condition`.
#' All the variables in `condition` must exist in the data (as variables) or in the parent
#' frame(e.g., in global environment).
#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all
#' other datasets with `USUBJID`.
#' @export
log_filter <- function(data, condition, ...) {
  UseMethod("log_filter")
}

#' @rdname log_filter
#' @export
#' @examples
#' data <- iris
#' attr(data$Sepal.Length, "label") <- "cm"
#' log_filter(data, Sepal.Length >= 7)
#'
log_filter.data.frame <- function(data, condition, suffix = NULL, ...) {
  checkmate::assert_string(suffix, null.ok = TRUE)

  condition <- match.call()$condition
  vars <- all.vars(condition)
  var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE)
  var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE)
  if (!all(var_in_env | var_in_data)) {
    stop(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data | var_in_env)])))
  }
  res <- eval(bquote(dplyr::filter(data, .(condition))))
  rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix))
  rlbl <- paste0(deparse(condition), collapse = "")
  rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ")
  names(rows) <- rlbl
  attr(res, "rows") <- c(attr(data, "rows"), rows)

  res
}

#' @rdname log_filter
#' @param table (`string`) table name.
#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering.
#' @export
#' @examples
#' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, ...) {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_subset(table, names(data))
  checkmate::assert_names(colnames(data[[table]]), must.include = by)
  condition <- match.call()$condition
  data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix))))
  if (identical(table, "adsl")) {
    for (k in setdiff(names(data), "adsl")) {
      if (all(by %in% names(data[[k]]))) {
        if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl))

        ori_n <- nrow(data[[k]])
        ori_att <- attr(data[[k]], "rows")

        data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by)

        rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix))
        names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "")
        attr(data[[k]], "rows") <- c(ori_att, rows)
      }
    }
  }
  return(data)
}

# Get Log ----

#' Get Log
#'
#' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`.
#' @param incl (`flag`) should information about unfiltered `data.frame` be printed.
#' @param incl.adsl (`flag`) should indication  of filtering performed through `adsl` be printed.
#'
#' @export
get_log <- function(data, incl, incl.adsl) {
  UseMethod("get_log")
}

#' @rdname get_log
#' @export
#' @examples
#' data <- log_filter(iris, Sepal.Length >= 7, "xx")
#' data <- log_filter(data, Sepal.Length < 2)
#' data <- log_filter(data, Sepal.Length >= 2, "yy")
#' get_log(data)
#'
get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_flag(incl)

  att <- attr(data, "rows")
  if (!incl.adsl) {
    sel <- grepl("Filtered by adsl", names(att))
    att <- att[!sel]
  }

  if (length(att) != 0L) {
    start_row <- lapply(att, "[[", "init")
    end_row <- lapply(att, "[[", "final")
    suffix <- lapply(att, "[[", "suffix")
    suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1))
    res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]")
  } else if (incl) {
    paste0("No filtering [", nrow(data), " rows.]")
  } else {
    NULL
  }
}


#' @rdname get_log
#' @export
#' @examples
#' data <- log_filter(
#'   list(iris1 = iris, iris2 = iris),
#'   Sepal.Length >= 7,
#'   "iris1",
#'   character(0),
#'   "Sep"
#' )
#' get_log(data)
#'
get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_flag(incl)

  lapply(data, get_log, incl = incl, incl.adsl = incl.adsl)
}

# Print Log ----

#' Print Log
#'
#' @inheritParams get_log
#' @export
#'
print_log <- function(data, incl, incl.adsl) {
  UseMethod("print_log")
}

#' @rdname print_log
#' @export
#' @examples
#' data <- log_filter(iris, Sepal.Length >= 7, "Sep")
#' print_log(data)
print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_flag(incl)

  cat("Filter Log:")
  cat(paste0("\n  ", get_log(data, incl = incl, incl.adsl = incl.adsl)))
  cat("\n")
  invisible()
}

#' @rdname print_log
#' @export
#' @examples
#' data <- log_filter(
#'   list(
#'     adsl = iris,
#'     iris2 = iris,
#'     mtcars = mtcars,
#'     iris3 = iris
#'   ),
#'   Sepal.Length >= 7,
#'   "adsl",
#'   character(0),
#'   "adsl filter"
#' )
#' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter")
#' print_log(data)
#' print_log(data, incl = FALSE)
#' print_log(data, incl.adsl = FALSE, incl = FALSE)
print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_flag(incl)

  filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl)

  if (!incl) {
    filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))]
  }

  cat("Filter Log:")
  if (length(filter_log) == 0) {
    cat("\n  No filtering")
  } else {
    mapply(
      function(x, y) {
        cat(paste0("\n  - ", x, ":"))
        cat(paste0("\n  ", y, ""))
      },
      as.list(names(filter_log)),
      filter_log
    )
  }
  cat("\n")

  invisible()
}

#' Reformat Values
#' @param obj object to reformat.
#' @param format (`rule`) or (`list`) of `rule` depending on the class of obj.
#' @param ... for compatibility between methods and pass additional special mapping to transform rules.
#' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor.
#' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are
#'   dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule.
#' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of
#'   the rule.
#' * `.na_last` (`flag`) whether the level replacing `NA` should be last.
#'
#' @export
#' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is
#'   raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels
#'   declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a
#'   rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to
#'   `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the
#'   ones defined during rule creation.
#'
#' @rdname reformat
#'
reformat <- function(obj, ...) {
  UseMethod("reformat")
}

#' @export
#' @rdname reformat
reformat.default <- function(obj, format, ...) {
  if (!is(format, "empty_rule")) {
    warning(paste0(c("Not implemented for class: ", toString(class(obj)), "! Only empty rule allowed.")))
  }
  return(obj)
}

#' @export
#' @rdname reformat
#'
#' @examples
#'
#' # Reformatting of character.
#' obj <- c("a", "b", "x", NA)
#' attr(obj, "label") <- "my label"
#' format <- rule("A" = "a", "NN" = NA)
#'
#' reformat(obj, format)
#' reformat(obj, format, .string_as_fct = FALSE, .to_NA = "x")
#' reformat(obj, empty_rule, .string_as_fct = FALSE, .to_NA = "x")
#'
reformat.character <- function(obj, format, ...) {
  checkmate::assert_class(format, "rule")

  # Give priority to argument defined in reformat.
  format <- do.call(rule, modifyList(as.list(format), list(...)))

  if (attr(format, ".string_as_fct")) {
    # Keep attributes.
    att <- attributes(obj)
    obj_fact <- as.factor(obj)
    supp_att_name <- setdiff(names(att), attributes(obj_fact))
    supp_att <- att[supp_att_name]
    attributes(obj_fact) <- c(attributes(obj_fact), supp_att)

    reformat(obj_fact, format)
  } else {
    if (!is(format, "empty_rule")) {
      value_match <- unlist(format)
      m <- match(obj, value_match)
      obj[!is.na(m)] <- names(format)[m[!is.na(m)]]
    }


    val_to_NA <- attr(format, ".to_NA")
    if (!is.null(val_to_NA)) {
      obj[obj %in% val_to_NA] <- NA_character_
    }

    obj
  }
}

#' @export
#' @rdname reformat
#'
#' @examples
#'
#' # Reformatting of factor.
#' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z"))
#' attr(obj, "label") <- "my label"
#' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P")
#'
#' reformat(obj, format)
#' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE)
#' reformat(obj, empty_rule, .na_last = FALSE, .to_NA = "b", .drop = FALSE)
#'
reformat.factor <- function(obj, format, ...) {
  checkmate::assert_class(format, "rule")

  format <- do.call(rule, modifyList(as.list(format), list(...)))

  if (!is(format, "empty_rule")) {
    any_na <- anyNA(obj)
    if (any(is.na(format)) && any_na) {
      obj <- forcats::fct_na_value_to_level(obj)
    }

    absent_format <- format[!format %in% levels(obj)]
    sel_format <- format[format %in% levels(obj)]
    obj <- forcats::fct_recode(obj, !!!sel_format)
    obj <- forcats::fct_expand(obj, unique(names(absent_format)))
    obj <- forcats::fct_relevel(obj, unique(names(format)))

    if (any(is.na(format)) && attr(format, ".na_last")) {
      na_lvl <- names(format)[is.na(format)]
      obj <- forcats::fct_relevel(obj, na_lvl, after = Inf)
    }
  }

  drop_lvl <- attr(format, ".drop")
  if (drop_lvl) {
    obj <- forcats::fct_drop(obj)
  }

  # Levels converted to NA are dropped.
  val_to_NA <- attr(format, ".to_NA")
  if (!is.null(val_to_NA)) {
    obj <- forcats::fct_na_level_to_value(obj, val_to_NA)
  }

  obj
}

#' @export
#' @rdname reformat
#'
#' @examples
#'
#' # Reformatting of list of data.frame.
#' df1 <- data.frame(
#'   var1 = c("a", "b", NA),
#'   var2 = factor(c("F1", "F2", NA))
#' )
#'
#' df2 <- data.frame(
#'   var1 = c("x", NA, "y"),
#'   var2 = factor(c("F11", NA, "F22"))
#' )
#'
#' db <- list(df1 = df1, df2 = df2)
#'
#' format <- list(
#'   df1 = list(
#'     var1 = rule("X" = "x", "N" = NA, .to_NA = "b")
#'   ),
#'   df2 = list(
#'     var1 = empty_rule,
#'     var2 = rule("f11" = "F11", "NN" = NA)
#'   )
#' )
#'
#' reformat(db, format)
reformat.list <- function(obj, format, ...) {
  checkmate::assert_list(obj, types = c("data.frame", "tibble"))
  checkmate::assert_named(obj)
  checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE)

  if (length(format) == 0) {
    return(obj)
  }

  assert_valid_format(format)

  for (tab in names(format)) {
    local_map <- format[[tab]]
    local_map <- local_map[names(local_map) %in% names(obj[[tab]])]

    obj[[tab]][names(local_map)] <- mapply(
      function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...),
      local_map,
      names(local_map),
      SIMPLIFY = FALSE
    )
  }

  obj
}

#' Create rule based on mappings
#' @param ... Mapping pairs, the argument name is the transformed while
#' its values are original values.
#' @param .lst (`list`) of mapping.
#' @param .string_as_fct (`flag`) whether to convert characters to factors.
#' @param .na_last (`flag`)  whether the level replacing `NA` should be last.
#' @param .drop (`flag`) whether to drop empty levels.
#' @param .to_NA (`character`) values that should be converted to `NA`.
#'
#' @note Conversion to `NA` is the last step of the remapping process.

#'
#' @export
#' @examples
#' rule("X" = "x", "Y" = c("y", "z"))
#' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
#'
rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = NULL) {
  checkmate::assert_flag(.string_as_fct)
  checkmate::assert_flag(.na_last)
  checkmate::assert_flag(.drop)
  checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE)

  if (length(.lst) == 0) {
    res <- empty_rule
    attr(res, ".string_as_fct") <- .string_as_fct %||% TRUE
    attr(res, ".drop") <- .drop %||% FALSE
    attr(res, ".to_NA") <- .to_NA %||% NULL
    return(res)
  } else {
    .lst[is.na(.lst)] <- NA_character_
    if (!checkmate::test_list(.lst, types = c("character"))) {
      stop("Value mapping may only contain the type: {character}")
    }
    vals <- as.character(unlist(.lst, use.names = FALSE))
    checkmate::assert_character(vals, unique = TRUE)
    nms <- unlist(lapply(seq_len(length(.lst)), function(x) {
      rep(names(.lst)[x], length(.lst[[x]]))
    }))

    res <- structure(
      setNames(vals, nms),
      class = c("rule", "character"),
      .string_as_fct = .string_as_fct,
      .na_last = .na_last,
      .drop = .drop,
      .to_NA = .to_NA
    )

    res
  }
}

#' Create empty rule
#' @export
empty_rule <- structure(
  character(0L),
  class = c("empty_rule", "rule", "character"),
  .string_as_fct = TRUE,
  .na_last = FALSE,
  .drop = FALSE
)

#' @export
#'
print.rule <- function(x, ...) {
  cat("Mapping of:\n")
  nms <- names(x)
  for (i in seq_len(length(x))) {
    cat(nms[i], " <- ", if (length(x[[i]]) > 1) sprintf("[%s]", toString(x[[i]])) else x[[i]], "\n")
  }
  if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n")
  cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")
  cat("Drop unused level:", attr(x, ".drop"), "\n")
  cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n")
}

#' Convert nested list into list of `rule`
#' @param obj (`nested list`) to convert into list of rules.
#' @export
#' @examples
#' obj <- list(
#'   rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"),
#'   rule2 = list(Missing = c(NA, "")),
#'   rule3 = list(Missing = c(NA, ""), .drop = TRUE),
#'   rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy")
#' )
#' list2rules(obj)
#'
list2rules <- function(obj) {
  coll <- checkmate::makeAssertCollection()
  checkmate::assert_list(obj, unique = TRUE, types = "list", add = coll)
  checkmate::assert_names(names(obj), type = "unique", add = coll)
  checkmate::reportAssertions(coll)

  lapply(obj, function(x) {
    do.call("rule", x)
  })
}

#' Convert Rule to List
#' @param x (`rule`) to convert.
#' @param ... not used.
#'
#' @export
#' @examples
#'
#' x <- rule("a" = c("a", "b"), "X" = "x")
#' as.list(x)
as.list.rule <- function(x, ...) {
  nms <- names(x)
  unames <- unique(nms)
  res <- lapply(unames, function(i) {
    unname(x[nms == i])
  })


  att <- attributes(x)
  arg <- att[!names(att) %in% c("names", "class")]

  res <- c(res, unname(arg))
  unames <- c(unames, names(arg))

  setNames(res, unames)
}

#' @export
print.empty_rule <- function(x, ...) {
  cat("Empty mapping\n")
  if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n")
  cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")
  cat("Drop unused level:", attr(x, ".drop"), "\n")
}

#' Propagate Column
#'
#' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other
#' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving
#' tables. In safe mode, the key must be unique in the original table.
#'
#' @param db (`list` of `data.frame`) object for which some variable need to be propagated.
#' @param from (`string`) the name of the table where the variables to propagate are stored.
#' @param add (`character`) the names of the variables to propagate.
#' @param by (`character`) the key binding the `from` table to the other tables.
#' @param safe (`flag`) should the key be checked for uniqueness in the `from` table.
#'
#' @return updated `list` of `data.frame`.
#'
#' @rdname propagate
#' @export
#'
propagate <- function(db, from, add, by, safe = TRUE) {
  UseMethod("propagate")
}

#' @rdname propagate
#' @export
#'
#'
#' @examples
#' df1 <- data.frame(
#'   id1 = c("a", "a", "c", "d", "e", "f"),
#'   id2 = c("A", "B", "A", "A", "A", "A"),
#'   int = c(1, 2, 3, 4, 5, 6),
#'   bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)
#' )
#'
#' df2 <- data.frame(
#'   id1 = c("a", "a", "d", "e", "f", "g"),
#'   id2 = c("A", "B", "A", "A", "A", "A")
#' )
#'
#' df3 <- data.frame(
#'   id1 = c("a", "c", "d", "e", "f", "x"),
#'   id2 = c("A", "A", "A", "A", "B", "A"),
#'   int = c(11, 22, 33, 44, 55, 66)
#' )
#'
#' db <- list(df1 = df1, fd2 = df2, df3 = df3)
#' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
#'
propagate.list <- function(db, from, add, by, safe = TRUE) {
  checkmate::assert_list(db, types = "data.frame", names = "unique")
  checkmate::assert_names(names(db), must.include = from)
  checkmate::assert_names(colnames(db[[from]]), must.include = add)
  checkmate::assert_names(colnames(db[[from]]), must.include = by)
  checkmate::assert_flag(safe)

  if (safe) {
    keys <- db[[from]][, by]
    if (anyDuplicated(keys)) stop(paste("Duplicated key"))
  }

  toJoin <- db[[from]]

  for (tab_name in setdiff(names(db), from)) {
    tab_colnames <- colnames(db[[tab_name]])
    if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) {
      missing_var <- setdiff(add, tab_colnames)
      sel_var <- c(missing_var, by)
      sel_tab <- toJoin[, sel_var]

      cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var)))

      db[[tab_name]] <- db[[tab_name]] %>%
        dplyr::left_join(sel_tab, by = by)
    } else {
      cat(paste0("\nSkipping: ", tab_name))
    }
  }
  cat("\n")
  return(db)
}

#' Safe transformer
#' @details Obtain content in global environment by default.
#' If not found, use the environment here.
#' @keywords internal
safe_transformer <- function(text, envir) {
  text_lower <- tolower(text)
  res <- if (exists(text_lower, envir = envir, inherits = FALSE)) {
    get(text_lower, envir = envir)
  } else {
    text
  }
  if (is.character(res)) {
    if (identical(text, tolower(text))) {
      res <- tolower(res)
    } else if (identical(text, toupper(text))) {
      res <- toupper(res)
    } else if (identical(text, stringr::str_to_title(text))) {
      res <- stringr::str_to_title(res)
    }
  }
  res
}

#' Render whiskers safely
#' @param x (`character`) input to be rendered safely.
#' @export
render_safe <- function(x) {
  checkmate::assert_character(x, null.ok = TRUE)
  if (is.null(x)) {
    return(NULL)
  }
  ret <- lapply(
    x,
    glue::glue,
    .transformer = safe_transformer,
    .envir = whisker_env,
    .null = "NULL",
    .open = "{",
    .close = "}"
  )
  ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "")
  setNames(ret, names(x))
}
#' Add whisker values
#' @param x Named (`character`) input.
#' @export
add_whisker <- function(x) {
  checkmate::assert_character(x, names = "unique", any.missing = FALSE)
  lapply(
    names(x),
    function(i) {
      assign(i, x[i], envir = whisker_env)
    }
  )
  invisible()
}

#' Remove whisker values
#' @param x Named (`character`) input.
#' @export
remove_whisker <- function(x) {
  checkmate::assert_character(x, any.missing = FALSE)
  rm(list = x, envir = whisker_env)
}

#' Unite Columns of a Table in a `list` of `data.frame`.
#'
#' @param adam_db (`list` of `data.frames`) to be transformed.
#' @param tab (`string`) the name of a table in the `adam_db` object.
#' @param cols (`character`) the name of the columns to unite.
#' @param sep (`string`) the separator for the new column name.
#' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used.
#'
#' @return `list` of `data.frames` object with a united column.
#' @export
#'
#' @examples
#' db <- list(mtcars = mtcars, iris = iris)
#'
#' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION")
#' x$mtcars
ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) {
  checkmate::assert_list(adam_db, types = "data.frame")
  checkmate::assert_string(tab)
  checkmate::assert_names(names(adam_db), must.include = tab)
  checkmate::assert_character(cols, min.len = 1)
  checkmate::assert_names(names(adam_db[[tab]]), must.include = cols)
  checkmate::assert_string(sep)
  checkmate::assert_string(new, null.ok = TRUE)

  x_interaction <- if (!is.null(new)) {
    new
  } else {
    paste(cols, collapse = sep)
  }

  x_df <- adam_db[[tab]][, cols, drop = FALSE]
  lvl <- lapply(x_df, function(y) {
    uni <- if (is.factor(y)) {
      levels(y)
    } else {
      unique(y)
    }
    factor(uni, levels = uni)
  })

  all_lvl_df <- expand.grid(lvl)

  all_lvl <- all_lvl_df[, cols, drop = FALSE] %>%
    arrange(across(all_of(cols))) %>%
    apply(1, paste, collapse = sep)

  x_vec <- x_df[, cols, drop = FALSE] %>%
    apply(1, paste, collapse = sep)

  existing_lvl <- intersect(all_lvl, x_vec)
  x_fact <- factor(x_vec, existing_lvl)

  adam_db[[tab]][, x_interaction] <- x_fact
  adam_db
}

#' Cutting data by group
#'
#' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are
#'   closed on the right (and open on the left).
#'
#' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation.
#' @param col_data (`character`) the column containing the data to be cut.
#' @param col_group (`character`) the column containing the names of the groups according to which the data should be
#'   split.
#' @param group  (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the
#'   name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically
#'   `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`).
#' @param cat_col (`character`) the name of the new column in which the cut label should he stored.
#'
#' @export
#'
#' @return `data.frame` with a column containing categorical values.
#' @examples
#' group <- list(
#'   list(
#'     "Height",
#'     c(-Inf, 150, 170, Inf),
#'     c("=<150", "150-170", ">170")
#'   ),
#'   list(
#'     "Weight",
#'     c(-Inf, 65, Inf),
#'     c("=<65", ">65")
#'   ),
#'   list(
#'     "Age",
#'     c(-Inf, 31, Inf),
#'     c("=<31", ">31")
#'   ),
#'   list(
#'     "PreCondition",
#'     c(-Inf, 1, Inf),
#'     c("=<1", "<1")
#'   )
#' )
#' data <- data.frame(
#'   SUBJECT = rep(letters[1:10], 4),
#'   PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10),
#'   AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)),
#'   index = 1:40
#' )
#'
#' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
cut_by_group <- function(df,
                         col_data,
                         col_group,
                         group,
                         cat_col) {
  checkmate::assert_data_frame(df)
  checkmate::assert_subset(c(col_data, col_group), colnames(df))
  checkmate::assert_numeric(df[, col_data])
  checkmate::assert_list(group)

  lapply(
    group,
    function(list_element) {
      checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character"))
    }
  )

  df[cat_col] <- NA

  for (g in group) {
    selected_row <- df[[col_group]] == g[[1]]

    df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]]))
  }
  df
}

#' Reorder Two Columns Levels Simultaneously
#'
#' @details The function expect a 1:1 matching between the elements of the two selected column.
#'
#' @param df (`data.frame`) with two column whose factors should be reordered.
#' @param primary (`string`) the name of the column on which the levels reordering should be based.
#' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the
#'   primary column.
#' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be
#'   placed afterward in their current order.
#'
#' @export
#'
#' @examples
#'
#' df <- data.frame(
#'   SUBJID = 1:3,
#'   PARAMCD = factor(c("A", "B", "C")),
#'   PARAM = factor(paste("letter", LETTERS[1:3]))
#' )
#' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
co_relevels <- function(df, primary, secondary, levels_primary) {
  checkmate::assert_data_frame(df, min.rows = 1)
  checkmate::assert_subset(c(primary, secondary), colnames(df))
  checkmate::assert_character(levels_primary, min.len = 1)
  checkmate::assert_vector(df[[primary]], any.missing = FALSE)
  checkmate::assert_vector(df[[secondary]], any.missing = FALSE)

  df[, primary] <- as.factor(df[[primary]])
  df[, secondary] <- as.factor(df[[secondary]])

  # check unique relationship
  df_key <- df[, c(primary, secondary)]
  df_key <- unique(df_key)

  if (any(duplicated(df_key[[primary]])) || any(duplicated(df_key[[secondary]]))) {
    stop("non univoque relation between values in primary and secondary column")
  }

  keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]]))

  all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary))
  all_levels_secondary <- keys[all_levels_primary]

  df[, primary] <- factor(df[[primary]], all_levels_primary)
  df[, secondary] <- factor(df[[secondary]], all_levels_secondary)

  df
}

#' Transforming Empty Strings and White Spaces to NAs
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' SAS imports missing data as empty strings or white spaces. This helper function replaces the empty strings and white
#' space-only character and levels by `NAs`.
#'
#' @param x (`vector`) where empty of white space should be transformed to `NAs`.
#'
#' @return `character` or `factor` without explicit NA. `logical` and `numeric` are returned as `character`.
#'
#' @export
#' @examples
#' char1 <- c(" ", "    ", "a", "b", "", "")
#' h_ws_to_na(char1)
#'
#' fact1 <- as.factor(char1)
#' h_ws_to_na(fact1)
#'
#' num1 <- c(1:10)
#' h_ws_to_na(num1)
#'
#' logi1 <- c(TRUE, FALSE, NA)
#' h_ws_to_na(logi1)
h_ws_to_na <- function(x) {
  if (is.factor(x)) {
    levels_x <- levels(x)

    ws_levels <- grepl("^\\s*$", levels_x) | levels_x == ""

    levels(x)[ws_levels] <- NA
  } else if (is.character(x)) {
    ws_char <- grepl("^\\s*$", x) | x == ""

    x[ws_char] <- NA
  } else {
    x <- as.character(x)
  }
  x
}

#' Transforming Empty Strings and White Spaces to Explicit NAs
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' SAS imports missing data as empty strings or white spaces. This helper function is a thin wrapper around
#' [dunlin::h_ws_to_na] which replaces them with explicit missing level.
#'
#' @param x (`vector`) where empty of white space should be transformed to `NAs`.
#' @param na_level (`character`) replacement of the missing levels.
#'
#' @return `factor` with explicit NA
#'
#' @export
#' @examples
#' char1 <- c(" ", "    ", "a", "b", "", "")
#' h_ws_to_explicit_na(char1)
#'
#' fact1 <- as.factor(char1)
#' h_ws_to_explicit_na(fact1)
#'
#' num1 <- c(1, 2, NA)
#' h_ws_to_explicit_na(num1)
#'
#' logi1 <- c(TRUE, FALSE, NA)
#' h_ws_to_explicit_na(logi1)
h_ws_to_explicit_na <- function(x, na_level = "<Missing>") {
  checkmate::assert_character(na_level)

  res <- forcats::fct_na_value_to_level(h_ws_to_na(x), na_level)

  if (na_level %in% res) {
    forcats::fct_relevel(res, na_level, after = Inf)
  } else {
    forcats::fct_drop(res, only = na_level)
  }
}

#' Transforming Empty Strings and White Spaces to Explicit NAs while Preserving Label
#'
#' @details This function preserves the label attribute.
#'
#' @param x (`vector`) input to be turned into factor with explicit missing level.
#' @param na_level (`character`) the label to encode missing levels.
#'
#' @return `factor` with explicit NA and the same label as the input.
#'
#' @export
#' @examples
#' char1 <- c(" ", "    ", "a", "b", "", "", NA)
#' attr(char1, "label") <- "my_label"
#'
#' h_as_factor(char1)
h_as_factor <- function(x, na_level = "<Missing>") {
  checkmate::assert_vector(x)

  init_lab <- attr(x, "label")

  res <- h_ws_to_explicit_na(x, na_level = na_level)

  attr(res, "label") <- init_lab

  res
}

#' Setting the Label Attribute
#'
#' @param var (`object`) whose label attribute can be set.
#' @param label (`character`) the label to add.
#'
#' @return `object` with label attribute.
#'
#' @export
#' @examples
#' x <- c(1:10)
#' attr(x, "label")
#'
#' y <- attr_label(x, "my_label")
#' attr(y, "label")
attr_label <- function(var, label) {
  checkmate::assert_character(label)

  x <- var
  attr(x, "label") <- label

  x
}

#' Setting the Label Attribute to Data Frame Columns
#'
#' @param df (`data.frame`).
#' @param label (`character`) the labels to add.
#'
#' @return `data.frame` with label attributes.
#'
#' @export
#' @examples
#' res <- attr_label_df(mtcars, letters[1:11])
#' res
#' lapply(res, attr, "label")
attr_label_df <- function(df, label) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(label, len = ncol(df))

  res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE)
  as.data.frame(res)
}

#' Encode Categorical Missing Values in a `list` of `data.frame`
#'
#' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and
#'   `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved.
#'
#' @param data (`list` of `data.frame`) to be transformed.
#' @param omit_tables (`character`) the names of the tables to omit from processing.
#' @param omit_columns (`character`) the names of the columns to omit from processing.
#' @param char_as_factor (`logical`) should character columns be converted into factor.
#' @param na_level (`string`) the label to encode missing levels.
#'
#' @return `list` of `data.frame` object with explicit missing levels.
#' @export
#'
#' @examples
#'
#' df1 <- data.frame(
#'   "char" = c("a", "b", NA, "a", "k", "x"),
#'   "char2" = c("A", "B", NA, "A", "K", "X"),
#'   "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
#'   "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA)
#' )
#' df2 <- data.frame(
#'   "char" = c("a", "b", NA, "a", "k", "x"),
#'   "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
#'   "num" = c(1:5, NA)
#' )
#' df3 <- data.frame(
#'   "char" = c(NA, NA, "A")
#' )
#'
#' db <- list(df1 = df1, df2 = df2, df3 = df3)
#'
#' ls_explicit_na(db)
#' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
#'
ls_explicit_na <- function(data,
                           omit_tables = NULL,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           na_level = "<Missing>") {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_character(omit_tables, null.ok = TRUE)
  checkmate::assert_character(omit_columns, null.ok = TRUE)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_string(na_level)

  modif_tab <- setdiff(names(data), omit_tables)
  if (length(modif_tab) < 1) {
    return(data)
  }

  data[modif_tab] <- lapply(
    data[modif_tab],
    h_df_explicit,
    omit_columns = omit_columns,
    char_as_factor = char_as_factor,
    na_level = na_level
  )

  data
}

#' Encode Categorical Missing Values in a `data.frame`.
#'
#' @inheritParams ls_explicit_na
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' df <- data.frame(
#'   "char" = c("a", "b", NA, "a", "k", "x"),
#'   "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
#'   "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA),
#'   "num" = c(1:5, NA)
#' )
#'
#' h_df_explicit(df)
#' h_df_explicit(df, omit_columns = c("fact", "x"))
#' }
h_df_explicit <- function(df,
                          omit_columns = NULL,
                          char_as_factor = TRUE,
                          na_level = "<Missing>") {
  na_list <- list(x = c("", NA))
  names(na_list) <- na_level
  na_rule <- rule(.lst = na_list)

  df %>%
    mutate(
      across(
        where(~ is.character(.x) | is.factor(.x)) & !any_of(.env$omit_columns),
        ~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE)
      )
    )
}

#' Assert Nested List can be used as Format Argument in Reformat.
#'
#' @param object (`list`) to assert.
#' @return invisible `TRUE` or an error message if the criteria are not fulfilled.
#'
#' @export
#' @examples
#' format <- list(
#'   df1 = list(
#'     var1 = rule("X" = "x", "N" = c(NA, ""))
#'   ),
#'   df2 = list(
#'     var1 = rule(),
#'     var2 = rule("f11" = "F11", "NN" = NA),
#'     var3 = empty_rule
#'   ),
#'   df3 = list()
#' )
#'
#' assert_valid_format(format)
assert_valid_format <- function(object) {
  coll <- checkmate::makeAssertCollection()

  # Check object.
  checkmate::assert_list(object, names = "unique", type = "list", add = coll)

  # Check table level.
  mapply(
    function(x, xtable) {
      checkmate::assert_list(
        x,
        names = "unique",
        types = "rule",
        any.missing = FALSE,
        .var.name = paste0("[", xtable, "]"),
        add = coll
      )
    },
    object,
    names(object)
  )

  checkmate::reportAssertions(coll)
}

#' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.
#'
#' @param object (`list`) to assert.
#' @return invisible `TRUE` or an error message if the criteria are not fulfilled.
#'
#' @export
#' @examples
#' format <- list(
#'   df1 = list(
#'     var1 = list("X" = "x", "N" = c(NA, ""))
#'   ),
#'   df2 = list(
#'     var1 = list(),
#'     var2 = list("f11" = "F11", "NN" = NA)
#'   ),
#'   df3 = list()
#' )
#'
#' assert_valid_list_format(format)
assert_valid_list_format <- function(object) {
  coll <- checkmate::makeAssertCollection()

  # Check object.
  checkmate::assert_list(object, names = "unique", type = "list", add = coll)

  # Check table level.
  mapply(
    function(x, xtable) {
      checkmate::assert_list(
        x,
        names = "unique",
        types = "list",
        any.missing = FALSE,
        .var.name = paste0("[", xtable, "]"),
        add = coll
      )
    },
    object,
    names(object)
  )

  # Check variable level.
  mapply(
    function(x, xtable) {
      xvar <- names(x)
      mapply(
        function(x, xvar) {
          checkmate::assert_list(
            x,
            names = "unique",
            type = c("character", "numeric", "logical"),
            .var.name = paste0("[", xtable, ".", xvar, "]"),
            add = coll
          )
        },
        x,
        xvar
      )
    },
    object,
    names(object)
  )

  checkmate::reportAssertions(coll)
}

1		#' Join `adsub` to `adsl`
2		#'
3		#' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table.
4		#' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row.
5		#' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
6		#' columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`,
7		#' none are selected.
8		#' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
9		#' columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`,
10		#' none are selected.
11		#' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values.
12		#' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical
13		#' values.
14		#'
15		#' @return a `list` of `data.frame` with new columns in the `adsl` table.
16		#'
17		#' @rdname join_adsub_adsl
18		#' @export
19		#'
20		join_adsub_adsl <- function(adam_db,
21		keys,
22		continuous_var,
23		categorical_var,
24		continuous_suffix,
25		categorical_suffix) {
26	5x	UseMethod("join_adsub_adsl")
27		}
28
29		#' @rdname join_adsub_adsl
30		#' @export
31		#'
32		#' @examples
33		#' adsl <- data.frame(
34		#' USUBJID = c("S1", "S2", "S3", "S4"),
35		#' STUDYID = "My_study",
36		#' AGE = c(60, 44, 23, 31)
37		#' )
38		#'
39		#' adsub <- data.frame(
40		#' USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"),
41		#' STUDYID = "My_study",
42		#' PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"),
43		#' PARAMCD = c("w", "w", "w", "w", "h", "h", "h"),
44		#' AVAL = c(98, 75, 70, 71, 182, 155, 152),
45		#' AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180")
46		#' )
47		#'
48		#' db <- list(adsl = adsl, adsub = adsub)
49		#'
50		#' x <- join_adsub_adsl(adam_db = db)
51		#' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
52		join_adsub_adsl.list <- function(adam_db,
53		keys = c("USUBJID", "STUDYID"),
54		continuous_var = "all",
55		categorical_var = "all",
56		continuous_suffix = "",
57		categorical_suffix = "_CAT") {
58	5x	checkmate::assert_list(adam_db, types = "data.frame")
59	5x	checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub"))
60	5x	checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys))
61	5x	checkmate::assert_names(names(adam_db$adsl), must.include = keys)
62	5x	checkmate::assert_string(continuous_suffix)
63	5x	checkmate::assert_string(categorical_suffix)
64
65	5x	value_col <- c("AVAL", "AVALC")
66	5x	vars_ls <- list(continuous_var, categorical_var)
67	5x	suffix_ls <- list(continuous_suffix, categorical_suffix)
68
69		# Select variables names.
70	5x	vars_ls <- lapply(vars_ls, function(x) {
71	10x	if (identical(x, "all")) {
72	6x	unique(adam_db$adsub$PARAMCD)
73		} else {
74	4x	x
75		}
76		})
77
78		# Create new variable names.
79	5x	vars_nam <- mapply(
80	5x	function(x, y) {
81	10x	if (!is.null(x)) {
82	6x	names(x) <- paste0(x, y)
83	6x	x
84		} else {
85	4x	NULL
86		}
87		},
88	5x	vars_ls,
89	5x	suffix_ls,
90	5x	SIMPLIFY = FALSE
91		)
92
93		# Test if new columns already exist in adsl.
94	5x	assert_names_notadsl(vars_nam, adam_db$adsl)
95
96		# Test if categorical and continuous column will result in the same column name.
97	5x	assert_names_collision(vars_nam)
98
99		# Pivot and keep labels.
100	5x	adsub_wide_ls <-
101	5x	adam_db$adsub %>%
102	5x	poly_pivot_wider(id = keys, param_from = "PARAMCD", value_from = value_col, labels_from = "PARAM")
103
104		# Merge categorical and continuous variables.
105	5x	for (i in seq_along(value_col)) {
106	10x	adsub_df <- adsub_wide_ls[[value_col[i]]]
107	10x	adsub_df <- adsub_df[, c(keys, vars_nam[[i]])]
108	10x	colnames(adsub_df) <- c(keys, names(vars_nam[[i]]))
109
110	10x	adam_db$adsl <- dplyr::left_join(
111	10x	x = adam_db$adsl,
112	10x	y = adsub_df,
113	10x	by = keys
114		)
115		}
116
117	5x	adam_db
118		}
119
120		# Utility functions ----
121
122		assert_names_collision <- function(vars_nam) {
123	5x	final_names_ls <- lapply(vars_nam, names)
124	5x	in_both <- final_names_ls[[1]] %in% final_names_ls[[2]]
125	5x	if (any(in_both)) {
126	!	warning(
127	!	paste(
128	!	toString(final_names_ls[[1]][in_both]),
129	!	"are new columns for continuous and categorical variable,
130	!	Please set different `continuous_suffix` or `categorical_suffix`
131	!	or select different columns to avoid automatic renaming."
132		)
133		)
134		}
135		}
136
137		assert_names_notadsl <- function(vars_nam, df) {
138	5x	final_names <- unique(sapply(vars_nam, names))
139	5x	already_in_adsl <- final_names %in% colnames(df)
140	5x	if (any(already_in_adsl)) {
141	1x	warning(
142	1x	paste(
143	1x	toString(final_names[already_in_adsl]),
144	1x	"already exist in adsl, the name will default to another values.
145	1x	Please change `continuous_suffix` or `categorical_suffix` to avoid automatic reneaming"
146		)
147		)
148		}
149		}

1		#' Reformat Values
2		#' @param obj object to reformat.
3		#' @param format (`rule`) or (`list`) of `rule` depending on the class of obj.
4		#' @param ... for compatibility between methods and pass additional special mapping to transform rules.
5		#' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor.
6		#' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are
7		#' dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule.
8		#' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of
9		#' the rule.
10		#' * `.na_last` (`flag`) whether the level replacing `NA` should be last.
11		#'
12		#' @export
13		#' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is
14		#' raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels
15		#' declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a
16		#' rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to
17		#' `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the
18		#' ones defined during rule creation.
19		#'
20		#' @rdname reformat
21		#'
22		reformat <- function(obj, ...) {
23	51x	UseMethod("reformat")
24		}
25
26		#' @export
27		#' @rdname reformat
28		reformat.default <- function(obj, format, ...) {
29	2x	if (!is(format, "empty_rule")) {
30	1x	warning(paste0(c("Not implemented for class: ", toString(class(obj)), "! Only empty rule allowed.")))
31		}
32	2x	return(obj)
33		}
34
35		#' @export
36		#' @rdname reformat
37		#'
38		#' @examples
39		#'
40		#' # Reformatting of character.
41		#' obj <- c("a", "b", "x", NA)
42		#' attr(obj, "label") <- "my label"
43		#' format <- rule("A" = "a", "NN" = NA)
44		#'
45		#' reformat(obj, format)
46		#' reformat(obj, format, .string_as_fct = FALSE, .to_NA = "x")
47		#' reformat(obj, empty_rule, .string_as_fct = FALSE, .to_NA = "x")
48		#'
49		reformat.character <- function(obj, format, ...) {
50	19x	checkmate::assert_class(format, "rule")
51
52		# Give priority to argument defined in reformat.
53	19x	format <- do.call(rule, modifyList(as.list(format), list(...)))
54
55	19x	if (attr(format, ".string_as_fct")) {
56		# Keep attributes.
57	11x	att <- attributes(obj)
58	11x	obj_fact <- as.factor(obj)
59	11x	supp_att_name <- setdiff(names(att), attributes(obj_fact))
60	11x	supp_att <- att[supp_att_name]
61	11x	attributes(obj_fact) <- c(attributes(obj_fact), supp_att)
62
63	11x	reformat(obj_fact, format)
64		} else {
65	8x	if (!is(format, "empty_rule")) {
66	6x	value_match <- unlist(format)
67	6x	m <- match(obj, value_match)
68	6x	obj[!is.na(m)] <- names(format)[m[!is.na(m)]]
69		}
70
71
72	8x	val_to_NA <- attr(format, ".to_NA")
73	8x	if (!is.null(val_to_NA)) {
74	4x	obj[obj %in% val_to_NA] <- NA_character_
75		}
76
77	8x	obj
78		}
79		}
80
81		#' @export
82		#' @rdname reformat
83		#'
84		#' @examples
85		#'
86		#' # Reformatting of factor.
87		#' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z"))
88		#' attr(obj, "label") <- "my label"
89		#' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P")
90		#'
91		#' reformat(obj, format)
92		#' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE)
93		#' reformat(obj, empty_rule, .na_last = FALSE, .to_NA = "b", .drop = FALSE)
94		#'
95		reformat.factor <- function(obj, format, ...) {
96	27x	checkmate::assert_class(format, "rule")
97
98	27x	format <- do.call(rule, modifyList(as.list(format), list(...)))
99
100	27x	if (!is(format, "empty_rule")) {
101	20x	any_na <- anyNA(obj)
102	20x	if (any(is.na(format)) && any_na) {
103	18x	obj <- forcats::fct_na_value_to_level(obj)
104		}
105
106	20x	absent_format <- format[!format %in% levels(obj)]
107	20x	sel_format <- format[format %in% levels(obj)]
108	20x	obj <- forcats::fct_recode(obj, !!!sel_format)
109	20x	obj <- forcats::fct_expand(obj, unique(names(absent_format)))
110	20x	obj <- forcats::fct_relevel(obj, unique(names(format)))
111
112	20x	if (any(is.na(format)) && attr(format, ".na_last")) {
113	13x	na_lvl <- names(format)[is.na(format)]
114	13x	obj <- forcats::fct_relevel(obj, na_lvl, after = Inf)
115		}
116		}
117
118	27x	drop_lvl <- attr(format, ".drop")
119	27x	if (drop_lvl) {
120	3x	obj <- forcats::fct_drop(obj)
121		}
122
123		# Levels converted to NA are dropped.
124	27x	val_to_NA <- attr(format, ".to_NA")
125	27x	if (!is.null(val_to_NA)) {
126	4x	obj <- forcats::fct_na_level_to_value(obj, val_to_NA)
127		}
128
129	27x	obj
130		}
131
132		#' @export
133		#' @rdname reformat
134		#'
135		#' @examples
136		#'
137		#' # Reformatting of list of data.frame.
138		#' df1 <- data.frame(
139		#' var1 = c("a", "b", NA),
140		#' var2 = factor(c("F1", "F2", NA))
141		#' )
142		#'
143		#' df2 <- data.frame(
144		#' var1 = c("x", NA, "y"),
145		#' var2 = factor(c("F11", NA, "F22"))
146		#' )
147		#'
148		#' db <- list(df1 = df1, df2 = df2)
149		#'
150		#' format <- list(
151		#' df1 = list(
152		#' var1 = rule("X" = "x", "N" = NA, .to_NA = "b")
153		#' ),
154		#' df2 = list(
155		#' var1 = empty_rule,
156		#' var2 = rule("f11" = "F11", "NN" = NA)
157		#' )
158		#' )
159		#'
160		#' reformat(db, format)
161		reformat.list <- function(obj, format, ...) {
162	3x	checkmate::assert_list(obj, types = c("data.frame", "tibble"))
163	3x	checkmate::assert_named(obj)
164	3x	checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE)
165
166	3x	if (length(format) == 0) {
167	1x	return(obj)
168		}
169
170	2x	assert_valid_format(format)
171
172	2x	for (tab in names(format)) {
173	3x	local_map <- format[[tab]]
174	3x	local_map <- local_map[names(local_map) %in% names(obj[[tab]])]
175
176	3x	obj[[tab]][names(local_map)] <- mapply(
177	3x	function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...),
178	3x	local_map,
179	3x	names(local_map),
180	3x	SIMPLIFY = FALSE
181		)
182		}
183
184	2x	obj
185		}

1		#' Create rule based on mappings
2		#' @param ... Mapping pairs, the argument name is the transformed while
3		#' its values are original values.
4		#' @param .lst (`list`) of mapping.
5		#' @param .string_as_fct (`flag`) whether to convert characters to factors.
6		#' @param .na_last (`flag`) whether the level replacing `NA` should be last.
7		#' @param .drop (`flag`) whether to drop empty levels.
8		#' @param .to_NA (`character`) values that should be converted to `NA`.
9		#'
10		#' @note Conversion to `NA` is the last step of the remapping process.
11
12		#'
13		#' @export
14		#' @examples
15		#' rule("X" = "x", "Y" = c("y", "z"))
16		#' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
17		#'
18		rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = NULL) {
19	97x	checkmate::assert_flag(.string_as_fct)
20	97x	checkmate::assert_flag(.na_last)
21	97x	checkmate::assert_flag(.drop)
22	97x	checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE)
23
24	97x	if (length(.lst) == 0) {
25	23x	res <- empty_rule
26	23x	attr(res, ".string_as_fct") <- .string_as_fct %\|\|% TRUE
27	23x	attr(res, ".drop") <- .drop %\|\|% FALSE
28	23x	attr(res, ".to_NA") <- .to_NA %\|\|% NULL
29	23x	return(res)
30		} else {
31	74x	.lst[is.na(.lst)] <- NA_character_
32	74x	if (!checkmate::test_list(.lst, types = c("character"))) {
33	1x	stop("Value mapping may only contain the type: {character}")
34		}
35	73x	vals <- as.character(unlist(.lst, use.names = FALSE))
36	73x	checkmate::assert_character(vals, unique = TRUE)
37	71x	nms <- unlist(lapply(seq_len(length(.lst)), function(x) {
38	166x	rep(names(.lst)[x], length(.lst[[x]]))
39		}))
40
41	71x	res <- structure(
42	71x	setNames(vals, nms),
43	71x	class = c("rule", "character"),
44	71x	.string_as_fct = .string_as_fct,
45	71x	.na_last = .na_last,
46	71x	.drop = .drop,
47	71x	.to_NA = .to_NA
48		)
49
50	71x	res
51		}
52		}
53
54		#' Create empty rule
55		#' @export
56		empty_rule <- structure(
57		character(0L),
58		class = c("empty_rule", "rule", "character"),
59		.string_as_fct = TRUE,
60		.na_last = FALSE,
61		.drop = FALSE
62		)
63
64		#' @export
65		#'
66		print.rule <- function(x, ...) {
67	1x	cat("Mapping of:\n")
68	1x	nms <- names(x)
69	1x	for (i in seq_len(length(x))) {
70	2x	cat(nms[i], " <- ", if (length(x[[i]]) > 1) sprintf("[%s]", toString(x[[i]])) else x[[i]], "\n")
71		}
72	!	if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n")
73	1x	cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")
74	1x	cat("Drop unused level:", attr(x, ".drop"), "\n")
75	1x	cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n")
76		}
77
78		#' Convert nested list into list of `rule`
79		#' @param obj (`nested list`) to convert into list of rules.
80		#' @export
81		#' @examples
82		#' obj <- list(
83		#' rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"),
84		#' rule2 = list(Missing = c(NA, "")),
85		#' rule3 = list(Missing = c(NA, ""), .drop = TRUE),
86		#' rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy")
87		#' )
88		#' list2rules(obj)
89		#'
90		list2rules <- function(obj) {
91	2x	coll <- checkmate::makeAssertCollection()
92	2x	checkmate::assert_list(obj, unique = TRUE, types = "list", add = coll)
93	2x	checkmate::assert_names(names(obj), type = "unique", add = coll)
94	2x	checkmate::reportAssertions(coll)
95
96	1x	lapply(obj, function(x) {
97	3x	do.call("rule", x)
98		})
99		}
100
101		#' Convert Rule to List
102		#' @param x (`rule`) to convert.
103		#' @param ... not used.
104		#'
105		#' @export
106		#' @examples
107		#'
108		#' x <- rule("a" = c("a", "b"), "X" = "x")
109		#' as.list(x)
110		as.list.rule <- function(x, ...) {
111	48x	nms <- names(x)
112	48x	unames <- unique(nms)
113	48x	res <- lapply(unames, function(i) {
114	80x	unname(x[nms == i])
115		})
116
117
118	48x	att <- attributes(x)
119	48x	arg <- att[!names(att) %in% c("names", "class")]
120
121	48x	res <- c(res, unname(arg))
122	48x	unames <- c(unames, names(arg))
123
124	48x	setNames(res, unames)
125		}
126
127		#' @export
128		print.empty_rule <- function(x, ...) {
129	2x	cat("Empty mapping\n")
130	!	if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n")
131	2x	cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")
132	2x	cat("Drop unused level:", attr(x, ".drop"), "\n")
133		}

1		#' Safe transformer
2		#' @details Obtain content in global environment by default.
3		#' If not found, use the environment here.
4		#' @keywords internal
5		safe_transformer <- function(text, envir) {
6	7x	text_lower <- tolower(text)
7	7x	res <- if (exists(text_lower, envir = envir, inherits = FALSE)) {
8	6x	get(text_lower, envir = envir)
9		} else {
10	1x	text
11		}
12	7x	if (is.character(res)) {
13	7x	if (identical(text, tolower(text))) {
14	3x	res <- tolower(res)
15	4x	} else if (identical(text, toupper(text))) {
16	2x	res <- toupper(res)
17	2x	} else if (identical(text, stringr::str_to_title(text))) {
18	2x	res <- stringr::str_to_title(res)
19		}
20		}
21	7x	res
22		}
23
24		#' Render whiskers safely
25		#' @param x (`character`) input to be rendered safely.
26		#' @export
27		render_safe <- function(x) {
28	4x	checkmate::assert_character(x, null.ok = TRUE)
29	4x	if (is.null(x)) {
30	!	return(NULL)
31		}
32	4x	ret <- lapply(
33	4x	x,
34	4x	glue::glue,
35	4x	.transformer = safe_transformer,
36	4x	.envir = whisker_env,
37	4x	.null = "NULL",
38	4x	.open = "{",
39	4x	.close = "}"
40		)
41	4x	ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "")
42	4x	setNames(ret, names(x))
43		}
44		#' Add whisker values
45		#' @param x Named (`character`) input.
46		#' @export
47		add_whisker <- function(x) {
48	1x	checkmate::assert_character(x, names = "unique", any.missing = FALSE)
49	1x	lapply(
50	1x	names(x),
51	1x	function(i) {
52	2x	assign(i, x[i], envir = whisker_env)
53		}
54		)
55	1x	invisible()
56		}
57
58		#' Remove whisker values
59		#' @param x Named (`character`) input.
60		#' @export
61		remove_whisker <- function(x) {
62	1x	checkmate::assert_character(x, any.missing = FALSE)
63	1x	rm(list = x, envir = whisker_env)
64		}

1		#' Assert Nested List can be used as Format Argument in Reformat.
2		#'
3		#' @param object (`list`) to assert.
4		#' @return invisible `TRUE` or an error message if the criteria are not fulfilled.
5		#'
6		#' @export
7		#' @examples
8		#' format <- list(
9		#' df1 = list(
10		#' var1 = rule("X" = "x", "N" = c(NA, ""))
11		#' ),
12		#' df2 = list(
13		#' var1 = rule(),
14		#' var2 = rule("f11" = "F11", "NN" = NA),
15		#' var3 = empty_rule
16		#' ),
17		#' df3 = list()
18		#' )
19		#'
20		#' assert_valid_format(format)
21		assert_valid_format <- function(object) {
22	4x	coll <- checkmate::makeAssertCollection()
23
24		# Check object.
25	4x	checkmate::assert_list(object, names = "unique", type = "list", add = coll)
26
27		# Check table level.
28	4x	mapply(
29	4x	function(x, xtable) {
30	8x	checkmate::assert_list(
31	8x	x,
32	8x	names = "unique",
33	8x	types = "rule",
34	8x	any.missing = FALSE,
35	8x	.var.name = paste0("[", xtable, "]"),
36	8x	add = coll
37		)
38		},
39	4x	object,
40	4x	names(object)
41		)
42
43	4x	checkmate::reportAssertions(coll)
44		}
45
46		#' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.
47		#'
48		#' @param object (`list`) to assert.
49		#' @return invisible `TRUE` or an error message if the criteria are not fulfilled.
50		#'
51		#' @export
52		#' @examples
53		#' format <- list(
54		#' df1 = list(
55		#' var1 = list("X" = "x", "N" = c(NA, ""))
56		#' ),
57		#' df2 = list(
58		#' var1 = list(),
59		#' var2 = list("f11" = "F11", "NN" = NA)
60		#' ),
61		#' df3 = list()
62		#' )
63		#'
64		#' assert_valid_list_format(format)
65		assert_valid_list_format <- function(object) {
66	2x	coll <- checkmate::makeAssertCollection()
67
68		# Check object.
69	2x	checkmate::assert_list(object, names = "unique", type = "list", add = coll)
70
71		# Check table level.
72	2x	mapply(
73	2x	function(x, xtable) {
74	4x	checkmate::assert_list(
75	4x	x,
76	4x	names = "unique",
77	4x	types = "list",
78	4x	any.missing = FALSE,
79	4x	.var.name = paste0("[", xtable, "]"),
80	4x	add = coll
81		)
82		},
83	2x	object,
84	2x	names(object)
85		)
86
87		# Check variable level.
88	2x	mapply(
89	2x	function(x, xtable) {
90	4x	xvar <- names(x)
91	4x	mapply(
92	4x	function(x, xvar) {
93	8x	checkmate::assert_list(
94	8x	x,
95	8x	names = "unique",
96	8x	type = c("character", "numeric", "logical"),
97	8x	.var.name = paste0("[", xtable, ".", xvar, "]"),
98	8x	add = coll
99		)
100		},
101	4x	x,
102	4x	xvar
103		)
104		},
105	2x	object,
106	2x	names(object)
107		)
108
109	2x	checkmate::reportAssertions(coll)
110		}

1		#' Transforming data.frame into Wide Format
2		#'
3		#' @details instead of nesting duplicated values, the function will throw an error if the same parameter is
4		#' provided twice for the same observation.
5		#'
6		#' @param data (`data.frame`) to be pivoted.
7		#' @param id (`character`) the name of the column identifying the observations. It will correspond to the row names
8		#' of the output.
9		#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The
10		#' unique values in this column will become column names in the output.
11		#' @param value_from (`character`) the name of the column containing the values that will populate the output.
12		#'
13		#' @return `data.frame` in a wide format.
14		#'
15		#' @export
16		#' @examples
17		#' test_data <- data.frame(
18		#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
19		#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
20		#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
21		#' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)
22		#' )
23		#'
24		#' mini_pivot_wider(test_data, "the_obs", "the_param", "the_val")
25		mini_pivot_wider <- function(data,
26		id,
27		param_from,
28		value_from) {
29		# check for duplication of observation-parameter
30	3x	checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)
31	3x	checkmate::assert_character(id, len = 1)
32	3x	checkmate::assert_character(param_from, len = 1)
33	1x	checkmate::assert_character(value_from, len = 1)
34	1x	checkmate::assert_subset(c(id, param_from, value_from), colnames(data))
35	1x	checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))
36
37	1x	unique_id <- sort(unique(data[[id]]))
38	1x	param <- data[[param_from]]
39
40	1x	mini_data <- data[, c(id, param_from, value_from)]
41	1x	data_ls <- split(mini_data, param)
42
43		# transform to named vector
44	1x	data_vec <-
45	1x	lapply(
46	1x	data_ls,
47	1x	function(x) setNames(x[[value_from]], x[[id]])
48		)
49
50		# query each id in each param
51	1x	all_vec <- lapply(data_vec, function(x) setNames(x[unique_id], unique_id))
52
53	1x	bind_data <- as.data.frame(all_vec)
54
55	1x	res <- cbind(id = unique_id, bind_data)
56	1x	rownames(res) <- NULL
57
58	1x	res
59		}
60
61		#' Transforming data.frame with Complex Identifiers into Wide Format
62		#'
63		#' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting
64		#' duplicated values, the function will throw an error if the same parameter is provided twice for the same
65		#' observation.
66		#'
67		#' @param data (`data.frame`) to be pivoted.
68		#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
69		#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The
70		#' unique values in this column will become column names in the output.
71		#' @param value_from (`character`) the name of the column containing the values that will populate the output.
72		#' @param drop_na (`logical`) should column containing only `NAs` be dropped.
73		#'
74		#' @return `data.frame` in a wide format.
75		#'
76		#' @export
77		#' @examples
78		#' test_data <- data.frame(
79		#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
80		#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
81		#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
82		#' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)
83		#' )
84		#'
85		#' multi_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val")
86		#' multi_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
87		multi_pivot_wider <- function(data,
88		id,
89		param_from,
90		value_from,
91		drop_na = FALSE) {
92		# check for duplication of observation-parameter
93	19x	checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)
94	19x	checkmate::assert_character(id)
95	19x	checkmate::assert_character(param_from, len = 1)
96	19x	checkmate::assert_character(value_from, len = 1)
97	19x	checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))
98	18x	checkmate::assert_subset(c(id, param_from, value_from), colnames(data))
99
100		# find a way to sort
101	18x	unique_id <- unique(data[id])
102	18x	key <- apply(unique_id[id], 1, paste, collapse = "-")
103	18x	unique_id <- cbind(key, unique_id)
104
105	18x	param <- data[[param_from]]
106
107	18x	mini_data <- data[, c(param_from, value_from)]
108	18x	f_key <- apply(data[id], 1, paste, collapse = "-")
109	18x	mini_data <- cbind(f_key, mini_data)
110
111	18x	data_ls <- split(mini_data, param)
112
113		# Transform to named vector, the first column is the key.
114	18x	data_vec <-
115	18x	lapply(
116	18x	data_ls,
117	18x	function(x) setNames(x[[value_from]], x[, 1])
118		)
119
120		# query each id in each param
121	18x	all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]])
122
123	16x	if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec)
124
125	18x	bind_data <- do.call(cbind, all_vec)
126
127	18x	res <- cbind(unique_id[, -1, drop = FALSE], bind_data)
128
129	18x	rownames(res) <- NULL
130	18x	res
131		}
132
133		#' Transforming data.frame with multiple Data Column into Wide Format
134		#'
135		#' @details This function is adapted to cases where the data are distributed in several columns while the name of the
136		#' parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are
137		#' in `AVALC`.
138		#'
139		#' @param data (`data.frame`) to be pivoted.
140		#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
141		#' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The
142		#' unique values in this column will become column names in the output.
143		#' @param value_from (`character`) the name of the column containing the values that will populate the output.
144		#' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not
145		#' provided, the labels will be equal to the column names. When several labels are available for the same column, the
146		#' first one will be selected.
147		#'
148		#' @return `list` of `data.frame` in a wide format with label attribute attached to each columns.
149		#'
150		#' @export
151		#' @examples
152		#' test_data <- data.frame(
153		#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
154		#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
155		#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
156		#' the_label = c(
157		#' "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)",
158		#' "Gender", "Height (cm)", "Height (cm)", "Pre-condition"
159		#' ),
160		#' the_val = c(65, 165, NA, 66, NA, 166, 155, NA),
161		#' the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE)
162		#' )
163		#'
164		#' x <- poly_pivot_wider(
165		#' test_data,
166		#' c("the_obs", "the_obs2"),
167		#' "the_param",
168		#' c("the_val", "the_val2"),
169		#' "the_label"
170		#' )
171		#' x
172		#' Reduce(function(u, v) merge(u, v, all = TRUE), x)
173		poly_pivot_wider <- function(data,
174		id,
175		param_from,
176		value_from,
177		labels_from = NULL) {
178		# other tests are performed at lower levels.
179	7x	checkmate::assert_character(value_from, unique = TRUE)
180
181		# Create new labels for new columns.
182	7x	if (is.null(labels_from) \|\| labels_from == param_from) {
183	1x	new_labels <- unique(data[[param_from]])
184	1x	names(new_labels) <- new_labels
185		} else {
186	6x	checkmate::assert_character(labels_from, len = 1)
187	6x	checkmate::assert_subset(labels_from, colnames(data))
188
189	6x	new_labels_df <- data[, c(labels_from, param_from)]
190	6x	new_labels_df <- unique(new_labels_df)
191
192	6x	new_labels <- as.character(new_labels_df[[labels_from]])
193	6x	names(new_labels) <- as.character(new_labels_df[[param_from]])
194		}
195
196		# Retrieve old labels.
197	7x	old_labels <- lapply(data, attr, "label")
198	7x	n_old_label <- names(old_labels)
199	7x	null_label <- unlist(lapply(old_labels, is.null))
200	7x	old_labels[null_label] <- n_old_label[null_label]
201	7x	old_labels <- unlist(old_labels)
202
203	7x	all_labels <- c(new_labels, old_labels)
204
205	7x	res_ls <- list()
206	7x	for (n_value_from in value_from) {
207	14x	res <- multi_pivot_wider(
208	14x	data = data,
209	14x	id = id,
210	14x	param_from = param_from,
211	14x	value_from = n_value_from,
212	14x	drop_na = TRUE
213		)
214
215	14x	res <- attr_label_df(res, all_labels[colnames(res)])
216	14x	res_ls[[n_value_from]] <- res
217		}
218	7x	res_ls
219		}

1		#' Filter Data with Log
2		#' @param data (`data.frame`) input data to subset, or named (`list`) of (`data.frame`).
3		#' @param condition (`call`) of subset condition. Must evaluate as logical.
4		#' @param suffix (`string`) optional argument describing the filter.
5		#' @param ... further arguments to be passed to or from other methods.
6		#'
7		#' @details
8		#' `log_filter` will filter the data/named list of data according to the `condition`.
9		#' All the variables in `condition` must exist in the data (as variables) or in the parent
10		#' frame(e.g., in global environment).
11		#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all
12		#' other datasets with `USUBJID`.
13		#' @export
14		log_filter <- function(data, condition, ...) {
15	40x	UseMethod("log_filter")
16		}
17
18		#' @rdname log_filter
19		#' @export
20		#' @examples
21		#' data <- iris
22		#' attr(data$Sepal.Length, "label") <- "cm"
23		#' log_filter(data, Sepal.Length >= 7)
24		#'
25		log_filter.data.frame <- function(data, condition, suffix = NULL, ...) {
26	23x	checkmate::assert_string(suffix, null.ok = TRUE)
27
28	23x	condition <- match.call()$condition
29	23x	vars <- all.vars(condition)
30	23x	var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE)
31	23x	var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE)
32	23x	if (!all(var_in_env \| var_in_data)) {
33	1x	stop(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data \| var_in_env)])))
34		}
35	22x	res <- eval(bquote(dplyr::filter(data, .(condition))))
36	22x	rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix))
37	22x	rlbl <- paste0(deparse(condition), collapse = "")
38	22x	rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ")
39	22x	names(rows) <- rlbl
40	22x	attr(res, "rows") <- c(attr(data, "rows"), rows)
41
42	22x	res
43		}
44
45		#' @rdname log_filter
46		#' @param table (`string`) table name.
47		#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering.
48		#' @export
49		#' @examples
50		#' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
51		log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, ...) {
52	17x	checkmate::assert_list(data, types = "data.frame", names = "unique")
53	17x	checkmate::assert_subset(table, names(data))
54	17x	checkmate::assert_names(colnames(data[[table]]), must.include = by)
55	17x	condition <- match.call()$condition
56	17x	data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix))))
57	17x	if (identical(table, "adsl")) {
58	8x	for (k in setdiff(names(data), "adsl")) {
59	8x	if (all(by %in% names(data[[k]]))) {
60	1x	if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl))
61
62	8x	ori_n <- nrow(data[[k]])
63	8x	ori_att <- attr(data[[k]], "rows")
64
65	8x	data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by)
66
67	8x	rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix))
68	8x	names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "")
69	8x	attr(data[[k]], "rows") <- c(ori_att, rows)
70		}
71		}
72		}
73	17x	return(data)
74		}
75
76		# Get Log ----
77
78		#' Get Log
79		#'
80		#' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`.
81		#' @param incl (`flag`) should information about unfiltered `data.frame` be printed.
82		#' @param incl.adsl (`flag`) should indication of filtering performed through `adsl` be printed.
83		#'
84		#' @export
85		get_log <- function(data, incl, incl.adsl) {
86	39x	UseMethod("get_log")
87		}
88
89		#' @rdname get_log
90		#' @export
91		#' @examples
92		#' data <- log_filter(iris, Sepal.Length >= 7, "xx")
93		#' data <- log_filter(data, Sepal.Length < 2)
94		#' data <- log_filter(data, Sepal.Length >= 2, "yy")
95		#' get_log(data)
96		#'
97		get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
98	27x	checkmate::assert_flag(incl)
99
100	27x	att <- attr(data, "rows")
101	27x	if (!incl.adsl) {
102	4x	sel <- grepl("Filtered by adsl", names(att))
103	4x	att <- att[!sel]
104		}
105
106	27x	if (length(att) != 0L) {
107	17x	start_row <- lapply(att, "[[", "init")
108	17x	end_row <- lapply(att, "[[", "final")
109	17x	suffix <- lapply(att, "[[", "suffix")
110	17x	suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1))
111	17x	res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]")
112	10x	} else if (incl) {
113	6x	paste0("No filtering [", nrow(data), " rows.]")
114		} else {
115	4x	NULL
116		}
117		}
118
119
120		#' @rdname get_log
121		#' @export
122		#' @examples
123		#' data <- log_filter(
124		#' list(iris1 = iris, iris2 = iris),
125		#' Sepal.Length >= 7,
126		#' "iris1",
127		#' character(0),
128		#' "Sep"
129		#' )
130		#' get_log(data)
131		#'
132		get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
133	12x	checkmate::assert_list(data, types = "data.frame", names = "unique")
134	12x	checkmate::assert_flag(incl)
135
136	12x	lapply(data, get_log, incl = incl, incl.adsl = incl.adsl)
137		}
138
139		# Print Log ----
140
141		#' Print Log
142		#'
143		#' @inheritParams get_log
144		#' @export
145		#'
146		print_log <- function(data, incl, incl.adsl) {
147	10x	UseMethod("print_log")
148		}
149
150		#' @rdname print_log
151		#' @export
152		#' @examples
153		#' data <- log_filter(iris, Sepal.Length >= 7, "Sep")
154		#' print_log(data)
155		print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
156	3x	checkmate::assert_flag(incl)
157
158	3x	cat("Filter Log:")
159	3x	cat(paste0("\n ", get_log(data, incl = incl, incl.adsl = incl.adsl)))
160	3x	cat("\n")
161	3x	invisible()
162		}
163
164		#' @rdname print_log
165		#' @export
166		#' @examples
167		#' data <- log_filter(
168		#' list(
169		#' adsl = iris,
170		#' iris2 = iris,
171		#' mtcars = mtcars,
172		#' iris3 = iris
173		#' ),
174		#' Sepal.Length >= 7,
175		#' "adsl",
176		#' character(0),
177		#' "adsl filter"
178		#' )
179		#' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter")
180		#' print_log(data)
181		#' print_log(data, incl = FALSE)
182		#' print_log(data, incl.adsl = FALSE, incl = FALSE)
183		print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
184	7x	checkmate::assert_list(data, types = "data.frame", names = "unique")
185	7x	checkmate::assert_flag(incl)
186
187	7x	filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl)
188
189	7x	if (!incl) {
190	2x	filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))]
191		}
192
193	7x	cat("Filter Log:")
194	7x	if (length(filter_log) == 0) {
195	1x	cat("\n No filtering")
196		} else {
197	6x	mapply(
198	6x	function(x, y) {
199	11x	cat(paste0("\n - ", x, ":"))
200	11x	cat(paste0("\n ", y, ""))
201		},
202	6x	as.list(names(filter_log)),
203	6x	filter_log
204		)
205		}
206	7x	cat("\n")
207
208	7x	invisible()
209		}

1		#' Propagate Column
2		#'
3		#' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other
4		#' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving
5		#' tables. In safe mode, the key must be unique in the original table.
6		#'
7		#' @param db (`list` of `data.frame`) object for which some variable need to be propagated.
8		#' @param from (`string`) the name of the table where the variables to propagate are stored.
9		#' @param add (`character`) the names of the variables to propagate.
10		#' @param by (`character`) the key binding the `from` table to the other tables.
11		#' @param safe (`flag`) should the key be checked for uniqueness in the `from` table.
12		#'
13		#' @return updated `list` of `data.frame`.
14		#'
15		#' @rdname propagate
16		#' @export
17		#'
18		propagate <- function(db, from, add, by, safe = TRUE) {
19	3x	UseMethod("propagate")
20		}
21
22		#' @rdname propagate
23		#' @export
24		#'
25		#'
26		#' @examples
27		#' df1 <- data.frame(
28		#' id1 = c("a", "a", "c", "d", "e", "f"),
29		#' id2 = c("A", "B", "A", "A", "A", "A"),
30		#' int = c(1, 2, 3, 4, 5, 6),
31		#' bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)
32		#' )
33		#'
34		#' df2 <- data.frame(
35		#' id1 = c("a", "a", "d", "e", "f", "g"),
36		#' id2 = c("A", "B", "A", "A", "A", "A")
37		#' )
38		#'
39		#' df3 <- data.frame(
40		#' id1 = c("a", "c", "d", "e", "f", "x"),
41		#' id2 = c("A", "A", "A", "A", "B", "A"),
42		#' int = c(11, 22, 33, 44, 55, 66)
43		#' )
44		#'
45		#' db <- list(df1 = df1, fd2 = df2, df3 = df3)
46		#' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
47		#'
48		propagate.list <- function(db, from, add, by, safe = TRUE) {
49	3x	checkmate::assert_list(db, types = "data.frame", names = "unique")
50	3x	checkmate::assert_names(names(db), must.include = from)
51	3x	checkmate::assert_names(colnames(db[[from]]), must.include = add)
52	3x	checkmate::assert_names(colnames(db[[from]]), must.include = by)
53	3x	checkmate::assert_flag(safe)
54
55	3x	if (safe) {
56	2x	keys <- db[[from]][, by]
57	1x	if (anyDuplicated(keys)) stop(paste("Duplicated key"))
58		}
59
60	2x	toJoin <- db[[from]]
61
62	2x	for (tab_name in setdiff(names(db), from)) {
63	3x	tab_colnames <- colnames(db[[tab_name]])
64	3x	if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) {
65	2x	missing_var <- setdiff(add, tab_colnames)
66	2x	sel_var <- c(missing_var, by)
67	2x	sel_tab <- toJoin[, sel_var]
68
69	2x	cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var)))
70
71	2x	db[[tab_name]] <- db[[tab_name]] %>%
72	2x	dplyr::left_join(sel_tab, by = by)
73		} else {
74	1x	cat(paste0("\nSkipping: ", tab_name))
75		}
76		}
77	2x	cat("\n")
78	2x	return(db)
79		}

1		#' Unite Columns of a Table in a `list` of `data.frame`.
2		#'
3		#' @param adam_db (`list` of `data.frames`) to be transformed.
4		#' @param tab (`string`) the name of a table in the `adam_db` object.
5		#' @param cols (`character`) the name of the columns to unite.
6		#' @param sep (`string`) the separator for the new column name.
7		#' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used.
8		#'
9		#' @return `list` of `data.frames` object with a united column.
10		#' @export
11		#'
12		#' @examples
13		#' db <- list(mtcars = mtcars, iris = iris)
14		#'
15		#' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION")
16		#' x$mtcars
17		ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) {
18	4x	checkmate::assert_list(adam_db, types = "data.frame")
19	4x	checkmate::assert_string(tab)
20	4x	checkmate::assert_names(names(adam_db), must.include = tab)
21	4x	checkmate::assert_character(cols, min.len = 1)
22	4x	checkmate::assert_names(names(adam_db[[tab]]), must.include = cols)
23	4x	checkmate::assert_string(sep)
24	4x	checkmate::assert_string(new, null.ok = TRUE)
25
26	4x	x_interaction <- if (!is.null(new)) {
27	3x	new
28		} else {
29	1x	paste(cols, collapse = sep)
30		}
31
32	4x	x_df <- adam_db[[tab]][, cols, drop = FALSE]
33	4x	lvl <- lapply(x_df, function(y) {
34	9x	uni <- if (is.factor(y)) {
35	7x	levels(y)
36		} else {
37	2x	unique(y)
38		}
39	9x	factor(uni, levels = uni)
40		})
41
42	4x	all_lvl_df <- expand.grid(lvl)
43
44	4x	all_lvl <- all_lvl_df[, cols, drop = FALSE] %>%
45	4x	arrange(across(all_of(cols))) %>%
46	4x	apply(1, paste, collapse = sep)
47
48	4x	x_vec <- x_df[, cols, drop = FALSE] %>%
49	4x	apply(1, paste, collapse = sep)
50
51	4x	existing_lvl <- intersect(all_lvl, x_vec)
52	4x	x_fact <- factor(x_vec, existing_lvl)
53
54	4x	adam_db[[tab]][, x_interaction] <- x_fact
55	4x	adam_db
56		}

1		#' Cutting data by group
2		#'
3		#' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are
4		#' closed on the right (and open on the left).
5		#'
6		#' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation.
7		#' @param col_data (`character`) the column containing the data to be cut.
8		#' @param col_group (`character`) the column containing the names of the groups according to which the data should be
9		#' split.
10		#' @param group (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the
11		#' name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically
12		#' `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`).
13		#' @param cat_col (`character`) the name of the new column in which the cut label should he stored.
14		#'
15		#' @export
16		#'
17		#' @return `data.frame` with a column containing categorical values.
18		#' @examples
19		#' group <- list(
20		#' list(
21		#' "Height",
22		#' c(-Inf, 150, 170, Inf),
23		#' c("=<150", "150-170", ">170")
24		#' ),
25		#' list(
26		#' "Weight",
27		#' c(-Inf, 65, Inf),
28		#' c("=<65", ">65")
29		#' ),
30		#' list(
31		#' "Age",
32		#' c(-Inf, 31, Inf),
33		#' c("=<31", ">31")
34		#' ),
35		#' list(
36		#' "PreCondition",
37		#' c(-Inf, 1, Inf),
38		#' c("=<1", "<1")
39		#' )
40		#' )
41		#' data <- data.frame(
42		#' SUBJECT = rep(letters[1:10], 4),
43		#' PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10),
44		#' AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)),
45		#' index = 1:40
46		#' )
47		#'
48		#' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
49		cut_by_group <- function(df,
50		col_data,
51		col_group,
52		group,
53		cat_col) {
54	4x	checkmate::assert_data_frame(df)
55	4x	checkmate::assert_subset(c(col_data, col_group), colnames(df))
56	4x	checkmate::assert_numeric(df[, col_data])
57	4x	checkmate::assert_list(group)
58
59	4x	lapply(
60	4x	group,
61	4x	function(list_element) {
62	11x	checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character"))
63		}
64		)
65
66	4x	df[cat_col] <- NA
67
68	4x	for (g in group) {
69	10x	selected_row <- df[[col_group]] == g[[1]]
70
71	10x	df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]]))
72		}
73	3x	df
74		}

1		#' Reorder Two Columns Levels Simultaneously
2		#'
3		#' @details The function expect a 1:1 matching between the elements of the two selected column.
4		#'
5		#' @param df (`data.frame`) with two column whose factors should be reordered.
6		#' @param primary (`string`) the name of the column on which the levels reordering should be based.
7		#' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the
8		#' primary column.
9		#' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be
10		#' placed afterward in their current order.
11		#'
12		#' @export
13		#'
14		#' @examples
15		#'
16		#' df <- data.frame(
17		#' SUBJID = 1:3,
18		#' PARAMCD = factor(c("A", "B", "C")),
19		#' PARAM = factor(paste("letter", LETTERS[1:3]))
20		#' )
21		#' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
22		co_relevels <- function(df, primary, secondary, levels_primary) {
23	6x	checkmate::assert_data_frame(df, min.rows = 1)
24	6x	checkmate::assert_subset(c(primary, secondary), colnames(df))
25	6x	checkmate::assert_character(levels_primary, min.len = 1)
26	6x	checkmate::assert_vector(df[[primary]], any.missing = FALSE)
27	5x	checkmate::assert_vector(df[[secondary]], any.missing = FALSE)
28
29	5x	df[, primary] <- as.factor(df[[primary]])
30	5x	df[, secondary] <- as.factor(df[[secondary]])
31
32		# check unique relationship
33	5x	df_key <- df[, c(primary, secondary)]
34	5x	df_key <- unique(df_key)
35
36	5x	if (any(duplicated(df_key[[primary]])) \|\| any(duplicated(df_key[[secondary]]))) {
37	1x	stop("non univoque relation between values in primary and secondary column")
38		}
39
40	4x	keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]]))
41
42	4x	all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary))
43	4x	all_levels_secondary <- keys[all_levels_primary]
44
45	4x	df[, primary] <- factor(df[[primary]], all_levels_primary)
46	4x	df[, secondary] <- factor(df[[secondary]], all_levels_secondary)
47
48	4x	df
49		}

1		#' Transforming Empty Strings and White Spaces to NAs
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' SAS imports missing data as empty strings or white spaces. This helper function replaces the empty strings and white
6		#' space-only character and levels by `NAs`.
7		#'
8		#' @param x (`vector`) where empty of white space should be transformed to `NAs`.
9		#'
10		#' @return `character` or `factor` without explicit NA. `logical` and `numeric` are returned as `character`.
11		#'
12		#' @export
13		#' @examples
14		#' char1 <- c(" ", " ", "a", "b", "", "")
15		#' h_ws_to_na(char1)
16		#'
17		#' fact1 <- as.factor(char1)
18		#' h_ws_to_na(fact1)
19		#'
20		#' num1 <- c(1:10)
21		#' h_ws_to_na(num1)
22		#'
23		#' logi1 <- c(TRUE, FALSE, NA)
24		#' h_ws_to_na(logi1)
25		h_ws_to_na <- function(x) {
26	9x	if (is.factor(x)) {
27	2x	levels_x <- levels(x)
28
29	2x	ws_levels <- grepl("^\\s*$", levels_x) \| levels_x == ""
30
31	2x	levels(x)[ws_levels] <- NA
32	7x	} else if (is.character(x)) {
33	6x	ws_char <- grepl("^\\s*$", x) \| x == ""
34
35	6x	x[ws_char] <- NA
36		} else {
37	1x	x <- as.character(x)
38		}
39	9x	x
40		}
41
42		#' Transforming Empty Strings and White Spaces to Explicit NAs
43		#'
44		#' @description `r lifecycle::badge("experimental")`
45		#'
46		#' SAS imports missing data as empty strings or white spaces. This helper function is a thin wrapper around
47		#' [dunlin::h_ws_to_na] which replaces them with explicit missing level.
48		#'
49		#' @param x (`vector`) where empty of white space should be transformed to `NAs`.
50		#' @param na_level (`character`) replacement of the missing levels.
51		#'
52		#' @return `factor` with explicit NA
53		#'
54		#' @export
55		#' @examples
56		#' char1 <- c(" ", " ", "a", "b", "", "")
57		#' h_ws_to_explicit_na(char1)
58		#'
59		#' fact1 <- as.factor(char1)
60		#' h_ws_to_explicit_na(fact1)
61		#'
62		#' num1 <- c(1, 2, NA)
63		#' h_ws_to_explicit_na(num1)
64		#'
65		#' logi1 <- c(TRUE, FALSE, NA)
66		#' h_ws_to_explicit_na(logi1)
67		h_ws_to_explicit_na <- function(x, na_level = "<Missing>") {
68	7x	checkmate::assert_character(na_level)
69
70	7x	res <- forcats::fct_na_value_to_level(h_ws_to_na(x), na_level)
71
72	7x	if (na_level %in% res) {
73	5x	forcats::fct_relevel(res, na_level, after = Inf)
74		} else {
75	2x	forcats::fct_drop(res, only = na_level)
76		}
77		}
78
79		#' Transforming Empty Strings and White Spaces to Explicit NAs while Preserving Label
80		#'
81		#' @details This function preserves the label attribute.
82		#'
83		#' @param x (`vector`) input to be turned into factor with explicit missing level.
84		#' @param na_level (`character`) the label to encode missing levels.
85		#'
86		#' @return `factor` with explicit NA and the same label as the input.
87		#'
88		#' @export
89		#' @examples
90		#' char1 <- c(" ", " ", "a", "b", "", "", NA)
91		#' attr(char1, "label") <- "my_label"
92		#'
93		#' h_as_factor(char1)
94		h_as_factor <- function(x, na_level = "<Missing>") {
95	4x	checkmate::assert_vector(x)
96
97	4x	init_lab <- attr(x, "label")
98
99	4x	res <- h_ws_to_explicit_na(x, na_level = na_level)
100
101	4x	attr(res, "label") <- init_lab
102
103	4x	res
104		}
105
106		#' Setting the Label Attribute
107		#'
108		#' @param var (`object`) whose label attribute can be set.
109		#' @param label (`character`) the label to add.
110		#'
111		#' @return `object` with label attribute.
112		#'
113		#' @export
114		#' @examples
115		#' x <- c(1:10)
116		#' attr(x, "label")
117		#'
118		#' y <- attr_label(x, "my_label")
119		#' attr(y, "label")
120		attr_label <- function(var, label) {
121	89x	checkmate::assert_character(label)
122
123	88x	x <- var
124	88x	attr(x, "label") <- label
125
126	88x	x
127		}
128
129		#' Setting the Label Attribute to Data Frame Columns
130		#'
131		#' @param df (`data.frame`).
132		#' @param label (`character`) the labels to add.
133		#'
134		#' @return `data.frame` with label attributes.
135		#'
136		#' @export
137		#' @examples
138		#' res <- attr_label_df(mtcars, letters[1:11])
139		#' res
140		#' lapply(res, attr, "label")
141		attr_label_df <- function(df, label) {
142	20x	checkmate::assert_data_frame(df)
143	20x	checkmate::assert_character(label, len = ncol(df))
144
145	19x	res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE)
146	19x	as.data.frame(res)
147		}

1		#' Encode Categorical Missing Values in a `list` of `data.frame`
2		#'
3		#' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and
4		#' `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved.
5		#'
6		#' @param data (`list` of `data.frame`) to be transformed.
7		#' @param omit_tables (`character`) the names of the tables to omit from processing.
8		#' @param omit_columns (`character`) the names of the columns to omit from processing.
9		#' @param char_as_factor (`logical`) should character columns be converted into factor.
10		#' @param na_level (`string`) the label to encode missing levels.
11		#'
12		#' @return `list` of `data.frame` object with explicit missing levels.
13		#' @export
14		#'
15		#' @examples
16		#'
17		#' df1 <- data.frame(
18		#' "char" = c("a", "b", NA, "a", "k", "x"),
19		#' "char2" = c("A", "B", NA, "A", "K", "X"),
20		#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
21		#' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA)
22		#' )
23		#' df2 <- data.frame(
24		#' "char" = c("a", "b", NA, "a", "k", "x"),
25		#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
26		#' "num" = c(1:5, NA)
27		#' )
28		#' df3 <- data.frame(
29		#' "char" = c(NA, NA, "A")
30		#' )
31		#'
32		#' db <- list(df1 = df1, df2 = df2, df3 = df3)
33		#'
34		#' ls_explicit_na(db)
35		#' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
36		#'
37		ls_explicit_na <- function(data,
38		omit_tables = NULL,
39		omit_columns = NULL,
40		char_as_factor = TRUE,
41		na_level = "<Missing>") {
42	3x	checkmate::assert_list(data, types = "data.frame", names = "unique")
43	3x	checkmate::assert_character(omit_tables, null.ok = TRUE)
44	3x	checkmate::assert_character(omit_columns, null.ok = TRUE)
45	3x	checkmate::assert_flag(char_as_factor)
46	3x	checkmate::assert_string(na_level)
47
48	3x	modif_tab <- setdiff(names(data), omit_tables)
49	3x	if (length(modif_tab) < 1) {
50	1x	return(data)
51		}
52
53	2x	data[modif_tab] <- lapply(
54	2x	data[modif_tab],
55	2x	h_df_explicit,
56	2x	omit_columns = omit_columns,
57	2x	char_as_factor = char_as_factor,
58	2x	na_level = na_level
59		)
60
61	2x	data
62		}
63
64		#' Encode Categorical Missing Values in a `data.frame`.
65		#'
66		#' @inheritParams ls_explicit_na
67		#' @keywords internal
68		#'
69		#' @examples
70		#' \dontrun{
71		#' df <- data.frame(
72		#' "char" = c("a", "b", NA, "a", "k", "x"),
73		#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
74		#' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA),
75		#' "num" = c(1:5, NA)
76		#' )
77		#'
78		#' h_df_explicit(df)
79		#' h_df_explicit(df, omit_columns = c("fact", "x"))
80		#' }
81		h_df_explicit <- function(df,
82		omit_columns = NULL,
83		char_as_factor = TRUE,
84		na_level = "<Missing>") {
85	3x	na_list <- list(x = c("", NA))
86	3x	names(na_list) <- na_level
87	3x	na_rule <- rule(.lst = na_list)
88
89	3x	df %>%
90	3x	mutate(
91	3x	across(
92	3x	where(~ is.character(.x) \| is.factor(.x)) & !any_of(.env$omit_columns),
93	3x	~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE)
94		)
95		)
96		}

1		whisker_env <- NULL
2
3		.onLoad <- function(libname, pkgname) {
4	!	default_whiskers <- c(patient_label = "patients")
5	!	whisker_env <<- new.env(parent = globalenv())
6	!	add_whisker(default_whiskers)
7		}