dunlin coverage - 97.82%

Files
Source

#' Reformat Values
#' @param obj (`character`, `factor` or `list of data.frame`) to reformat.
#' @param format (`rule`) or (`list`) of `rule` depending on the class of obj.
#' @param ... for compatibility between methods and pass additional special mapping to transform rules.
#' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor.
#' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are
#'   dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule.
#' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of
#'   the rule.
#' * `.na_last` (`flag`) whether the level replacing `NA` should be last.
#' @param verbose (`flag`) whether to print the format.
#' @returns (`character`, `factor` or `list of data.frame`) with remapped values.
#'
#' @export
#' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is
#'   raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels
#'   declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a
#'   rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to
#'   `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the
#'   ones defined during rule creation.
#'
#' @rdname reformat
#'
reformat <- function(obj, ...) {
  UseMethod("reformat")
}

#' @export
#' @rdname reformat
reformat.default <- function(obj, format, ...) {
  rlang::warn(paste0("Not implemented for class: ", toString(class(obj)), "! Returning original object."))
  return(obj)
}

#' @export
#' @rdname reformat
#'
#' @examples
#' # Reformatting of character.
#' obj <- c("a", "b", "x", NA, "")
#' attr(obj, "label") <- "my label"
#' format <- rule("A" = "a", "NN" = NA)
#'
#' reformat(obj, format)
#' reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL)
#'
reformat.character <- function(obj, format, ..., verbose = FALSE) {
  checkmate::assert_class(format, "rule")
  checkmate::assert_flag(as.logical(verbose))

  # Give priority to argument defined in reformat.
  format <- do.call(rule, modifyList(as.list(format), list(...), keep.null = TRUE))
  if (verbose) {
    print(format)
  }

  if (attr(format, ".string_as_fct")) {
    # Keep attributes.
    att <- attributes(obj)
    obj_fact <- as.factor(obj)
    supp_att_name <- setdiff(names(att), attributes(obj_fact))
    supp_att <- att[supp_att_name]
    attributes(obj_fact) <- c(attributes(obj_fact), supp_att)

    reformat(obj_fact, format)
  } else {
    value_match <- unlist(format)
    m <- match(obj, value_match)
    obj[!is.na(m)] <- names(format)[m[!is.na(m)]]
    val_to_NA <- attr(format, ".to_NA")
    if (!is.null(val_to_NA)) {
      obj[obj %in% val_to_NA] <- NA_character_
    }

    obj
  }
}

#' @export
#' @rdname reformat
#'
#' @examples
#' # Reformatting of factor.
#' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z"))
#' attr(obj, "label") <- "my label"
#' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P")
#'
#' reformat(obj, format)
#' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE)
#'
reformat.factor <- function(obj, format, ..., verbose = FALSE) {
  checkmate::assert_class(format, "rule")
  checkmate::assert_flag(verbose)

  format <- do.call(rule, modifyList(as.list(format), list(...), keep.null = TRUE))
  if (verbose) {
    print(format)
  }

  any_na <- anyNA(obj)
  if (any(is.na(format)) && any_na) {
    obj <- forcats::fct_na_value_to_level(obj)
  }

  absent_format <- format[!format %in% levels(obj)]
  sel_format <- format[format %in% levels(obj)]
  obj <- forcats::fct_recode(obj, !!!sel_format)
  obj <- forcats::fct_expand(obj, unique(names(absent_format)))
  obj <- forcats::fct_relevel(obj, unique(names(format)))

  if (any(is.na(format)) && attr(format, ".na_last")) {
    na_lvl <- names(format)[is.na(format)]
    obj <- forcats::fct_relevel(obj, na_lvl, after = Inf)
  }

  drop_lvl <- attr(format, ".drop")
  if (drop_lvl) {
    obj <- forcats::fct_drop(obj)
  }

  # Levels converted to NA are dropped.
  val_to_NA <- attr(format, ".to_NA")
  if (!is.null(val_to_NA)) {
    obj <- forcats::fct_na_level_to_value(obj, val_to_NA)
  }

  obj
}

#' @export
#' @rdname reformat
#'
#' @note the variables listed under the `all_dataset` keyword will be reformatted with the corresponding rule in every
#'   data set except where another rule is specified for the same variable under a specific data set name.
#'
#' @examples
#' # Reformatting of list of data.frame.
#' df1 <- data.frame(
#'   var1 = c("a", "b", NA),
#'   var2 = factor(c("F1", "F2", NA))
#' )
#'
#' df2 <- data.frame(
#'   var1 = c("x", NA, "y"),
#'   var2 = factor(c("F11", NA, "F22"))
#' )
#'
#' db <- list(df1 = df1, df2 = df2)
#'
#' format <- list(
#'   df1 = list(
#'     var1 = rule("X" = "x", "N" = NA, .to_NA = "b")
#'   ),
#'   df2 = list(
#'     var2 = rule("f11" = "F11", "NN" = NA)
#'   ),
#'   all_datasets = list(
#'     var1 = rule("xx" = "x", "aa" = "a")
#'   )
#' )
#'
#' reformat(db, format)
reformat.list <- function(obj,
                          format,
                          ...,
                          verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE)) {
  checkmate::assert_list(obj, types = c("data.frame", "tibble"))
  checkmate::assert_named(obj)
  checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE)
  verbose <- as.logical(verbose)
  checkmate::assert_flag(verbose)

  if (length(format) == 0) {
    return(obj)
  }

  assert_valid_format(format)

  ls_datasets <- names(obj)
  format <- h_expand_all_datasets(format, ls_datasets)

  if (verbose) {
    for (tb in names(format)) {
      for (cl in names(format[[tb]])) {
        cat(sprintf("\nData frame `%s`, column `%s`:\n", tb, cl))
        print(format[[tb]][[cl]])
      }
    }
    cat("\n")
  }

  for (tab in ls_datasets) {
    local_map <- format[[tab]]
    local_map <- local_map[names(local_map) %in% names(obj[[tab]])]

    obj[[tab]][names(local_map)] <- mapply(
      function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...),
      local_map,
      names(local_map),
      SIMPLIFY = FALSE
    )
  }

  obj
}

#' Propagate the rules for all datasets
#'
#' @inheritParams reformat
#' @param ls_datasets (`character`) the name of all datasets in the object to reformat.
#' @returns a nested `list` attributing a rule to be applied to specific variables of specific datasets.
#'
#' @details the rules described  under `all_datasets` are propagated to all data sets for the corresponding variables
#'   except in data sets where a rule is already attributed to the same variable.
#'
#' @keywords internal
h_expand_all_datasets <- function(format_list, ls_datasets = NULL) {
  assert_valid_list_format(list(f = format_list))
  checkmate::assert_character(ls_datasets, null.ok = TRUE)

  spec_datasets <- format_list[setdiff(names(format_list), "all_datasets")]

  if (!is.null(ls_datasets)) {
    to_all_datasets <- list()
    to_all_datasets[ls_datasets] <- format_list["all_datasets"]
    to_all_datasets <- base::Filter(function(x) !is.null(x), to_all_datasets)

    modifyList(to_all_datasets, spec_datasets)
  } else {
    spec_datasets
  }
}

#' Assert Nested List can be used as Format Argument in Reformat.
#'
#' @param object (`list`) to assert.
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
#'
#' @export
#' @examples
#' format <- list(
#'   df1 = list(
#'     var1 = rule("X" = "x", "N" = c(NA, ""))
#'   ),
#'   df2 = list(
#'     var1 = rule(),
#'     var2 = rule("f11" = "F11", "NN" = NA)
#'   ),
#'   df3 = list()
#' )
#'
#' assert_valid_format(format)
assert_valid_format <- function(object) {
  coll <- checkmate::makeAssertCollection()

  # Check object.
  checkmate::assert_list(object, names = "unique", types = "list", add = coll)

  # Check table level.
  mapply(
    function(x, xtable) {
      checkmate::assert_list(
        x,
        names = "unique",
        types = "rule",
        any.missing = FALSE,
        .var.name = paste0("[", xtable, "]"),
        add = coll
      )
    },
    object,
    names(object)
  )

  checkmate::reportAssertions(coll)
}

#' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.
#'
#' @param object (`list`) to assert.
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
#'
#' @export
#' @examples
#' format <- list(
#'   df1 = list(
#'     var1 = list("X" = "x", "N" = c(NA, ""))
#'   ),
#'   df2 = list(
#'     var1 = list(),
#'     var2 = list("f11" = "F11", "NN" = NA)
#'   ),
#'   df3 = list()
#' )
#'
#' assert_valid_list_format(format)
assert_valid_list_format <- function(object) {
  coll <- checkmate::makeAssertCollection()

  # Check object.
  checkmate::assert_list(object, names = "unique", types = "list", add = coll)

  # Check table level.
  mapply(
    function(x, xtable) {
      checkmate::assert_list(
        x,
        names = "unique",
        types = "list",
        any.missing = FALSE,
        .var.name = paste0("[", xtable, "]"),
        add = coll
      )
    },
    object,
    names(object)
  )

  # Check variable level.
  mapply(
    function(x, xtable) {
      xvar <- names(x)
      mapply(
        function(x, xvar) {
          checkmate::assert_list(
            x,
            names = "unique",
            types = c("character", "numeric", "logical"),
            .var.name = paste0("[", xtable, ".", xvar, "]"),
            add = coll
          )
        },
        x,
        xvar
      )
    },
    object,
    names(object)
  )

  checkmate::reportAssertions(coll)
}

# assert_all_tablenames ----

#' Assert that all names are among names of a `list` of `data.frame`.
#'
#' @param db (`list` of `data.frame`) input to check for the presence of tables.
#' @param tab (`character`) the names of the tables to be checked.
#' @param null_ok (`flag`) can `x` be NULL.
#' @param qualifier (`string`) to be returned if the check fails.
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
#'
#' @export
#'
#' @examples
#' lsd <- list(
#'   mtcars = mtcars,
#'   iris = iris
#' )
#' assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:")
assert_all_tablenames <- function(db, tab, null_ok = TRUE, qualifier = NULL) {
  checkmate::assert_list(db, types = "data.frame", names = "unique")
  checkmate::assert_character(tab, null.ok = null_ok)
  checkmate::assert_string(qualifier, null.ok = TRUE)

  diff <- setdiff(tab, names(db))

  if (length(diff) == 0) {
    invisible(NULL)
  } else {
    stop(
      paste(qualifier, "Expected table names:", toString(diff), "not in", deparse(substitute(db)))
    )
  }
}

# assert_one_tablenames ----

#' Assert that at least one name is among table names of a `list` of `data.frame`.
#'
#' @param db (`list` of `data.frame`) input to check for the presence or tables.
#' @param tab (`character`) the names of the tables to be checked.
#' @param null_ok (`flag`) can `x` be NULL.
#' @param qualifier (`string`) to be returned if the check fails.
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
#'
#' @keywords internal
assert_one_tablenames <- function(db, tab, null_ok = TRUE, qualifier = NULL) {
  checkmate::assert_list(db, types = "data.frame", names = "unique")
  checkmate::assert_character(tab, null.ok = null_ok)
  checkmate::assert_string(qualifier, null.ok = TRUE)

  diff <- setdiff(tab, names(db))

  common <- intersect(tab, names(db))

  if (length(common) > 0) {
    invisible(NULL)
  } else {
    stop(
      paste(qualifier, "At least one of:", toString(tab), "is expected to be a table name of", deparse(substitute(db)))
    )
  }
}

#' Safe transformer
#'
#' @param text (`string`) to be substituted.
#' @param envir (`environment`) containing key-value pairs describing the substitution to perform.
#' @returns `string` with substituted placeholders.
#'
#' @details Obtain content in global environment by default.
#' If not found, use the environment here. The function first looks for an exact match. If not found, it searches for a
#' match in lower case then apply to the result the same case as the original value.
#'
#' @keywords internal
safe_transformer <- function(text, envir) {
  if (exists(text, envir = envir, inherits = FALSE, mode = "character")) {
    res <- get(text, envir = envir, mode = "character")
    return(toString(res))
  }

  text_lower <- tolower(text)
  res <- if (exists(text_lower, envir = envir, inherits = FALSE, mode = "character")) {
    get(text_lower, envir = envir, mode = "character")
  } else {
    text
  }

  if (is.character(res)) {
    if (identical(text, tolower(text))) {
      res <- tolower(res)
    } else if (identical(text, toupper(text))) {
      res <- toupper(res)
    } else if (identical(text, stringr::str_to_title(text))) {
      res <- stringr::str_to_title(res)
    }
  }

  return(toString(res))
}

#' Render whiskers safely
#' @param x (`character`) input to be rendered safely.
#' @returns `character` with substituted placeholders.
#'
#' @note The strings enclosed in `{}` are substituted using the key-values pairs set with `add_whiskers`.
#'
#' @export
#' @examples
#' render_safe("Name of {Patient_label}")
render_safe <- function(x) {
  checkmate::assert_character(x, null.ok = TRUE)
  if (is.null(x)) {
    return(NULL)
  }
  ret <- lapply(
    x,
    glue::glue,
    .transformer = safe_transformer,
    .envir = whisker_env,
    .null = "NULL",
    .open = "{",
    .close = "}"
  )
  ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "")
  setNames(ret, names(x))
}
#' Add whisker values
#' @param x Named (`character`) input.
#' @returns invisible `NULL`. Assign the key-value pair provided as argument in the whisker environment.
#'
#' @details The names of the character gives the string to be replaced and the value gives the new string.
#'
#' @export
#' @examples
#' my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2")
#' add_whisker(my_whiskers)
add_whisker <- function(x) {
  checkmate::assert_character(x, names = "unique", any.missing = FALSE)
  lapply(
    names(x),
    function(i) {
      assign(i, x[i], envir = whisker_env)
    }
  )
  invisible()
}

#' Remove whisker values
#' @param x Named (`character`) input.
#' @returns invisible `NULL`. Removes `x` from the whisker environment.
#' @export
remove_whisker <- function(x) {
  checkmate::assert_character(x, any.missing = FALSE)
  rm(list = x, envir = whisker_env)
}

#' Show Whisker Values
#' @returns invisible `NULL`. Prints the values stored in the whisker environment.
#' @export
#' @examples
#' show_whisker()
show_whisker <- function() {
  l <- ls(envir = whisker_env)
  val <- lapply(
    l,
    function(x) {
      if (exists(x, envir = whisker_env, mode = "character")) {
        setNames(
          toString(get(x, envir = whisker_env, mode = "character")),
          x
        )
      }
    }
  )

  lapply(val, function(x) cat(sprintf("%s --> %s\n", names(x), x)))
  invisible()
}

#' Create rule based on mappings
#' @param ... Mapping pairs, the argument name is the transformed while
#' its values are original values.
#' @param .lst (`list`) of mapping.
#' @param .string_as_fct (`flag`) whether to convert characters to factors.
#' @param .na_last (`flag`)  whether the level replacing `NA` should be last.
#' @param .drop (`flag`) whether to drop empty levels.
#' @param .to_NA (`character`) values that should be converted to `NA`. Set to `NULL` if nothing should be converted to
#'   `NA`.
#' @returns a `rule` object.
#'
#' @note Conversion to `NA` is the last step of the remapping process.
#'
#' @export
#' @examples
#' rule("X" = "x", "Y" = c("y", "z"))
#' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
#'
rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "") {
  checkmate::assert_flag(.string_as_fct)
  checkmate::assert_flag(.na_last)
  checkmate::assert_flag(.drop)
  checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE)

  .lst[is.na(.lst)] <- NA_character_
  if (!checkmate::test_list(.lst, types = c("character"))) {
    rlang::abort("Value mapping may only contain the type: {character}")
  }
  vals <- as.character(unlist(.lst, use.names = FALSE))
  checkmate::assert_character(vals, unique = TRUE)
  nms <- unlist(lapply(seq_len(length(.lst)), function(x) {
    rep(names(.lst)[x], length(.lst[[x]]))
  }))

  res <- structure(
    setNames(vals, nms),
    class = c("rule", "character"),
    .string_as_fct = .string_as_fct,
    .na_last = .na_last,
    .drop = .drop,
    .to_NA = .to_NA
  )

  res
}

#' @export
#'
print.rule <- function(x, ...) {
  cat("Mapping of:\n")
  nms <- unique(names(x))
  if (length(x) == 0) {
    cat("Empty mapping.\n")
  } else {
    for (i in nms) {
      ori_nms <- unlist(x[names(x) %in% i])
      ori_nms <- ifelse(is.na(ori_nms), "<NA>", stringr::str_c("\"", ori_nms, "\""))
      ori_nms <- toString(ori_nms)
      cat(i, " <- ", ori_nms, "\n")
    }
  }
  .to_NA <- attr(x, ".to_NA")
  if (!is.null(.to_NA)) {
    cat("Convert to <NA>:", toString(stringr::str_c("\"", .to_NA, "\"")), "\n")
  }
  cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")
  cat("Drop unused level:", attr(x, ".drop"), "\n")
  cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n")
}

#' Convert nested list into list of `rule`
#' @param obj (`nested list`) to convert into list of rules.
#' @returns a `list` of `rule` objects.
#' @export
#' @examples
#' obj <- list(
#'   rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"),
#'   rule2 = list(Missing = c(NA, "")),
#'   rule3 = list(Missing = c(NA, ""), .drop = TRUE),
#'   rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy")
#' )
#' list2rules(obj)
#'
list2rules <- function(obj) {
  coll <- checkmate::makeAssertCollection()
  checkmate::assert_list(obj, types = "list", add = coll)
  checkmate::assert_names(names(obj), type = "unique", add = coll)
  checkmate::reportAssertions(coll)

  lapply(obj, function(x) {
    do.call("rule", x)
  })
}

#' Convert Rule to List
#' @param x (`rule`) to convert.
#' @param ... not used.
#' @returns an object of class `list`.
#'
#' @export
#' @examples
#' x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w"))
#' as.list(x)
as.list.rule <- function(x, ...) {
  nms <- names(x)
  unames <- unique(nms)
  res <- lapply(unames, function(i) {
    unname(x[nms == i])
  })


  att <- attributes(x)
  arg <- att[!names(att) %in% c("names", "class")]

  res <- c(res, unname(arg))
  unames <- c(unames, names(arg))

  r_list <- setNames(res, unames)

  # Explicitly declare .to_NA value, even if NULL.
  .to_NA <- r_list[[".to_NA"]]
  if (is.null(.to_NA)) {
    r_list[".to_NA"] <- list(NULL)
  }

  r_list
}

#' Combine Two Rules
#'
#' @param x (`rule`) to modify.
#' @param y (`rule`) rule whose mapping will take precedence over the ones described in `x`.
#' @param ... not used.
#'
#' @note The order of the mappings in the resulting rule corresponds to the order of the mappings in `x` followed by the
#'   mappings that are only present in `y`.
#'
#' @returns a `rule`.
#' @export
#' @examples
#' r1 <- rule(
#'   "first" = c("from ori rule", "FROM ORI RULE"),
#'   "last" = c(NA, "last"),
#'   .to_NA = "X",
#'   .drop = TRUE
#' )
#' r2 <- rule(
#'   "first" = c("F", "f"),
#'   "second" = c("S", "s"),
#'   "third" = c("T", "t"),
#'   .to_NA = "something"
#' )
#' combine_rules(r1, r2)
combine_rules <- function(x, y, ...) {
  checkmate::assert_class(x, "rule", null.ok = TRUE)
  checkmate::assert_class(y, "rule", null.ok = TRUE)

  if (is.null(x) && is.null(y)) {
    rlang::abort("Both rules are NULL.")
  }

  # If one of the rules is NULL, return the other (via empty list).
  x <- as.list(x)
  y <- as.list(y)

  x[names(y)] <- y

  r <- do.call(rule, x)
  r
}

#' Combine Rules Found in Lists of Rules.
#'
#' @param x (`list`) of `rule` objects.
#' @param val (`list`) of `rule` objects.
#' @param ... passed to [`dunlin::combine_rules`].
#'
#' @returns a `list` of `rule` objects.
#' @export
#' @examples
#' l1 <- list(
#'   r1 = rule(
#'     "first" = c("overwritten", "OVERWRITTEN"),
#'     "almost first" = c(NA, "almost")
#'   ),
#'   r2 = rule(
#'     ANYTHING = "anything"
#'   )
#' )
#'
#' l2 <- list(
#'   r1 = rule(
#'     "first" = c("F", "f"),
#'     "second" = c("S", "s"),
#'     "third" = c("T", "t"),
#'     .to_NA = "something"
#'   ),
#'   r3 = rule(
#'     SOMETHING = "something"
#'   )
#' )
#'
#' combine_list_rules(l1, l2)
combine_list_rules <- function(x, val, ...) {
  # Unique names prevents zero-character names.
  checkmate::assert_list(x, types = "rule", null.ok = FALSE, names = "unique")
  checkmate::assert_list(val, types = "rule", null.ok = FALSE, names = "unique")

  vnames <- names(val)

  for (v in vnames) {
    x[[v]] <- combine_rules(x[[v]], val[[v]], ...)
  }
  x
}

#' Propagate Column
#'
#' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other
#' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving
#' tables. In safe mode, the key must be unique in the original table.
#'
#' @param db (`list` of `data.frame`) object for which some variable need to be propagated.
#' @param from (`string`) the name of the table where the variables to propagate are stored.
#' @param add (`character`) the names of the variables to propagate.
#' @param by (`character`) the key binding the `from` table to the other tables.
#' @param safe (`flag`) should the key be checked for uniqueness in the `from` table.
#'
#' @returns updated `list` of `data.frame`.
#'
#' @rdname propagate
#' @export
#'
propagate <- function(db, from, add, by, safe = TRUE) {
  UseMethod("propagate")
}

#' @rdname propagate
#' @export
#'
#'
#' @examples
#' df1 <- data.frame(
#'   id1 = c("a", "a", "c", "d", "e", "f"),
#'   id2 = c("A", "B", "A", "A", "A", "A"),
#'   int = c(1, 2, 3, 4, 5, 6),
#'   bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)
#' )
#'
#' df2 <- data.frame(
#'   id1 = c("a", "a", "d", "e", "f", "g"),
#'   id2 = c("A", "B", "A", "A", "A", "A")
#' )
#'
#' df3 <- data.frame(
#'   id1 = c("a", "c", "d", "e", "f", "x"),
#'   id2 = c("A", "A", "A", "A", "B", "A"),
#'   int = c(11, 22, 33, 44, 55, 66)
#' )
#'
#' db <- list(df1 = df1, fd2 = df2, df3 = df3)
#' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
#'
propagate.list <- function(db, from, add, by, safe = TRUE) {
  checkmate::assert_list(db, types = "data.frame", names = "unique")
  checkmate::assert_names(names(db), must.include = from)
  checkmate::assert_names(colnames(db[[from]]), must.include = add)
  checkmate::assert_names(colnames(db[[from]]), must.include = by)
  checkmate::assert_flag(safe)

  if (safe) {
    keys <- db[[from]][, by]
    if (anyDuplicated(keys)) rlang::abort(paste("Duplicated key"))
  }
  toJoin <- db[[from]]

  for (tab_name in setdiff(names(db), from)) {
    tab_colnames <- colnames(db[[tab_name]])
    if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) {
      missing_var <- setdiff(add, tab_colnames)
      sel_var <- c(missing_var, by)
      sel_tab <- toJoin[, sel_var]

      cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var)))

      db[[tab_name]] <- db[[tab_name]] %>%
        dplyr::left_join(sel_tab, by = by, multiple = "all")
    } else {
      cat(paste0("\nSkipping: ", tab_name))
    }
  }
  cat("\n")
  return(db)
}

#' Encode Categorical Missing Values in a `list` of `data.frame`
#'
#' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and
#'   `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved.
#'
#' @param data (`list` of `data.frame`) to be transformed.
#' @param omit_tables (`character`) the names of the tables to omit from processing.
#' @param omit_columns (`character`) the names of the columns to omit from processing.
#' @param char_as_factor (`logical`) should character columns be converted into factor.
#' @param na_level (`string`) the label to encode missing levels.
#' @returns `list` of `data.frame` object with explicit missing levels.
#' @export
#'
#' @examples
#' df1 <- data.frame(
#'   "char" = c("a", "b", NA, "a", "k", "x"),
#'   "char2" = c("A", "B", NA, "A", "K", "X"),
#'   "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
#'   "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA)
#' )
#' df2 <- data.frame(
#'   "char" = c("a", "b", NA, "a", "k", "x"),
#'   "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
#'   "num" = c(1:5, NA)
#' )
#' df3 <- data.frame(
#'   "char" = c(NA, NA, "A")
#' )
#'
#' db <- list(df1 = df1, df2 = df2, df3 = df3)
#'
#' ls_explicit_na(db)
#' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
#'
ls_explicit_na <- function(data,
                           omit_tables = NULL,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           na_level = "<Missing>") {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_character(omit_tables, null.ok = TRUE)
  checkmate::assert_character(omit_columns, null.ok = TRUE)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_string(na_level)

  modif_tab <- setdiff(names(data), omit_tables)
  if (length(modif_tab) < 1) {
    return(data)
  }

  data[modif_tab] <- lapply(
    data[modif_tab],
    h_df_explicit,
    omit_columns = omit_columns,
    char_as_factor = char_as_factor,
    na_level = na_level
  )

  data
}

#' Encode Categorical Missing Values in a `data.frame`.
#'
#' @inheritParams ls_explicit_na
#' @returns a `data.frame` object with explicit missing levels.
#' @keywords internal
h_df_explicit <- function(df,
                          omit_columns = NULL,
                          char_as_factor = TRUE,
                          na_level = "<Missing>") {
  na_list <- list(x = c("", NA))
  names(na_list) <- na_level
  na_rule <- rule(.lst = na_list)

  df %>%
    mutate(
      across(
        where(~ is.character(.x) | is.factor(.x)) & !any_of(.env$omit_columns),
        ~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE)
      )
    )
}

#' Setting the Label Attribute
#'
#' @param var (`object`) whose label attribute can be set.
#' @param label (`character`) the label to add.
#' @returns `object` with label attribute.
#'
#' @export
#' @examples
#' x <- c(1:10)
#' attr(x, "label")
#'
#' y <- attr_label(x, "my_label")
#' attr(y, "label")
attr_label <- function(var, label) {
  checkmate::assert_character(label)

  x <- var
  attr(x, "label") <- label

  x
}

#' Setting the Label Attribute to Data Frame Columns
#'
#' @param df (`data.frame`).
#' @param label (`character`) the labels to add.
#' @returns `data.frame` with label attributes.
#'
#' @export
#' @examples
#' res <- attr_label_df(mtcars, letters[1:11])
#' res
#' lapply(res, attr, "label")
attr_label_df <- function(df, label) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(label, len = ncol(df))

  res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE)
  as.data.frame(res)
}

#' Getting Argument From System, Option or Default
#'
#' @param opt (`string`) the name of an option.
#' @param sys (`string`) the name of an environment variable.
#' @param default value to return if neither the environment variable nor the option are set.
#' @param split (`string`) the pattern used to split the values obtained using environment variable.
#'
#' @returns if defined, the value of the option (`opt`), a `character` from the environment variable (`sys`) or the
#'   `default` in this order of priority.
#'
#' @export
#' @examplesIf require("withr")
#' get_arg("my.option", "MY_ARG", "default")
#' withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default"))
#' withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default"))
get_arg <- function(opt = NULL, sys = NULL, default = NULL, split = ";") {
  checkmate::assert_string(sys, null.ok = TRUE)
  checkmate::assert_string(opt, null.ok = TRUE)
  checkmate::assert_string(split)

  if (!is.null(opt)) {
    val <- getOption(opt, default = "")
    if (!identical(val, "")) {
      return(val)
    }
  }

  if (!is.null(sys)) {
    val <- Sys.getenv(sys, unset = "")
    if (!identical(val, "")) {
      val <- stringr::str_split_1(val, split)
      return(val)
    }
  }

  return(default)
}

#' Filter Data with Log
#' @param data (`data.frame`) input data to subset, or named (`list` of `data.frame`).
#' @param condition (`call`) of subset condition. Must evaluate as logical.
#' @param suffix (`string`) optional argument describing the filter.
#' @param ... further arguments to be passed to or from other methods.
#' @returns a `data.frame` or `list` of `data.frame` filtered for the provided conditions.
#' @details
#' `log_filter` will filter the data/named list of data according to the `condition`.
#' All the variables in `condition` must exist in the data (as variables) or in the parent
#' frame(e.g., in global environment).
#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all
#' other datasets with `USUBJID`.
#' @export
log_filter <- function(data, condition, ...) {
  UseMethod("log_filter")
}

#' @rdname log_filter
#' @export
#' @examples
#' data <- iris
#' attr(data$Sepal.Length, "label") <- "cm"
#' log_filter(data, Sepal.Length >= 7)
#'
log_filter.data.frame <- function(data, condition, suffix = NULL, ...) {
  checkmate::assert_string(suffix, null.ok = TRUE)

  condition <- match.call()$condition
  vars <- all.vars(condition)
  var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE)
  var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE)
  if (!all(var_in_env | var_in_data)) {
    rlang::abort(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data | var_in_env)])))
  }
  res <- eval(bquote(dplyr::filter(data, .(condition))))
  rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix))
  rlbl <- paste0(deparse(condition), collapse = "")
  rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ")
  names(rows) <- rlbl
  attr(res, "rows") <- c(attr(data, "rows"), rows)

  res
}

#' @rdname log_filter
#' @param table (`string`) table name.
#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering.
#' @param verbose (`flag`) whether to print a report about the filtering.
#' @export
#' @examples
#' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, ...) {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  assert_all_tablenames(data, table)
  checkmate::assert_names(colnames(data[[table]]), must.include = by)
  condition <- match.call()$condition
  data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix))))
  if (identical(table, "adsl")) {
    for (k in setdiff(names(data), "adsl")) {
      if (all(by %in% names(data[[k]]))) {
        if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl))

        ori_n <- nrow(data[[k]])
        ori_att <- attr(data[[k]], "rows")

        data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by)

        rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix))
        names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "")
        attr(data[[k]], "rows") <- c(ori_att, rows)
      }
    }
  }

  if (verbose) {
    print_log(data, incl = FALSE, incl.adsl = TRUE)
  }

  return(data)
}

# Get Log ----

#' Get Log
#'
#' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`.
#' @param incl (`flag`) should information about unfiltered `data.frame` be printed.
#' @param incl.adsl (`flag`) should indication  of filtering performed through `adsl` be printed.
#' @returns `character` or `list of character` describing the filtering applied to `data`.
#'
#' @export
get_log <- function(data, incl, incl.adsl) {
  UseMethod("get_log")
}

#' @rdname get_log
#' @export
#' @examples
#' data <- log_filter(iris, Sepal.Length >= 7, "xx")
#' data <- log_filter(data, Sepal.Length < 2)
#' data <- log_filter(data, Sepal.Length >= 2, "yy")
#' get_log(data)
#'
get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_flag(incl)

  att <- attr(data, "rows")
  if (!incl.adsl) {
    sel <- grepl("Filtered by adsl", names(att))
    att <- att[!sel]
  }

  if (length(att) != 0L) {
    start_row <- lapply(att, "[[", "init")
    end_row <- lapply(att, "[[", "final")
    suffix <- lapply(att, "[[", "suffix")
    suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1))
    res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]")
  } else if (incl) {
    paste0("No filtering [", nrow(data), " rows.]")
  } else {
    NULL
  }
}


#' @rdname get_log
#' @export
#' @examples
#' data <- log_filter(
#'   list(iris1 = iris, iris2 = iris),
#'   Sepal.Length >= 7,
#'   "iris1",
#'   character(0),
#'   "Sep"
#' )
#' get_log(data)
#'
get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_flag(incl)

  lapply(data, get_log, incl = incl, incl.adsl = incl.adsl)
}

# Print Log ----

#' Print Log
#'
#' @inheritParams get_log
#' @returns `NULL`. Print a description of the filtering applied to `data`.
#' @export
#'
print_log <- function(data, incl, incl.adsl) {
  UseMethod("print_log")
}

#' @rdname print_log
#' @export
#' @examples
#' data <- log_filter(iris, Sepal.Length >= 7, "Sep")
#' print_log(data)
print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_flag(incl)

  cat("Filter Log:")
  cat(paste0("\n  ", get_log(data, incl = incl, incl.adsl = incl.adsl)))
  cat("\n")
  invisible()
}

#' @rdname print_log
#' @export
#' @examples
#' data <- log_filter(
#'   list(
#'     adsl = iris,
#'     iris2 = iris,
#'     mtcars = mtcars,
#'     iris3 = iris
#'   ),
#'   Sepal.Length >= 7,
#'   "adsl",
#'   character(0),
#'   "adsl filter"
#' )
#' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter")
#' print_log(data)
#' print_log(data, incl = FALSE)
#' print_log(data, incl.adsl = FALSE, incl = FALSE)
print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
  checkmate::assert_list(data, types = "data.frame", names = "unique")
  checkmate::assert_flag(incl)

  filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl)

  if (!incl) {
    filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))]
  }

  cat("Filter Log:")
  if (length(filter_log) == 0) {
    cat("\n  No filtering")
  } else {
    mapply(
      function(x, y) {
        cat(paste0("\n  - ", x, ":"))
        cat(paste0("\n  ", y, ""))
      },
      as.list(names(filter_log)),
      filter_log
    )
  }
  cat("\n")

  invisible()
}

#' Unite Columns of a Table in a `list` of `data.frame`.
#'
#' @param adam_db (`list` of `data.frames`) to be transformed.
#' @param tab (`string`) the name of a table in the `adam_db` object.
#' @param cols (`character`) the name of the columns to unite.
#' @param sep (`string`) the separator for the new column name.
#' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used.
#'
#' @returns `list` of `data.frames` object with a united column.
#' @export
#'
#' @examples
#' db <- list(mtcars = mtcars, iris = iris)
#'
#' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION")
#' x$mtcars
ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) {
  checkmate::assert_list(adam_db, types = "data.frame")
  checkmate::assert_string(tab)
  checkmate::assert_names(names(adam_db), must.include = tab)
  checkmate::assert_character(cols, min.len = 1)
  checkmate::assert_names(names(adam_db[[tab]]), must.include = cols)
  checkmate::assert_string(sep)
  checkmate::assert_string(new, null.ok = TRUE)

  x_interaction <- if (!is.null(new)) {
    new
  } else {
    paste(cols, collapse = sep)
  }

  x_df <- adam_db[[tab]][, cols, drop = FALSE]
  lvl <- lapply(x_df, function(y) {
    uni <- if (is.factor(y)) {
      levels(y)
    } else {
      unique(y)
    }
    factor(uni, levels = uni)
  })

  all_lvl_df <- expand.grid(lvl)

  all_lvl <- all_lvl_df[, cols, drop = FALSE] %>%
    arrange(across(all_of(cols))) %>%
    apply(1, paste, collapse = sep)

  x_vec <- x_df[, cols, drop = FALSE] %>%
    apply(1, paste, collapse = sep)

  existing_lvl <- intersect(all_lvl, x_vec)
  x_fact <- factor(x_vec, existing_lvl)

  adam_db[[tab]][, x_interaction] <- x_fact
  adam_db
}

#' Transforming data.frame with Multiple Identifying columns into Wide Format
#'
#' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting
#'   duplicated values, the function will throw an error if the same parameter is provided twice for the same
#'   observation.
#'
#' @param data (`data.frame`) to be pivoted.
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The
#'   unique values in this column will become column names in the output.
#' @param value_from (`character`) the name of the column containing the values that will populate the output.
#' @param drop_na (`logical`) should column containing only `NAs` be dropped.
#' @param drop_lvl (`logical`) should missing levels be dropped in the columns coming from (`value_from`).
#'
#' @returns `data.frame` in a wide format.
#'
#' @export
#' @examples
#' test_data <- data.frame(
#'   the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
#'   the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
#'   the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
#'   the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)
#' )
#'
#' multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val")
#' multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
multi_id_pivot_wider <- function(data,
                                 id,
                                 param_from,
                                 value_from,
                                 drop_na = FALSE,
                                 drop_lvl = FALSE) {
  # check for duplication of observation-parameter
  checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)
  checkmate::assert_character(id)
  checkmate::assert_character(param_from, len = 1)
  checkmate::assert_character(value_from, len = 1)
  checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))
  checkmate::assert_subset(c(id, param_from, value_from), colnames(data))
  checkmate::assert_flag(drop_na)
  checkmate::assert_flag(drop_lvl)

  # find a way to sort
  unique_id <- unique(data[id])
  key <- apply(unique_id[id], 1, paste, collapse = "-")
  unique_id <- cbind(key, unique_id)

  param <- data[[param_from]]

  mini_data <- data[, c(param_from, value_from)]
  f_key <- apply(data[id], 1, paste, collapse = "-")
  mini_data <- cbind(f_key, mini_data)

  data_ls <- split(mini_data, param)

  # Transform to named vector, the first column is the key.
  data_vec <-
    lapply(
      data_ls,
      function(x) setNames(x[[value_from]], x[, 1])
    )

  if (drop_lvl) {
    data_vec <- rapply(data_vec, droplevels, classes = "factor", how = "replace")
  }

  # query each id in each param
  all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]])

  if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec)

  all_vec <- lapply(all_vec, unname)
  bind_data <- do.call(dplyr::bind_cols, all_vec)

  res <- dplyr::bind_cols(unique_id[, -1, drop = FALSE], bind_data)

  rownames(res) <- NULL
  res
}

#' Transforming data.frame with multiple Data Column into Wide Format
#'
#' @details This function is adapted to cases where the data are distributed in several columns while the name of the
#'   parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are
#'   in `AVALC`.
#'
#' @param data (`data.frame`) to be pivoted.
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
#' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The
#'   unique values in this column will become column names in the output.
#' @param value_from (`character`) the name of the column containing the values that will populate the output.
#' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not
#'   provided, the labels will be equal to the column names. When several labels are available for the same column, the
#'   first one will be selected.
#' @param drop_na (`logical`) should column containing only `NAs` be dropped.
#' @param drop_lvl (`logical`) should missing levels be dropped in the columns coming from `value_from`.
#'
#' @returns `list` of `data.frame` in a wide format with label attribute attached to each columns.
#'
#' @export
#' @examples
#' test_data <- data.frame(
#'   the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
#'   the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
#'   the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
#'   the_label = c(
#'     "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)",
#'     "Gender", "Height (cm)", "Height (cm)", "Pre-condition"
#'   ),
#'   the_val = c(65, 165, NA, 66, NA, 166, 155, NA),
#'   the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE)
#' )
#'
#' x <- poly_pivot_wider(
#'   test_data,
#'   c("the_obs", "the_obs2"),
#'   "the_param",
#'   c("the_val", "the_val2"),
#'   "the_label"
#' )
#' x
#' Reduce(function(u, v) merge(u, v, all = TRUE), x)
poly_pivot_wider <- function(data,
                             id,
                             param_from,
                             value_from,
                             labels_from = NULL,
                             drop_na = TRUE,
                             drop_lvl = FALSE) {
  # other tests are performed at lower levels.
  checkmate::assert_character(value_from, unique = TRUE)

  # Create new labels for new columns.
  if (is.null(labels_from) || labels_from == param_from) {
    new_labels <- unique(data[[param_from]])
    names(new_labels) <- new_labels
  } else {
    checkmate::assert_character(labels_from, len = 1)
    checkmate::assert_subset(labels_from, colnames(data))

    new_labels_df <- data[, c(labels_from, param_from)]
    new_labels_df <- unique(new_labels_df)

    new_labels <- as.character(new_labels_df[[labels_from]])
    names(new_labels) <- as.character(new_labels_df[[param_from]])
  }

  # Retrieve old labels.
  old_labels <- lapply(data, attr, "label")
  n_old_label <- names(old_labels)
  null_label <- unlist(lapply(old_labels, is.null))
  old_labels[null_label] <- n_old_label[null_label]
  old_labels <- unlist(old_labels)

  all_labels <- c(new_labels, old_labels)

  res_ls <- list()
  for (n_value_from in value_from) {
    res <- multi_id_pivot_wider(
      data = data,
      id = id,
      param_from = param_from,
      value_from = n_value_from,
      drop_na = drop_na,
      drop_lvl = drop_lvl
    )

    res <- attr_label_df(res, all_labels[colnames(res)])
    res_ls[[n_value_from]] <- res
  }
  res_ls
}

#' Reorder Two Columns Levels Simultaneously
#'
#' @details The function expect a 1:1 matching between the elements of the two selected column.
#'
#' @param df (`data.frame`) with two column whose factors should be reordered.
#' @param primary (`string`) the name of the column on which the levels reordering should be based.
#' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the
#'   primary column.
#' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be
#'   placed afterward in their current order.
#' @returns a `data.frame` with the `secondary` column converted to factor with reordered levels.
#'
#' @export
#'
#' @examples
#' df <- data.frame(
#'   SUBJID = 1:3,
#'   PARAMCD = factor(c("A", "B", "C")),
#'   PARAM = factor(paste("letter", LETTERS[1:3]))
#' )
#' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
co_relevels <- function(df, primary, secondary, levels_primary) {
  checkmate::assert_data_frame(df, min.rows = 1)
  checkmate::assert_subset(c(primary, secondary), colnames(df))
  checkmate::assert_character(levels_primary, min.len = 1)
  checkmate::assert_vector(df[[primary]], any.missing = FALSE)
  checkmate::assert_vector(df[[secondary]], any.missing = FALSE)

  df[, primary] <- as.factor(df[[primary]])
  df[, secondary] <- as.factor(df[[secondary]])

  # check unique relationship
  df_key <- df[, c(primary, secondary)]
  df_key <- unique(df_key)

  if (any(duplicated(df_key[[primary]])) || any(duplicated(df_key[[secondary]]))) {
    rlang::abort("non univoque relation between values in primary and secondary column")
  }

  keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]]))

  all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary))
  all_levels_secondary <- keys[all_levels_primary]

  df[, primary] <- factor(df[[primary]], all_levels_primary)
  df[, secondary] <- factor(df[[secondary]], all_levels_secondary)

  df
}

#' Join `adsub` to `adsl`
#'
#' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table.
#' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row.
#' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
#'   columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`,
#'   none are selected.
#' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
#'   columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`,
#'   none are selected.
#' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values.
#' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical
#'   values.
#' @param drop_na (`logical`) whether resulting columns containing only `NAs` should be dropped.
#' @param drop_lvl (`logical`) should missing levels be dropped in the resulting columns.
#'
#' @returns a `list` of `data.frame` with new columns in the `adsl` table.
#'
#' @rdname join_adsub_adsl
#' @export
#'
join_adsub_adsl <- function(adam_db,
                            keys,
                            continuous_var,
                            categorical_var,
                            continuous_suffix,
                            categorical_suffix,
                            drop_na = TRUE,
                            drop_lvl = TRUE) {
  UseMethod("join_adsub_adsl")
}

#' @rdname join_adsub_adsl
#' @export
#'
#' @examples
#' adsl <- data.frame(
#'   USUBJID = c("S1", "S2", "S3", "S4"),
#'   STUDYID = "My_study",
#'   AGE = c(60, 44, 23, 31)
#' )
#'
#' adsub <- data.frame(
#'   USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"),
#'   STUDYID = "My_study",
#'   PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"),
#'   PARAMCD = c("w", "w", "w", "w", "h", "h", "h"),
#'   AVAL = c(98, 75, 70, 71, 182, 155, 152),
#'   AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180")
#' )
#'
#' db <- list(adsl = adsl, adsub = adsub)
#'
#' x <- join_adsub_adsl(adam_db = db)
#' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
join_adsub_adsl.list <- function(adam_db,
                                 keys = c("USUBJID", "STUDYID"),
                                 continuous_var = "all",
                                 categorical_var = "all",
                                 continuous_suffix = "",
                                 categorical_suffix = "_CAT",
                                 drop_na = TRUE,
                                 drop_lvl = FALSE) {
  checkmate::assert_list(adam_db, types = "data.frame")
  checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub"))
  checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys))
  checkmate::assert_names(names(adam_db$adsl), must.include = keys)
  checkmate::assert_numeric(adam_db$adsub$AVAL)
  checkmate::assert_multi_class(adam_db$adsub$AVALC, c("character", "factor"))
  checkmate::assert_string(continuous_suffix)
  checkmate::assert_string(categorical_suffix)
  checkmate::assert_flag(drop_na)
  checkmate::assert_flag(drop_lvl)

  # Empty strings in AVALC are treated as NA.
  adam_db$adsub$AVALC[adam_db$adsub$AVALC == ""] <- NA

  value_col <- c("AVAL", "AVALC")
  vars_ls <- list(continuous_var, categorical_var)
  suffix_ls <- list(continuous_suffix, categorical_suffix)

  # Select variables names.
  vars_ls <- lapply(vars_ls, function(x) {
    if (identical(x, "all")) {
      unique(adam_db$adsub$PARAMCD)
    } else {
      x
    }
  })

  # Create new variable names.
  vars_nam <- mapply(
    function(x, y) {
      if (!is.null(x)) {
        names(x) <- paste0(x, y)
        x
      } else {
        NULL
      }
    },
    vars_ls,
    suffix_ls,
    SIMPLIFY = FALSE
  )

  # Test if new columns already exist in adsl.
  assert_names_notadsl(vars_nam, adam_db$adsl)

  # Test if categorical and continuous column will result in the same column name.
  assert_names_collision(vars_nam)

  # Pivot and keep labels.
  adsub_wide_ls <-
    adam_db$adsub %>%
    poly_pivot_wider(
      id = keys,
      param_from = "PARAMCD",
      value_from = value_col,
      labels_from = "PARAM",
      drop_na = drop_na,
      drop_lvl = drop_lvl
    )

  # Merge categorical and continuous variables.
  for (i in seq_along(value_col)) {
    adsub_df <- adsub_wide_ls[[value_col[i]]]

    # Warning if some columns are entirely NA, hence discarded.
    not_cols <- setdiff(vars_nam[[i]], colnames(adsub_df))
    if (length(not_cols) > 0) {
      type <- ifelse(value_col[i] == "AVALC", "Categorical", "Continuous")
      arg_type <- ifelse(value_col[i] == "AVALC", "categorical_var", "continuous_var")
      warning(
        sprintf(
          "Dropping %s for %s type, No data available. Adjust `%s` argument to silence this warning or set `drop_na = FALSE`", # nolint
          toString(not_cols),
          type,
          arg_type
        )
      )
    }

    # Preserving names.
    common_cols_id <- c(vars_nam[[i]]) %in% colnames(adsub_df)
    common_cols <- vars_nam[[i]][common_cols_id]

    adsub_df <- adsub_df[, c(keys, as.character(common_cols)), drop = FALSE]
    colnames(adsub_df) <- c(keys, names(common_cols))

    adam_db$adsl <- dplyr::left_join(
      x = adam_db$adsl,
      y = adsub_df,
      by = keys
    )
  }

  adam_db
}

# Utility functions ----

assert_names_collision <- function(vars_nam) {
  final_names_ls <- lapply(vars_nam, names)
  in_both <- final_names_ls[[1]] %in% final_names_ls[[2]]
  if (any(in_both)) {
    rlang::warn(
      paste(
        toString(final_names_ls[[1]][in_both]),
        "are new columns for continuous and categorical variable,
Please set different `continuous_suffix` or `categorical_suffix`
or select different columns to avoid automatic renaming."
      )
    )
  }
}

assert_names_notadsl <- function(vars_nam, df) {
  final_names <- unique(sapply(vars_nam, names))
  already_in_adsl <- final_names %in% colnames(df)
  if (any(already_in_adsl)) {
    rlang::warn(
      paste(
        toString(final_names[already_in_adsl]),
        "already exist in adsl, the name will default to another values.
Please change `continuous_suffix` or `categorical_suffix` to avoid automatic renaming"
      )
    )
  }
}

#' Cutting data by group
#'
#' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are
#'   closed on the right (and open on the left).
#'
#' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation.
#' @param col_data (`character`) the column containing the data to be cut.
#' @param col_group (`character`) the column containing the names of the groups according to which the data should be
#'   split.
#' @param group  (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the
#'   name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically
#'   `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`).
#' @param cat_col (`character`) the name of the new column in which the cut label should he stored.
#' @returns `data.frame` with a column containing categorical values.
#' @export
#'
#' @examples
#' group <- list(
#'   list(
#'     "Height",
#'     c(-Inf, 150, 170, Inf),
#'     c("=<150", "150-170", ">170")
#'   ),
#'   list(
#'     "Weight",
#'     c(-Inf, 65, Inf),
#'     c("=<65", ">65")
#'   ),
#'   list(
#'     "Age",
#'     c(-Inf, 31, Inf),
#'     c("=<31", ">31")
#'   ),
#'   list(
#'     "PreCondition",
#'     c(-Inf, 1, Inf),
#'     c("=<1", "<1")
#'   )
#' )
#' data <- data.frame(
#'   SUBJECT = rep(letters[1:10], 4),
#'   PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10),
#'   AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)),
#'   index = 1:40
#' )
#'
#' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
cut_by_group <- function(df,
                         col_data,
                         col_group,
                         group,
                         cat_col) {
  checkmate::assert_data_frame(df)
  checkmate::assert_subset(c(col_data, col_group), colnames(df))
  checkmate::assert_numeric(df[, col_data])
  checkmate::assert_list(group)

  lapply(
    group,
    function(list_element) {
      checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character"))
    }
  )

  df[cat_col] <- NA

  for (g in group) {
    selected_row <- df[[col_group]] == g[[1]]

    df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]]))
  }
  df
}

1		#' Assert Nested List can be used as Format Argument in Reformat.
2		#'
3		#' @param object (`list`) to assert.
4		#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
5		#'
6		#' @export
7		#' @examples
8		#' format <- list(
9		#' df1 = list(
10		#' var1 = rule("X" = "x", "N" = c(NA, ""))
11		#' ),
12		#' df2 = list(
13		#' var1 = rule(),
14		#' var2 = rule("f11" = "F11", "NN" = NA)
15		#' ),
16		#' df3 = list()
17		#' )
18		#'
19		#' assert_valid_format(format)
20		assert_valid_format <- function(object) {
21	5x	coll <- checkmate::makeAssertCollection()
22
23		# Check object.
24	5x	checkmate::assert_list(object, names = "unique", types = "list", add = coll)
25
26		# Check table level.
27	5x	mapply(
28	5x	function(x, xtable) {
29	12x	checkmate::assert_list(
30	12x	x,
31	12x	names = "unique",
32	12x	types = "rule",
33	12x	any.missing = FALSE,
34	12x	.var.name = paste0("[", xtable, "]"),
35	12x	add = coll
36		)
37		},
38	5x	object,
39	5x	names(object)
40		)
41
42	5x	checkmate::reportAssertions(coll)
43		}
44
45		#' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.
46		#'
47		#' @param object (`list`) to assert.
48		#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
49		#'
50		#' @export
51		#' @examples
52		#' format <- list(
53		#' df1 = list(
54		#' var1 = list("X" = "x", "N" = c(NA, ""))
55		#' ),
56		#' df2 = list(
57		#' var1 = list(),
58		#' var2 = list("f11" = "F11", "NN" = NA)
59		#' ),
60		#' df3 = list()
61		#' )
62		#'
63		#' assert_valid_list_format(format)
64		assert_valid_list_format <- function(object) {
65	8x	coll <- checkmate::makeAssertCollection()
66
67		# Check object.
68	8x	checkmate::assert_list(object, names = "unique", types = "list", add = coll)
69
70		# Check table level.
71	8x	mapply(
72	8x	function(x, xtable) {
73	10x	checkmate::assert_list(
74	10x	x,
75	10x	names = "unique",
76	10x	types = "list",
77	10x	any.missing = FALSE,
78	10x	.var.name = paste0("[", xtable, "]"),
79	10x	add = coll
80		)
81		},
82	8x	object,
83	8x	names(object)
84		)
85
86		# Check variable level.
87	8x	mapply(
88	8x	function(x, xtable) {
89	10x	xvar <- names(x)
90	10x	mapply(
91	10x	function(x, xvar) {
92	20x	checkmate::assert_list(
93	20x	x,
94	20x	names = "unique",
95	20x	types = c("character", "numeric", "logical"),
96	20x	.var.name = paste0("[", xtable, ".", xvar, "]"),
97	20x	add = coll
98		)
99		},
100	10x	x,
101	10x	xvar
102		)
103		},
104	8x	object,
105	8x	names(object)
106		)
107
108	8x	checkmate::reportAssertions(coll)
109		}
110
111		# assert_all_tablenames ----
112
113		#' Assert that all names are among names of a `list` of `data.frame`.
114		#'
115		#' @param db (`list` of `data.frame`) input to check for the presence of tables.
116		#' @param tab (`character`) the names of the tables to be checked.
117		#' @param null_ok (`flag`) can `x` be NULL.
118		#' @param qualifier (`string`) to be returned if the check fails.
119		#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
120		#'
121		#' @export
122		#'
123		#' @examples
124		#' lsd <- list(
125		#' mtcars = mtcars,
126		#' iris = iris
127		#' )
128		#' assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:")
129		assert_all_tablenames <- function(db, tab, null_ok = TRUE, qualifier = NULL) {
130	23x	checkmate::assert_list(db, types = "data.frame", names = "unique")
131	23x	checkmate::assert_character(tab, null.ok = null_ok)
132	23x	checkmate::assert_string(qualifier, null.ok = TRUE)
133
134	23x	diff <- setdiff(tab, names(db))
135
136	23x	if (length(diff) == 0) {
137	20x	invisible(NULL)
138		} else {
139	3x	stop(
140	3x	paste(qualifier, "Expected table names:", toString(diff), "not in", deparse(substitute(db)))
141		)
142		}
143		}
144
145		# assert_one_tablenames ----
146
147		#' Assert that at least one name is among table names of a `list` of `data.frame`.
148		#'
149		#' @param db (`list` of `data.frame`) input to check for the presence or tables.
150		#' @param tab (`character`) the names of the tables to be checked.
151		#' @param null_ok (`flag`) can `x` be NULL.
152		#' @param qualifier (`string`) to be returned if the check fails.
153		#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled.
154		#'
155		#' @keywords internal
156		assert_one_tablenames <- function(db, tab, null_ok = TRUE, qualifier = NULL) {
157	4x	checkmate::assert_list(db, types = "data.frame", names = "unique")
158	4x	checkmate::assert_character(tab, null.ok = null_ok)
159	4x	checkmate::assert_string(qualifier, null.ok = TRUE)
160
161	4x	diff <- setdiff(tab, names(db))
162
163	4x	common <- intersect(tab, names(db))
164
165	4x	if (length(common) > 0) {
166	2x	invisible(NULL)
167		} else {
168	2x	stop(
169	2x	paste(qualifier, "At least one of:", toString(tab), "is expected to be a table name of", deparse(substitute(db)))
170		)
171		}
172		}

1		#' Safe transformer
2		#'
3		#' @param text (`string`) to be substituted.
4		#' @param envir (`environment`) containing key-value pairs describing the substitution to perform.
5		#' @returns `string` with substituted placeholders.
6		#'
7		#' @details Obtain content in global environment by default.
8		#' If not found, use the environment here. The function first looks for an exact match. If not found, it searches for a
9		#' match in lower case then apply to the result the same case as the original value.
10		#'
11		#' @keywords internal
12		safe_transformer <- function(text, envir) {
13	14x	if (exists(text, envir = envir, inherits = FALSE, mode = "character")) {
14	4x	res <- get(text, envir = envir, mode = "character")
15	4x	return(toString(res))
16		}
17
18	10x	text_lower <- tolower(text)
19	10x	res <- if (exists(text_lower, envir = envir, inherits = FALSE, mode = "character")) {
20	6x	get(text_lower, envir = envir, mode = "character")
21		} else {
22	4x	text
23		}
24
25	10x	if (is.character(res)) {
26	10x	if (identical(text, tolower(text))) {
27	2x	res <- tolower(res)
28	8x	} else if (identical(text, toupper(text))) {
29	3x	res <- toupper(res)
30	5x	} else if (identical(text, stringr::str_to_title(text))) {
31	5x	res <- stringr::str_to_title(res)
32		}
33		}
34
35	10x	return(toString(res))
36		}
37
38		#' Render whiskers safely
39		#' @param x (`character`) input to be rendered safely.
40		#' @returns `character` with substituted placeholders.
41		#'
42		#' @note The strings enclosed in `{}` are substituted using the key-values pairs set with `add_whiskers`.
43		#'
44		#' @export
45		#' @examples
46		#' render_safe("Name of {Patient_label}")
47		render_safe <- function(x) {
48	11x	checkmate::assert_character(x, null.ok = TRUE)
49	11x	if (is.null(x)) {
50	!	return(NULL)
51		}
52	11x	ret <- lapply(
53	11x	x,
54	11x	glue::glue,
55	11x	.transformer = safe_transformer,
56	11x	.envir = whisker_env,
57	11x	.null = "NULL",
58	11x	.open = "{",
59	11x	.close = "}"
60		)
61	11x	ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "")
62	11x	setNames(ret, names(x))
63		}
64		#' Add whisker values
65		#' @param x Named (`character`) input.
66		#' @returns invisible `NULL`. Assign the key-value pair provided as argument in the whisker environment.
67		#'
68		#' @details The names of the character gives the string to be replaced and the value gives the new string.
69		#'
70		#' @export
71		#' @examples
72		#' my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2")
73		#' add_whisker(my_whiskers)
74		add_whisker <- function(x) {
75	5x	checkmate::assert_character(x, names = "unique", any.missing = FALSE)
76	4x	lapply(
77	4x	names(x),
78	4x	function(i) {
79	5x	assign(i, x[i], envir = whisker_env)
80		}
81		)
82	4x	invisible()
83		}
84
85		#' Remove whisker values
86		#' @param x Named (`character`) input.
87		#' @returns invisible `NULL`. Removes `x` from the whisker environment.
88		#' @export
89		remove_whisker <- function(x) {
90	8x	checkmate::assert_character(x, any.missing = FALSE)
91	8x	rm(list = x, envir = whisker_env)
92		}
93
94		#' Show Whisker Values
95		#' @returns invisible `NULL`. Prints the values stored in the whisker environment.
96		#' @export
97		#' @examples
98		#' show_whisker()
99		show_whisker <- function() {
100	6x	l <- ls(envir = whisker_env)
101	6x	val <- lapply(
102	6x	l,
103	6x	function(x) {
104	10x	if (exists(x, envir = whisker_env, mode = "character")) {
105	9x	setNames(
106	9x	toString(get(x, envir = whisker_env, mode = "character")),
107	9x	x
108		)
109		}
110		}
111		)
112
113	6x	lapply(val, function(x) cat(sprintf("%s --> %s\n", names(x), x)))
114	6x	invisible()
115		}

1		#' Transforming data.frame with Multiple Identifying columns into Wide Format
2		#'
3		#' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting
4		#' duplicated values, the function will throw an error if the same parameter is provided twice for the same
5		#' observation.
6		#'
7		#' @param data (`data.frame`) to be pivoted.
8		#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
9		#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The
10		#' unique values in this column will become column names in the output.
11		#' @param value_from (`character`) the name of the column containing the values that will populate the output.
12		#' @param drop_na (`logical`) should column containing only `NAs` be dropped.
13		#' @param drop_lvl (`logical`) should missing levels be dropped in the columns coming from (`value_from`).
14		#'
15		#' @returns `data.frame` in a wide format.
16		#'
17		#' @export
18		#' @examples
19		#' test_data <- data.frame(
20		#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
21		#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
22		#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
23		#' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)
24		#' )
25		#'
26		#' multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val")
27		#' multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
28		multi_id_pivot_wider <- function(data,
29		id,
30		param_from,
31		value_from,
32		drop_na = FALSE,
33		drop_lvl = FALSE) {
34		# check for duplication of observation-parameter
35	33x	checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)
36	33x	checkmate::assert_character(id)
37	33x	checkmate::assert_character(param_from, len = 1)
38	33x	checkmate::assert_character(value_from, len = 1)
39	33x	checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))
40	32x	checkmate::assert_subset(c(id, param_from, value_from), colnames(data))
41	32x	checkmate::assert_flag(drop_na)
42	32x	checkmate::assert_flag(drop_lvl)
43
44		# find a way to sort
45	32x	unique_id <- unique(data[id])
46	32x	key <- apply(unique_id[id], 1, paste, collapse = "-")
47	32x	unique_id <- cbind(key, unique_id)
48
49	32x	param <- data[[param_from]]
50
51	32x	mini_data <- data[, c(param_from, value_from)]
52	32x	f_key <- apply(data[id], 1, paste, collapse = "-")
53	32x	mini_data <- cbind(f_key, mini_data)
54
55	32x	data_ls <- split(mini_data, param)
56
57		# Transform to named vector, the first column is the key.
58	32x	data_vec <-
59	32x	lapply(
60	32x	data_ls,
61	32x	function(x) setNames(x[[value_from]], x[, 1])
62		)
63
64	32x	if (drop_lvl) {
65	3x	data_vec <- rapply(data_vec, droplevels, classes = "factor", how = "replace")
66		}
67
68		# query each id in each param
69	32x	all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]])
70
71	26x	if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec)
72
73	32x	all_vec <- lapply(all_vec, unname)
74	32x	bind_data <- do.call(dplyr::bind_cols, all_vec)
75
76	32x	res <- dplyr::bind_cols(unique_id[, -1, drop = FALSE], bind_data)
77
78	32x	rownames(res) <- NULL
79	32x	res
80		}
81
82		#' Transforming data.frame with multiple Data Column into Wide Format
83		#'
84		#' @details This function is adapted to cases where the data are distributed in several columns while the name of the
85		#' parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are
86		#' in `AVALC`.
87		#'
88		#' @param data (`data.frame`) to be pivoted.
89		#' @param id (`character`) the name of the columns whose combination uniquely identify the observations.
90		#' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The
91		#' unique values in this column will become column names in the output.
92		#' @param value_from (`character`) the name of the column containing the values that will populate the output.
93		#' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not
94		#' provided, the labels will be equal to the column names. When several labels are available for the same column, the
95		#' first one will be selected.
96		#' @param drop_na (`logical`) should column containing only `NAs` be dropped.
97		#' @param drop_lvl (`logical`) should missing levels be dropped in the columns coming from `value_from`.
98		#'
99		#' @returns `list` of `data.frame` in a wide format with label attribute attached to each columns.
100		#'
101		#' @export
102		#' @examples
103		#' test_data <- data.frame(
104		#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),
105		#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),
106		#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),
107		#' the_label = c(
108		#' "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)",
109		#' "Gender", "Height (cm)", "Height (cm)", "Pre-condition"
110		#' ),
111		#' the_val = c(65, 165, NA, 66, NA, 166, 155, NA),
112		#' the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE)
113		#' )
114		#'
115		#' x <- poly_pivot_wider(
116		#' test_data,
117		#' c("the_obs", "the_obs2"),
118		#' "the_param",
119		#' c("the_val", "the_val2"),
120		#' "the_label"
121		#' )
122		#' x
123		#' Reduce(function(u, v) merge(u, v, all = TRUE), x)
124		poly_pivot_wider <- function(data,
125		id,
126		param_from,
127		value_from,
128		labels_from = NULL,
129		drop_na = TRUE,
130		drop_lvl = FALSE) {
131		# other tests are performed at lower levels.
132	13x	checkmate::assert_character(value_from, unique = TRUE)
133
134		# Create new labels for new columns.
135	13x	if (is.null(labels_from) \|\| labels_from == param_from) {
136	1x	new_labels <- unique(data[[param_from]])
137	1x	names(new_labels) <- new_labels
138		} else {
139	12x	checkmate::assert_character(labels_from, len = 1)
140	12x	checkmate::assert_subset(labels_from, colnames(data))
141
142	12x	new_labels_df <- data[, c(labels_from, param_from)]
143	12x	new_labels_df <- unique(new_labels_df)
144
145	12x	new_labels <- as.character(new_labels_df[[labels_from]])
146	12x	names(new_labels) <- as.character(new_labels_df[[param_from]])
147		}
148
149		# Retrieve old labels.
150	13x	old_labels <- lapply(data, attr, "label")
151	13x	n_old_label <- names(old_labels)
152	13x	null_label <- unlist(lapply(old_labels, is.null))
153	13x	old_labels[null_label] <- n_old_label[null_label]
154	13x	old_labels <- unlist(old_labels)
155
156	13x	all_labels <- c(new_labels, old_labels)
157
158	13x	res_ls <- list()
159	13x	for (n_value_from in value_from) {
160	26x	res <- multi_id_pivot_wider(
161	26x	data = data,
162	26x	id = id,
163	26x	param_from = param_from,
164	26x	value_from = n_value_from,
165	26x	drop_na = drop_na,
166	26x	drop_lvl = drop_lvl
167		)
168
169	26x	res <- attr_label_df(res, all_labels[colnames(res)])
170	26x	res_ls[[n_value_from]] <- res
171		}
172	13x	res_ls
173		}

1		#' Join `adsub` to `adsl`
2		#'
3		#' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table.
4		#' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row.
5		#' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
6		#' columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`,
7		#' none are selected.
8		#' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which
9		#' columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`,
10		#' none are selected.
11		#' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values.
12		#' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical
13		#' values.
14		#' @param drop_na (`logical`) whether resulting columns containing only `NAs` should be dropped.
15		#' @param drop_lvl (`logical`) should missing levels be dropped in the resulting columns.
16		#'
17		#' @returns a `list` of `data.frame` with new columns in the `adsl` table.
18		#'
19		#' @rdname join_adsub_adsl
20		#' @export
21		#'
22		join_adsub_adsl <- function(adam_db,
23		keys,
24		continuous_var,
25		categorical_var,
26		continuous_suffix,
27		categorical_suffix,
28		drop_na = TRUE,
29		drop_lvl = TRUE) {
30	11x	UseMethod("join_adsub_adsl")
31		}
32
33		#' @rdname join_adsub_adsl
34		#' @export
35		#'
36		#' @examples
37		#' adsl <- data.frame(
38		#' USUBJID = c("S1", "S2", "S3", "S4"),
39		#' STUDYID = "My_study",
40		#' AGE = c(60, 44, 23, 31)
41		#' )
42		#'
43		#' adsub <- data.frame(
44		#' USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"),
45		#' STUDYID = "My_study",
46		#' PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"),
47		#' PARAMCD = c("w", "w", "w", "w", "h", "h", "h"),
48		#' AVAL = c(98, 75, 70, 71, 182, 155, 152),
49		#' AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180")
50		#' )
51		#'
52		#' db <- list(adsl = adsl, adsub = adsub)
53		#'
54		#' x <- join_adsub_adsl(adam_db = db)
55		#' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
56		join_adsub_adsl.list <- function(adam_db,
57		keys = c("USUBJID", "STUDYID"),
58		continuous_var = "all",
59		categorical_var = "all",
60		continuous_suffix = "",
61		categorical_suffix = "_CAT",
62		drop_na = TRUE,
63		drop_lvl = FALSE) {
64	11x	checkmate::assert_list(adam_db, types = "data.frame")
65	11x	checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub"))
66	11x	checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys))
67	11x	checkmate::assert_names(names(adam_db$adsl), must.include = keys)
68	11x	checkmate::assert_numeric(adam_db$adsub$AVAL)
69	11x	checkmate::assert_multi_class(adam_db$adsub$AVALC, c("character", "factor"))
70	11x	checkmate::assert_string(continuous_suffix)
71	11x	checkmate::assert_string(categorical_suffix)
72	11x	checkmate::assert_flag(drop_na)
73	11x	checkmate::assert_flag(drop_lvl)
74
75		# Empty strings in AVALC are treated as NA.
76	11x	adam_db$adsub$AVALC[adam_db$adsub$AVALC == ""] <- NA
77
78	11x	value_col <- c("AVAL", "AVALC")
79	11x	vars_ls <- list(continuous_var, categorical_var)
80	11x	suffix_ls <- list(continuous_suffix, categorical_suffix)
81
82		# Select variables names.
83	11x	vars_ls <- lapply(vars_ls, function(x) {
84	22x	if (identical(x, "all")) {
85	18x	unique(adam_db$adsub$PARAMCD)
86		} else {
87	4x	x
88		}
89		})
90
91		# Create new variable names.
92	11x	vars_nam <- mapply(
93	11x	function(x, y) {
94	22x	if (!is.null(x)) {
95	18x	names(x) <- paste0(x, y)
96	18x	x
97		} else {
98	4x	NULL
99		}
100		},
101	11x	vars_ls,
102	11x	suffix_ls,
103	11x	SIMPLIFY = FALSE
104		)
105
106		# Test if new columns already exist in adsl.
107	11x	assert_names_notadsl(vars_nam, adam_db$adsl)
108
109		# Test if categorical and continuous column will result in the same column name.
110	11x	assert_names_collision(vars_nam)
111
112		# Pivot and keep labels.
113	11x	adsub_wide_ls <-
114	11x	adam_db$adsub %>%
115	11x	poly_pivot_wider(
116	11x	id = keys,
117	11x	param_from = "PARAMCD",
118	11x	value_from = value_col,
119	11x	labels_from = "PARAM",
120	11x	drop_na = drop_na,
121	11x	drop_lvl = drop_lvl
122		)
123
124		# Merge categorical and continuous variables.
125	11x	for (i in seq_along(value_col)) {
126	22x	adsub_df <- adsub_wide_ls[[value_col[i]]]
127
128		# Warning if some columns are entirely NA, hence discarded.
129	22x	not_cols <- setdiff(vars_nam[[i]], colnames(adsub_df))
130	22x	if (length(not_cols) > 0) {
131	2x	type <- ifelse(value_col[i] == "AVALC", "Categorical", "Continuous")
132	2x	arg_type <- ifelse(value_col[i] == "AVALC", "categorical_var", "continuous_var")
133	2x	warning(
134	2x	sprintf(
135	2x	"Dropping %s for %s type, No data available. Adjust `%s` argument to silence this warning or set `drop_na = FALSE`", # nolint
136	2x	toString(not_cols),
137	2x	type,
138	2x	arg_type
139		)
140		)
141		}
142
143		# Preserving names.
144	22x	common_cols_id <- c(vars_nam[[i]]) %in% colnames(adsub_df)
145	22x	common_cols <- vars_nam[[i]][common_cols_id]
146
147	22x	adsub_df <- adsub_df[, c(keys, as.character(common_cols)), drop = FALSE]
148	22x	colnames(adsub_df) <- c(keys, names(common_cols))
149
150	22x	adam_db$adsl <- dplyr::left_join(
151	22x	x = adam_db$adsl,
152	22x	y = adsub_df,
153	22x	by = keys
154		)
155		}
156
157	11x	adam_db
158		}
159
160		# Utility functions ----
161
162		assert_names_collision <- function(vars_nam) {
163	11x	final_names_ls <- lapply(vars_nam, names)
164	11x	in_both <- final_names_ls[[1]] %in% final_names_ls[[2]]
165	11x	if (any(in_both)) {
166	!	rlang::warn(
167	!	paste(
168	!	toString(final_names_ls[[1]][in_both]),
169	!	"are new columns for continuous and categorical variable,
170	!	Please set different `continuous_suffix` or `categorical_suffix`
171	!	or select different columns to avoid automatic renaming."
172		)
173		)
174		}
175		}
176
177		assert_names_notadsl <- function(vars_nam, df) {
178	11x	final_names <- unique(sapply(vars_nam, names))
179	11x	already_in_adsl <- final_names %in% colnames(df)
180	11x	if (any(already_in_adsl)) {
181	1x	rlang::warn(
182	1x	paste(
183	1x	toString(final_names[already_in_adsl]),
184	1x	"already exist in adsl, the name will default to another values.
185	1x	Please change `continuous_suffix` or `categorical_suffix` to avoid automatic renaming"
186		)
187		)
188		}
189		}

1		whisker_env <- NULL
2
3		.onLoad <- function(libname, pkgname) {
4	!	default_whiskers <- c(patient_label = "patients")
5	!	whisker_env <<- new.env(parent = globalenv())
6	!	add_whisker(default_whiskers)
7		}

1		#' Reformat Values
2		#' @param obj (`character`, `factor` or `list of data.frame`) to reformat.
3		#' @param format (`rule`) or (`list`) of `rule` depending on the class of obj.
4		#' @param ... for compatibility between methods and pass additional special mapping to transform rules.
5		#' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor.
6		#' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are
7		#' dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule.
8		#' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of
9		#' the rule.
10		#' * `.na_last` (`flag`) whether the level replacing `NA` should be last.
11		#' @param verbose (`flag`) whether to print the format.
12		#' @returns (`character`, `factor` or `list of data.frame`) with remapped values.
13		#'
14		#' @export
15		#' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is
16		#' raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels
17		#' declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a
18		#' rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to
19		#' `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the
20		#' ones defined during rule creation.
21		#'
22		#' @rdname reformat
23		#'
24		reformat <- function(obj, ...) {
25	57x	UseMethod("reformat")
26		}
27
28		#' @export
29		#' @rdname reformat
30		reformat.default <- function(obj, format, ...) {
31	1x	rlang::warn(paste0("Not implemented for class: ", toString(class(obj)), "! Returning original object."))
32	1x	return(obj)
33		}
34
35		#' @export
36		#' @rdname reformat
37		#'
38		#' @examples
39		#' # Reformatting of character.
40		#' obj <- c("a", "b", "x", NA, "")
41		#' attr(obj, "label") <- "my label"
42		#' format <- rule("A" = "a", "NN" = NA)
43		#'
44		#' reformat(obj, format)
45		#' reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL)
46		#'
47		reformat.character <- function(obj, format, ..., verbose = FALSE) {
48	21x	checkmate::assert_class(format, "rule")
49	21x	checkmate::assert_flag(as.logical(verbose))
50
51		# Give priority to argument defined in reformat.
52	21x	format <- do.call(rule, modifyList(as.list(format), list(...), keep.null = TRUE))
53	21x	if (verbose) {
54	1x	print(format)
55		}
56
57	21x	if (attr(format, ".string_as_fct")) {
58		# Keep attributes.
59	12x	att <- attributes(obj)
60	12x	obj_fact <- as.factor(obj)
61	12x	supp_att_name <- setdiff(names(att), attributes(obj_fact))
62	12x	supp_att <- att[supp_att_name]
63	12x	attributes(obj_fact) <- c(attributes(obj_fact), supp_att)
64
65	12x	reformat(obj_fact, format)
66		} else {
67	9x	value_match <- unlist(format)
68	9x	m <- match(obj, value_match)
69	9x	obj[!is.na(m)] <- names(format)[m[!is.na(m)]]
70	9x	val_to_NA <- attr(format, ".to_NA")
71	9x	if (!is.null(val_to_NA)) {
72	5x	obj[obj %in% val_to_NA] <- NA_character_
73		}
74
75	9x	obj
76		}
77		}
78
79		#' @export
80		#' @rdname reformat
81		#'
82		#' @examples
83		#' # Reformatting of factor.
84		#' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z"))
85		#' attr(obj, "label") <- "my label"
86		#' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P")
87		#'
88		#' reformat(obj, format)
89		#' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE)
90		#'
91		reformat.factor <- function(obj, format, ..., verbose = FALSE) {
92	30x	checkmate::assert_class(format, "rule")
93	30x	checkmate::assert_flag(verbose)
94
95	30x	format <- do.call(rule, modifyList(as.list(format), list(...), keep.null = TRUE))
96	30x	if (verbose) {
97	1x	print(format)
98		}
99
100	30x	any_na <- anyNA(obj)
101	30x	if (any(is.na(format)) && any_na) {
102	21x	obj <- forcats::fct_na_value_to_level(obj)
103		}
104
105	30x	absent_format <- format[!format %in% levels(obj)]
106	30x	sel_format <- format[format %in% levels(obj)]
107	30x	obj <- forcats::fct_recode(obj, !!!sel_format)
108	30x	obj <- forcats::fct_expand(obj, unique(names(absent_format)))
109	30x	obj <- forcats::fct_relevel(obj, unique(names(format)))
110
111	30x	if (any(is.na(format)) && attr(format, ".na_last")) {
112	16x	na_lvl <- names(format)[is.na(format)]
113	16x	obj <- forcats::fct_relevel(obj, na_lvl, after = Inf)
114		}
115
116	30x	drop_lvl <- attr(format, ".drop")
117	30x	if (drop_lvl) {
118	2x	obj <- forcats::fct_drop(obj)
119		}
120
121		# Levels converted to NA are dropped.
122	30x	val_to_NA <- attr(format, ".to_NA")
123	30x	if (!is.null(val_to_NA)) {
124	30x	obj <- forcats::fct_na_level_to_value(obj, val_to_NA)
125		}
126
127	30x	obj
128		}
129
130		#' @export
131		#' @rdname reformat
132		#'
133		#' @note the variables listed under the `all_dataset` keyword will be reformatted with the corresponding rule in every
134		#' data set except where another rule is specified for the same variable under a specific data set name.
135		#'
136		#' @examples
137		#' # Reformatting of list of data.frame.
138		#' df1 <- data.frame(
139		#' var1 = c("a", "b", NA),
140		#' var2 = factor(c("F1", "F2", NA))
141		#' )
142		#'
143		#' df2 <- data.frame(
144		#' var1 = c("x", NA, "y"),
145		#' var2 = factor(c("F11", NA, "F22"))
146		#' )
147		#'
148		#' db <- list(df1 = df1, df2 = df2)
149		#'
150		#' format <- list(
151		#' df1 = list(
152		#' var1 = rule("X" = "x", "N" = NA, .to_NA = "b")
153		#' ),
154		#' df2 = list(
155		#' var2 = rule("f11" = "F11", "NN" = NA)
156		#' ),
157		#' all_datasets = list(
158		#' var1 = rule("xx" = "x", "aa" = "a")
159		#' )
160		#' )
161		#'
162		#' reformat(db, format)
163		reformat.list <- function(obj,
164		format,
165		...,
166		verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE)) {
167	5x	checkmate::assert_list(obj, types = c("data.frame", "tibble"))
168	5x	checkmate::assert_named(obj)
169	5x	checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE)
170	5x	verbose <- as.logical(verbose)
171	5x	checkmate::assert_flag(verbose)
172
173	5x	if (length(format) == 0) {
174	2x	return(obj)
175		}
176
177	3x	assert_valid_format(format)
178
179	3x	ls_datasets <- names(obj)
180	3x	format <- h_expand_all_datasets(format, ls_datasets)
181
182	3x	if (verbose) {
183	1x	for (tb in names(format)) {
184	2x	for (cl in names(format[[tb]])) {
185	2x	cat(sprintf("\nData frame `%s`, column `%s`:\n", tb, cl))
186	2x	print(format[[tb]][[cl]])
187		}
188		}
189	1x	cat("\n")
190		}
191
192	3x	for (tab in ls_datasets) {
193	7x	local_map <- format[[tab]]
194	7x	local_map <- local_map[names(local_map) %in% names(obj[[tab]])]
195
196	7x	obj[[tab]][names(local_map)] <- mapply(
197	7x	function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...),
198	7x	local_map,
199	7x	names(local_map),
200	7x	SIMPLIFY = FALSE
201		)
202		}
203
204	3x	obj
205		}
206
207		#' Propagate the rules for all datasets
208		#'
209		#' @inheritParams reformat
210		#' @param ls_datasets (`character`) the name of all datasets in the object to reformat.
211		#' @returns a nested `list` attributing a rule to be applied to specific variables of specific datasets.
212		#'
213		#' @details the rules described under `all_datasets` are propagated to all data sets for the corresponding variables
214		#' except in data sets where a rule is already attributed to the same variable.
215		#'
216		#' @keywords internal
217		h_expand_all_datasets <- function(format_list, ls_datasets = NULL) {
218	6x	assert_valid_list_format(list(f = format_list))
219	6x	checkmate::assert_character(ls_datasets, null.ok = TRUE)
220
221	6x	spec_datasets <- format_list[setdiff(names(format_list), "all_datasets")]
222
223	6x	if (!is.null(ls_datasets)) {
224	5x	to_all_datasets <- list()
225	5x	to_all_datasets[ls_datasets] <- format_list["all_datasets"]
226	5x	to_all_datasets <- base::Filter(function(x) !is.null(x), to_all_datasets)
227
228	5x	modifyList(to_all_datasets, spec_datasets)
229		} else {
230	1x	spec_datasets
231		}
232		}

1		#' Create rule based on mappings
2		#' @param ... Mapping pairs, the argument name is the transformed while
3		#' its values are original values.
4		#' @param .lst (`list`) of mapping.
5		#' @param .string_as_fct (`flag`) whether to convert characters to factors.
6		#' @param .na_last (`flag`) whether the level replacing `NA` should be last.
7		#' @param .drop (`flag`) whether to drop empty levels.
8		#' @param .to_NA (`character`) values that should be converted to `NA`. Set to `NULL` if nothing should be converted to
9		#' `NA`.
10		#' @returns a `rule` object.
11		#'
12		#' @note Conversion to `NA` is the last step of the remapping process.
13		#'
14		#' @export
15		#' @examples
16		#' rule("X" = "x", "Y" = c("y", "z"))
17		#' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
18		#'
19		rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "") {
20	135x	checkmate::assert_flag(.string_as_fct)
21	135x	checkmate::assert_flag(.na_last)
22	135x	checkmate::assert_flag(.drop)
23	135x	checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE)
24
25	135x	.lst[is.na(.lst)] <- NA_character_
26	135x	if (!checkmate::test_list(.lst, types = c("character"))) {
27	1x	rlang::abort("Value mapping may only contain the type: {character}")
28		}
29	134x	vals <- as.character(unlist(.lst, use.names = FALSE))
30	134x	checkmate::assert_character(vals, unique = TRUE)
31	132x	nms <- unlist(lapply(seq_len(length(.lst)), function(x) {
32	263x	rep(names(.lst)[x], length(.lst[[x]]))
33		}))
34
35	132x	res <- structure(
36	132x	setNames(vals, nms),
37	132x	class = c("rule", "character"),
38	132x	.string_as_fct = .string_as_fct,
39	132x	.na_last = .na_last,
40	132x	.drop = .drop,
41	132x	.to_NA = .to_NA
42		)
43
44	132x	res
45		}
46
47		#' @export
48		#'
49		print.rule <- function(x, ...) {
50	12x	cat("Mapping of:\n")
51	12x	nms <- unique(names(x))
52	12x	if (length(x) == 0) {
53	3x	cat("Empty mapping.\n")
54		} else {
55	9x	for (i in nms) {
56	26x	ori_nms <- unlist(x[names(x) %in% i])
57	26x	ori_nms <- ifelse(is.na(ori_nms), "<NA>", stringr::str_c("\"", ori_nms, "\""))
58	26x	ori_nms <- toString(ori_nms)
59	26x	cat(i, " <- ", ori_nms, "\n")
60		}
61		}
62	12x	.to_NA <- attr(x, ".to_NA")
63	12x	if (!is.null(.to_NA)) {
64	12x	cat("Convert to <NA>:", toString(stringr::str_c("\"", .to_NA, "\"")), "\n")
65		}
66	12x	cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")
67	12x	cat("Drop unused level:", attr(x, ".drop"), "\n")
68	12x	cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n")
69		}
70
71		#' Convert nested list into list of `rule`
72		#' @param obj (`nested list`) to convert into list of rules.
73		#' @returns a `list` of `rule` objects.
74		#' @export
75		#' @examples
76		#' obj <- list(
77		#' rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"),
78		#' rule2 = list(Missing = c(NA, "")),
79		#' rule3 = list(Missing = c(NA, ""), .drop = TRUE),
80		#' rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy")
81		#' )
82		#' list2rules(obj)
83		#'
84		list2rules <- function(obj) {
85	3x	coll <- checkmate::makeAssertCollection()
86	3x	checkmate::assert_list(obj, types = "list", add = coll)
87	3x	checkmate::assert_names(names(obj), type = "unique", add = coll)
88	3x	checkmate::reportAssertions(coll)
89
90	2x	lapply(obj, function(x) {
91	6x	do.call("rule", x)
92		})
93		}
94
95		#' Convert Rule to List
96		#' @param x (`rule`) to convert.
97		#' @param ... not used.
98		#' @returns an object of class `list`.
99		#'
100		#' @export
101		#' @examples
102		#' x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w"))
103		#' as.list(x)
104		as.list.rule <- function(x, ...) {
105	61x	nms <- names(x)
106	61x	unames <- unique(nms)
107	61x	res <- lapply(unames, function(i) {
108	119x	unname(x[nms == i])
109		})
110
111
112	61x	att <- attributes(x)
113	61x	arg <- att[!names(att) %in% c("names", "class")]
114
115	61x	res <- c(res, unname(arg))
116	61x	unames <- c(unames, names(arg))
117
118	61x	r_list <- setNames(res, unames)
119
120		# Explicitly declare .to_NA value, even if NULL.
121	61x	.to_NA <- r_list[[".to_NA"]]
122	61x	if (is.null(.to_NA)) {
123	3x	r_list[".to_NA"] <- list(NULL)
124		}
125
126	61x	r_list
127		}
128
129		#' Combine Two Rules
130		#'
131		#' @param x (`rule`) to modify.
132		#' @param y (`rule`) rule whose mapping will take precedence over the ones described in `x`.
133		#' @param ... not used.
134		#'
135		#' @note The order of the mappings in the resulting rule corresponds to the order of the mappings in `x` followed by the
136		#' mappings that are only present in `y`.
137		#'
138		#' @returns a `rule`.
139		#' @export
140		#' @examples
141		#' r1 <- rule(
142		#' "first" = c("from ori rule", "FROM ORI RULE"),
143		#' "last" = c(NA, "last"),
144		#' .to_NA = "X",
145		#' .drop = TRUE
146		#' )
147		#' r2 <- rule(
148		#' "first" = c("F", "f"),
149		#' "second" = c("S", "s"),
150		#' "third" = c("T", "t"),
151		#' .to_NA = "something"
152		#' )
153		#' combine_rules(r1, r2)
154		combine_rules <- function(x, y, ...) {
155	6x	checkmate::assert_class(x, "rule", null.ok = TRUE)
156	6x	checkmate::assert_class(y, "rule", null.ok = TRUE)
157
158	6x	if (is.null(x) && is.null(y)) {
159	1x	rlang::abort("Both rules are NULL.")
160		}
161
162		# If one of the rules is NULL, return the other (via empty list).
163	5x	x <- as.list(x)
164	5x	y <- as.list(y)
165
166	5x	x[names(y)] <- y
167
168	5x	r <- do.call(rule, x)
169	5x	r
170		}
171
172		#' Combine Rules Found in Lists of Rules.
173		#'
174		#' @param x (`list`) of `rule` objects.
175		#' @param val (`list`) of `rule` objects.
176		#' @param ... passed to [`dunlin::combine_rules`].
177		#'
178		#' @returns a `list` of `rule` objects.
179		#' @export
180		#' @examples
181		#' l1 <- list(
182		#' r1 = rule(
183		#' "first" = c("overwritten", "OVERWRITTEN"),
184		#' "almost first" = c(NA, "almost")
185		#' ),
186		#' r2 = rule(
187		#' ANYTHING = "anything"
188		#' )
189		#' )
190		#'
191		#' l2 <- list(
192		#' r1 = rule(
193		#' "first" = c("F", "f"),
194		#' "second" = c("S", "s"),
195		#' "third" = c("T", "t"),
196		#' .to_NA = "something"
197		#' ),
198		#' r3 = rule(
199		#' SOMETHING = "something"
200		#' )
201		#' )
202		#'
203		#' combine_list_rules(l1, l2)
204		combine_list_rules <- function(x, val, ...) {
205		# Unique names prevents zero-character names.
206	2x	checkmate::assert_list(x, types = "rule", null.ok = FALSE, names = "unique")
207	1x	checkmate::assert_list(val, types = "rule", null.ok = FALSE, names = "unique")
208
209	1x	vnames <- names(val)
210
211	1x	for (v in vnames) {
212	2x	x[[v]] <- combine_rules(x[[v]], val[[v]], ...)
213		}
214	1x	x
215		}

1		#' Propagate Column
2		#'
3		#' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other
4		#' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving
5		#' tables. In safe mode, the key must be unique in the original table.
6		#'
7		#' @param db (`list` of `data.frame`) object for which some variable need to be propagated.
8		#' @param from (`string`) the name of the table where the variables to propagate are stored.
9		#' @param add (`character`) the names of the variables to propagate.
10		#' @param by (`character`) the key binding the `from` table to the other tables.
11		#' @param safe (`flag`) should the key be checked for uniqueness in the `from` table.
12		#'
13		#' @returns updated `list` of `data.frame`.
14		#'
15		#' @rdname propagate
16		#' @export
17		#'
18		propagate <- function(db, from, add, by, safe = TRUE) {
19	3x	UseMethod("propagate")
20		}
21
22		#' @rdname propagate
23		#' @export
24		#'
25		#'
26		#' @examples
27		#' df1 <- data.frame(
28		#' id1 = c("a", "a", "c", "d", "e", "f"),
29		#' id2 = c("A", "B", "A", "A", "A", "A"),
30		#' int = c(1, 2, 3, 4, 5, 6),
31		#' bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)
32		#' )
33		#'
34		#' df2 <- data.frame(
35		#' id1 = c("a", "a", "d", "e", "f", "g"),
36		#' id2 = c("A", "B", "A", "A", "A", "A")
37		#' )
38		#'
39		#' df3 <- data.frame(
40		#' id1 = c("a", "c", "d", "e", "f", "x"),
41		#' id2 = c("A", "A", "A", "A", "B", "A"),
42		#' int = c(11, 22, 33, 44, 55, 66)
43		#' )
44		#'
45		#' db <- list(df1 = df1, fd2 = df2, df3 = df3)
46		#' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
47		#'
48		propagate.list <- function(db, from, add, by, safe = TRUE) {
49	3x	checkmate::assert_list(db, types = "data.frame", names = "unique")
50	3x	checkmate::assert_names(names(db), must.include = from)
51	3x	checkmate::assert_names(colnames(db[[from]]), must.include = add)
52	3x	checkmate::assert_names(colnames(db[[from]]), must.include = by)
53	3x	checkmate::assert_flag(safe)
54
55	3x	if (safe) {
56	2x	keys <- db[[from]][, by]
57	1x	if (anyDuplicated(keys)) rlang::abort(paste("Duplicated key"))
58		}
59	2x	toJoin <- db[[from]]
60
61	2x	for (tab_name in setdiff(names(db), from)) {
62	3x	tab_colnames <- colnames(db[[tab_name]])
63	3x	if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) {
64	2x	missing_var <- setdiff(add, tab_colnames)
65	2x	sel_var <- c(missing_var, by)
66	2x	sel_tab <- toJoin[, sel_var]
67
68	2x	cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var)))
69
70	2x	db[[tab_name]] <- db[[tab_name]] %>%
71	2x	dplyr::left_join(sel_tab, by = by, multiple = "all")
72		} else {
73	1x	cat(paste0("\nSkipping: ", tab_name))
74		}
75		}
76	2x	cat("\n")
77	2x	return(db)
78		}

1		#' Encode Categorical Missing Values in a `list` of `data.frame`
2		#'
3		#' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and
4		#' `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved.
5		#'
6		#' @param data (`list` of `data.frame`) to be transformed.
7		#' @param omit_tables (`character`) the names of the tables to omit from processing.
8		#' @param omit_columns (`character`) the names of the columns to omit from processing.
9		#' @param char_as_factor (`logical`) should character columns be converted into factor.
10		#' @param na_level (`string`) the label to encode missing levels.
11		#' @returns `list` of `data.frame` object with explicit missing levels.
12		#' @export
13		#'
14		#' @examples
15		#' df1 <- data.frame(
16		#' "char" = c("a", "b", NA, "a", "k", "x"),
17		#' "char2" = c("A", "B", NA, "A", "K", "X"),
18		#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
19		#' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA)
20		#' )
21		#' df2 <- data.frame(
22		#' "char" = c("a", "b", NA, "a", "k", "x"),
23		#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
24		#' "num" = c(1:5, NA)
25		#' )
26		#' df3 <- data.frame(
27		#' "char" = c(NA, NA, "A")
28		#' )
29		#'
30		#' db <- list(df1 = df1, df2 = df2, df3 = df3)
31		#'
32		#' ls_explicit_na(db)
33		#' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
34		#'
35		ls_explicit_na <- function(data,
36		omit_tables = NULL,
37		omit_columns = NULL,
38		char_as_factor = TRUE,
39		na_level = "<Missing>") {
40	3x	checkmate::assert_list(data, types = "data.frame", names = "unique")
41	3x	checkmate::assert_character(omit_tables, null.ok = TRUE)
42	3x	checkmate::assert_character(omit_columns, null.ok = TRUE)
43	3x	checkmate::assert_flag(char_as_factor)
44	3x	checkmate::assert_string(na_level)
45
46	3x	modif_tab <- setdiff(names(data), omit_tables)
47	3x	if (length(modif_tab) < 1) {
48	1x	return(data)
49		}
50
51	2x	data[modif_tab] <- lapply(
52	2x	data[modif_tab],
53	2x	h_df_explicit,
54	2x	omit_columns = omit_columns,
55	2x	char_as_factor = char_as_factor,
56	2x	na_level = na_level
57		)
58
59	2x	data
60		}
61
62		#' Encode Categorical Missing Values in a `data.frame`.
63		#'
64		#' @inheritParams ls_explicit_na
65		#' @returns a `data.frame` object with explicit missing levels.
66		#' @keywords internal
67		h_df_explicit <- function(df,
68		omit_columns = NULL,
69		char_as_factor = TRUE,
70		na_level = "<Missing>") {
71	3x	na_list <- list(x = c("", NA))
72	3x	names(na_list) <- na_level
73	3x	na_rule <- rule(.lst = na_list)
74
75	3x	df %>%
76	3x	mutate(
77	3x	across(
78	3x	where(~ is.character(.x) \| is.factor(.x)) & !any_of(.env$omit_columns),
79	3x	~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE)
80		)
81		)
82		}

1		#' Setting the Label Attribute
2		#'
3		#' @param var (`object`) whose label attribute can be set.
4		#' @param label (`character`) the label to add.
5		#' @returns `object` with label attribute.
6		#'
7		#' @export
8		#' @examples
9		#' x <- c(1:10)
10		#' attr(x, "label")
11		#'
12		#' y <- attr_label(x, "my_label")
13		#' attr(y, "label")
14		attr_label <- function(var, label) {
15	135x	checkmate::assert_character(label)
16
17	134x	x <- var
18	134x	attr(x, "label") <- label
19
20	134x	x
21		}
22
23		#' Setting the Label Attribute to Data Frame Columns
24		#'
25		#' @param df (`data.frame`).
26		#' @param label (`character`) the labels to add.
27		#' @returns `data.frame` with label attributes.
28		#'
29		#' @export
30		#' @examples
31		#' res <- attr_label_df(mtcars, letters[1:11])
32		#' res
33		#' lapply(res, attr, "label")
34		attr_label_df <- function(df, label) {
35	32x	checkmate::assert_data_frame(df)
36	32x	checkmate::assert_character(label, len = ncol(df))
37
38	31x	res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE)
39	31x	as.data.frame(res)
40		}
41
42		#' Getting Argument From System, Option or Default
43		#'
44		#' @param opt (`string`) the name of an option.
45		#' @param sys (`string`) the name of an environment variable.
46		#' @param default value to return if neither the environment variable nor the option are set.
47		#' @param split (`string`) the pattern used to split the values obtained using environment variable.
48		#'
49		#' @returns if defined, the value of the option (`opt`), a `character` from the environment variable (`sys`) or the
50		#' `default` in this order of priority.
51		#'
52		#' @export
53		#' @examplesIf require("withr")
54		#' get_arg("my.option", "MY_ARG", "default")
55		#' withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default"))
56		#' withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default"))
57		get_arg <- function(opt = NULL, sys = NULL, default = NULL, split = ";") {
58	9x	checkmate::assert_string(sys, null.ok = TRUE)
59	9x	checkmate::assert_string(opt, null.ok = TRUE)
60	9x	checkmate::assert_string(split)
61
62	9x	if (!is.null(opt)) {
63	8x	val <- getOption(opt, default = "")
64	8x	if (!identical(val, "")) {
65	2x	return(val)
66		}
67		}
68
69	7x	if (!is.null(sys)) {
70	6x	val <- Sys.getenv(sys, unset = "")
71	6x	if (!identical(val, "")) {
72	!	val <- stringr::str_split_1(val, split)
73	!	return(val)
74		}
75		}
76
77	7x	return(default)
78		}

1		#' Filter Data with Log
2		#' @param data (`data.frame`) input data to subset, or named (`list` of `data.frame`).
3		#' @param condition (`call`) of subset condition. Must evaluate as logical.
4		#' @param suffix (`string`) optional argument describing the filter.
5		#' @param ... further arguments to be passed to or from other methods.
6		#' @returns a `data.frame` or `list` of `data.frame` filtered for the provided conditions.
7		#' @details
8		#' `log_filter` will filter the data/named list of data according to the `condition`.
9		#' All the variables in `condition` must exist in the data (as variables) or in the parent
10		#' frame(e.g., in global environment).
11		#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all
12		#' other datasets with `USUBJID`.
13		#' @export
14		log_filter <- function(data, condition, ...) {
15	45x	UseMethod("log_filter")
16		}
17
18		#' @rdname log_filter
19		#' @export
20		#' @examples
21		#' data <- iris
22		#' attr(data$Sepal.Length, "label") <- "cm"
23		#' log_filter(data, Sepal.Length >= 7)
24		#'
25		log_filter.data.frame <- function(data, condition, suffix = NULL, ...) {
26	25x	checkmate::assert_string(suffix, null.ok = TRUE)
27
28	25x	condition <- match.call()$condition
29	25x	vars <- all.vars(condition)
30	25x	var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE)
31	25x	var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE)
32	25x	if (!all(var_in_env \| var_in_data)) {
33	1x	rlang::abort(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data \| var_in_env)])))
34		}
35	24x	res <- eval(bquote(dplyr::filter(data, .(condition))))
36	24x	rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix))
37	24x	rlbl <- paste0(deparse(condition), collapse = "")
38	24x	rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ")
39	24x	names(rows) <- rlbl
40	24x	attr(res, "rows") <- c(attr(data, "rows"), rows)
41
42	24x	res
43		}
44
45		#' @rdname log_filter
46		#' @param table (`string`) table name.
47		#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering.
48		#' @param verbose (`flag`) whether to print a report about the filtering.
49		#' @export
50		#' @examples
51		#' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
52		log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, ...) {
53	20x	checkmate::assert_list(data, types = "data.frame", names = "unique")
54	20x	assert_all_tablenames(data, table)
55	19x	checkmate::assert_names(colnames(data[[table]]), must.include = by)
56	19x	condition <- match.call()$condition
57	19x	data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix))))
58	19x	if (identical(table, "adsl")) {
59	8x	for (k in setdiff(names(data), "adsl")) {
60	8x	if (all(by %in% names(data[[k]]))) {
61	1x	if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl))
62
63	8x	ori_n <- nrow(data[[k]])
64	8x	ori_att <- attr(data[[k]], "rows")
65
66	8x	data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by)
67
68	8x	rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix))
69	8x	names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "")
70	8x	attr(data[[k]], "rows") <- c(ori_att, rows)
71		}
72		}
73		}
74
75	19x	if (verbose) {
76	2x	print_log(data, incl = FALSE, incl.adsl = TRUE)
77		}
78
79	19x	return(data)
80		}
81
82		# Get Log ----
83
84		#' Get Log
85		#'
86		#' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`.
87		#' @param incl (`flag`) should information about unfiltered `data.frame` be printed.
88		#' @param incl.adsl (`flag`) should indication of filtering performed through `adsl` be printed.
89		#' @returns `character` or `list of character` describing the filtering applied to `data`.
90		#'
91		#' @export
92		get_log <- function(data, incl, incl.adsl) {
93	43x	UseMethod("get_log")
94		}
95
96		#' @rdname get_log
97		#' @export
98		#' @examples
99		#' data <- log_filter(iris, Sepal.Length >= 7, "xx")
100		#' data <- log_filter(data, Sepal.Length < 2)
101		#' data <- log_filter(data, Sepal.Length >= 2, "yy")
102		#' get_log(data)
103		#'
104		get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
105	29x	checkmate::assert_flag(incl)
106
107	29x	att <- attr(data, "rows")
108	29x	if (!incl.adsl) {
109	4x	sel <- grepl("Filtered by adsl", names(att))
110	4x	att <- att[!sel]
111		}
112
113	29x	if (length(att) != 0L) {
114	19x	start_row <- lapply(att, "[[", "init")
115	19x	end_row <- lapply(att, "[[", "final")
116	19x	suffix <- lapply(att, "[[", "suffix")
117	19x	suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1))
118	19x	res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]")
119	10x	} else if (incl) {
120	6x	paste0("No filtering [", nrow(data), " rows.]")
121		} else {
122	4x	NULL
123		}
124		}
125
126
127		#' @rdname get_log
128		#' @export
129		#' @examples
130		#' data <- log_filter(
131		#' list(iris1 = iris, iris2 = iris),
132		#' Sepal.Length >= 7,
133		#' "iris1",
134		#' character(0),
135		#' "Sep"
136		#' )
137		#' get_log(data)
138		#'
139		get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
140	14x	checkmate::assert_list(data, types = "data.frame", names = "unique")
141	14x	checkmate::assert_flag(incl)
142
143	14x	lapply(data, get_log, incl = incl, incl.adsl = incl.adsl)
144		}
145
146		# Print Log ----
147
148		#' Print Log
149		#'
150		#' @inheritParams get_log
151		#' @returns `NULL`. Print a description of the filtering applied to `data`.
152		#' @export
153		#'
154		print_log <- function(data, incl, incl.adsl) {
155	12x	UseMethod("print_log")
156		}
157
158		#' @rdname print_log
159		#' @export
160		#' @examples
161		#' data <- log_filter(iris, Sepal.Length >= 7, "Sep")
162		#' print_log(data)
163		print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {
164	3x	checkmate::assert_flag(incl)
165
166	3x	cat("Filter Log:")
167	3x	cat(paste0("\n ", get_log(data, incl = incl, incl.adsl = incl.adsl)))
168	3x	cat("\n")
169	3x	invisible()
170		}
171
172		#' @rdname print_log
173		#' @export
174		#' @examples
175		#' data <- log_filter(
176		#' list(
177		#' adsl = iris,
178		#' iris2 = iris,
179		#' mtcars = mtcars,
180		#' iris3 = iris
181		#' ),
182		#' Sepal.Length >= 7,
183		#' "adsl",
184		#' character(0),
185		#' "adsl filter"
186		#' )
187		#' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter")
188		#' print_log(data)
189		#' print_log(data, incl = FALSE)
190		#' print_log(data, incl.adsl = FALSE, incl = FALSE)
191		print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {
192	9x	checkmate::assert_list(data, types = "data.frame", names = "unique")
193	9x	checkmate::assert_flag(incl)
194
195	9x	filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl)
196
197	9x	if (!incl) {
198	4x	filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))]
199		}
200
201	9x	cat("Filter Log:")
202	9x	if (length(filter_log) == 0) {
203	1x	cat("\n No filtering")
204		} else {
205	8x	mapply(
206	8x	function(x, y) {
207	13x	cat(paste0("\n - ", x, ":"))
208	13x	cat(paste0("\n ", y, ""))
209		},
210	8x	as.list(names(filter_log)),
211	8x	filter_log
212		)
213		}
214	9x	cat("\n")
215
216	9x	invisible()
217		}

1		#' Unite Columns of a Table in a `list` of `data.frame`.
2		#'
3		#' @param adam_db (`list` of `data.frames`) to be transformed.
4		#' @param tab (`string`) the name of a table in the `adam_db` object.
5		#' @param cols (`character`) the name of the columns to unite.
6		#' @param sep (`string`) the separator for the new column name.
7		#' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used.
8		#'
9		#' @returns `list` of `data.frames` object with a united column.
10		#' @export
11		#'
12		#' @examples
13		#' db <- list(mtcars = mtcars, iris = iris)
14		#'
15		#' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION")
16		#' x$mtcars
17		ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) {
18	4x	checkmate::assert_list(adam_db, types = "data.frame")
19	4x	checkmate::assert_string(tab)
20	4x	checkmate::assert_names(names(adam_db), must.include = tab)
21	4x	checkmate::assert_character(cols, min.len = 1)
22	4x	checkmate::assert_names(names(adam_db[[tab]]), must.include = cols)
23	4x	checkmate::assert_string(sep)
24	4x	checkmate::assert_string(new, null.ok = TRUE)
25
26	4x	x_interaction <- if (!is.null(new)) {
27	3x	new
28		} else {
29	1x	paste(cols, collapse = sep)
30		}
31
32	4x	x_df <- adam_db[[tab]][, cols, drop = FALSE]
33	4x	lvl <- lapply(x_df, function(y) {
34	9x	uni <- if (is.factor(y)) {
35	7x	levels(y)
36		} else {
37	2x	unique(y)
38		}
39	9x	factor(uni, levels = uni)
40		})
41
42	4x	all_lvl_df <- expand.grid(lvl)
43
44	4x	all_lvl <- all_lvl_df[, cols, drop = FALSE] %>%
45	4x	arrange(across(all_of(cols))) %>%
46	4x	apply(1, paste, collapse = sep)
47
48	4x	x_vec <- x_df[, cols, drop = FALSE] %>%
49	4x	apply(1, paste, collapse = sep)
50
51	4x	existing_lvl <- intersect(all_lvl, x_vec)
52	4x	x_fact <- factor(x_vec, existing_lvl)
53
54	4x	adam_db[[tab]][, x_interaction] <- x_fact
55	4x	adam_db
56		}

1		#' Reorder Two Columns Levels Simultaneously
2		#'
3		#' @details The function expect a 1:1 matching between the elements of the two selected column.
4		#'
5		#' @param df (`data.frame`) with two column whose factors should be reordered.
6		#' @param primary (`string`) the name of the column on which the levels reordering should be based.
7		#' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the
8		#' primary column.
9		#' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be
10		#' placed afterward in their current order.
11		#' @returns a `data.frame` with the `secondary` column converted to factor with reordered levels.
12		#'
13		#' @export
14		#'
15		#' @examples
16		#' df <- data.frame(
17		#' SUBJID = 1:3,
18		#' PARAMCD = factor(c("A", "B", "C")),
19		#' PARAM = factor(paste("letter", LETTERS[1:3]))
20		#' )
21		#' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
22		co_relevels <- function(df, primary, secondary, levels_primary) {
23	6x	checkmate::assert_data_frame(df, min.rows = 1)
24	6x	checkmate::assert_subset(c(primary, secondary), colnames(df))
25	6x	checkmate::assert_character(levels_primary, min.len = 1)
26	6x	checkmate::assert_vector(df[[primary]], any.missing = FALSE)
27	5x	checkmate::assert_vector(df[[secondary]], any.missing = FALSE)
28
29	5x	df[, primary] <- as.factor(df[[primary]])
30	5x	df[, secondary] <- as.factor(df[[secondary]])
31
32		# check unique relationship
33	5x	df_key <- df[, c(primary, secondary)]
34	5x	df_key <- unique(df_key)
35
36	5x	if (any(duplicated(df_key[[primary]])) \|\| any(duplicated(df_key[[secondary]]))) {
37	1x	rlang::abort("non univoque relation between values in primary and secondary column")
38		}
39
40	4x	keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]]))
41
42	4x	all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary))
43	4x	all_levels_secondary <- keys[all_levels_primary]
44
45	4x	df[, primary] <- factor(df[[primary]], all_levels_primary)
46	4x	df[, secondary] <- factor(df[[secondary]], all_levels_secondary)
47
48	4x	df
49		}

1		#' Cutting data by group
2		#'
3		#' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are
4		#' closed on the right (and open on the left).
5		#'
6		#' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation.
7		#' @param col_data (`character`) the column containing the data to be cut.
8		#' @param col_group (`character`) the column containing the names of the groups according to which the data should be
9		#' split.
10		#' @param group (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the
11		#' name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically
12		#' `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`).
13		#' @param cat_col (`character`) the name of the new column in which the cut label should he stored.
14		#' @returns `data.frame` with a column containing categorical values.
15		#' @export
16		#'
17		#' @examples
18		#' group <- list(
19		#' list(
20		#' "Height",
21		#' c(-Inf, 150, 170, Inf),
22		#' c("=<150", "150-170", ">170")
23		#' ),
24		#' list(
25		#' "Weight",
26		#' c(-Inf, 65, Inf),
27		#' c("=<65", ">65")
28		#' ),
29		#' list(
30		#' "Age",
31		#' c(-Inf, 31, Inf),
32		#' c("=<31", ">31")
33		#' ),
34		#' list(
35		#' "PreCondition",
36		#' c(-Inf, 1, Inf),
37		#' c("=<1", "<1")
38		#' )
39		#' )
40		#' data <- data.frame(
41		#' SUBJECT = rep(letters[1:10], 4),
42		#' PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10),
43		#' AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)),
44		#' index = 1:40
45		#' )
46		#'
47		#' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
48		cut_by_group <- function(df,
49		col_data,
50		col_group,
51		group,
52		cat_col) {
53	4x	checkmate::assert_data_frame(df)
54	4x	checkmate::assert_subset(c(col_data, col_group), colnames(df))
55	4x	checkmate::assert_numeric(df[, col_data])
56	4x	checkmate::assert_list(group)
57
58	4x	lapply(
59	4x	group,
60	4x	function(list_element) {
61	11x	checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character"))
62		}
63		)
64
65	4x	df[cat_col] <- NA
66
67	4x	for (g in group) {
68	10x	selected_row <- df[[col_group]] == g[[1]]
69
70	10x	df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]]))
71		}
72	3x	df
73		}