teal.data coverage - 86.86%

Files
Source

File	Lines	Relevant	Covered	Missed	Hits / Line	Coverage
R/deprecated.R	345	56	0	56	0	0.00%
R/teal_data-show.R	20	4	0	4	0	0.00%
R/dummy_function.R	16	2	0	2	0	0.00%
R/teal_data.R	70	24	12	12	38	50.00%
R/formatters_var_labels.R	98	33	23	10	2	69.70%
R/verify.R	112	37	28	9	1	75.68%
R/join_keys-utils.R	154	67	64	3	59	95.52%
R/teal_data-class.R	96	24	23	1	66	95.83%
R/utils-get_code_dependency.R	405	134	133	1	144	99.25%
R/join_keys-extract.R	283	113	113	0	120	100.00%
R/join_key.R	91	33	33	0	820	100.00%
R/join_keys-print.R	61	31	31	0	13	100.00%
R/topological_sort.R	64	28	28	0	276	100.00%
R/join_keys-parents.R	140	28	28	0	219	100.00%
R/testhat-helpers.R	55	22	22	0	36	100.00%
R/join_keys.R	161	20	20	0	258	100.00%
R/join_keys-names.R	32	15	15	0	3	100.00%
R/teal_data-get_code.R	127	12	12	0	33	100.00%
R/teal_data-datanames.R	52	10	10	0	3	100.00%
R/join_keys-c.R	42	10	10	0	192	100.00%
R/cdisc_data.R	39	1	1	0	1	100.00%

#' Show `teal_data` object
#'
#' Prints `teal_data` object.
#'
#' @param object (`teal_data`)
#' @return Input `teal_data` object.
#' @importFrom methods show
#' @examples
#' teal_data()
#' teal_data(x = iris, code = "x = iris")
#' verify(teal_data(x = iris, code = "x = iris"))
#' @export
setMethod("show", signature = "teal_data", function(object) {
  if (object@verified) {
    cat("\u2705\ufe0e", "verified teal_data object\n")
  } else {
    cat("\u2716", "unverified teal_data object\n")
  }
  rlang::env_print(object@env)
})

#' Deprecated `TealData` class and related functions
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' The `TealData` class and associated functions have been deprecated. Use [teal_data()] instead.
#' See the [Migration guide](https://github.com/insightsengineering/teal/discussions/945) for details.
#'
#' @name TealData
#'
#' @param ... any argument supported in `TealData` related functions.
#'
#' @return nothing
#' @seealso  [cdisc_data()] , [join_keys()]
#'
NULL

.deprecate_function <- function(what, details) {
  lifecycle::deprecate_stop(
    when = "0.4.0",
    what = what,
    details = details
  )
}

deprecation_detail <- "Find more information on https://github.com/insightsengineering/teal/discussions/945"

#' @rdname TealData
#' @export
as_cdisc <- function(...) {
  .deprecate_function("as_cdisc()", deprecation_detail)
}

#' @rdname TealData
#' @export
callable_code <- function(...) {
  .deprecate_function("callable_code()", deprecation_detail)
}

#' @rdname TealData
#' @export
callable_function <- function(...) {
  .deprecate_function("callable_function()", deprecation_detail)
}

#' @rdname TealData
#' @export
code_dataset_connector <- function(...) {
  .deprecate_function("code_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
code_cdisc_dataset_connector <- function(...) {
  .deprecate_function("code_cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
csv_dataset_connector <- function(...) {
  .deprecate_function("csv_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
csv_cdisc_dataset_connector <- function(...) {
  .deprecate_function("csv_cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
python_code <- function(...) {
  .deprecate_function("python_code()", deprecation_detail)
}

#' @rdname TealData
#' @export
python_dataset_connector <- function(...) {
  .deprecate_function("python_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
python_cdisc_dataset_connector <- function(...) {
  .deprecate_function("python_cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
cdisc_data_connector <- function(...) {
  .deprecate_function("cdisc_data_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
cdisc_dataset <- function(...) {
  .deprecate_function("cdisc_dataset()", deprecation_detail)
}

#' @rdname TealData
#' @export
cdisc_dataset_connector <- function(...) {
  .deprecate_function("cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
cdisc_dataset_connector_file <- function(...) {
  .deprecate_function("cdisc_dataset_connector_file()", deprecation_detail)
}

#' @rdname TealData
#' @export
cdisc_dataset_file <- function(...) {
  .deprecate_function("cdisc_dataset_file()", deprecation_detail)
}

#' @rdname TealData
#' @export
dataset <- function(...) {
  .deprecate_function("dataset()", deprecation_detail)
}

#' @rdname TealData
#' @export
dataset_connector <- function(...) {
  .deprecate_function("dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
dataset_connector_file <- function(...) {
  .deprecate_function("dataset_connector_file()", deprecation_detail)
}

#' @rdname TealData
#' @export
dataset_file <- function(...) {
  .deprecate_function("dataset_file()", deprecation_detail)
}

#' @rdname TealData
#' @export
data_connection <- function(...) {
  .deprecate_function("data_connection()", deprecation_detail)
}

#' @rdname TealData
#' @export
fun_dataset_connector <- function(...) {
  .deprecate_function("fun_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
fun_cdisc_dataset_connector <- function(...) {
  .deprecate_function("fun_cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
relational_data_connector <- function(...) {
  .deprecate_function("relational_data_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
mae_dataset <- function(...) {
  .deprecate_function("mae_dataset()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_attrs <- function(...) {
  .deprecate_function("get_attrs()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_dataset_label <- function(...) {
  .deprecate_function("get_dataset_label()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_dataset <- function(...) {
  .deprecate_function("get_dataset()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_datasets <- function(...) {
  .deprecate_function("get_datasets()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_dataname <- function(...) {
  .deprecate_function("get_dataname()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_key_duplicates <- function(...) {
  .deprecate_function("get_key_duplicates()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_keys <- function(...) {
  .deprecate_function("get_keys()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_raw_data <- function(...) {
  .deprecate_function("get_raw_data()", deprecation_detail)
}

#' @rdname TealData
#' @export
is_pulled <- function(...) {
  .deprecate_function("is_pulled()", deprecation_detail)
}

#' @rdname TealData
#' @export
load_dataset <- function(...) {
  .deprecate_function("load_dataset()", deprecation_detail)
}

#' @rdname TealData
#' @export
load_datasets <- function(...) {
  .deprecate_function("load_datasets()", deprecation_detail)
}

#' @rdname TealData
#' @export
mutate_data <- function(...) {
  .deprecate_function("mutate_data()", deprecation_detail)
}

#' @rdname TealData
#' @export
mutate_dataset <- function(...) {
  .deprecate_function("mutate_dataset()", deprecation_detail)
}

#' @rdname TealData
#' @export
set_args <- function(...) {
  .deprecate_function("set_args()", deprecation_detail)
}

#' @rdname TealData
#' @export
rds_dataset_connector <- function(...) {
  .deprecate_function("rds_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
rds_cdisc_dataset_connector <- function(...) {
  .deprecate_function("rds_cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
script_dataset_connector <- function(...) {
  .deprecate_function("script_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
script_cdisc_dataset_connector <- function(...) {
  .deprecate_function("script_cdisc_dataset_connector()", deprecation_detail)
}

#' @rdname TealData
#' @export
set_keys <- function(...) {
  .deprecate_function("set_keys()", deprecation_detail)
}

#' @rdname TealData
#' @export
read_script <- function(...) {
  .deprecate_function("read_script()", deprecation_detail)
}

#' @rdname TealData
#' @export
to_relational_data <- function(...) {
  .deprecate_function("to_relational_data()", deprecation_detail)
}

#' @rdname TealData
#' @export
validate_metadata <- function(...) {
  .deprecate_function("validate_metadata()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_cdisc_keys <- function(...) {
  .deprecate_function("get_cdisc_keys()", deprecation_detail)
}

#' @rdname TealData
#' @export
cdisc_data_file <- function(...) {
  .deprecate_function("cdisc_data_file()", deprecation_detail)
}

#' @rdname TealData
#' @export
teal_data_file <- function(...) {
  .deprecate_function("teal_data_file()", deprecation_detail)
}

#' @rdname TealData
#' @export
get_join_keys <- function(...) {
  .deprecate_function("get_join_keys()", "Use `join_keys(data)` instead.")
}

#' @rdname TealData
#' @param value value to assign
#' @export
`get_join_keys<-` <- function(..., value) {
  .deprecate_function("`get_join_keys<-`()", "Use `join_keys(x) <- ...`")
}

#' @rdname col_labels
#' @include formatters_var_labels.R
#' @details
#' `r lifecycle::badge("deprecated")`
#'
#' In previous versions of `teal.data` labels were managed with `get_labels()`.
#' This function is deprecated as of `0.4.0`, use `col_labels` instead.
#' @export
get_labels <- function(...) {
  .deprecate_function("get_labels()", "Use col_labels(data)")
}

#' Create a relationship between a pair of datasets
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' Create a relationship between two datasets, `dataset_1` and `dataset_2`.
#' By default, this function establishes a directed relationship with `dataset_1` as the parent.
#' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`.
#'
#' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted,
#' a primary key for `dataset_1` is created.
#' @param keys (optionally named `character`) Column mapping between the datasets,
#' where `names(keys)` maps columns in `dataset_1` corresponding to columns of
#' `dataset_2` given by the elements of `keys`.
#' - If unnamed, the same column names are used for both datasets.
#' - If any element of the `keys` vector is empty with a non-empty name, then the name is
#' used for both datasets.
#' @param directed (`logical(1)`) Flag that indicates whether it should create
#' a parent-child relationship between the datasets.
#'  - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;
#'  - `FALSE` when the relationship is undirected.
#'
#' @return object of class `join_key_set` to be passed into `join_keys` function.
#'
#' @examples
#' join_key("d1", "d2", c("A"))
#' join_key("d1", "d2", c("A" = "B"))
#' join_key("d1", "d2", c("A" = "B", "C"))
#'
#' @export
#' @seealso [join_keys()], [parents()]
#'
join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) {
  checkmate::assert_string(dataset_1)
  checkmate::assert_string(dataset_2)
  checkmate::assert_character(keys, any.missing = FALSE)
  checkmate::assert_flag(directed)

  if (length(keys) > 0) {
    if (is.null(names(keys))) {
      names(keys) <- keys
    }

    keys <- trimws(keys)
    names(keys) <- trimws(names(keys))

    # Remove keys with empty value and without name
    if (any(keys == "" & names(keys) == "")) {
      message("Key with an empty value and name are ignored.")
      keys <- keys[keys != "" & names(keys) != ""]
    }

    # Set name of keys without one: c("A") -> c("A" = "A")
    if (any(names(keys) == "")) {
      names(keys)[names(keys) == ""] <- keys[names(keys) == ""]
    }

    # Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A")
    if (any(keys == "")) {
      keys[keys == ""] <- names(keys[keys == ""])
    }

    stopifnot(!is.null(names(keys)))
    stopifnot(!anyDuplicated(keys))
    stopifnot(!anyDuplicated(names(keys)))

    if (dataset_1 == dataset_2 && any(names(keys) != keys)) {
      stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed")
    }
  } else {
    keys <- NULL
  }

  parents <- if (directed && dataset_1 != dataset_2) {
    stats::setNames(list(dataset_1), dataset_2)
  } else {
    list()
  }

  structure(
    list(
      structure(
        list(keys),
        names = dataset_2
      )
    ),
    names = dataset_1,
    class = "join_key_set",
    parents = parents
  )
}

#' Check Compatibility of keys
#'
#' Helper function to assert if two key sets contain incompatible keys.
#'
#' @return Returns `TRUE` if successful, otherwise raises error.
#' @keywords internal
assert_compatible_keys <- function(join_key_1, join_key_2) {
  stop_message <- function(dataset_1, dataset_2) {
    stop(
      paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2)
    )
  }

  dataset_1_one <- names(join_key_1)
  dataset_2_one <- names(join_key_1[[1]])
  keys_one <- join_key_1[[1]][[1]]

  dataset_1_two <- names(join_key_2)
  dataset_2_two <- names(join_key_2[[1]])
  keys_two <- join_key_2[[1]][[1]]

  # if first datasets and the second datasets match and keys
  # must contain the same named elements
  if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) {
    if (!identical(sort(keys_one), sort(keys_two))) {
      stop_message(dataset_1_one, dataset_2_one)
    }
  }

  # if first dataset of join_key_1 matches second dataset of join_key_2
  # and the first dataset of join_key_2 must match second dataset of join_key_1
  # and keys must contain the same elements but with names and values swapped
  if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) {
    if (
      xor(length(keys_one) == 0, length(keys_two) == 0) ||
        !identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two)))
    ) {
      stop_message(dataset_1_one, dataset_2_one)
    }
  }

  # otherwise they are compatible
  return(TRUE)
}

#' Validate parent-child key
#'
#' Helper function checks the parent-child relations are valid.
#'
#' @param x (`join_keys`) object to assert validity of relations
#'
#' @return `join_keys` invisibly
#'
#' @keywords internal
assert_parent_child <- function(x) {
  jk <- join_keys(x)
  jk_parents <- parents(jk)

  checkmate::assert_class(jk, c("join_keys", "list"))

  if (!is.null(jk_parents)) {
    for (idx1 in seq_along(jk_parents)) {
      name_from <- names(jk_parents)[[idx1]]
      for (idx2 in seq_along(jk_parents[[idx1]])) {
        name_to <- jk_parents[[idx1]][[idx2]]
        keys_from <- jk[[name_from]][[name_to]]
        keys_to <- jk[[name_to]][[name_from]]
        if (length(keys_from) == 0 && length(keys_to) == 0) {
          stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to))
        }
      }
    }
  }
  invisible(x)
}

#' Verify key set compatibility
#'
#' Helper function to ensuring compatibility between two sets of keys
#'
#' @return Returns `TRUE` if successful, otherwise raises error.
#' @keywords internal
assert_compatible_keys2 <- function(x, y) {
  # Helper to flatten join_keys / join_key_set
  flatten_join_key_sets <- function(value) {
    value <- unclass(value)
    Reduce(
      init = list(),
      f = function(u, v, ...) {
        el <- value[v][[1]]
        res <- lapply(seq_along(el), function(ix) el[ix])
        names(res) <- rep(v, length(res))
        append(u, res)
      },
      x = names(value)
    )
  }

  x <- flatten_join_key_sets(x)
  y <- flatten_join_key_sets(y)

  for (idx_1 in seq_along(x)) {
    for (idx_2 in seq_along(y)) {
      assert_compatible_keys(x[idx_1], y[idx_2])
    }
  }
  TRUE
}

#' Updates the keys of the datasets based on the parents
#'
#' @param x (`join_keys`) object to update the keys.
#'
#' @return (`self`) invisibly for chaining
#'
#' @keywords internal
update_keys_given_parents <- function(x) {
  jk <- join_keys(x)

  checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x))

  datanames <- names(jk)
  for (d1_ix in seq_along(datanames)) {
    d1 <- datanames[[d1_ix]]
    d1_parent <- parent(jk, d1)
    for (d2 in datanames[-1 * seq.int(d1_ix)]) {
      if (length(jk[[d1]][[d2]]) == 0) {
        d2_parent <- parent(jk, d2)

        if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next

        # both has the same parent -> common keys to parent
        keys_d1_parent <- sort(jk[[d1]][[d1_parent]])
        keys_d2_parent <- sort(jk[[d2]][[d2_parent]])

        common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)
        common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)

        # No common keys between datasets - leave empty
        if (all(!common_ix_1)) next

        fk <- structure(
          names(keys_d2_parent)[common_ix_2],
          names = names(keys_d1_parent)[common_ix_1]
        )
        jk[[d1]][[d2]] <- fk # mutate join key
      }
    }
  }
  # check parent child relation
  assert_parent_child(x = jk)

  jk
}

#' @rdname join_keys
#' @order 2
#'
#' @section Functions:
#' - `x[datanames]`: Returns a subset of the `join_keys` object for
#' given `datanames`, including parent `datanames` and symmetric mirror keys between
#' `datanames` in the result.
#' - `x[i, j]`: Returns join keys between datasets `i` and `j`,
#'   including implicit keys inferred from their relationship with a parent.
#'
#' @param i,j indices specifying elements to extract or replace. Index should be a
#' a character vector, but it can also take numeric, logical, `NULL` or missing.
#'
#' @export
#'
#' @examples
#'
#' # Getter for join_keys ---
#'
#' jk["ds1", "ds2"]
#'
#' # Subsetting join_keys ----
#'
#' jk["ds1"]
#' jk[1:2]
#' jk[c("ds1", "ds2")]
#'
`[.join_keys` <- function(x, i, j) {
  if (missing(i) && missing(j)) {
    # because:
    # - list(a = 1)[] returns list(a = 1)
    # - data.frame(a = 1)[] returns data.frame(a = 1)
    return(x)
  } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {
    # because list(a = 1)[NULL] returns NULL
    # data.frame(a = 1)[NULL, NULL] returns data.frame(
    return(join_keys())
  } else if (!missing(i) && !missing(j)) {
    if (
      !any(
        checkmate::test_string(i),
        checkmate::test_number(i),
        checkmate::test_logical(i, len = length(x)) && sum(j) == 1
      ) ||
        !any(
          checkmate::test_string(j),
          checkmate::test_number(j),
          checkmate::test_logical(j, len = length(x)) && sum(j) == 1
        )
    ) {
      stop(
        "join_keys[i, j] - Can't extract keys for multiple pairs.",
        "When specifying a pair [i, j], both indices must point to a single key pair.\n",
        call. = FALSE
      )
    }
    if (is.numeric(i)) i <- names(x)[i]
    if (is.numeric(j)) j <- names(x)[j]

    subset_x <- update_keys_given_parents(x[union(i, j)])
    return(subset_x[[i]][[j]])
  } else if (!missing(j)) {
    # ie. select all keys which have j as dataset_2
    # since list is symmetrical it is equivalent to selecting by i
    i <- j
  }

  checkmate::assert(
    combine = "or",
    checkmate::check_character(i),
    checkmate::check_numeric(i),
    checkmate::check_logical(i)
  )


  # Convert integer/logical index to named index
  if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {
    i <- names(x)[i]
  }

  # When retrieving a relationship pair, it will also return the symmetric key
  new_jk <- new_join_keys()
  queue <- unique(i)
  bin <- character(0)

  # Need to iterate on a mutating queue if subset of a dataset will also
  #  select its parent as that parent might have relationships with others
  #  already selected.
  while (length(queue) > 0) {
    ix <- queue[1]
    queue <- queue[-1]
    bin <- c(bin, ix)

    ix_parent <- parent(x, ix)

    if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) {
      queue <- c(queue, ix_parent)
    }

    ix_valid_names <- names(x[[ix]]) %in% c(queue, bin)

    new_jk[[ix]] <- x[[ix]][ix_valid_names]

    # Add primary key of parent
    if (length(ix_parent) > 0) {
      new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]]
    }
  }

  common_parents_ix <- names(parents(x)) %in% names(new_jk) &
    parents(x) %in% names(new_jk)

  if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix]

  new_jk
}

#' @rdname join_keys
#' @order 2
#'
#' @param directed (`logical(1)`) Flag that indicates whether it should create
#' a parent-child relationship between the datasets.
#'  - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;
#'  - `FALSE` when the relationship is undirected.
#' @section Functions:
#' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`.
#' - `x[i] <- value`: This (without `j` parameter) **is not** a supported
#' operation for `join_keys`.
#' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`,
#' such as a `teal_data` object or `join_keys` object itself.
#'
#' @export
#' @examples
#'
#' # Setting a new primary key ---
#'
#' jk["ds4", "ds4"] <- "pk4"
#' jk["ds5", "ds5"] <- "pk5"
#'
#' # Setting a single relationship pair ---
#'
#' jk["ds1", "ds4"] <- c("pk1" = "pk4")
#'
#' # Removing a key ---
#'
#' jk["ds5", "ds5"] <- NULL
`[<-.join_keys` <- function(x, i, j, directed = TRUE, value) {
  checkmate::assert_flag(directed)
  if (missing(i) || missing(j)) {
    stop("join_keys[i, j] specify both indices to set a key pair.")
  } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {
    stop("join_keys[i, j] neither i nor j can be NULL.")
  } else if (
    !any(
      checkmate::test_string(i),
      checkmate::test_number(i),
      checkmate::test_logical(i, len = length(x)) && sum(j) == 1
    ) ||
      !any(
        checkmate::test_string(j),
        checkmate::test_number(j),
        checkmate::test_logical(j, len = length(x)) && sum(j) == 1
      )
  ) {
    stop(
      "join_keys[i, j] <- Can't set keys to specified indices.\n",
      "When setting pair [i, j], both indices must point to a single key pair.\n",
      call. = FALSE
    )
  }

  # Handle join key removal separately
  if (is.null(value)) {
    x[[i]][[j]] <- NULL
    return(x)
  }

  c(x, join_key(i, j, value, directed))
}

#' @rdname join_keys
#'
#' @order 1000
#' @usage ## Preferred method is x[i, j] <- value
#' x[[i]][[j]] <- value
#'
#' @section Functions:
#' - `x[[i]][[j]] <- value`: It is equivalent as  `x[i, j] <- value`.
#'
#' @export
#' @examples
#'
#' # Setting via x[[i]] <- value ---
#'
#' jk <- join_keys()
#' jk[["ds6"]][["ds6"]] <- "pk6"
#' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6"))
#' jk[["ds7"]][["ds7"]] <- NULL # removes key
#'
#' jk
#'
#' @noRd
`[[<-.join_keys` <- function(x, i, value) {
  checkmate::assert(
    combine = "or",
    checkmate::check_string(i),
    checkmate::check_number(i),
    checkmate::check_logical(i, len = length(x))
  )
  checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE)
  if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {
    i <- names(x)[[i]]
  }

  # Normalize values
  norm_value <- lapply(seq_along(value), function(.x) {
    join_key(i, names(value)[.x], value[[.x]])
  })
  names(norm_value) <- names(value)

  # Check if multiple modifications don't have a conflict
  repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))]
  repeated <- norm_value[repeated_value_ix]
  vapply(
    seq_along(repeated),
    function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) {
      assert_compatible_keys2(
        .x_value,
        unlist(unname(
          repeated[-.ix][names(repeated[-.ix]) == .x_name]
        ), recursive = FALSE)
      )
    },
    logical(1)
  )

  norm_value <- lapply(norm_value, function(x) x[[1]][[1]])
  names(norm_value) <- names(value)

  # Safe to do as duplicated are the same
  norm_value[duplicated(names(norm_value))] <- NULL

  # Keep only elements with length > 0L
  norm_value <- Filter(length, norm_value)

  # Remove classes to use list-based get/assign operations
  new_x <- unclass(x)

  # In case a pair is removed, also remove the symmetric pair and update parents
  removed_names <- setdiff(names(new_x[[i]]), names(norm_value))
  for (.x in removed_names) {
    if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL
    if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL

    new_x[[.x]][[i]] <- NULL
  }

  new_x[[i]] <- norm_value

  # Iterate on all new values to create symmetrical pair
  for (ds2 in names(norm_value)) {
    if (ds2 == i) next

    keep_value <- if (is.null(x)) list() else new_x[[ds2]]

    # Invert key
    new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]])
    keep_value[[i]] <- new_value

    # Assign symmetrical
    new_x[[ds2]] <- keep_value
  }

  preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"]
  # Remove NULL or empty keys
  new_x <- Filter(function(x) length(x) != 0L, new_x)
  attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr)

  #
  # restore class
  class(new_x) <- class(x)
  new_x
}

# get_code_dependency ----

#' Get code dependency of an object
#'
#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.
#'
#' Given a character vector with code, this function will extract the part of the code responsible for creating
#' the variables specified by `names`.
#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,
#' _i.e._ calls that create variables used in the final call and their parents, etc.
#' Also included are calls that create side-effects like establishing connections.
#'
#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .
#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.
#'
#' Side-effects are not detected automatically and must be marked in the code.
#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required
#' to reproduce a variable called `object`.
#'
#' @param code `character` with the code.
#' @param names `character` vector of object names.
#' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.
#'
#' @return Character vector, a subset of `code`.
#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.
#'
#' @keywords internal
get_code_dependency <- function(code, names, check_names = TRUE) {
  checkmate::assert_character(code)
  checkmate::assert_character(names, any.missing = FALSE)

  if (identical(code, character(0)) || identical(trimws(code), "")) {
    return(code)
  }

  code <- parse(text = code, keep.source = TRUE)
  pd <- utils::getParseData(code)
  calls_pd <- extract_calls(pd)

  if (check_names) {
    # Detect if names are actually in code.
    symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))
    if (any(pd$text == "assign")) {
      assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)
      ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))
      ass_str <- gsub("^['\"]|['\"]$", "", ass_str)
      symbols <- c(ass_str, symbols)
    }
    if (!all(names %in% unique(symbols))) {
      warning("Object(s) not found in code: ", toString(setdiff(names, symbols)))
    }
  }

  graph <- code_graph(calls_pd)
  ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))

  lib_ind <- detect_libraries(calls_pd)

  as.character(code[unique(c(lib_ind, ind))])
}

#' Locate function call token
#'
#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.
#'
#' Useful for determining occurrence of `assign` or `data` functions in an input call.
#'
#' @param call_pd `data.frame` as returned by `extract_calls()`
#' @param text `character(1)` to look for in `text` column of `call_pd`
#'
#' @return
#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.
#' 0 if not found.
#'
#' @keywords internal
#' @noRd
find_call <- function(call_pd, text) {
  checkmate::check_data_frame(call_pd)
  checkmate::check_names(call_pd, must.include = c("token", "text"))
  checkmate::check_string(text)

  ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)
  if (length(ans)) {
    ans
  } else {
    0L
  }
}

#' Split the result of `utils::getParseData()` into separate calls
#'
#' @param pd (`data.frame`) A result of `utils::getParseData()`.
#'
#' @return
#' A `list` of `data.frame`s.
#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.
#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.
#'
#' @keywords internal
#' @noRd
extract_calls <- function(pd) {
  calls <- lapply(
    pd[pd$parent == 0, "id"],
    function(parent) {
      rbind(
        pd[pd$id == parent, c("token", "text", "id", "parent")],
        get_children(pd = pd, parent = parent)
      )
    }
  )
  calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)
  calls <- Filter(Negate(is.null), calls)
  calls <- fix_shifted_comments(calls)
  fix_arrows(calls)
}

#' @keywords internal
#' @noRd
get_children <- function(pd, parent) {
  idx_children <- abs(pd$parent) == parent
  children <- pd[idx_children, c("token", "text", "id", "parent")]
  if (nrow(children) == 0) {
    return(NULL)
  }

  if (parent > 0) {
    do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))
  }
}

#' Fixes edge case of comments being shifted to the next call.
#' @keywords internal
#' @noRd
fix_shifted_comments <- function(calls) {
  # If the first or the second token is a @linksto COMMENT,
  #  then it belongs to the previous call.
  if (length(calls) >= 2) {
    for (i in 2:length(calls)) {
      comment_idx <- grep("@linksto", calls[[i]][, "text"])
      if (isTRUE(comment_idx[1] <= 2)) {
        calls[[i - 1]] <- rbind(
          calls[[i - 1]],
          calls[[i]][seq_len(comment_idx[1]), ]
        )
        calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ]
      }
    }
  }
  Filter(nrow, calls)
}

#' Fixes edge case of `<-` assignment operator being called as function,
#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.
#' @keywords internal
#' @noRd
fix_arrows <- function(calls) {
  lapply(
    calls,
    function(call) {
      call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-")
      call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->`", c("token", "text")] <- c("RIGHT_ASSIGN", "->")
      call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-")
      call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->>`", c("token", "text")] <- c("RIGHT_ASSIGN", "->")
      call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`=`", c("token", "text")] <- c("LEFT_ASSIGN", "<-")
      call
    }
  )
}

# code_graph ----

#' Create object dependencies graph within parsed code
#'
#' Builds dependency graph that identifies dependencies between objects in parsed code.
#' Helps understand which objects depend on which.
#'
#' @param calls_pd `list` of `data.frame`s;
#'  result of `utils::getParseData()` split into subsets representing individual calls;
#'  created by `extract_calls()` function
#'
#' @return
#' A list (of length of input `calls_pd`) where each element represents one call.
#' Each element is a character vector listing names of objects that depend on this call
#' and names of objects that this call depends on.
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
#' depends on objects `b` and `c`.
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
#'
#' @keywords internal
#' @noRd
code_graph <- function(calls_pd) {
  cooccurrence <- extract_occurrence(calls_pd)

  side_effects <- extract_side_effects(calls_pd)

  mapply(function(x, y) unique(c(x, y)), side_effects, cooccurrence, SIMPLIFY = FALSE)
}

#' Extract object occurrence
#'
#' Extracts objects occurrence within calls passed by `calls_pd`.
#' Also detects which objects depend on which within a call.
#'
#' @param calls_pd `list` of `data.frame`s;
#'  result of `utils::getParseData()` split into subsets representing individual calls;
#'  created by `extract_calls()` function
#'
#' @return
#' A list (of length of input `calls_pd`) where each element represents one call.
#' Each element is a character vector listing names of objects that depend on this call
#' and names of objects that this call depends on.
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
#' depends on objects `b` and `c`.
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
#'
#' @keywords internal
#' @noRd
extract_occurrence <- function(calls_pd) {
  is_in_function <- function(x) {
    # If an object is a function parameter,
    # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
    function_id <- x[x$token == "FUNCTION", "parent"]
    if (length(function_id)) {
      x$id %in% get_children(x, function_id)$id
    } else {
      rep(FALSE, nrow(x))
    }
  }
  lapply(
    calls_pd,
    function(call_pd) {
      # Handle data(object)/data("object")/data(object, envir = ) independently.
      data_call <- find_call(call_pd, "data")
      if (data_call) {
        sym <- call_pd[data_call + 1, "text"]
        return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
      }
      # Handle assign(x = ).
      assign_call <- find_call(call_pd, "assign")
      if (assign_call) {
        # Check if parameters were named.
        # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
        # "EQ_SUB" is for `=` appearing after the name of the named parameter.
        if (any(call_pd$token == "SYMBOL_SUB")) {
          params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
          # Remove sequence of "=", ",".
          if (length(params > 1)) {
            remove <- integer(0)
            for (i in 2:length(params)) {
              if (params[i - 1] == "=" & params[i] == ",") {
                remove <- c(remove, i - 1, i)
              }
            }
            if (length(remove)) params <- params[-remove]
          }
          pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
          if (!pos) {
            return(character(0L))
          }
          # pos is indicator of the place of 'x'
          # 1. All parameters are named, but none is 'x' - return(character(0L))
          # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
          # - check "x" in params being just a vector of named parameters.
          # 3. Some parameters are named, 'x' is not in named parameters
          # - check first appearance of "," (unnamed parameter) in vector parameters.
        } else {
          # Object is the first entry after 'assign'.
          pos <- 1
        }
        sym <- call_pd[assign_call + pos, "text"]
        return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
      }

      # What occurs in a function body is not tracked.
      x <- call_pd[!is_in_function(call_pd), ]
      sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL"))

      if (length(sym_cond) == 0) {
        return(character(0L))
      }
      # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
      # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
      dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
      if (length(dollar_ids)) {
        object_ids <- x[sym_cond, "id"]
        after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
        sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
      }

      ass_cond <- grep("ASSIGN", x$token)
      if (!length(ass_cond)) {
        return(c("<-", unique(x[sym_cond, "text"])))
      }

      sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1
      # If there was an assignment operation detect direction of it.
      if (unique(x$text[ass_cond]) == "->") { # NOTE 2
        sym_cond <- rev(sym_cond)
      }

      append(unique(x[sym_cond, "text"]), "<-", after = 1)

      ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
      ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
    }
  )
}

#' Extract side effects
#'
#' Extracts all object names from the code that are marked with `@linksto` tag.
#'
#' The code may contain functions calls that create side effects, e.g. modify the environment.
#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.
#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.
#' With this tag a complete object dependency structure can be established.
#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
#'
#' @param calls_pd `list` of `data.frame`s;
#'  result of `utils::getParseData()` split into subsets representing individual calls;
#'  created by `extract_calls()` function
#'
#' @return
#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.
#'
#' @keywords internal
#' @noRd
extract_side_effects <- function(calls_pd) {
  lapply(
    calls_pd,
    function(x) {
      linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)
      unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
    }
  )
}

# graph_parser ----

#' Return the indices of calls needed to reproduce an object
#'
#' @param x The name of the object to return code for.
#' @param graph A result of `code_graph()`.
#'
#' @return
#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.
#'
#' @keywords internal
#' @noRd
graph_parser <- function(x, graph) {
  occurrence <- vapply(
    graph, function(call) {
      ind <- match("<-", call, nomatch = length(call) + 1L)
      x %in% call[seq_len(ind - 1L)]
    },
    logical(1)
  )

  dependencies <- lapply(graph[occurrence], function(call) {
    ind <- match("<-", call, nomatch = 0L)
    call[(ind + 1L):length(call)]
  })
  dependencies <- setdiff(unlist(dependencies), x)

  if (length(dependencies) && any(occurrence)) {
    dependency_ids <- lapply(dependencies, function(dependency) {
      graph_parser(dependency, graph[1:max(which(occurrence))])
    })
    sort(unique(c(which(occurrence), unlist(dependency_ids))))
  } else {
    which(occurrence)
  }
}


# default_side_effects --------------------------------------------------------------------------------------------

#' Detect library calls
#'
#' Detects `library()` and `require()` function calls.
#'
#' @param calls_pd `list` of `data.frame`s;
#'  result of `utils::getParseData()` split into subsets representing individual calls;
#'  created by `extract_calls()` function
#'
#' @return
#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
#' `library()` or `require()` calls that are always returned for reproducibility.
#'
#' @keywords internal
#' @noRd
detect_libraries <- function(calls_pd) {
  defaults <- c("library", "require")

  which(
    vapply(
      calls_pd,
      function(call) {
        any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)
      },
      logical(1)
    )
  )
}

#' Variable labels
#'
#' Get or set variable labels in a `data.frame`.
#'
#' @details Variable labels can be stored as a `label` attribute set on individual variables.
#' These functions get or set this attribute, either on all (`col_labels`) or some variables (`col_relabel`).
#'
#' @param x (`data.frame` or `DataFrame`) data object
#' @param fill (`logical(1)`) specifying what to return if variable has no label
#' @param value (`character`) vector of variable labels of length equal to number of columns in `x`;
#'  if named, names must match variable names in `x` and will be used as key to set labels;
#'  use `NA` to remove label from variable
#' @param ... name-value pairs, where name corresponds to a variable name in `x`
#'  and value is the new variable label
#'
#' @return
#' For `col_labels`, named character vector of variable labels, the names being the corresponding variable names.
#' If the `label` attribute is missing, the vector elements will be
#' the variable names themselves if `fill = TRUE` and `NA` if `fill = FALSE`.
#'
#' For `col_labels<-` and `col_relabel`, copy of `x` with variable labels modified.
#'
#' @examples
#' x <- iris
#' col_labels(x)
#' col_labels(x) <- paste("label for", names(iris))
#' col_labels(x)
#' y <- col_relabel(x, Sepal.Length = "Sepal Length of iris flower")
#' col_labels(y)
#'
#' @source These functions were taken from
#' [formatters](https://cran.r-project.org/package=formatters) package, to reduce the complexity of
#' the dependency tree and rewritten.
#'
#' @rdname col_labels
#' @export
#'
col_labels <- function(x, fill = FALSE) {
  checkmate::test_multi_class(x, c("data.frame", "DataFrame"))
  checkmate::assert_flag(fill)

  if (ncol(x) == 0L) {
    return(character(0L))
  }

  labels <- lapply(x, attr, "label")

  nulls <- vapply(labels, is.null, logical(1L))
  if (any(nulls)) {
    labels[nulls] <-
      if (fill) {
        colnames(x)[nulls]
      } else {
        NA_character_
      }
  }

  not_char <- !vapply(labels, checkmate::test_string, logical(1L), na.ok = TRUE)
  if (any(not_char)) {
    stop("labels for variables ", toString(names(not_char[not_char])), "are not character strings")
  }

  unlist(labels, recursive = FALSE)
}

#' @rdname col_labels
#' @export
`col_labels<-` <- function(x, value) {
  checkmate::test_multi_class(x, c("data.frame", "DataFrame"))
  checkmate::assert_character(value)
  checkmate::assert_true(
    ncol(x) == length(value),
    .var.name = "Length of value is equal to the number of columns"
  )

  if (is.null(names(value))) {
    names(value) <- names(x)
  }
  x[names(value)] <- mapply(`attr<-`, x = x[names(value)], which = "label", value = value, SIMPLIFY = FALSE)
  x
}

#' @rdname col_labels
#' @export
col_relabel <- function(x, ...) {
  checkmate::test_multi_class(x, c("data.frame", "DataFrame"))
  if (missing(...)) {
    return(x)
  }
  dots <- list(...)
  varnames <- names(dots)

  checkmate::assert_subset(varnames, names(x), .var.name = "names of ...")
  lapply(dots, checkmate::assert_string, .var.name = "element of ...")

  x[varnames] <- mapply(`attr<-`, x = x[varnames], which = "label", value = dots, SIMPLIFY = FALSE, USE.NAMES = FALSE)
  x
}

#' Verify code reproducibility
#'
#' Checks whether code in `teal_data` object reproduces the stored objects.
#'
#' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot,
#' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object.
#' Once verified, the slot will always be set to `TRUE`.
#' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised.
#'
#' @return Input `teal_data` object or error.
#'
#' @param x `teal_data` object
#' @examples
#' tdata1 <- teal_data()
#' tdata1 <- within(tdata1, {
#'   a <- 1
#'   b <- a^5
#'   c <- list(x = 2)
#' })
#' verify(tdata1)
#'
#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")
#' verify(tdata2)
#' verify(tdata2)@verified
#' tdata2@verified
#'
#' tdata3 <- teal_data()
#' tdata3 <- within(tdata3, {
#'   stop("error")
#' })
#' try(verify(tdata3)) # fails
#'
#'
#' a <- 1
#' b <- a + 2
#' c <- list(x = 2)
#' d <- 5
#' tdata4 <- teal_data(
#'   a = a, b = b, c = c, d = d,
#'   code = "a <- 1
#'           b <- a
#'           c <- list(x = 2)
#'           e <- 1"
#' )
#' tdata4
#' try(verify(tdata4)) # fails
#'
#' @name verify
#' @rdname verify
#' @aliases verify,teal_data-method
#' @aliases verify,qenv.error-method
#'
#' @export
setGeneric("verify", function(x) standardGeneric("verify"))
setMethod("verify", signature = "teal_data", definition = function(x) {
  if (x@verified) {
    return(x)
  }
  x_name <- deparse(substitute(x))
  y <- eval_code(teal_data(), get_code(x))

  if (inherits(y, "qenv.error")) {
    stop(conditionMessage(y), call. = FALSE)
  }

  reproduced <- isTRUE(all.equal(x@env, y@env))
  if (reproduced) {
    x@verified <- TRUE
    methods::validObject(x)
    x
  } else {
    error <- "Code verification failed."

    objects_diff <- vapply(
      intersect(names(x@env), names(y@env)),
      function(element) {
        isTRUE(all.equal(x@env[[element]], y@env[[element]]))
      },
      logical(1)
    )

    names_diff_other <- setdiff(names(y@env), names(x@env))
    names_diff_inenv <- setdiff(names(x@env), names(y@env))

    if (length(objects_diff)) {
      error <- c(
        error,
        paste0("Object(s) recreated with code that have different structure in ", x_name, ":"),
        paste0("  \u2022 ", names(which(!objects_diff)))
      )
    }
    if (length(names_diff_inenv)) {
      error <- c(
        error,
        paste0("Object(s) not created with code that exist in ", x_name, ":"),
        paste0("  \u2022 ", names_diff_inenv)
      )
    }
    if (length(names_diff_other)) {
      error <- c(
        error,
        paste0("Object(s) created with code that do not exist in ", x_name, ":"),
        paste0("  \u2022 ", names_diff_other)
      )
    }

    stop(paste(error, collapse = "\n"), call. = FALSE)
  }
})
setMethod("verify", signature = "qenv.error", definition = function(x) {
  stop(conditionMessage(x), call. = FALSE)
})

#' Topological graph sort
#'
#' Graph is a `list` which for each node contains a vector of child nodes
#' in the returned list, parents appear before their children.
#'
#' Implementation of `Kahn` algorithm with a modification to maintain the order of input elements.
#'
#' @param graph (`named list`) with node vector elements
#' @keywords internal
topological_sort <- function(graph) {
  # compute in-degrees
  in_degrees <- list()
  for (node in names(graph)) {
    in_degrees[[node]] <- 0
    for (to_edge in graph[[node]]) {
      in_degrees[[to_edge]] <- 0
    }
  }

  for (node in graph) {
    for (to_edge in node) {
      in_degrees[[to_edge]] <- in_degrees[[to_edge]] + 1
    }
  }

  # sort
  visited <- 0
  sorted <- list()
  zero_in <- list()
  for (node in names(in_degrees)) {
    if (in_degrees[[node]] == 0) zero_in <- append(zero_in, node)
  }
  zero_in <- rev(zero_in)

  while (length(zero_in) != 0) {
    visited <- visited + 1
    sorted <- c(zero_in[[1]], sorted)
    for (edge_to in graph[[zero_in[[1]]]]) {
      in_degrees[[edge_to]] <- in_degrees[[edge_to]] - 1
      if (in_degrees[[edge_to]] == 0) {
        zero_in <- append(zero_in, edge_to, 1)
      }
    }
    zero_in[[1]] <- NULL
  }

  if (visited != length(in_degrees)) {
    stop(
      "Graph is not a directed acyclic graph. Cycles involving nodes: ",
      paste0(setdiff(names(in_degrees), sorted), collapse = " ")
    )
  } else {
    return(sorted)
  }
}

#' Checks whether a graph is a `Directed Acyclic Graph (DAG)`
#'
#' @inheritParams topological_sort
#' @return `logical(1)` `TRUE` if the graph is a `DAG`; `FALSE` otherwise
#' @keywords internal
is_dag <- function(graph) {
  inherits(try(topological_sort(graph), silent = TRUE), "try-error")
}

#' Names of data sets in `teal_data` object
#'
#' Get or set the value of the `datanames` slot.
#'
#' The `@datanames` slot in a `teal_data` object specifies which of the variables stored in its environment
#' (the `@env` slot) are data sets to be taken into consideration.
#' The contents of `@datanames` can be specified upon creation and default to all variables in `@env`.
#' Variables created later, which may well be data sets, are not automatically considered such.
#' Use this function to update the slot.
#'
#' @param x (`teal_data`) object to access or modify
#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@env`
#'
#' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`.
#'
#' @examples
#' td <- teal_data(iris = iris)
#' td <- within(td, mtcars <- mtcars)
#' datanames(td)
#'
#' datanames(td) <- c("iris", "mtcars")
#' datanames(td)
#'
#' @name datanames
#' @aliases datanames,teal_data-method
#' @aliases datanames<-,teal_data,character-method
#' @aliases datanames,qenv.error-method
#' @aliases datanames<-,qenv.error,character-method

#' @rdname datanames
#' @export
setGeneric("datanames", function(x) standardGeneric("datanames"))
setMethod("datanames", signature = "teal_data", definition = function(x) {
  x@datanames
})
setMethod("datanames", signature = "qenv.error", definition = function(x) {
  NULL
})

#' @rdname datanames
#' @export
setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-"))
setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) {
  checkmate::assert_subset(value, names(x@env))
  x@datanames <- value
  methods::validObject(x)
  x
})
setMethod("datanames<-", signature = c("qenv.error", "character"), definition = function(x, value) {
  methods::validObject(x)
  x
})

#' @rdname join_keys
#' @order 7
#' @export
format.join_keys <- function(x, ...) {
  if (length(x) > 0) {
    my_parents <- parents(x)
    names_sorted <- topological_sort(my_parents)
    names <- union(names_sorted, names(x))
    x_implicit <- update_keys_given_parents(x)
    out <- lapply(names, function(i) {
      out_i <- lapply(union(i, names(x[[i]])), function(j) {
        direction <- if (identical(my_parents[[j]], i)) {
          "  <-- "
        } else if (identical(my_parents[[i]], j)) {
          "  --> "
        } else if (!identical(i, j)) {
          "  <-> "
        } else {
          ""
        }

        keys <- x[[i]][[j]]
        sprintf(
          "%s%s: [%s]",
          direction, j,
          if (length(keys) == 0) "no primary keys" else toString(keys)
        )
      })

      implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]]))
      if (length(implicit_datasets) > 0) {
        out_i <- c(
          out_i,
          paste0(
            "  --* (implicit via parent with): ",
            paste(implicit_datasets, collapse = ", ")
          )
        )
      }

      paste(out_i, collapse = "\n")
    })
    paste(
      c(
        sprintf("A join_keys object containing foreign keys between %s datasets:", length(x)),
        out
      ),
      collapse = "\n"
    )
  } else {
    "An empty join_keys object."
  }
}

#' @rdname join_keys
#' @order 7
#' @export
print.join_keys <- function(x, ...) {
  cat(format(x, ...), "\n")
  invisible(x)
}

#' Manage relationships between datasets using `join_keys`
#' @order 1
#' @name join_keys
#'
#' @usage ## Constructor, getter and setter
#' join_keys(...)
#'
#' @description
#' Facilitates the creation and retrieval of relationships between datasets.
#' `join_keys` class extends `list` and contains keys connecting pairs of datasets.
#' Each element of the list contains keys for specific dataset.
#' Each dataset can have a relationship with itself (primary key) and with other datasets.
#'
#' Note that `join_keys` list is symmetrical and assumes a default direction, that is:
#' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent
#' in a parent-child relationship and the mapping is automatically mirrored between
#' `ds2` and `ds1`.
#'
#' @section Methods (by class):
#' - `join_keys()`: Returns an empty `join_keys` object when called without arguments.
#' - `join_keys(join_keys)`: Returns itself.
#' - `join_keys(teal_data)`: Returns the `join_keys` object contained in `teal_data` object.
#' - `join_keys(...)`: Creates a new object with one or more `join_key_set` parameters.
#'
#' @param ... (optional)
#' - either `teal_data` or `join_keys` object to extract `join_keys`
#' - or any number of `join_key_set` objects to create `join_keys`
#' - or nothing to create an empty `join_keys`
#' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`)
#' Column mapping between datasets.
#'
#' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship
#' pairs to add to `join_keys` list.
#'
#'
#' @return `join_keys` object.
#'
#' @examples
#' # Creating a new join keys ----
#'
#' jk <- join_keys(
#'   join_key("ds1", "ds1", "pk1"),
#'   join_key("ds2", "ds2", "pk2"),
#'   join_key("ds3", "ds3", "pk3"),
#'   join_key("ds1", "ds2", c(pk1 = "pk2")),
#'   join_key("ds1", "ds3", c(pk1 = "pk3"))
#' )
#'
#' jk
#'
#' @export
#'
#' @seealso [join_key()] for creating `join_keys_set`,
#' [parents()] for parent operations,
#' [teal_data()] for `teal_data` constructor _and_
#' [default_cdisc_join_keys] for default `CDISC` keys.
#'
join_keys <- function(...) {
  if (missing(...)) {
    return(new_join_keys())
  }
  x <- rlang::list2(...)
  if (length(x) == 1L) {
    UseMethod("join_keys", x[[1]])
  } else {
    join_keys.default(...)
  }
}

#' @rdname join_keys
#' @order 1
#' @export
join_keys.default <- function(...) {
  c(new_join_keys(), ...)
}

#' @rdname join_keys
#' @order 1
#' @export
join_keys.join_keys <- function(...) {
  x <- rlang::list2(...)
  x[[1]]
}

#' @rdname join_keys
#' @order 1
#' @export
join_keys.teal_data <- function(...) {
  x <- rlang::list2(...)
  x[[1]]@join_keys
}

#' @rdname join_keys
#' @order 5
#'
#' @section Functions:
#' - `join_keys(x) <- value`: Assignment of the `join_keys` in object with `value`.
#' `value` needs to be an object of class `join_keys` or `join_key_set`.
#'
#' @param x (`join_keys`) empty object to set the new relationship pairs.
#' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)`
#' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`)
#'
#' @export
`join_keys<-` <- function(x, value) {
  checkmate::assert_class(value, classes = c("join_keys", "list"))
  UseMethod("join_keys<-", x)
}

#' @rdname join_keys
#' @order 5
#' @export
#' @examples
#'
#' # Assigning keys via join_keys(x)[i, j] <- value ----
#'
#' obj <- join_keys()
#' # or
#' obj <- teal_data()
#'
#' join_keys(obj)["ds1", "ds1"] <- "pk1"
#' join_keys(obj)["ds2", "ds2"] <- "pk2"
#' join_keys(obj)["ds3", "ds3"] <- "pk3"
#' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2")
#' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3")
#'
#' identical(jk, join_keys(obj))
`join_keys<-.join_keys` <- function(x, value) {
  value
}

#' @rdname join_keys
#' @order 5
#' @export
#' @examples
#'
#' # Setter for join_keys within teal_data ----
#'
#' td <- teal_data()
#' join_keys(td) <- jk
#'
#' join_keys(td)["ds1", "ds2"] <- "new_key"
#' join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3")))
#' join_keys(td)
`join_keys<-.teal_data` <- function(x, value) {
  join_keys(x@join_keys) <- value
  x
}

#' Internal constructor
#'
#' @return an empty `join_keys` list
#'
#' @keywords internal
new_join_keys <- function() {
  structure(
    list(),
    class = c("join_keys", "list"),
    "parents" = list()
  )
}

#' Get and set parents in `join_keys` object
#'
#' `parents()` facilitates the creation of dependencies between datasets by
#' assigning a parent-child relationship.
#'
#' Each element is defined by a `list` element, where `list("child" = "parent")`.
#'
#' @param x (`join_keys` or `teal_data`) object that contains "parents" information
#' to retrieve or manipulate.
#'
#' @return a `list` of `character` representing the parents.
#'
#' @export
#' @seealso [join_keys()]
parents <- function(x) {
  UseMethod("parents", x)
}

#' @describeIn parents Retrieves parents of `join_keys` object.
#' @export
#' @examples
#' # Get parents of join_keys ---
#'
#' jk <- default_cdisc_join_keys["ADEX"]
#' parents(jk)
parents.join_keys <- function(x) {
  if (is.null(attr(x, "parents"))) list() else attr(x, "parents")
}

#' @describeIn parents Retrieves parents of `join_keys` inside `teal_data` object.
#' @export
#' @examples
#'
#' # Get parents of join_keys inside teal_data object ---
#'
#' td <- teal_data(
#'   ADSL = rADSL,
#'   ADTTE = rADTTE,
#'   ADRS = rADRS,
#'   join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")]
#' )
#' parents(td)
parents.teal_data <- function(x) {
  parents(x@join_keys)
}

#' @describeIn parents Assignment of parents in `join_keys` object.
#'
#' @param value (`named list`) of `character` vectors.
#'
#' @export
`parents<-` <- function(x, value) {
  UseMethod("parents<-", x)
}

#' @describeIn parents Assignment of parents of `join_keys` object.
#' @export
#' @examples
#'
#' # Assignment of parents ---
#'
#' jk <- join_keys(
#'   join_key("ds1", "ds2", "id"),
#'   join_key("ds5", "ds6", "id"),
#'   join_key("ds7", "ds6", "id")
#' )
#'
#' parents(jk) <- list(ds2 = "ds1")
#'
#' # Setting individual parent-child relationship
#'
#' parents(jk)["ds6"] <- "ds5"
#' parents(jk)["ds7"] <- "ds6"
`parents<-.join_keys` <- function(x, value) {
  checkmate::assert_list(value, types = "character", names = "named")

  new_parents <- list()

  for (dataset in names(value)) {
    # Custom .var.name so it is verbose and helpful for users
    checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset))

    parent <- new_parents[[dataset]]
    checkmate::assert(
      checkmate::check_null(parent),
      checkmate::check_true(
        length(parent) == 0 &&
          length(value[[dataset]]) == 0
      ),
      checkmate::check_true(parent == value[[dataset]]),
      "Please check the difference between provided datasets parents and provided join_keys parents.",
      .var.name = "value"
    )
    if (is.null(parent)) {
      new_parents[[dataset]] <- value[[dataset]]
    }
  }

  if (is_dag(new_parents)) {
    stop("Cycle detected in a parent and child dataset graph.")
  }

  attr(x, "parents") <- new_parents

  assert_parent_child(x)
  x
}

#' @describeIn parents Assignment of parents of `join_keys` inside `teal_data` object.
#' @export
#' @examples
#'
#' # Assignment of parents of join_keys inside teal_data object ---
#'
#' parents(td) <- list("ADTTE" = "ADSL") # replace existing
#' parents(td)["ADRS"] <- "ADSL" # add new parent
`parents<-.teal_data` <- function(x, value) {
  parents(x@join_keys) <- value
  x
}

#' @describeIn parents Getter for individual parent.
#'
#' @param dataset_name (`character(1)`) Name of dataset to query on their parent.
#'
#' @return For `parent(x, dataset_name)` returns `NULL` if parent does not exist.
#'
#' @export
#'
#' @examples
#'
#' # Get individual parent ---
#'
#' parent(jk, "ds2")
#' parent(td, "ADTTE")
parent <- function(x, dataset_name) {
  checkmate::assert_string(dataset_name)
  # assert x is performed by parents()
  parents(x)[[dataset_name]]
}

#' The names of a `join_keys` object
#'
#' @inheritParams base::`names<-`
#' @export
`names<-.join_keys` <- function(x, value) {
  new_x <- unclass(x)
  parent_list <- parents(x)
  # Update inner keys
  for (old_name in setdiff(names(new_x), value)) {
    old_entry <- new_x[[old_name]]
    new_name <- value[names(new_x) == old_name]

    # Change 2nd-tier first
    for (sub_name in names(old_entry)) {
      names(new_x[[sub_name]])[names(new_x[[sub_name]]) == old_name] <- new_name
    }

    # Change in first tier
    names(new_x)[names(new_x) == old_name] <- new_name

    # changing name in the parents
    if (length(parent_list)) {
      names(parent_list)[names(parent_list) == old_name] <- new_name
      ind <- vapply(parent_list, identical, logical(1), old_name)
      parent_list[ind] <- new_name
      attr(new_x, "parents") <- parent_list
    }
  }

  class(new_x) <- c("join_keys", "list")
  new_x
}

#' Comprehensive data integration function for `teal` applications
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' Universal function to pass data to teal application.
#'
#' @param ... any number of objects (presumably data objects) provided as `name = value` pairs.
#'
#' @param join_keys (`join_keys`) object or a single (`join_key_set`) object.
#'
#'  (optional) object with dataset column relationships used for joining.
#'  If empty then no joins between pairs of objects.
#'
#' @param code (`character`, `language`) optional code to reproduce the datasets provided in `...`.
#'  Note this code is not executed and the `teal_data` may not be reproducible
#'
#' @param check (`logical`) `r lifecycle::badge("deprecated")`
#'  Use [verify()] to verify code reproducibility .
#'
#' @return A `teal_data` object.
#'
#' @export
#'
#' @examples
#' teal_data(x1 = iris, x2 = mtcars)
#'
teal_data <- function(...,
                      join_keys = teal.data::join_keys(),
                      code = character(0),
                      check) {
  data_objects <- rlang::list2(...)
  if (inherits(join_keys, "join_key_set")) {
    join_keys <- teal.data::join_keys(join_keys)
  }
  if (!missing(check)) {
    lifecycle::deprecate_stop(
      when = "0.4.0",
      "teal_data(
        check = 'check argument is deprecated. Use `verify()` to verify code reproducibility.
        Find more information on https://github.com/insightsengineering/teal/discussions/945'
      )"
    )
  }

  if (
    checkmate::test_list(
      data_objects,
      types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"),
      min.len = 1
    )
  ) {
    lifecycle::deprecate_stop(
      when = "0.4.0",
      "teal_data(
        data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated.
        Find more information on https://github.com/insightsengineering/teal/discussions/945'
      )"
    )
  } else {
    if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) {
      stop("Dot (`...`) arguments on `teal_data()` must be named.")
    }
    new_teal_data(
      data = data_objects,
      code = code,
      join_keys = join_keys
    )
  }
}

#' Generate sample `CDISC` datasets
#'
#' Retrieves example `CDISC` datasets for use in examples and testing.
#'
#' This function returns a dummy dataset and should only be used within `teal.data`.
#' Note that the datasets are not created and maintained in `teal.data`, they are retrieved its dependencies.
#'
#' @param dataname (`character(1)`) name of a `CDISC` dataset
#'
#' @return A `CDISC` dataset as a `data.frame`.
#'
#' @export
example_cdisc_data <- function(dataname = c("ADSL", "ADAE", "ADLB", "ADCM", "ADEX", "ADRS", "ADTR", "ADTTE", "ADVS")) {
  dataname <- sprintf("r%s", match.arg(dataname))
  dynGet(dataname, ifnotfound = stop(dataname, " not found"), inherits = TRUE)
}

#' Test if two objects are (nearly) equal
#'
#' `all.equal(target, current)` is a utility to compare `join_keys` objects target
#' and current testing `near equality`.
#'
#' If they are different, comparison is still made to some extent, and a report
#' of the differences is returned.
#' Do not use `all.equal` directly in if expressions—either use `isTRUE(all.equal(....))`
#' or identical if appropriate.
#'
#' The parents attribute comparison tolerates `NULL` and empty lists and will find
#' no difference.
#'
#' The list containing all the relationships is treated like a map and ignores
#' entries with `NULL` if they exist.
#'
#' @inheritParams base::all.equal
#' @param ... further arguments for different methods. Not used with `join_keys`.
#'
#' @seealso [base::all.equal()]
#' @keywords internal
#'
all.equal.join_keys <- function(target, current, ...) {
  .as_map <- function(.x) {
    old_attributes <- attributes(.x)
    # Keep only non-list attributes
    old_attributes[["names"]] <- NULL
    old_attributes[["original_class"]] <- old_attributes[["class"]]
    old_attributes[["class"]] <- NULL
    old_attributes[["parents"]] <- if (!length(old_attributes[["parents"]])) {
      list()
    } else {
      old_attributes[["parents"]][order(names(old_attributes[["parents"]]))]
    }
    attr(.x, "class") <- "list"

    # Remove nulls
    .x <- Filter(Negate(is.null), .x)

    # Sort named components, preserving positions of unnamed
    nx <- rlang::names2(.x)
    is_named <- nx != ""
    if (any(is_named)) {
      idx <- seq_along(.x)
      idx[is_named] <- idx[is_named][order(nx[is_named])]
      .x <- .x[idx]
    }
    new_attributes <- if (is.null(attributes(.x))) list() else attributes(.x)
    attributes(.x) <- utils::modifyList(old_attributes, new_attributes)
    .x
  }
  x <- .as_map(target)
  y <- .as_map(current)
  all.equal(x, y)
}

#' Get code from `teal_data` object
#'
#' Retrieve code from `teal_data` object.
#'
#' Retrieve code stored in `@code`, which (in principle) can be used to recreate all objects found in `@env`.
#' Use `datanames` to limit the code to one or more of the datasets enumerated in `@datanames`.
#' If the code has not passed verification (with [`verify()`]), a warning will be prepended.
#'
#' @section Extracting dataset-specific code:
#' When `datanames` is specified, the code returned will be limited  to the lines needed to _create_
#' the requested datasets. The code stored in the `@code` slot is analyzed statically to determine
#' which lines the datasets of interest depend upon. The analysis works well when objects are created
#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations.
#'
#' Consider the following examples:
#'
#' _Case 1: Usual assignments._
#' ```r
#' data <- teal_data() |>
#'   within({
#'     foo <- function(x) {
#'       x + 1
#'     }
#'     x <- 0
#'     y <- foo(x)
#'   })
#' get_code(data, datanames = "y")
#' ```
#' `x` has no dependencies, so `get_code(data, datanames = "x")` will return only the second call.\cr
#' `y` depends on `x` and `foo`, so `get_code(data, datanames = "y")` will contain all three calls.
#'
#' _Case 2: Some objects are created by a function's side effects._
#' ```r
#' data <- teal_data() |>
#'   within({
#'     foo <- function() {
#'       x <<- x + 1
#'     }
#'     x <- 0
#'     foo()
#'     y <- x
#'   })
#' get_code(data, datanames = "y")
#' ```
#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment)
#' and so `get_code(data, datanames = "y")` will not return the `foo()` call.\cr
#' To overcome this limitation, code dependencies can be specified manually.
#' Lines where side effects occur can be flagged by adding "`# @linksto <object name>`" at the end.\cr
#' Note that `within` evaluates code passed to `expr` as is and comments are ignored.
#' In order to include comments in code one must use the `eval_code` function instead.
#'
#' ```r
#' data <- teal_data() |>
#'   eval_code("
#'     foo <- function() {
#'       x <<- x + 1
#'     }
#'     x <- 0
#'     foo() # @linksto x
#'     y <- x
#'   ")
#' get_code(data, datanames = "y")
#' ```
#' Now the `foo()` call will be properly included in the code required to recreate `y`.
#'
#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically.
#'
#' Here are known cases where manual tagging is necessary:
#' - non-standard assignment operators, _e.g._ `%<>%`
#' - objects used as conditions in `if` statements: `if (<condition>)`
#' - objects used to iterate over in `for` loops: `for(i in <sequence>)`
#' - creating and evaluating language objects, _e.g._ `eval(<call>)`
#'
#'
#' @param object (`teal_data`)
#' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for.
#' For more details see the "Extracting dataset-specific code" section.
#' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as
#' `expression` (`deparse = FALSE`).
#'
#' @return
#' Either a character string or an expression. If `datanames` is used to request a specific dataset,
#' only code that _creates_ that dataset (not code that uses it) is returned. Otherwise, all contents of `@code`.
#'
#' @examples
#' tdata1 <- teal_data()
#' tdata1 <- within(tdata1, {
#'   a <- 1
#'   b <- a^5
#'   c <- list(x = 2)
#' })
#' get_code(tdata1)
#' get_code(tdata1, datanames = "a")
#' get_code(tdata1, datanames = "b")
#'
#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")
#' get_code(tdata2)
#' get_code(verify(tdata2))
#'
#' @rdname get_code
#' @aliases get_code,teal_data-method
#'
#' @export
setMethod("get_code", signature = "teal_data", definition = function(object, deparse = TRUE, datanames = NULL) {
  checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE)
  checkmate::assert_flag(deparse)

  code <- if (!is.null(datanames)) {
    get_code_dependency(object@code, datanames)
  } else {
    object@code
  }

  if (!object@verified) {
    code <- c("warning('Code was not verified for reproducibility.')", code)
  }

  if (deparse) {
    if (length(code) == 0) {
      code
    } else {
      paste(code, collapse = "\n")
    }
  } else {
    parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE)
  }
})

setOldClass("join_keys")

#' Reproducible data
#'
#' Reproducible data container class. Inherits code tracking behavior from [`teal.code::qenv-class`].
#'
#' This class provides an isolated environment in which to store and process data with all code being recorded.
#' The environment, code, data set names, and data joining keys are stored in their respective slots.
#' These slots should never be accessed directly, use the provided get/set functions.
#'
#' As code is evaluated in `teal_data`, messages and warnings are stored in their respective slots.
#' If errors are raised, a `qenv.error` object is returned.
#'
#' @name teal_data-class
#' @rdname teal_data-class
#'
#' @slot env (`environment`) environment containing data sets and possibly auxiliary variables.
#'  Access variables with [get_var()] or [`[[`].
#'  No setter provided. Evaluate code to add variables into `@env`.
#' @slot code (`character`) vector representing code necessary to reproduce the contents of `@env`.
#'  Access with [get_code()].
#'  No setter provided. Evaluate code to append code to the slot.
#' @slot id (`integer`) random identifier assigned to each element of `@code`. Used internally.
#' @slot warnings (`character`) vector of warnings raised when evaluating code.
#'  Access with [get_warnings()].
#' @slot messages (`character`) vector of messages raised when evaluating code.
#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in `@env`.
#'  Access or modify with [join_keys()].
#' @slot datanames (`character`) vector of names of data sets in `@env`.
#'  Used internally to distinguish them from auxiliary variables.
#'  Access or modify with [datanames()].
#' @slot verified (`logical(1)`) flag signifying that code in `@code` has been proven to yield contents of `@env`.
#'  Used internally. See [`verify()`] for more details.
#'
#' @import teal.code
#' @keywords internal
setClass(
  Class = "teal_data",
  contains = "qenv",
  slots = c(join_keys = "join_keys", datanames = "character", verified = "logical"),
  prototype = list(
    join_keys = join_keys(),
    datanames = character(0),
    verified = logical(0)
  )
)

#' Initialize `teal_data` object
#'
#' @name new_teal_data
#'
#' @param data (`named list`) of data objects.
#' @param code (`character` or `language`) code to reproduce the `data`.
#'   Accepts and stores comments also.
#' @param join_keys (`join_keys`) object
#' @param datanames (`character`) names of datasets passed to `data`.
#'   Needed when non-dataset objects are needed in the `env` slot.
#' @rdname new_teal_data
#' @keywords internal
new_teal_data <- function(data,
                          code = character(0),
                          join_keys = join_keys(),
                          datanames = union(names(data), names(join_keys))) {
  checkmate::assert_list(data)
  checkmate::assert_class(join_keys, "join_keys")
  if (is.null(datanames)) datanames <- character(0) # todo: allow to specify
  checkmate::assert_character(datanames)
  if (!any(is.language(code), is.character(code))) {
    stop("`code` must be a character or language object.")
  }

  if (is.language(code)) {
    code <- paste(lang2calls(code), collapse = "\n")
  }
  if (length(code)) {
    code <- paste(code, collapse = "\n")
  }
  verified <- (length(code) == 0L && length(data) == 0L)

  id <- sample.int(.Machine$integer.max, size = length(code))

  new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv))
  lockEnvironment(new_env, bindings = TRUE)

  methods::new(
    "teal_data",
    env = new_env,
    code = code,
    warnings = rep("", length(code)),
    messages = rep("", length(code)),
    id = id,
    join_keys = join_keys,
    datanames = datanames,
    verified = verified
  )
}

#' @rdname join_keys
#' @order 4
#' @export
#' @examples
#' # Merging multiple `join_keys` objects ---
#'
#' jk_merged <- c(
#'   jk,
#'   join_keys(
#'     join_key("ds4", keys = c("pk4", "pk4_2")),
#'     join_key("ds3", "ds4", c(pk3 = "pk4_2"))
#'   )
#' )
c.join_keys <- function(...) {
  x <- rlang::list2(...)
  checkmate::assert_list(x, types = c("join_keys", "join_key_set"))

  Reduce(
    init = join_keys(),
    x = x,
    f = function(.x, .y) {
      out <- utils::modifyList(.x, .y, keep.null = FALSE)
      parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE)
      out
    }
  )
}

#' @rdname join_keys
#' @order 4
#' @export
#' @examples
#' # note: merge can be performed with both join_keys and join_key_set
#'
#' jk_merged <- c(
#'   jk_merged,
#'   join_key("ds5", keys = "pk5"),
#'   join_key("ds1", "ds5", c(pk1 = "pk5"))
#' )
c.join_key_set <- function(...) {
  c.join_keys(...)
}

#' Data input for `teal` app
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' Function is a wrapper around [teal_data()] and guesses `join_keys`
#' for given datasets whose names match ADAM datasets names.
#'
#' @inheritParams teal_data
#' @param join_keys (`join_keys`) object or a single (`join_key_set`) object
#'
#'   (optional) object with datasets column names used for joining.
#'   If empty then it would be automatically derived basing on intersection of datasets primary keys.
#'   For ADAM datasets it would be automatically derived.
#'
#' @return A `teal_data` object.
#'
#' @details This function checks if there were keys added to all data sets.
#'
#' @examples
#' data <- cdisc_data(
#'   join_keys = join_keys(
#'     join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID"))
#'   )
#' )
#'
#' data <- within(data, {
#'   ADSL <- example_cdisc_data("ADSL")
#'   ADTTE <- example_cdisc_data("ADTTE")
#' })
#'
#' @export
#'
cdisc_data <- function(...,
                       join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))],
                       code = character(0),
                       check) {
  teal_data(..., join_keys = join_keys, code = code, check = check)
}

1		#' Show `teal_data` object
2		#'
3		#' Prints `teal_data` object.
4		#'
5		#' @param object (`teal_data`)
6		#' @return Input `teal_data` object.
7		#' @importFrom methods show
8		#' @examples
9		#' teal_data()
10		#' teal_data(x = iris, code = "x = iris")
11		#' verify(teal_data(x = iris, code = "x = iris"))
12		#' @export
13		setMethod("show", signature = "teal_data", function(object) {
14	!	if (object@verified) {
15	!	cat("\u2705\ufe0e", "verified teal_data object\n")
16		} else {
17	!	cat("\u2716", "unverified teal_data object\n")
18		}
19	!	rlang::env_print(object@env)
20		})

1		#' Deprecated `TealData` class and related functions
2		#'
3		#' @description
4		#' `r lifecycle::badge("deprecated")`
5		#'
6		#' The `TealData` class and associated functions have been deprecated. Use [teal_data()] instead.
7		#' See the [Migration guide](https://github.com/insightsengineering/teal/discussions/945) for details.
8		#'
9		#' @name TealData
10		#'
11		#' @param ... any argument supported in `TealData` related functions.
12		#'
13		#' @return nothing
14		#' @seealso [cdisc_data()] , [join_keys()]
15		#'
16		NULL
17
18		.deprecate_function <- function(what, details) {
19	!	lifecycle::deprecate_stop(
20	!	when = "0.4.0",
21	!	what = what,
22	!	details = details
23		)
24		}
25
26		deprecation_detail <- "Find more information on https://github.com/insightsengineering/teal/discussions/945"
27
28		#' @rdname TealData
29		#' @export
30		as_cdisc <- function(...) {
31	!	.deprecate_function("as_cdisc()", deprecation_detail)
32		}
33
34		#' @rdname TealData
35		#' @export
36		callable_code <- function(...) {
37	!	.deprecate_function("callable_code()", deprecation_detail)
38		}
39
40		#' @rdname TealData
41		#' @export
42		callable_function <- function(...) {
43	!	.deprecate_function("callable_function()", deprecation_detail)
44		}
45
46		#' @rdname TealData
47		#' @export
48		code_dataset_connector <- function(...) {
49	!	.deprecate_function("code_dataset_connector()", deprecation_detail)
50		}
51
52		#' @rdname TealData
53		#' @export
54		code_cdisc_dataset_connector <- function(...) {
55	!	.deprecate_function("code_cdisc_dataset_connector()", deprecation_detail)
56		}
57
58		#' @rdname TealData
59		#' @export
60		csv_dataset_connector <- function(...) {
61	!	.deprecate_function("csv_dataset_connector()", deprecation_detail)
62		}
63
64		#' @rdname TealData
65		#' @export
66		csv_cdisc_dataset_connector <- function(...) {
67	!	.deprecate_function("csv_cdisc_dataset_connector()", deprecation_detail)
68		}
69
70		#' @rdname TealData
71		#' @export
72		python_code <- function(...) {
73	!	.deprecate_function("python_code()", deprecation_detail)
74		}
75
76		#' @rdname TealData
77		#' @export
78		python_dataset_connector <- function(...) {
79	!	.deprecate_function("python_dataset_connector()", deprecation_detail)
80		}
81
82		#' @rdname TealData
83		#' @export
84		python_cdisc_dataset_connector <- function(...) {
85	!	.deprecate_function("python_cdisc_dataset_connector()", deprecation_detail)
86		}
87
88		#' @rdname TealData
89		#' @export
90		cdisc_data_connector <- function(...) {
91	!	.deprecate_function("cdisc_data_connector()", deprecation_detail)
92		}
93
94		#' @rdname TealData
95		#' @export
96		cdisc_dataset <- function(...) {
97	!	.deprecate_function("cdisc_dataset()", deprecation_detail)
98		}
99
100		#' @rdname TealData
101		#' @export
102		cdisc_dataset_connector <- function(...) {
103	!	.deprecate_function("cdisc_dataset_connector()", deprecation_detail)
104		}
105
106		#' @rdname TealData
107		#' @export
108		cdisc_dataset_connector_file <- function(...) {
109	!	.deprecate_function("cdisc_dataset_connector_file()", deprecation_detail)
110		}
111
112		#' @rdname TealData
113		#' @export
114		cdisc_dataset_file <- function(...) {
115	!	.deprecate_function("cdisc_dataset_file()", deprecation_detail)
116		}
117
118		#' @rdname TealData
119		#' @export
120		dataset <- function(...) {
121	!	.deprecate_function("dataset()", deprecation_detail)
122		}
123
124		#' @rdname TealData
125		#' @export
126		dataset_connector <- function(...) {
127	!	.deprecate_function("dataset_connector()", deprecation_detail)
128		}
129
130		#' @rdname TealData
131		#' @export
132		dataset_connector_file <- function(...) {
133	!	.deprecate_function("dataset_connector_file()", deprecation_detail)
134		}
135
136		#' @rdname TealData
137		#' @export
138		dataset_file <- function(...) {
139	!	.deprecate_function("dataset_file()", deprecation_detail)
140		}
141
142		#' @rdname TealData
143		#' @export
144		data_connection <- function(...) {
145	!	.deprecate_function("data_connection()", deprecation_detail)
146		}
147
148		#' @rdname TealData
149		#' @export
150		fun_dataset_connector <- function(...) {
151	!	.deprecate_function("fun_dataset_connector()", deprecation_detail)
152		}
153
154		#' @rdname TealData
155		#' @export
156		fun_cdisc_dataset_connector <- function(...) {
157	!	.deprecate_function("fun_cdisc_dataset_connector()", deprecation_detail)
158		}
159
160		#' @rdname TealData
161		#' @export
162		relational_data_connector <- function(...) {
163	!	.deprecate_function("relational_data_connector()", deprecation_detail)
164		}
165
166		#' @rdname TealData
167		#' @export
168		mae_dataset <- function(...) {
169	!	.deprecate_function("mae_dataset()", deprecation_detail)
170		}
171
172		#' @rdname TealData
173		#' @export
174		get_attrs <- function(...) {
175	!	.deprecate_function("get_attrs()", deprecation_detail)
176		}
177
178		#' @rdname TealData
179		#' @export
180		get_dataset_label <- function(...) {
181	!	.deprecate_function("get_dataset_label()", deprecation_detail)
182		}
183
184		#' @rdname TealData
185		#' @export
186		get_dataset <- function(...) {
187	!	.deprecate_function("get_dataset()", deprecation_detail)
188		}
189
190		#' @rdname TealData
191		#' @export
192		get_datasets <- function(...) {
193	!	.deprecate_function("get_datasets()", deprecation_detail)
194		}
195
196		#' @rdname TealData
197		#' @export
198		get_dataname <- function(...) {
199	!	.deprecate_function("get_dataname()", deprecation_detail)
200		}
201
202		#' @rdname TealData
203		#' @export
204		get_key_duplicates <- function(...) {
205	!	.deprecate_function("get_key_duplicates()", deprecation_detail)
206		}
207
208		#' @rdname TealData
209		#' @export
210		get_keys <- function(...) {
211	!	.deprecate_function("get_keys()", deprecation_detail)
212		}
213
214		#' @rdname TealData
215		#' @export
216		get_raw_data <- function(...) {
217	!	.deprecate_function("get_raw_data()", deprecation_detail)
218		}
219
220		#' @rdname TealData
221		#' @export
222		is_pulled <- function(...) {
223	!	.deprecate_function("is_pulled()", deprecation_detail)
224		}
225
226		#' @rdname TealData
227		#' @export
228		load_dataset <- function(...) {
229	!	.deprecate_function("load_dataset()", deprecation_detail)
230		}
231
232		#' @rdname TealData
233		#' @export
234		load_datasets <- function(...) {
235	!	.deprecate_function("load_datasets()", deprecation_detail)
236		}
237
238		#' @rdname TealData
239		#' @export
240		mutate_data <- function(...) {
241	!	.deprecate_function("mutate_data()", deprecation_detail)
242		}
243
244		#' @rdname TealData
245		#' @export
246		mutate_dataset <- function(...) {
247	!	.deprecate_function("mutate_dataset()", deprecation_detail)
248		}
249
250		#' @rdname TealData
251		#' @export
252		set_args <- function(...) {
253	!	.deprecate_function("set_args()", deprecation_detail)
254		}
255
256		#' @rdname TealData
257		#' @export
258		rds_dataset_connector <- function(...) {
259	!	.deprecate_function("rds_dataset_connector()", deprecation_detail)
260		}
261
262		#' @rdname TealData
263		#' @export
264		rds_cdisc_dataset_connector <- function(...) {
265	!	.deprecate_function("rds_cdisc_dataset_connector()", deprecation_detail)
266		}
267
268		#' @rdname TealData
269		#' @export
270		script_dataset_connector <- function(...) {
271	!	.deprecate_function("script_dataset_connector()", deprecation_detail)
272		}
273
274		#' @rdname TealData
275		#' @export
276		script_cdisc_dataset_connector <- function(...) {
277	!	.deprecate_function("script_cdisc_dataset_connector()", deprecation_detail)
278		}
279
280		#' @rdname TealData
281		#' @export
282		set_keys <- function(...) {
283	!	.deprecate_function("set_keys()", deprecation_detail)
284		}
285
286		#' @rdname TealData
287		#' @export
288		read_script <- function(...) {
289	!	.deprecate_function("read_script()", deprecation_detail)
290		}
291
292		#' @rdname TealData
293		#' @export
294		to_relational_data <- function(...) {
295	!	.deprecate_function("to_relational_data()", deprecation_detail)
296		}
297
298		#' @rdname TealData
299		#' @export
300		validate_metadata <- function(...) {
301	!	.deprecate_function("validate_metadata()", deprecation_detail)
302		}
303
304		#' @rdname TealData
305		#' @export
306		get_cdisc_keys <- function(...) {
307	!	.deprecate_function("get_cdisc_keys()", deprecation_detail)
308		}
309
310		#' @rdname TealData
311		#' @export
312		cdisc_data_file <- function(...) {
313	!	.deprecate_function("cdisc_data_file()", deprecation_detail)
314		}
315
316		#' @rdname TealData
317		#' @export
318		teal_data_file <- function(...) {
319	!	.deprecate_function("teal_data_file()", deprecation_detail)
320		}
321
322		#' @rdname TealData
323		#' @export
324		get_join_keys <- function(...) {
325	!	.deprecate_function("get_join_keys()", "Use `join_keys(data)` instead.")
326		}
327
328		#' @rdname TealData
329		#' @param value value to assign
330		#' @export
331		`get_join_keys<-` <- function(..., value) {
332	!	.deprecate_function("`get_join_keys<-`()", "Use `join_keys(x) <- ...`")
333		}
334
335		#' @rdname col_labels
336		#' @include formatters_var_labels.R
337		#' @details
338		#' `r lifecycle::badge("deprecated")`
339		#'
340		#' In previous versions of `teal.data` labels were managed with `get_labels()`.
341		#' This function is deprecated as of `0.4.0`, use `col_labels` instead.
342		#' @export
343		get_labels <- function(...) {
344	!	.deprecate_function("get_labels()", "Use col_labels(data)")
345		}

1		#' Create a relationship between a pair of datasets
2		#'
3		#' @description
4		#' `r lifecycle::badge("stable")`
5		#'
6		#' Create a relationship between two datasets, `dataset_1` and `dataset_2`.
7		#' By default, this function establishes a directed relationship with `dataset_1` as the parent.
8		#' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`.
9		#'
10		#' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted,
11		#' a primary key for `dataset_1` is created.
12		#' @param keys (optionally named `character`) Column mapping between the datasets,
13		#' where `names(keys)` maps columns in `dataset_1` corresponding to columns of
14		#' `dataset_2` given by the elements of `keys`.
15		#' - If unnamed, the same column names are used for both datasets.
16		#' - If any element of the `keys` vector is empty with a non-empty name, then the name is
17		#' used for both datasets.
18		#' @param directed (`logical(1)`) Flag that indicates whether it should create
19		#' a parent-child relationship between the datasets.
20		#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;
21		#' - `FALSE` when the relationship is undirected.
22		#'
23		#' @return object of class `join_key_set` to be passed into `join_keys` function.
24		#'
25		#' @examples
26		#' join_key("d1", "d2", c("A"))
27		#' join_key("d1", "d2", c("A" = "B"))
28		#' join_key("d1", "d2", c("A" = "B", "C"))
29		#'
30		#' @export
31		#' @seealso [join_keys()], [parents()]
32		#'
33		join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) {
34	1070x	checkmate::assert_string(dataset_1)
35	1070x	checkmate::assert_string(dataset_2)
36	1067x	checkmate::assert_character(keys, any.missing = FALSE)
37	1062x	checkmate::assert_flag(directed)
38
39	1062x	if (length(keys) > 0) {
40	1060x	if (is.null(names(keys))) {
41	520x	names(keys) <- keys
42		}
43
44	1060x	keys <- trimws(keys)
45	1060x	names(keys) <- trimws(names(keys))
46
47		# Remove keys with empty value and without name
48	1060x	if (any(keys == "" & names(keys) == "")) {
49	6x	message("Key with an empty value and name are ignored.")
50	6x	keys <- keys[keys != "" & names(keys) != ""]
51		}
52
53		# Set name of keys without one: c("A") -> c("A" = "A")
54	1060x	if (any(names(keys) == "")) {
55	4x	names(keys)[names(keys) == ""] <- keys[names(keys) == ""]
56		}
57
58		# Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A")
59	1060x	if (any(keys == "")) {
60	4x	keys[keys == ""] <- names(keys[keys == ""])
61		}
62
63	1060x	stopifnot(!is.null(names(keys)))
64	1060x	stopifnot(!anyDuplicated(keys))
65	1059x	stopifnot(!anyDuplicated(names(keys)))
66
67	1058x	if (dataset_1 == dataset_2 && any(names(keys) != keys)) {
68	2x	stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed")
69		}
70		} else {
71	2x	keys <- NULL
72		}
73
74	1058x	parents <- if (directed && dataset_1 != dataset_2) {
75	392x	stats::setNames(list(dataset_1), dataset_2)
76		} else {
77	666x	list()
78		}
79
80	1058x	structure(
81	1058x	list(
82	1058x	structure(
83	1058x	list(keys),
84	1058x	names = dataset_2
85		)
86		),
87	1058x	names = dataset_1,
88	1058x	class = "join_key_set",
89	1058x	parents = parents
90		)
91		}

1		#' Check Compatibility of keys
2		#'
3		#' Helper function to assert if two key sets contain incompatible keys.
4		#'
5		#' @return Returns `TRUE` if successful, otherwise raises error.
6		#' @keywords internal
7		assert_compatible_keys <- function(join_key_1, join_key_2) {
8	3x	stop_message <- function(dataset_1, dataset_2) {
9	1x	stop(
10	1x	paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2)
11		)
12		}
13
14	3x	dataset_1_one <- names(join_key_1)
15	3x	dataset_2_one <- names(join_key_1[[1]])
16	3x	keys_one <- join_key_1[[1]][[1]]
17
18	3x	dataset_1_two <- names(join_key_2)
19	3x	dataset_2_two <- names(join_key_2[[1]])
20	3x	keys_two <- join_key_2[[1]][[1]]
21
22		# if first datasets and the second datasets match and keys
23		# must contain the same named elements
24	3x	if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) {
25	3x	if (!identical(sort(keys_one), sort(keys_two))) {
26	1x	stop_message(dataset_1_one, dataset_2_one)
27		}
28		}
29
30		# if first dataset of join_key_1 matches second dataset of join_key_2
31		# and the first dataset of join_key_2 must match second dataset of join_key_1
32		# and keys must contain the same elements but with names and values swapped
33	2x	if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) {
34		if (
35	!	xor(length(keys_one) == 0, length(keys_two) == 0) \|\|
36	!	!identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two)))
37		) {
38	!	stop_message(dataset_1_one, dataset_2_one)
39		}
40		}
41
42		# otherwise they are compatible
43	2x	return(TRUE)
44		}
45
46		#' Validate parent-child key
47		#'
48		#' Helper function checks the parent-child relations are valid.
49		#'
50		#' @param x (`join_keys`) object to assert validity of relations
51		#'
52		#' @return `join_keys` invisibly
53		#'
54		#' @keywords internal
55		assert_parent_child <- function(x) {
56	431x	jk <- join_keys(x)
57	431x	jk_parents <- parents(jk)
58
59	431x	checkmate::assert_class(jk, c("join_keys", "list"))
60
61	431x	if (!is.null(jk_parents)) {
62	431x	for (idx1 in seq_along(jk_parents)) {
63	170x	name_from <- names(jk_parents)[[idx1]]
64	170x	for (idx2 in seq_along(jk_parents[[idx1]])) {
65	170x	name_to <- jk_parents[[idx1]][[idx2]]
66	170x	keys_from <- jk[[name_from]][[name_to]]
67	170x	keys_to <- jk[[name_to]][[name_from]]
68	170x	if (length(keys_from) == 0 && length(keys_to) == 0) {
69	1x	stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to))
70		}
71		}
72		}
73		}
74	430x	invisible(x)
75		}
76
77		#' Verify key set compatibility
78		#'
79		#' Helper function to ensuring compatibility between two sets of keys
80		#'
81		#' @return Returns `TRUE` if successful, otherwise raises error.
82		#' @keywords internal
83		assert_compatible_keys2 <- function(x, y) {
84		# Helper to flatten join_keys / join_key_set
85	3x	flatten_join_key_sets <- function(value) {
86	6x	value <- unclass(value)
87	6x	Reduce(
88	6x	init = list(),
89	6x	f = function(u, v, ...) {
90	6x	el <- value[v][[1]]
91	6x	res <- lapply(seq_along(el), function(ix) el[ix])
92	6x	names(res) <- rep(v, length(res))
93	6x	append(u, res)
94		},
95	6x	x = names(value)
96		)
97		}
98
99	3x	x <- flatten_join_key_sets(x)
100	3x	y <- flatten_join_key_sets(y)
101
102	3x	for (idx_1 in seq_along(x)) {
103	3x	for (idx_2 in seq_along(y)) {
104	3x	assert_compatible_keys(x[idx_1], y[idx_2])
105		}
106		}
107	2x	TRUE
108		}
109
110		#' Updates the keys of the datasets based on the parents
111		#'
112		#' @param x (`join_keys`) object to update the keys.
113		#'
114		#' @return (`self`) invisibly for chaining
115		#'
116		#' @keywords internal
117		update_keys_given_parents <- function(x) {
118	12x	jk <- join_keys(x)
119
120	12x	checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x))
121
122	12x	datanames <- names(jk)
123	12x	for (d1_ix in seq_along(datanames)) {
124	34x	d1 <- datanames[[d1_ix]]
125	34x	d1_parent <- parent(jk, d1)
126	34x	for (d2 in datanames[-1 * seq.int(d1_ix)]) {
127	38x	if (length(jk[[d1]][[d2]]) == 0) {
128	16x	d2_parent <- parent(jk, d2)
129
130	12x	if (!identical(d1_parent, d2_parent) \|\| length(d1_parent) == 0) next
131
132		# both has the same parent -> common keys to parent
133	4x	keys_d1_parent <- sort(jk[[d1]][[d1_parent]])
134	4x	keys_d2_parent <- sort(jk[[d2]][[d2_parent]])
135
136	4x	common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)
137	4x	common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)
138
139		# No common keys between datasets - leave empty
140	1x	if (all(!common_ix_1)) next
141
142	3x	fk <- structure(
143	3x	names(keys_d2_parent)[common_ix_2],
144	3x	names = names(keys_d1_parent)[common_ix_1]
145		)
146	3x	jk[[d1]][[d2]] <- fk # mutate join key
147		}
148		}
149		}
150		# check parent child relation
151	12x	assert_parent_child(x = jk)
152
153	12x	jk
154		}

1		#' @rdname join_keys
2		#' @order 2
3		#'
4		#' @section Functions:
5		#' - `x[datanames]`: Returns a subset of the `join_keys` object for
6		#' given `datanames`, including parent `datanames` and symmetric mirror keys between
7		#' `datanames` in the result.
8		#' - `x[i, j]`: Returns join keys between datasets `i` and `j`,
9		#' including implicit keys inferred from their relationship with a parent.
10		#'
11		#' @param i,j indices specifying elements to extract or replace. Index should be a
12		#' a character vector, but it can also take numeric, logical, `NULL` or missing.
13		#'
14		#' @export
15		#'
16		#' @examples
17		#'
18		#' # Getter for join_keys ---
19		#'
20		#' jk["ds1", "ds2"]
21		#'
22		#' # Subsetting join_keys ----
23		#'
24		#' jk["ds1"]
25		#' jk[1:2]
26		#' jk[c("ds1", "ds2")]
27		#'
28		`[.join_keys` <- function(x, i, j) {
29	31x	if (missing(i) && missing(j)) {
30		# because:
31		# - list(a = 1)[] returns list(a = 1)
32		# - data.frame(a = 1)[] returns data.frame(a = 1)
33	1x	return(x)
34	30x	} else if (!missing(i) && is.null(i) \|\| !missing(j) && is.null(j)) {
35		# because list(a = 1)[NULL] returns NULL
36		# data.frame(a = 1)[NULL, NULL] returns data.frame(
37	2x	return(join_keys())
38	28x	} else if (!missing(i) && !missing(j)) {
39		if (
40	8x	!any(
41	8x	checkmate::test_string(i),
42	8x	checkmate::test_number(i),
43	8x	checkmate::test_logical(i, len = length(x)) && sum(j) == 1
44		) \|\|
45	8x	!any(
46	8x	checkmate::test_string(j),
47	8x	checkmate::test_number(j),
48	8x	checkmate::test_logical(j, len = length(x)) && sum(j) == 1
49		)
50		) {
51	1x	stop(
52	1x	"join_keys[i, j] - Can't extract keys for multiple pairs.",
53	1x	"When specifying a pair [i, j], both indices must point to a single key pair.\n",
54	1x	call. = FALSE
55		)
56		}
57	1x	if (is.numeric(i)) i <- names(x)[i]
58	1x	if (is.numeric(j)) j <- names(x)[j]
59
60	7x	subset_x <- update_keys_given_parents(x[union(i, j)])
61	7x	return(subset_x[[i]][[j]])
62	20x	} else if (!missing(j)) {
63		# ie. select all keys which have j as dataset_2
64		# since list is symmetrical it is equivalent to selecting by i
65	1x	i <- j
66		}
67
68	20x	checkmate::assert(
69	20x	combine = "or",
70	20x	checkmate::check_character(i),
71	20x	checkmate::check_numeric(i),
72	20x	checkmate::check_logical(i)
73		)
74
75
76		# Convert integer/logical index to named index
77	20x	if (checkmate::test_numeric(i) \|\| checkmate::test_logical(i)) {
78	2x	i <- names(x)[i]
79		}
80
81		# When retrieving a relationship pair, it will also return the symmetric key
82	20x	new_jk <- new_join_keys()
83	20x	queue <- unique(i)
84	20x	bin <- character(0)
85
86		# Need to iterate on a mutating queue if subset of a dataset will also
87		# select its parent as that parent might have relationships with others
88		# already selected.
89	20x	while (length(queue) > 0) {
90	45x	ix <- queue[1]
91	45x	queue <- queue[-1]
92	45x	bin <- c(bin, ix)
93
94	45x	ix_parent <- parent(x, ix)
95
96	45x	if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) {
97	10x	queue <- c(queue, ix_parent)
98		}
99
100	45x	ix_valid_names <- names(x[[ix]]) %in% c(queue, bin)
101
102	45x	new_jk[[ix]] <- x[[ix]][ix_valid_names]
103
104		# Add primary key of parent
105	45x	if (length(ix_parent) > 0) {
106	16x	new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]]
107		}
108		}
109
110	20x	common_parents_ix <- names(parents(x)) %in% names(new_jk) &
111	20x	parents(x) %in% names(new_jk)
112
113	9x	if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix]
114
115	20x	new_jk
116		}
117
118		#' @rdname join_keys
119		#' @order 2
120		#'
121		#' @param directed (`logical(1)`) Flag that indicates whether it should create
122		#' a parent-child relationship between the datasets.
123		#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;
124		#' - `FALSE` when the relationship is undirected.
125		#' @section Functions:
126		#' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`.
127		#' - `x[i] <- value`: This (without `j` parameter) is not a supported
128		#' operation for `join_keys`.
129		#' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`,
130		#' such as a `teal_data` object or `join_keys` object itself.
131		#'
132		#' @export
133		#' @examples
134		#'
135		#' # Setting a new primary key ---
136		#'
137		#' jk["ds4", "ds4"] <- "pk4"
138		#' jk["ds5", "ds5"] <- "pk5"
139		#'
140		#' # Setting a single relationship pair ---
141		#'
142		#' jk["ds1", "ds4"] <- c("pk1" = "pk4")
143		#'
144		#' # Removing a key ---
145		#'
146		#' jk["ds5", "ds5"] <- NULL
147		`[<-.join_keys` <- function(x, i, j, directed = TRUE, value) {
148	11x	checkmate::assert_flag(directed)
149	11x	if (missing(i) \|\| missing(j)) {
150	4x	stop("join_keys[i, j] specify both indices to set a key pair.")
151	7x	} else if (!missing(i) && is.null(i) \|\| !missing(j) && is.null(j)) {
152	2x	stop("join_keys[i, j] neither i nor j can be NULL.")
153		} else if (
154	5x	!any(
155	5x	checkmate::test_string(i),
156	5x	checkmate::test_number(i),
157	5x	checkmate::test_logical(i, len = length(x)) && sum(j) == 1
158		) \|\|
159	5x	!any(
160	5x	checkmate::test_string(j),
161	5x	checkmate::test_number(j),
162	5x	checkmate::test_logical(j, len = length(x)) && sum(j) == 1
163		)
164		) {
165	2x	stop(
166	2x	"join_keys[i, j] <- Can't set keys to specified indices.\n",
167	2x	"When setting pair [i, j], both indices must point to a single key pair.\n",
168	2x	call. = FALSE
169		)
170		}
171
172		# Handle join key removal separately
173	3x	if (is.null(value)) {
174	1x	x[[i]][[j]] <- NULL
175	1x	return(x)
176		}
177
178	2x	c(x, join_key(i, j, value, directed))
179		}
180
181		#' @rdname join_keys
182		#'
183		#' @order 1000
184		#' @usage ## Preferred method is x[i, j] <- value
185		#' x[[i]][[j]] <- value
186		#'
187		#' @section Functions:
188		#' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`.
189		#'
190		#' @export
191		#' @examples
192		#'
193		#' # Setting via x[[i]] <- value ---
194		#'
195		#' jk <- join_keys()
196		#' jk[["ds6"]][["ds6"]] <- "pk6"
197		#' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6"))
198		#' jk[["ds7"]][["ds7"]] <- NULL # removes key
199		#'
200		#' jk
201		#'
202		#' @noRd
203		`[[<-.join_keys` <- function(x, i, value) {
204	382x	checkmate::assert(
205	382x	combine = "or",
206	382x	checkmate::check_string(i),
207	382x	checkmate::check_number(i),
208	382x	checkmate::check_logical(i, len = length(x))
209		)
210	382x	checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE)
211	375x	if (checkmate::test_numeric(i) \|\| checkmate::test_logical(i)) {
212	1x	i <- names(x)[[i]]
213		}
214
215		# Normalize values
216	375x	norm_value <- lapply(seq_along(value), function(.x) {
217	524x	join_key(i, names(value)[.x], value[[.x]])
218		})
219	375x	names(norm_value) <- names(value)
220
221		# Check if multiple modifications don't have a conflict
222	375x	repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))]
223	375x	repeated <- norm_value[repeated_value_ix]
224	375x	vapply(
225	375x	seq_along(repeated),
226	375x	function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) {
227	3x	assert_compatible_keys2(
228	3x	.x_value,
229	3x	unlist(unname(
230	3x	repeated[-.ix][names(repeated[-.ix]) == .x_name]
231	3x	), recursive = FALSE)
232		)
233		},
234	375x	logical(1)
235		)
236
237	374x	norm_value <- lapply(norm_value, function(x) x[[1]][[1]])
238	374x	names(norm_value) <- names(value)
239
240		# Safe to do as duplicated are the same
241	374x	norm_value[duplicated(names(norm_value))] <- NULL
242
243		# Keep only elements with length > 0L
244	374x	norm_value <- Filter(length, norm_value)
245
246		# Remove classes to use list-based get/assign operations
247	374x	new_x <- unclass(x)
248
249		# In case a pair is removed, also remove the symmetric pair and update parents
250	374x	removed_names <- setdiff(names(new_x[[i]]), names(norm_value))
251	374x	for (.x in removed_names) {
252	2x	if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL
253	1x	if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL
254
255	5x	new_x[[.x]][[i]] <- NULL
256		}
257
258	374x	new_x[[i]] <- norm_value
259
260		# Iterate on all new values to create symmetrical pair
261	374x	for (ds2 in names(norm_value)) {
262	298x	if (ds2 == i) next
263
264	222x	keep_value <- if (is.null(x)) list() else new_x[[ds2]]
265
266		# Invert key
267	222x	new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]])
268	222x	keep_value[[i]] <- new_value
269
270		# Assign symmetrical
271	222x	new_x[[ds2]] <- keep_value
272		}
273
274	374x	preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"]
275		# Remove NULL or empty keys
276	374x	new_x <- Filter(function(x) length(x) != 0L, new_x)
277	374x	attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr)
278
279		#
280		# restore class
281	374x	class(new_x) <- class(x)
282	374x	new_x
283		}

1		# get_code_dependency ----
2
3		#' Get code dependency of an object
4		#'
5		#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.
6		#'
7		#' Given a character vector with code, this function will extract the part of the code responsible for creating
8		#' the variables specified by `names`.
9		#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,
10		#' _i.e._ calls that create variables used in the final call and their parents, etc.
11		#' Also included are calls that create side-effects like establishing connections.
12		#'
13		#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .
14		#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.
15		#'
16		#' Side-effects are not detected automatically and must be marked in the code.
17		#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required
18		#' to reproduce a variable called `object`.
19		#'
20		#' @param code `character` with the code.
21		#' @param names `character` vector of object names.
22		#' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.
23		#'
24		#' @return Character vector, a subset of `code`.
25		#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.
26		#'
27		#' @keywords internal
28		get_code_dependency <- function(code, names, check_names = TRUE) {
29	44x	checkmate::assert_character(code)
30	44x	checkmate::assert_character(names, any.missing = FALSE)
31
32	44x	if (identical(code, character(0)) \|\| identical(trimws(code), "")) {
33	2x	return(code)
34		}
35
36	42x	code <- parse(text = code, keep.source = TRUE)
37	42x	pd <- utils::getParseData(code)
38	42x	calls_pd <- extract_calls(pd)
39
40	42x	if (check_names) {
41		# Detect if names are actually in code.
42	42x	symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))
43	42x	if (any(pd$text == "assign")) {
44	4x	assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)
45	4x	ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))
46	4x	ass_str <- gsub("^['\"]\|['\"]$", "", ass_str)
47	4x	symbols <- c(ass_str, symbols)
48		}
49	42x	if (!all(names %in% unique(symbols))) {
50	1x	warning("Object(s) not found in code: ", toString(setdiff(names, symbols)))
51		}
52		}
53
54	42x	graph <- code_graph(calls_pd)
55	42x	ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
56
57	42x	lib_ind <- detect_libraries(calls_pd)
58
59	42x	as.character(code[unique(c(lib_ind, ind))])
60		}
61
62		#' Locate function call token
63		#'
64		#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.
65		#'
66		#' Useful for determining occurrence of `assign` or `data` functions in an input call.
67		#'
68		#' @param call_pd `data.frame` as returned by `extract_calls()`
69		#' @param text `character(1)` to look for in `text` column of `call_pd`
70		#'
71		#' @return
72		#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.
73		#' 0 if not found.
74		#'
75		#' @keywords internal
76		#' @noRd
77		find_call <- function(call_pd, text) {
78	332x	checkmate::check_data_frame(call_pd)
79	332x	checkmate::check_names(call_pd, must.include = c("token", "text"))
80	332x	checkmate::check_string(text)
81
82	332x	ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)
83	332x	if (length(ans)) {
84	24x	ans
85		} else {
86	308x	0L
87		}
88		}
89
90		#' Split the result of `utils::getParseData()` into separate calls
91		#'
92		#' @param pd (`data.frame`) A result of `utils::getParseData()`.
93		#'
94		#' @return
95		#' A `list` of `data.frame`s.
96		#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.
97		#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.
98		#'
99		#' @keywords internal
100		#' @noRd
101		extract_calls <- function(pd) {
102	42x	calls <- lapply(
103	42x	pd[pd$parent == 0, "id"],
104	42x	function(parent) {
105	156x	rbind(
106	156x	pd[pd$id == parent, c("token", "text", "id", "parent")],
107	156x	get_children(pd = pd, parent = parent)
108		)
109		}
110		)
111	42x	calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)
112	42x	calls <- Filter(Negate(is.null), calls)
113	42x	calls <- fix_shifted_comments(calls)
114	42x	fix_arrows(calls)
115		}
116
117		#' @keywords internal
118		#' @noRd
119		get_children <- function(pd, parent) {
120	1827x	idx_children <- abs(pd$parent) == parent
121	1827x	children <- pd[idx_children, c("token", "text", "id", "parent")]
122	1827x	if (nrow(children) == 0) {
123	1065x	return(NULL)
124		}
125
126	762x	if (parent > 0) {
127	762x	do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))
128		}
129		}
130
131		#' Fixes edge case of comments being shifted to the next call.
132		#' @keywords internal
133		#' @noRd
134		fix_shifted_comments <- function(calls) {
135		# If the first or the second token is a @linksto COMMENT,
136		# then it belongs to the previous call.
137	42x	if (length(calls) >= 2) {
138	41x	for (i in 2:length(calls)) {
139	113x	comment_idx <- grep("@linksto", calls[[i]][, "text"])
140	113x	if (isTRUE(comment_idx[1] <= 2)) {
141	4x	calls[[i - 1]] <- rbind(
142	4x	calls[[i - 1]],
143	4x	calls[[i]][seq_len(comment_idx[1]), ]
144		)
145	4x	calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ]
146		}
147		}
148		}
149	42x	Filter(nrow, calls)
150		}
151
152		#' Fixes edge case of `<-` assignment operator being called as function,
153		#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.
154		#' @keywords internal
155		#' @noRd
156		fix_arrows <- function(calls) {
157	42x	lapply(
158	42x	calls,
159	42x	function(call) {
160	154x	call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-")
161	154x	call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->`", c("token", "text")] <- c("RIGHT_ASSIGN", "->")
162	154x	call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-")
163	154x	call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->>`", c("token", "text")] <- c("RIGHT_ASSIGN", "->")
164	154x	call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`=`", c("token", "text")] <- c("LEFT_ASSIGN", "<-")
165	154x	call
166		}
167		)
168		}
169
170		# code_graph ----
171
172		#' Create object dependencies graph within parsed code
173		#'
174		#' Builds dependency graph that identifies dependencies between objects in parsed code.
175		#' Helps understand which objects depend on which.
176		#'
177		#' @param calls_pd `list` of `data.frame`s;
178		#' result of `utils::getParseData()` split into subsets representing individual calls;
179		#' created by `extract_calls()` function
180		#'
181		#' @return
182		#' A list (of length of input `calls_pd`) where each element represents one call.
183		#' Each element is a character vector listing names of objects that depend on this call
184		#' and names of objects that this call depends on.
185		#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
186		#' depends on objects `b` and `c`.
187		#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
188		#'
189		#' @keywords internal
190		#' @noRd
191		code_graph <- function(calls_pd) {
192	42x	cooccurrence <- extract_occurrence(calls_pd)
193
194	42x	side_effects <- extract_side_effects(calls_pd)
195
196	42x	mapply(function(x, y) unique(c(x, y)), side_effects, cooccurrence, SIMPLIFY = FALSE)
197		}
198
199		#' Extract object occurrence
200		#'
201		#' Extracts objects occurrence within calls passed by `calls_pd`.
202		#' Also detects which objects depend on which within a call.
203		#'
204		#' @param calls_pd `list` of `data.frame`s;
205		#' result of `utils::getParseData()` split into subsets representing individual calls;
206		#' created by `extract_calls()` function
207		#'
208		#' @return
209		#' A list (of length of input `calls_pd`) where each element represents one call.
210		#' Each element is a character vector listing names of objects that depend on this call
211		#' and names of objects that this call depends on.
212		#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
213		#' depends on objects `b` and `c`.
214		#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
215		#'
216		#' @keywords internal
217		#' @noRd
218		extract_occurrence <- function(calls_pd) {
219	42x	is_in_function <- function(x) {
220		# If an object is a function parameter,
221		# then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
222	141x	function_id <- x[x$token == "FUNCTION", "parent"]
223	141x	if (length(function_id)) {
224	9x	x$id %in% get_children(x, function_id)$id
225		} else {
226	132x	rep(FALSE, nrow(x))
227		}
228		}
229	42x	lapply(
230	42x	calls_pd,
231	42x	function(call_pd) {
232		# Handle data(object)/data("object")/data(object, envir = ) independently.
233	154x	data_call <- find_call(call_pd, "data")
234	154x	if (data_call) {
235	2x	sym <- call_pd[data_call + 1, "text"]
236	2x	return(c(gsub("^['\"]\|['\"]$", "", sym), "<-"))
237		}
238		# Handle assign(x = ).
239	152x	assign_call <- find_call(call_pd, "assign")
240	152x	if (assign_call) {
241		# Check if parameters were named.
242		# "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
243		# "EQ_SUB" is for `=` appearing after the name of the named parameter.
244	11x	if (any(call_pd$token == "SYMBOL_SUB")) {
245	8x	params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
246		# Remove sequence of "=", ",".
247	8x	if (length(params > 1)) {
248	8x	remove <- integer(0)
249	8x	for (i in 2:length(params)) {
250	36x	if (params[i - 1] == "=" & params[i] == ",") {
251	8x	remove <- c(remove, i - 1, i)
252		}
253		}
254	7x	if (length(remove)) params <- params[-remove]
255		}
256	8x	pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
257	8x	if (!pos) {
258	!	return(character(0L))
259		}
260		# pos is indicator of the place of 'x'
261		# 1. All parameters are named, but none is 'x' - return(character(0L))
262		# 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
263		# - check "x" in params being just a vector of named parameters.
264		# 3. Some parameters are named, 'x' is not in named parameters
265		# - check first appearance of "," (unnamed parameter) in vector parameters.
266		} else {
267		# Object is the first entry after 'assign'.
268	3x	pos <- 1
269		}
270	11x	sym <- call_pd[assign_call + pos, "text"]
271	11x	return(c(gsub("^['\"]\|['\"]$", "", sym), "<-"))
272		}
273
274		# What occurs in a function body is not tracked.
275	141x	x <- call_pd[!is_in_function(call_pd), ]
276	141x	sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL"))
277
278	141x	if (length(sym_cond) == 0) {
279	2x	return(character(0L))
280		}
281		# Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
282		# For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
283	139x	dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
284	139x	if (length(dollar_ids)) {
285	12x	object_ids <- x[sym_cond, "id"]
286	12x	after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
287	12x	sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
288		}
289
290	139x	ass_cond <- grep("ASSIGN", x$token)
291	139x	if (!length(ass_cond)) {
292	19x	return(c("<-", unique(x[sym_cond, "text"])))
293		}
294
295	120x	sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1
296		# If there was an assignment operation detect direction of it.
297	120x	if (unique(x$text[ass_cond]) == "->") { # NOTE 2
298	1x	sym_cond <- rev(sym_cond)
299		}
300
301	120x	append(unique(x[sym_cond, "text"]), "<-", after = 1)
302
303		### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
304		### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
305		}
306		)
307		}
308
309		#' Extract side effects
310		#'
311		#' Extracts all object names from the code that are marked with `@linksto` tag.
312		#'
313		#' The code may contain functions calls that create side effects, e.g. modify the environment.
314		#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.
315		#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.
316		#' With this tag a complete object dependency structure can be established.
317		#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
318		#'
319		#' @param calls_pd `list` of `data.frame`s;
320		#' result of `utils::getParseData()` split into subsets representing individual calls;
321		#' created by `extract_calls()` function
322		#'
323		#' @return
324		#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
325		#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.
326		#'
327		#' @keywords internal
328		#' @noRd
329		extract_side_effects <- function(calls_pd) {
330	42x	lapply(
331	42x	calls_pd,
332	42x	function(x) {
333	154x	linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)
334	154x	unlist(strsplit(sub("\\s#\\s@linksto\\s+", "", linksto), "\\s+"))
335		}
336		)
337		}
338
339		# graph_parser ----
340
341		#' Return the indices of calls needed to reproduce an object
342		#'
343		#' @param x The name of the object to return code for.
344		#' @param graph A result of `code_graph()`.
345		#'
346		#' @return
347		#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.
348		#'
349		#' @keywords internal
350		#' @noRd
351		graph_parser <- function(x, graph) {
352	182x	occurrence <- vapply(
353	182x	graph, function(call) {
354	552x	ind <- match("<-", call, nomatch = length(call) + 1L)
355	552x	x %in% call[seq_len(ind - 1L)]
356		},
357	182x	logical(1)
358		)
359
360	182x	dependencies <- lapply(graph[occurrence], function(call) {
361	96x	ind <- match("<-", call, nomatch = 0L)
362	96x	call[(ind + 1L):length(call)]
363		})
364	182x	dependencies <- setdiff(unlist(dependencies), x)
365
366	182x	if (length(dependencies) && any(occurrence)) {
367	68x	dependency_ids <- lapply(dependencies, function(dependency) {
368	139x	graph_parser(dependency, graph[1:max(which(occurrence))])
369		})
370	68x	sort(unique(c(which(occurrence), unlist(dependency_ids))))
371		} else {
372	114x	which(occurrence)
373		}
374		}
375
376
377		# default_side_effects --------------------------------------------------------------------------------------------
378
379		#' Detect library calls
380		#'
381		#' Detects `library()` and `require()` function calls.
382		#'
383		#' @param calls_pd `list` of `data.frame`s;
384		#' result of `utils::getParseData()` split into subsets representing individual calls;
385		#' created by `extract_calls()` function
386		#'
387		#' @return
388		#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
389		#' `library()` or `require()` calls that are always returned for reproducibility.
390		#'
391		#' @keywords internal
392		#' @noRd
393		detect_libraries <- function(calls_pd) {
394	42x	defaults <- c("library", "require")
395
396	42x	which(
397	42x	vapply(
398	42x	calls_pd,
399	42x	function(call) {
400	154x	any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)
401		},
402	42x	logical(1)
403		)
404		)
405		}

1		#' Variable labels
2		#'
3		#' Get or set variable labels in a `data.frame`.
4		#'
5		#' @details Variable labels can be stored as a `label` attribute set on individual variables.
6		#' These functions get or set this attribute, either on all (`col_labels`) or some variables (`col_relabel`).
7		#'
8		#' @param x (`data.frame` or `DataFrame`) data object
9		#' @param fill (`logical(1)`) specifying what to return if variable has no label
10		#' @param value (`character`) vector of variable labels of length equal to number of columns in `x`;
11		#' if named, names must match variable names in `x` and will be used as key to set labels;
12		#' use `NA` to remove label from variable
13		#' @param ... name-value pairs, where name corresponds to a variable name in `x`
14		#' and value is the new variable label
15		#'
16		#' @return
17		#' For `col_labels`, named character vector of variable labels, the names being the corresponding variable names.
18		#' If the `label` attribute is missing, the vector elements will be
19		#' the variable names themselves if `fill = TRUE` and `NA` if `fill = FALSE`.
20		#'
21		#' For `col_labels<-` and `col_relabel`, copy of `x` with variable labels modified.
22		#'
23		#' @examples
24		#' x <- iris
25		#' col_labels(x)
26		#' col_labels(x) <- paste("label for", names(iris))
27		#' col_labels(x)
28		#' y <- col_relabel(x, Sepal.Length = "Sepal Length of iris flower")
29		#' col_labels(y)
30		#'
31		#' @source These functions were taken from
32		#' [formatters](https://cran.r-project.org/package=formatters) package, to reduce the complexity of
33		#' the dependency tree and rewritten.
34		#'
35		#' @rdname col_labels
36		#' @export
37		#'
38		col_labels <- function(x, fill = FALSE) {
39	5x	checkmate::test_multi_class(x, c("data.frame", "DataFrame"))
40	5x	checkmate::assert_flag(fill)
41
42	5x	if (ncol(x) == 0L) {
43	2x	return(character(0L))
44		}
45
46	3x	labels <- lapply(x, attr, "label")
47
48	3x	nulls <- vapply(labels, is.null, logical(1L))
49	3x	if (any(nulls)) {
50	3x	labels[nulls] <-
51	3x	if (fill) {
52	1x	colnames(x)[nulls]
53		} else {
54	3x	NA_character_
55		}
56		}
57
58	3x	not_char <- !vapply(labels, checkmate::test_string, logical(1L), na.ok = TRUE)
59	3x	if (any(not_char)) {
60	!	stop("labels for variables ", toString(names(not_char[not_char])), "are not character strings")
61		}
62
63	3x	unlist(labels, recursive = FALSE)
64		}
65
66		#' @rdname col_labels
67		#' @export
68		`col_labels<-` <- function(x, value) {
69	!	checkmate::test_multi_class(x, c("data.frame", "DataFrame"))
70	!	checkmate::assert_character(value)
71	!	checkmate::assert_true(
72	!	ncol(x) == length(value),
73	!	.var.name = "Length of value is equal to the number of columns"
74		)
75
76	!	if (is.null(names(value))) {
77	!	names(value) <- names(x)
78		}
79	!	x[names(value)] <- mapply(`attr<-`, x = x[names(value)], which = "label", value = value, SIMPLIFY = FALSE)
80	!	x
81		}
82
83		#' @rdname col_labels
84		#' @export
85		col_relabel <- function(x, ...) {
86	3x	checkmate::test_multi_class(x, c("data.frame", "DataFrame"))
87	3x	if (missing(...)) {
88	1x	return(x)
89		}
90	2x	dots <- list(...)
91	2x	varnames <- names(dots)
92
93	2x	checkmate::assert_subset(varnames, names(x), .var.name = "names of ...")
94	1x	lapply(dots, checkmate::assert_string, .var.name = "element of ...")
95
96	1x	x[varnames] <- mapply(`attr<-`, x = x[varnames], which = "label", value = dots, SIMPLIFY = FALSE, USE.NAMES = FALSE)
97	1x	x
98		}

1		#' Verify code reproducibility
2		#'
3		#' Checks whether code in `teal_data` object reproduces the stored objects.
4		#'
5		#' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot,
6		#' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object.
7		#' Once verified, the slot will always be set to `TRUE`.
8		#' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised.
9		#'
10		#' @return Input `teal_data` object or error.
11		#'
12		#' @param x `teal_data` object
13		#' @examples
14		#' tdata1 <- teal_data()
15		#' tdata1 <- within(tdata1, {
16		#' a <- 1
17		#' b <- a^5
18		#' c <- list(x = 2)
19		#' })
20		#' verify(tdata1)
21		#'
22		#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")
23		#' verify(tdata2)
24		#' verify(tdata2)@verified
25		#' tdata2@verified
26		#'
27		#' tdata3 <- teal_data()
28		#' tdata3 <- within(tdata3, {
29		#' stop("error")
30		#' })
31		#' try(verify(tdata3)) # fails
32		#'
33		#'
34		#' a <- 1
35		#' b <- a + 2
36		#' c <- list(x = 2)
37		#' d <- 5
38		#' tdata4 <- teal_data(
39		#' a = a, b = b, c = c, d = d,
40		#' code = "a <- 1
41		#' b <- a
42		#' c <- list(x = 2)
43		#' e <- 1"
44		#' )
45		#' tdata4
46		#' try(verify(tdata4)) # fails
47		#'
48		#' @name verify
49		#' @rdname verify
50		#' @aliases verify,teal_data-method
51		#' @aliases verify,qenv.error-method
52		#'
53		#' @export
54	5x	setGeneric("verify", function(x) standardGeneric("verify"))
55		setMethod("verify", signature = "teal_data", definition = function(x) {
56	4x	if (x@verified) {
57	2x	return(x)
58		}
59	2x	x_name <- deparse(substitute(x))
60	2x	y <- eval_code(teal_data(), get_code(x))
61
62	2x	if (inherits(y, "qenv.error")) {
63	!	stop(conditionMessage(y), call. = FALSE)
64		}
65
66	2x	reproduced <- isTRUE(all.equal(x@env, y@env))
67	2x	if (reproduced) {
68	1x	x@verified <- TRUE
69	1x	methods::validObject(x)
70	1x	x
71		} else {
72	1x	error <- "Code verification failed."
73
74	1x	objects_diff <- vapply(
75	1x	intersect(names(x@env), names(y@env)),
76	1x	function(element) {
77	1x	isTRUE(all.equal(x@env[[element]], y@env[[element]]))
78		},
79	1x	logical(1)
80		)
81
82	1x	names_diff_other <- setdiff(names(y@env), names(x@env))
83	1x	names_diff_inenv <- setdiff(names(x@env), names(y@env))
84
85	1x	if (length(objects_diff)) {
86	1x	error <- c(
87	1x	error,
88	1x	paste0("Object(s) recreated with code that have different structure in ", x_name, ":"),
89	1x	paste0(" \u2022 ", names(which(!objects_diff)))
90		)
91		}
92	1x	if (length(names_diff_inenv)) {
93	!	error <- c(
94	!	error,
95	!	paste0("Object(s) not created with code that exist in ", x_name, ":"),
96	!	paste0(" \u2022 ", names_diff_inenv)
97		)
98		}
99	1x	if (length(names_diff_other)) {
100	!	error <- c(
101	!	error,
102	!	paste0("Object(s) created with code that do not exist in ", x_name, ":"),
103	!	paste0(" \u2022 ", names_diff_other)
104		)
105		}
106
107	1x	stop(paste(error, collapse = "\n"), call. = FALSE)
108		}
109		})
110		setMethod("verify", signature = "qenv.error", definition = function(x) {
111	1x	stop(conditionMessage(x), call. = FALSE)
112		})

1		#' Topological graph sort
2		#'
3		#' Graph is a `list` which for each node contains a vector of child nodes
4		#' in the returned list, parents appear before their children.
5		#'
6		#' Implementation of `Kahn` algorithm with a modification to maintain the order of input elements.
7		#'
8		#' @param graph (`named list`) with node vector elements
9		#' @keywords internal
10		topological_sort <- function(graph) {
11		# compute in-degrees
12	428x	in_degrees <- list()
13	428x	for (node in names(graph)) {
14	168x	in_degrees[[node]] <- 0
15	168x	for (to_edge in graph[[node]]) {
16	168x	in_degrees[[to_edge]] <- 0
17		}
18		}
19
20	428x	for (node in graph) {
21	168x	for (to_edge in node) {
22	168x	in_degrees[[to_edge]] <- in_degrees[[to_edge]] + 1
23		}
24		}
25
26		# sort
27	428x	visited <- 0
28	428x	sorted <- list()
29	428x	zero_in <- list()
30	428x	for (node in names(in_degrees)) {
31	142x	if (in_degrees[[node]] == 0) zero_in <- append(zero_in, node)
32		}
33	428x	zero_in <- rev(zero_in)
34
35	428x	while (length(zero_in) != 0) {
36	283x	visited <- visited + 1
37	283x	sorted <- c(zero_in[[1]], sorted)
38	283x	for (edge_to in graph[[zero_in[[1]]]]) {
39	160x	in_degrees[[edge_to]] <- in_degrees[[edge_to]] - 1
40	160x	if (in_degrees[[edge_to]] == 0) {
41	141x	zero_in <- append(zero_in, edge_to, 1)
42		}
43		}
44	283x	zero_in[[1]] <- NULL
45		}
46
47	428x	if (visited != length(in_degrees)) {
48	4x	stop(
49	4x	"Graph is not a directed acyclic graph. Cycles involving nodes: ",
50	4x	paste0(setdiff(names(in_degrees), sorted), collapse = " ")
51		)
52		} else {
53	424x	return(sorted)
54		}
55		}
56
57		#' Checks whether a graph is a `Directed Acyclic Graph (DAG)`
58		#'
59		#' @inheritParams topological_sort
60		#' @return `logical(1)` `TRUE` if the graph is a `DAG`; `FALSE` otherwise
61		#' @keywords internal
62		is_dag <- function(graph) {
63	423x	inherits(try(topological_sort(graph), silent = TRUE), "try-error")
64		}

1		#' Names of data sets in `teal_data` object
2		#'
3		#' Get or set the value of the `datanames` slot.
4		#'
5		#' The `@datanames` slot in a `teal_data` object specifies which of the variables stored in its environment
6		#' (the `@env` slot) are data sets to be taken into consideration.
7		#' The contents of `@datanames` can be specified upon creation and default to all variables in `@env`.
8		#' Variables created later, which may well be data sets, are not automatically considered such.
9		#' Use this function to update the slot.
10		#'
11		#' @param x (`teal_data`) object to access or modify
12		#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@env`
13		#'
14		#' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`.
15		#'
16		#' @examples
17		#' td <- teal_data(iris = iris)
18		#' td <- within(td, mtcars <- mtcars)
19		#' datanames(td)
20		#'
21		#' datanames(td) <- c("iris", "mtcars")
22		#' datanames(td)
23		#'
24		#' @name datanames
25		#' @aliases datanames,teal_data-method
26		#' @aliases datanames<-,teal_data,character-method
27		#' @aliases datanames,qenv.error-method
28		#' @aliases datanames<-,qenv.error,character-method
29
30		#' @rdname datanames
31		#' @export
32	6x	setGeneric("datanames", function(x) standardGeneric("datanames"))
33		setMethod("datanames", signature = "teal_data", definition = function(x) {
34	4x	x@datanames
35		})
36		setMethod("datanames", signature = "qenv.error", definition = function(x) {
37	2x	NULL
38		})
39
40		#' @rdname datanames
41		#' @export
42	4x	setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-"))
43		setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) {
44	3x	checkmate::assert_subset(value, names(x@env))
45	2x	x@datanames <- value
46	2x	methods::validObject(x)
47	2x	x
48		})
49		setMethod("datanames<-", signature = c("qenv.error", "character"), definition = function(x, value) {
50	1x	methods::validObject(x)
51	1x	x
52		})

1		#' @rdname join_keys
2		#' @order 7
3		#' @export
4		format.join_keys <- function(x, ...) {
5	6x	if (length(x) > 0) {
6	5x	my_parents <- parents(x)
7	5x	names_sorted <- topological_sort(my_parents)
8	5x	names <- union(names_sorted, names(x))
9	5x	x_implicit <- update_keys_given_parents(x)
10	5x	out <- lapply(names, function(i) {
11	15x	out_i <- lapply(union(i, names(x[[i]])), function(j) {
12	35x	direction <- if (identical(my_parents[[j]], i)) {
13		" <-- "
14	35x	} else if (identical(my_parents[[i]], j)) {
15		" --> "
16	35x	} else if (!identical(i, j)) {
17		" <-> "
18		} else {
19		""
20		}
21
22	35x	keys <- x[[i]][[j]]
23	35x	sprintf(
24	35x	"%s%s: [%s]",
25	35x	direction, j,
26	35x	if (length(keys) == 0) "no primary keys" else toString(keys)
27		)
28		})
29
30	15x	implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]]))
31	15x	if (length(implicit_datasets) > 0) {
32	2x	out_i <- c(
33	2x	out_i,
34	2x	paste0(
35	2x	" --* (implicit via parent with): ",
36	2x	paste(implicit_datasets, collapse = ", ")
37		)
38		)
39		}
40
41	15x	paste(out_i, collapse = "\n")
42		})
43	5x	paste(
44	5x	c(
45	5x	sprintf("A join_keys object containing foreign keys between %s datasets:", length(x)),
46	5x	out
47		),
48	5x	collapse = "\n"
49		)
50		} else {
51	1x	"An empty join_keys object."
52		}
53		}
54
55		#' @rdname join_keys
56		#' @order 7
57		#' @export
58		print.join_keys <- function(x, ...) {
59	1x	cat(format(x, ...), "\n")
60	1x	invisible(x)
61		}

1		#' Manage relationships between datasets using `join_keys`
2		#' @order 1
3		#' @name join_keys
4		#'
5		#' @usage ## Constructor, getter and setter
6		#' join_keys(...)
7		#'
8		#' @description
9		#' Facilitates the creation and retrieval of relationships between datasets.
10		#' `join_keys` class extends `list` and contains keys connecting pairs of datasets.
11		#' Each element of the list contains keys for specific dataset.
12		#' Each dataset can have a relationship with itself (primary key) and with other datasets.
13		#'
14		#' Note that `join_keys` list is symmetrical and assumes a default direction, that is:
15		#' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent
16		#' in a parent-child relationship and the mapping is automatically mirrored between
17		#' `ds2` and `ds1`.
18		#'
19		#' @section Methods (by class):
20		#' - `join_keys()`: Returns an empty `join_keys` object when called without arguments.
21		#' - `join_keys(join_keys)`: Returns itself.
22		#' - `join_keys(teal_data)`: Returns the `join_keys` object contained in `teal_data` object.
23		#' - `join_keys(...)`: Creates a new object with one or more `join_key_set` parameters.
24		#'
25		#' @param ... (optional)
26		#' - either `teal_data` or `join_keys` object to extract `join_keys`
27		#' - or any number of `join_key_set` objects to create `join_keys`
28		#' - or nothing to create an empty `join_keys`
29		#' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`)
30		#' Column mapping between datasets.
31		#'
32		#' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship
33		#' pairs to add to `join_keys` list.
34		#'
35		#'
36		#' @return `join_keys` object.
37		#'
38		#' @examples
39		#' # Creating a new join keys ----
40		#'
41		#' jk <- join_keys(
42		#' join_key("ds1", "ds1", "pk1"),
43		#' join_key("ds2", "ds2", "pk2"),
44		#' join_key("ds3", "ds3", "pk3"),
45		#' join_key("ds1", "ds2", c(pk1 = "pk2")),
46		#' join_key("ds1", "ds3", c(pk1 = "pk3"))
47		#' )
48		#'
49		#' jk
50		#'
51		#' @export
52		#'
53		#' @seealso [join_key()] for creating `join_keys_set`,
54		#' [parents()] for parent operations,
55		#' [teal_data()] for `teal_data` constructor _and_
56		#' [default_cdisc_join_keys] for default `CDISC` keys.
57		#'
58		join_keys <- function(...) {
59	786x	if (missing(...)) {
60	224x	return(new_join_keys())
61		}
62	562x	x <- rlang::list2(...)
63	562x	if (length(x) == 1L) {
64	497x	UseMethod("join_keys", x[[1]])
65		} else {
66	65x	join_keys.default(...)
67		}
68		}
69
70		#' @rdname join_keys
71		#' @order 1
72		#' @export
73		join_keys.default <- function(...) {
74	110x	c(new_join_keys(), ...)
75		}
76
77		#' @rdname join_keys
78		#' @order 1
79		#' @export
80		join_keys.join_keys <- function(...) {
81	447x	x <- rlang::list2(...)
82	447x	x[[1]]
83		}
84
85		#' @rdname join_keys
86		#' @order 1
87		#' @export
88		join_keys.teal_data <- function(...) {
89	5x	x <- rlang::list2(...)
90	5x	x[[1]]@join_keys
91		}
92
93		#' @rdname join_keys
94		#' @order 5
95		#'
96		#' @section Functions:
97		#' - `join_keys(x) <- value`: Assignment of the `join_keys` in object with `value`.
98		#' `value` needs to be an object of class `join_keys` or `join_key_set`.
99		#'
100		#' @param x (`join_keys`) empty object to set the new relationship pairs.
101		#' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)`
102		#' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`)
103		#'
104		#' @export
105		`join_keys<-` <- function(x, value) {
106	8x	checkmate::assert_class(value, classes = c("join_keys", "list"))
107	8x	UseMethod("join_keys<-", x)
108		}
109
110		#' @rdname join_keys
111		#' @order 5
112		#' @export
113		#' @examples
114		#'
115		#' # Assigning keys via join_keys(x)[i, j] <- value ----
116		#'
117		#' obj <- join_keys()
118		#' # or
119		#' obj <- teal_data()
120		#'
121		#' join_keys(obj)["ds1", "ds1"] <- "pk1"
122		#' join_keys(obj)["ds2", "ds2"] <- "pk2"
123		#' join_keys(obj)["ds3", "ds3"] <- "pk3"
124		#' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2")
125		#' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3")
126		#'
127		#' identical(jk, join_keys(obj))
128		`join_keys<-.join_keys` <- function(x, value) {
129	6x	value
130		}
131
132		#' @rdname join_keys
133		#' @order 5
134		#' @export
135		#' @examples
136		#'
137		#' # Setter for join_keys within teal_data ----
138		#'
139		#' td <- teal_data()
140		#' join_keys(td) <- jk
141		#'
142		#' join_keys(td)["ds1", "ds2"] <- "new_key"
143		#' join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3")))
144		#' join_keys(td)
145		`join_keys<-.teal_data` <- function(x, value) {
146	2x	join_keys(x@join_keys) <- value
147	2x	x
148		}
149
150		#' Internal constructor
151		#'
152		#' @return an empty `join_keys` list
153		#'
154		#' @keywords internal
155		new_join_keys <- function() {
156	354x	structure(
157	354x	list(),
158	354x	class = c("join_keys", "list"),
159	354x	"parents" = list()
160		)
161		}

1		#' Get and set parents in `join_keys` object
2		#'
3		#' `parents()` facilitates the creation of dependencies between datasets by
4		#' assigning a parent-child relationship.
5		#'
6		#' Each element is defined by a `list` element, where `list("child" = "parent")`.
7		#'
8		#' @param x (`join_keys` or `teal_data`) object that contains "parents" information
9		#' to retrieve or manipulate.
10		#'
11		#' @return a `list` of `character` representing the parents.
12		#'
13		#' @export
14		#' @seealso [join_keys()]
15		parents <- function(x) {
16	606x	UseMethod("parents", x)
17		}
18
19		#' @describeIn parents Retrieves parents of `join_keys` object.
20		#' @export
21		#' @examples
22		#' # Get parents of join_keys ---
23		#'
24		#' jk <- default_cdisc_join_keys["ADEX"]
25		#' parents(jk)
26		parents.join_keys <- function(x) {
27	1x	if (is.null(attr(x, "parents"))) list() else attr(x, "parents")
28		}
29
30		#' @describeIn parents Retrieves parents of `join_keys` inside `teal_data` object.
31		#' @export
32		#' @examples
33		#'
34		#' # Get parents of join_keys inside teal_data object ---
35		#'
36		#' td <- teal_data(
37		#' ADSL = rADSL,
38		#' ADTTE = rADTTE,
39		#' ADRS = rADRS,
40		#' join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")]
41		#' )
42		#' parents(td)
43		parents.teal_data <- function(x) {
44	1x	parents(x@join_keys)
45		}
46
47		#' @describeIn parents Assignment of parents in `join_keys` object.
48		#'
49		#' @param value (`named list`) of `character` vectors.
50		#'
51		#' @export
52		`parents<-` <- function(x, value) {
53	428x	UseMethod("parents<-", x)
54		}
55
56		#' @describeIn parents Assignment of parents of `join_keys` object.
57		#' @export
58		#' @examples
59		#'
60		#' # Assignment of parents ---
61		#'
62		#' jk <- join_keys(
63		#' join_key("ds1", "ds2", "id"),
64		#' join_key("ds5", "ds6", "id"),
65		#' join_key("ds7", "ds6", "id")
66		#' )
67		#'
68		#' parents(jk) <- list(ds2 = "ds1")
69		#'
70		#' # Setting individual parent-child relationship
71		#'
72		#' parents(jk)["ds6"] <- "ds5"
73		#' parents(jk)["ds7"] <- "ds6"
74		`parents<-.join_keys` <- function(x, value) {
75	427x	checkmate::assert_list(value, types = "character", names = "named")
76
77	424x	new_parents <- list()
78
79	424x	for (dataset in names(value)) {
80		# Custom .var.name so it is verbose and helpful for users
81	161x	checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset))
82
83	160x	parent <- new_parents[[dataset]]
84	160x	checkmate::assert(
85	160x	checkmate::check_null(parent),
86	160x	checkmate::check_true(
87	160x	length(parent) == 0 &&
88	160x	length(value[[dataset]]) == 0
89		),
90	160x	checkmate::check_true(parent == value[[dataset]]),
91	160x	"Please check the difference between provided datasets parents and provided join_keys parents.",
92	160x	.var.name = "value"
93		)
94	160x	if (is.null(parent)) {
95	160x	new_parents[[dataset]] <- value[[dataset]]
96		}
97		}
98
99	423x	if (is_dag(new_parents)) {
100	4x	stop("Cycle detected in a parent and child dataset graph.")
101		}
102
103	419x	attr(x, "parents") <- new_parents
104
105	419x	assert_parent_child(x)
106	418x	x
107		}
108
109		#' @describeIn parents Assignment of parents of `join_keys` inside `teal_data` object.
110		#' @export
111		#' @examples
112		#'
113		#' # Assignment of parents of join_keys inside teal_data object ---
114		#'
115		#' parents(td) <- list("ADTTE" = "ADSL") # replace existing
116		#' parents(td)["ADRS"] <- "ADSL" # add new parent
117		`parents<-.teal_data` <- function(x, value) {
118	1x	parents(x@join_keys) <- value
119	1x	x
120		}
121
122		#' @describeIn parents Getter for individual parent.
123		#'
124		#' @param dataset_name (`character(1)`) Name of dataset to query on their parent.
125		#'
126		#' @return For `parent(x, dataset_name)` returns `NULL` if parent does not exist.
127		#'
128		#' @export
129		#'
130		#' @examples
131		#'
132		#' # Get individual parent ---
133		#'
134		#' parent(jk, "ds2")
135		#' parent(td, "ADTTE")
136		parent <- function(x, dataset_name) {
137	105x	checkmate::assert_string(dataset_name)
138		# assert x is performed by parents()
139	105x	parents(x)[[dataset_name]]
140		}

1		#' The names of a `join_keys` object
2		#'
3		#' @inheritParams base::`names<-`
4		#' @export
5		`names<-.join_keys` <- function(x, value) {
6	2x	new_x <- unclass(x)
7	2x	parent_list <- parents(x)
8		# Update inner keys
9	2x	for (old_name in setdiff(names(new_x), value)) {
10	3x	old_entry <- new_x[[old_name]]
11	3x	new_name <- value[names(new_x) == old_name]
12
13		# Change 2nd-tier first
14	3x	for (sub_name in names(old_entry)) {
15	7x	names(new_x[[sub_name]])[names(new_x[[sub_name]]) == old_name] <- new_name
16		}
17
18		# Change in first tier
19	3x	names(new_x)[names(new_x) == old_name] <- new_name
20
21		# changing name in the parents
22	3x	if (length(parent_list)) {
23	3x	names(parent_list)[names(parent_list) == old_name] <- new_name
24	3x	ind <- vapply(parent_list, identical, logical(1), old_name)
25	3x	parent_list[ind] <- new_name
26	3x	attr(new_x, "parents") <- parent_list
27		}
28		}
29
30	2x	class(new_x) <- c("join_keys", "list")
31	2x	new_x
32		}

1		#' Comprehensive data integration function for `teal` applications
2		#'
3		#' @description
4		#' `r lifecycle::badge("stable")`
5		#'
6		#' Universal function to pass data to teal application.
7		#'
8		#' @param ... any number of objects (presumably data objects) provided as `name = value` pairs.
9		#'
10		#' @param join_keys (`join_keys`) object or a single (`join_key_set`) object.
11		#'
12		#' (optional) object with dataset column relationships used for joining.
13		#' If empty then no joins between pairs of objects.
14		#'
15		#' @param code (`character`, `language`) optional code to reproduce the datasets provided in `...`.
16		#' Note this code is not executed and the `teal_data` may not be reproducible
17		#'
18		#' @param check (`logical`) `r lifecycle::badge("deprecated")`
19		#' Use [verify()] to verify code reproducibility .
20		#'
21		#' @return A `teal_data` object.
22		#'
23		#' @export
24		#'
25		#' @examples
26		#' teal_data(x1 = iris, x2 = mtcars)
27		#'
28		teal_data <- function(...,
29		join_keys = teal.data::join_keys(),
30		code = character(0),
31		check) {
32	76x	data_objects <- rlang::list2(...)
33	76x	if (inherits(join_keys, "join_key_set")) {
34	!	join_keys <- teal.data::join_keys(join_keys)
35		}
36	76x	if (!missing(check)) {
37	!	lifecycle::deprecate_stop(
38	!	when = "0.4.0",
39	!	"teal_data(
40	!	check = 'check argument is deprecated. Use `verify()` to verify code reproducibility.
41	!	Find more information on https://github.com/insightsengineering/teal/discussions/945'
42		)"
43		)
44		}
45
46		if (
47	76x	checkmate::test_list(
48	76x	data_objects,
49	76x	types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"),
50	76x	min.len = 1
51		)
52		) {
53	!	lifecycle::deprecate_stop(
54	!	when = "0.4.0",
55	!	"teal_data(
56	!	data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated.
57	!	Find more information on https://github.com/insightsengineering/teal/discussions/945'
58		)"
59		)
60		} else {
61	76x	if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) {
62	!	stop("Dot (`...`) arguments on `teal_data()` must be named.")
63		}
64	76x	new_teal_data(
65	76x	data = data_objects,
66	76x	code = code,
67	76x	join_keys = join_keys
68		)
69		}
70		}

1		#' Generate sample `CDISC` datasets
2		#'
3		#' Retrieves example `CDISC` datasets for use in examples and testing.
4		#'
5		#' This function returns a dummy dataset and should only be used within `teal.data`.
6		#' Note that the datasets are not created and maintained in `teal.data`, they are retrieved its dependencies.
7		#'
8		#' @param dataname (`character(1)`) name of a `CDISC` dataset
9		#'
10		#' @return A `CDISC` dataset as a `data.frame`.
11		#'
12		#' @export
13		example_cdisc_data <- function(dataname = c("ADSL", "ADAE", "ADLB", "ADCM", "ADEX", "ADRS", "ADTR", "ADTTE", "ADVS")) {
14	!	dataname <- sprintf("r%s", match.arg(dataname))
15	!	dynGet(dataname, ifnotfound = stop(dataname, " not found"), inherits = TRUE)
16		}

1		#' Test if two objects are (nearly) equal
2		#'
3		#' `all.equal(target, current)` is a utility to compare `join_keys` objects target
4		#' and current testing `near equality`.
5		#'
6		#' If they are different, comparison is still made to some extent, and a report
7		#' of the differences is returned.
8		#' Do not use `all.equal` directly in if expressions—either use `isTRUE(all.equal(....))`
9		#' or identical if appropriate.
10		#'
11		#' The parents attribute comparison tolerates `NULL` and empty lists and will find
12		#' no difference.
13		#'
14		#' The list containing all the relationships is treated like a map and ignores
15		#' entries with `NULL` if they exist.
16		#'
17		#' @inheritParams base::all.equal
18		#' @param ... further arguments for different methods. Not used with `join_keys`.
19		#'
20		#' @seealso [base::all.equal()]
21		#' @keywords internal
22		#'
23		all.equal.join_keys <- function(target, current, ...) {
24	21x	.as_map <- function(.x) {
25	42x	old_attributes <- attributes(.x)
26		# Keep only non-list attributes
27	42x	old_attributes[["names"]] <- NULL
28	42x	old_attributes[["original_class"]] <- old_attributes[["class"]]
29	42x	old_attributes[["class"]] <- NULL
30	42x	old_attributes[["parents"]] <- if (!length(old_attributes[["parents"]])) {
31	18x	list()
32		} else {
33	24x	old_attributes[["parents"]][order(names(old_attributes[["parents"]]))]
34		}
35	42x	attr(.x, "class") <- "list"
36
37		# Remove nulls
38	42x	.x <- Filter(Negate(is.null), .x)
39
40		# Sort named components, preserving positions of unnamed
41	42x	nx <- rlang::names2(.x)
42	42x	is_named <- nx != ""
43	42x	if (any(is_named)) {
44	42x	idx <- seq_along(.x)
45	42x	idx[is_named] <- idx[is_named][order(nx[is_named])]
46	42x	.x <- .x[idx]
47		}
48	42x	new_attributes <- if (is.null(attributes(.x))) list() else attributes(.x)
49	42x	attributes(.x) <- utils::modifyList(old_attributes, new_attributes)
50	42x	.x
51		}
52	21x	x <- .as_map(target)
53	21x	y <- .as_map(current)
54	21x	all.equal(x, y)
55		}

1		#' Get code from `teal_data` object
2		#'
3		#' Retrieve code from `teal_data` object.
4		#'
5		#' Retrieve code stored in `@code`, which (in principle) can be used to recreate all objects found in `@env`.
6		#' Use `datanames` to limit the code to one or more of the datasets enumerated in `@datanames`.
7		#' If the code has not passed verification (with [`verify()`]), a warning will be prepended.
8		#'
9		#' @section Extracting dataset-specific code:
10		#' When `datanames` is specified, the code returned will be limited to the lines needed to _create_
11		#' the requested datasets. The code stored in the `@code` slot is analyzed statically to determine
12		#' which lines the datasets of interest depend upon. The analysis works well when objects are created
13		#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations.
14		#'
15		#' Consider the following examples:
16		#'
17		#' _Case 1: Usual assignments._
18		#' ```r
19		#' data <- teal_data() \|>
20		#' within({
21		#' foo <- function(x) {
22		#' x + 1
23		#' }
24		#' x <- 0
25		#' y <- foo(x)
26		#' })
27		#' get_code(data, datanames = "y")
28		#' ```
29		#' `x` has no dependencies, so `get_code(data, datanames = "x")` will return only the second call.\cr
30		#' `y` depends on `x` and `foo`, so `get_code(data, datanames = "y")` will contain all three calls.
31		#'
32		#' _Case 2: Some objects are created by a function's side effects._
33		#' ```r
34		#' data <- teal_data() \|>
35		#' within({
36		#' foo <- function() {
37		#' x <<- x + 1
38		#' }
39		#' x <- 0
40		#' foo()
41		#' y <- x
42		#' })
43		#' get_code(data, datanames = "y")
44		#' ```
45		#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment)
46		#' and so `get_code(data, datanames = "y")` will not return the `foo()` call.\cr
47		#' To overcome this limitation, code dependencies can be specified manually.
48		#' Lines where side effects occur can be flagged by adding "`# @linksto <object name>`" at the end.\cr
49		#' Note that `within` evaluates code passed to `expr` as is and comments are ignored.
50		#' In order to include comments in code one must use the `eval_code` function instead.
51		#'
52		#' ```r
53		#' data <- teal_data() \|>
54		#' eval_code("
55		#' foo <- function() {
56		#' x <<- x + 1
57		#' }
58		#' x <- 0
59		#' foo() # @linksto x
60		#' y <- x
61		#' ")
62		#' get_code(data, datanames = "y")
63		#' ```
64		#' Now the `foo()` call will be properly included in the code required to recreate `y`.
65		#'
66		#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically.
67		#'
68		#' Here are known cases where manual tagging is necessary:
69		#' - non-standard assignment operators, _e.g._ `%<>%`
70		#' - objects used as conditions in `if` statements: `if (<condition>)`
71		#' - objects used to iterate over in `for` loops: `for(i in <sequence>)`
72		#' - creating and evaluating language objects, _e.g._ `eval(<call>)`
73		#'
74		#'
75		#' @param object (`teal_data`)
76		#' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for.
77		#' For more details see the "Extracting dataset-specific code" section.
78		#' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as
79		#' `expression` (`deparse = FALSE`).
80		#'
81		#' @return
82		#' Either a character string or an expression. If `datanames` is used to request a specific dataset,
83		#' only code that _creates_ that dataset (not code that uses it) is returned. Otherwise, all contents of `@code`.
84		#'
85		#' @examples
86		#' tdata1 <- teal_data()
87		#' tdata1 <- within(tdata1, {
88		#' a <- 1
89		#' b <- a^5
90		#' c <- list(x = 2)
91		#' })
92		#' get_code(tdata1)
93		#' get_code(tdata1, datanames = "a")
94		#' get_code(tdata1, datanames = "b")
95		#'
96		#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")
97		#' get_code(tdata2)
98		#' get_code(verify(tdata2))
99		#'
100		#' @rdname get_code
101		#' @aliases get_code,teal_data-method
102		#'
103		#' @export
104		setMethod("get_code", signature = "teal_data", definition = function(object, deparse = TRUE, datanames = NULL) {
105	48x	checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE)
106	48x	checkmate::assert_flag(deparse)
107
108	48x	code <- if (!is.null(datanames)) {
109	44x	get_code_dependency(object@code, datanames)
110		} else {
111	4x	object@code
112		}
113
114	48x	if (!object@verified) {
115	11x	code <- c("warning('Code was not verified for reproducibility.')", code)
116		}
117
118	48x	if (deparse) {
119	47x	if (length(code) == 0) {
120	1x	code
121		} else {
122	46x	paste(code, collapse = "\n")
123		}
124		} else {
125	1x	parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE)
126		}
127		})

1		setOldClass("join_keys")
2
3		#' Reproducible data
4		#'
5		#' Reproducible data container class. Inherits code tracking behavior from [`teal.code::qenv-class`].
6		#'
7		#' This class provides an isolated environment in which to store and process data with all code being recorded.
8		#' The environment, code, data set names, and data joining keys are stored in their respective slots.
9		#' These slots should never be accessed directly, use the provided get/set functions.
10		#'
11		#' As code is evaluated in `teal_data`, messages and warnings are stored in their respective slots.
12		#' If errors are raised, a `qenv.error` object is returned.
13		#'
14		#' @name teal_data-class
15		#' @rdname teal_data-class
16		#'
17		#' @slot env (`environment`) environment containing data sets and possibly auxiliary variables.
18		#' Access variables with [get_var()] or [`[[`].
19		#' No setter provided. Evaluate code to add variables into `@env`.
20		#' @slot code (`character`) vector representing code necessary to reproduce the contents of `@env`.
21		#' Access with [get_code()].
22		#' No setter provided. Evaluate code to append code to the slot.
23		#' @slot id (`integer`) random identifier assigned to each element of `@code`. Used internally.
24		#' @slot warnings (`character`) vector of warnings raised when evaluating code.
25		#' Access with [get_warnings()].
26		#' @slot messages (`character`) vector of messages raised when evaluating code.
27		#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in `@env`.
28		#' Access or modify with [join_keys()].
29		#' @slot datanames (`character`) vector of names of data sets in `@env`.
30		#' Used internally to distinguish them from auxiliary variables.
31		#' Access or modify with [datanames()].
32		#' @slot verified (`logical(1)`) flag signifying that code in `@code` has been proven to yield contents of `@env`.
33		#' Used internally. See [`verify()`] for more details.
34		#'
35		#' @import teal.code
36		#' @keywords internal
37		setClass(
38		Class = "teal_data",
39		contains = "qenv",
40		slots = c(join_keys = "join_keys", datanames = "character", verified = "logical"),
41		prototype = list(
42		join_keys = join_keys(),
43		datanames = character(0),
44		verified = logical(0)
45		)
46		)
47
48		#' Initialize `teal_data` object
49		#'
50		#' @name new_teal_data
51		#'
52		#' @param data (`named list`) of data objects.
53		#' @param code (`character` or `language`) code to reproduce the `data`.
54		#' Accepts and stores comments also.
55		#' @param join_keys (`join_keys`) object
56		#' @param datanames (`character`) names of datasets passed to `data`.
57		#' Needed when non-dataset objects are needed in the `env` slot.
58		#' @rdname new_teal_data
59		#' @keywords internal
60		new_teal_data <- function(data,
61		code = character(0),
62		join_keys = join_keys(),
63		datanames = union(names(data), names(join_keys))) {
64	76x	checkmate::assert_list(data)
65	76x	checkmate::assert_class(join_keys, "join_keys")
66	45x	if (is.null(datanames)) datanames <- character(0) # todo: allow to specify
67	76x	checkmate::assert_character(datanames)
68	76x	if (!any(is.language(code), is.character(code))) {
69	!	stop("`code` must be a character or language object.")
70		}
71
72	76x	if (is.language(code)) {
73	2x	code <- paste(lang2calls(code), collapse = "\n")
74		}
75	76x	if (length(code)) {
76	13x	code <- paste(code, collapse = "\n")
77		}
78	76x	verified <- (length(code) == 0L && length(data) == 0L)
79
80	76x	id <- sample.int(.Machine$integer.max, size = length(code))
81
82	76x	new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv))
83	76x	lockEnvironment(new_env, bindings = TRUE)
84
85	76x	methods::new(
86	76x	"teal_data",
87	76x	env = new_env,
88	76x	code = code,
89	76x	warnings = rep("", length(code)),
90	76x	messages = rep("", length(code)),
91	76x	id = id,
92	76x	join_keys = join_keys,
93	76x	datanames = datanames,
94	76x	verified = verified
95		)
96		}

1		#' @rdname join_keys
2		#' @order 4
3		#' @export
4		#' @examples
5		#' # Merging multiple `join_keys` objects ---
6		#'
7		#' jk_merged <- c(
8		#' jk,
9		#' join_keys(
10		#' join_key("ds4", keys = c("pk4", "pk4_2")),
11		#' join_key("ds3", "ds4", c(pk3 = "pk4_2"))
12		#' )
13		#' )
14		c.join_keys <- function(...) {
15	125x	x <- rlang::list2(...)
16	125x	checkmate::assert_list(x, types = c("join_keys", "join_key_set"))
17
18	122x	Reduce(
19	122x	init = join_keys(),
20	122x	x = x,
21	122x	f = function(.x, .y) {
22	394x	out <- utils::modifyList(.x, .y, keep.null = FALSE)
23	394x	parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE)
24	392x	out
25		}
26		)
27		}
28
29		#' @rdname join_keys
30		#' @order 4
31		#' @export
32		#' @examples
33		#' # note: merge can be performed with both join_keys and join_key_set
34		#'
35		#' jk_merged <- c(
36		#' jk_merged,
37		#' join_key("ds5", keys = "pk5"),
38		#' join_key("ds1", "ds5", c(pk1 = "pk5"))
39		#' )
40		c.join_key_set <- function(...) {
41	2x	c.join_keys(...)
42		}

1		#' Data input for `teal` app
2		#'
3		#' @description
4		#' `r lifecycle::badge("stable")`
5		#'
6		#' Function is a wrapper around [teal_data()] and guesses `join_keys`
7		#' for given datasets whose names match ADAM datasets names.
8		#'
9		#' @inheritParams teal_data
10		#' @param join_keys (`join_keys`) object or a single (`join_key_set`) object
11		#'
12		#' (optional) object with datasets column names used for joining.
13		#' If empty then it would be automatically derived basing on intersection of datasets primary keys.
14		#' For ADAM datasets it would be automatically derived.
15		#'
16		#' @return A `teal_data` object.
17		#'
18		#' @details This function checks if there were keys added to all data sets.
19		#'
20		#' @examples
21		#' data <- cdisc_data(
22		#' join_keys = join_keys(
23		#' join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID"))
24		#' )
25		#' )
26		#'
27		#' data <- within(data, {
28		#' ADSL <- example_cdisc_data("ADSL")
29		#' ADTTE <- example_cdisc_data("ADTTE")
30		#' })
31		#'
32		#' @export
33		#'
34		cdisc_data <- function(...,
35		join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))],
36		code = character(0),
37		check) {
38	1x	teal_data(..., join_keys = join_keys, code = code, check = check)
39		}