1 |
#' @rdname join_keys |
|
2 |
#' @order 7 |
|
3 |
#' @export |
|
4 |
format.join_keys <- function(x, ...) { |
|
5 | 6x |
if (length(x) > 0) { |
6 | 5x |
my_parents <- parents(x) |
7 | 5x |
names_sorted <- topological_sort(my_parents) |
8 | 5x |
names <- union(names_sorted, names(x)) |
9 | 5x |
x_implicit <- update_keys_given_parents(x) |
10 | 5x |
out <- lapply(names, function(i) { |
11 | 15x |
out_i <- lapply(union(i, names(x[[i]])), function(j) { |
12 | 35x |
direction <- if (identical(my_parents[[j]], i)) { |
13 |
" <-- " |
|
14 | 35x |
} else if (identical(my_parents[[i]], j)) { |
15 |
" --> " |
|
16 | 35x |
} else if (!identical(i, j)) { |
17 |
" <-> " |
|
18 |
} else { |
|
19 |
"" |
|
20 |
} |
|
21 | ||
22 | 35x |
keys <- x[[i]][[j]] |
23 | 35x |
sprintf( |
24 | 35x |
"%s%s: [%s]", |
25 | 35x |
direction, j, |
26 | 35x |
if (length(keys) == 0) "no primary keys" else toString(keys) |
27 |
) |
|
28 |
}) |
|
29 | ||
30 | 15x |
implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]])) |
31 | 15x |
if (length(implicit_datasets) > 0) { |
32 | 2x |
out_i <- c( |
33 | 2x |
out_i, |
34 | 2x |
paste0( |
35 | 2x |
" --* (implicit via parent with): ", |
36 | 2x |
paste(implicit_datasets, collapse = ", ") |
37 |
) |
|
38 |
) |
|
39 |
} |
|
40 | ||
41 | 15x |
paste(out_i, collapse = "\n") |
42 |
}) |
|
43 | 5x |
paste( |
44 | 5x |
c( |
45 | 5x |
sprintf("A join_keys object containing foreign keys between %s datasets:", length(x)), |
46 | 5x |
out |
47 |
), |
|
48 | 5x |
collapse = "\n" |
49 |
) |
|
50 |
} else { |
|
51 | 1x |
"An empty join_keys object." |
52 |
} |
|
53 |
} |
|
54 | ||
55 |
#' @rdname join_keys |
|
56 |
#' @order 7 |
|
57 |
#' @export |
|
58 |
print.join_keys <- function(x, ...) { |
|
59 | 1x |
cat(format(x, ...), "\n") |
60 | 1x |
invisible(x) |
61 |
} |
1 |
#' Topological graph sort |
|
2 |
#' |
|
3 |
#' Graph is a `list` which for each node contains a vector of child nodes |
|
4 |
#' in the returned list, parents appear before their children. |
|
5 |
#' |
|
6 |
#' Implementation of `Kahn` algorithm with a modification to maintain the order of input elements. |
|
7 |
#' |
|
8 |
#' @param graph (`named list`) with node vector elements |
|
9 |
#' @keywords internal |
|
10 |
topological_sort <- function(graph) { |
|
11 |
# compute in-degrees |
|
12 | 460x |
in_degrees <- list() |
13 | 460x |
for (node in names(graph)) { |
14 | 207x |
in_degrees[[node]] <- 0 |
15 | 207x |
for (to_edge in graph[[node]]) { |
16 | 183x |
in_degrees[[to_edge]] <- 0 |
17 |
} |
|
18 |
} |
|
19 | ||
20 | 460x |
for (node in graph) { |
21 | 207x |
for (to_edge in node) { |
22 | 183x |
in_degrees[[to_edge]] <- in_degrees[[to_edge]] + 1 |
23 |
} |
|
24 |
} |
|
25 | ||
26 |
# sort |
|
27 | 460x |
visited <- 0 |
28 | 460x |
sorted <- list() |
29 | 460x |
zero_in <- list() |
30 | 460x |
for (node in names(in_degrees)) { |
31 | 178x |
if (in_degrees[[node]] == 0) zero_in <- append(zero_in, node) |
32 |
} |
|
33 | 460x |
zero_in <- rev(zero_in) |
34 | ||
35 | 460x |
while (length(zero_in) != 0) { |
36 | 334x |
visited <- visited + 1 |
37 | 334x |
sorted <- c(zero_in[[1]], sorted) |
38 | 334x |
for (edge_to in graph[[zero_in[[1]]]]) { |
39 | 175x |
in_degrees[[edge_to]] <- in_degrees[[edge_to]] - 1 |
40 | 175x |
if (in_degrees[[edge_to]] == 0) { |
41 | 156x |
zero_in <- append(zero_in, edge_to, 1) |
42 |
} |
|
43 |
} |
|
44 | 334x |
zero_in[[1]] <- NULL |
45 |
} |
|
46 | ||
47 | 460x |
if (visited != length(in_degrees)) { |
48 | 4x |
stop( |
49 | 4x |
"Graph is not a directed acyclic graph. Cycles involving nodes: ", |
50 | 4x |
paste0(setdiff(names(in_degrees), sorted), collapse = " ") |
51 |
) |
|
52 |
} else { |
|
53 | 456x |
return(sorted) |
54 |
} |
|
55 |
} |
|
56 | ||
57 |
#' Checks whether a graph is a `Directed Acyclic Graph (DAG)` |
|
58 |
#' |
|
59 |
#' @inheritParams topological_sort |
|
60 |
#' @return `logical(1)` `TRUE` if the graph is a `DAG`; `FALSE` otherwise |
|
61 |
#' @keywords internal |
|
62 |
is_dag <- function(graph) { |
|
63 | 435x |
inherits(try(topological_sort(graph), silent = TRUE), "try-error") |
64 |
} |
1 |
#' Check Compatibility of keys |
|
2 |
#' |
|
3 |
#' Helper function to assert if two key sets contain incompatible keys. |
|
4 |
#' |
|
5 |
#' @return Returns `TRUE` if successful, otherwise raises error. |
|
6 |
#' @keywords internal |
|
7 |
assert_compatible_keys <- function(join_key_1, join_key_2) { |
|
8 | 3x |
stop_message <- function(dataset_1, dataset_2) { |
9 | 1x |
stop( |
10 | 1x |
paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2) |
11 |
) |
|
12 |
} |
|
13 | ||
14 | 3x |
dataset_1_one <- names(join_key_1) |
15 | 3x |
dataset_2_one <- names(join_key_1[[1]]) |
16 | 3x |
keys_one <- join_key_1[[1]][[1]] |
17 | ||
18 | 3x |
dataset_1_two <- names(join_key_2) |
19 | 3x |
dataset_2_two <- names(join_key_2[[1]]) |
20 | 3x |
keys_two <- join_key_2[[1]][[1]] |
21 | ||
22 |
# if first datasets and the second datasets match and keys |
|
23 |
# must contain the same named elements |
|
24 | 3x |
if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) { |
25 | 3x |
if (!identical(sort(keys_one), sort(keys_two))) { |
26 | 1x |
stop_message(dataset_1_one, dataset_2_one) |
27 |
} |
|
28 |
} |
|
29 | ||
30 |
# if first dataset of join_key_1 matches second dataset of join_key_2 |
|
31 |
# and the first dataset of join_key_2 must match second dataset of join_key_1 |
|
32 |
# and keys must contain the same elements but with names and values swapped |
|
33 | 2x |
if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) { |
34 |
if ( |
|
35 | ! |
xor(length(keys_one) == 0, length(keys_two) == 0) || |
36 | ! |
!identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two))) |
37 |
) { |
|
38 | ! |
stop_message(dataset_1_one, dataset_2_one) |
39 |
} |
|
40 |
} |
|
41 | ||
42 |
# otherwise they are compatible |
|
43 | 2x |
return(TRUE) |
44 |
} |
|
45 | ||
46 |
#' Validate parent-child key |
|
47 |
#' |
|
48 |
#' Helper function checks the parent-child relations are valid. |
|
49 |
#' |
|
50 |
#' @param x (`join_keys`) object to assert validity of relations |
|
51 |
#' |
|
52 |
#' @return `join_keys` invisibly |
|
53 |
#' |
|
54 |
#' @keywords internal |
|
55 |
assert_parent_child <- function(x) { |
|
56 | 443x |
jk <- join_keys(x) |
57 | 443x |
jk_parents <- parents(jk) |
58 | ||
59 | 443x |
checkmate::assert_class(jk, c("join_keys", "list")) |
60 | ||
61 | 443x |
if (!is.null(jk_parents)) { |
62 | 443x |
for (idx1 in seq_along(jk_parents)) { |
63 | 178x |
name_from <- names(jk_parents)[[idx1]] |
64 | 178x |
for (idx2 in seq_along(jk_parents[[idx1]])) { |
65 | 178x |
name_to <- jk_parents[[idx1]][[idx2]] |
66 | 178x |
keys_from <- jk[[name_from]][[name_to]] |
67 | 178x |
keys_to <- jk[[name_to]][[name_from]] |
68 | 178x |
if (length(keys_from) == 0 && length(keys_to) == 0) { |
69 | 1x |
stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to)) |
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 |
} |
|
74 | 442x |
invisible(x) |
75 |
} |
|
76 | ||
77 |
#' Verify key set compatibility |
|
78 |
#' |
|
79 |
#' Helper function to ensuring compatibility between two sets of keys |
|
80 |
#' |
|
81 |
#' @return Returns `TRUE` if successful, otherwise raises error. |
|
82 |
#' @keywords internal |
|
83 |
assert_compatible_keys2 <- function(x, y) { |
|
84 |
# Helper to flatten join_keys / join_key_set |
|
85 | 3x |
flatten_join_key_sets <- function(value) { |
86 | 6x |
value <- unclass(value) |
87 | 6x |
Reduce( |
88 | 6x |
init = list(), |
89 | 6x |
f = function(u, v, ...) { |
90 | 6x |
el <- value[v][[1]] |
91 | 6x |
res <- lapply(seq_along(el), function(ix) el[ix]) |
92 | 6x |
names(res) <- rep(v, length(res)) |
93 | 6x |
append(u, res) |
94 |
}, |
|
95 | 6x |
x = names(value) |
96 |
) |
|
97 |
} |
|
98 | ||
99 | 3x |
x <- flatten_join_key_sets(x) |
100 | 3x |
y <- flatten_join_key_sets(y) |
101 | ||
102 | 3x |
for (idx_1 in seq_along(x)) { |
103 | 3x |
for (idx_2 in seq_along(y)) { |
104 | 3x |
assert_compatible_keys(x[idx_1], y[idx_2]) |
105 |
} |
|
106 |
} |
|
107 | 2x |
TRUE |
108 |
} |
|
109 | ||
110 |
#' Updates the keys of the datasets based on the parents |
|
111 |
#' |
|
112 |
#' @param x (`join_keys`) object to update the keys. |
|
113 |
#' |
|
114 |
#' @return (`self`) invisibly for chaining |
|
115 |
#' |
|
116 |
#' @keywords internal |
|
117 |
update_keys_given_parents <- function(x) { |
|
118 | 12x |
jk <- join_keys(x) |
119 | ||
120 | 12x |
checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x)) |
121 | ||
122 | 12x |
datanames <- names(jk) |
123 | 12x |
for (d1_ix in seq_along(datanames)) { |
124 | 34x |
d1 <- datanames[[d1_ix]] |
125 | 34x |
d1_parent <- parent(jk, d1) |
126 | 34x |
for (d2 in datanames[-1 * seq.int(d1_ix)]) { |
127 | 38x |
if (length(jk[[d1]][[d2]]) == 0) { |
128 | 16x |
d2_parent <- parent(jk, d2) |
129 | ||
130 | 12x |
if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next |
131 | ||
132 |
# both has the same parent -> common keys to parent |
|
133 | 4x |
keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) |
134 | 4x |
keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) |
135 | ||
136 | 4x |
common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent) |
137 | 4x |
common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) |
138 | ||
139 |
# No common keys between datasets - leave empty |
|
140 | 1x |
if (all(!common_ix_1)) next |
141 | ||
142 | 3x |
fk <- structure( |
143 | 3x |
names(keys_d2_parent)[common_ix_2], |
144 | 3x |
names = names(keys_d1_parent)[common_ix_1] |
145 |
) |
|
146 | 3x |
jk[[d1]][[d2]] <- fk # mutate join key |
147 |
} |
|
148 |
} |
|
149 |
} |
|
150 |
# check parent child relation |
|
151 | 12x |
assert_parent_child(x = jk) |
152 | ||
153 | 12x |
jk |
154 |
} |
1 |
#' Show `teal_data` object |
|
2 |
#' |
|
3 |
#' Prints `teal_data` object. |
|
4 |
#' |
|
5 |
#' @param object (`teal_data`) |
|
6 |
#' @return Input `teal_data` object. |
|
7 |
#' @importFrom methods show |
|
8 |
#' @examples |
|
9 |
#' teal_data() |
|
10 |
#' teal_data(x = iris, code = "x = iris") |
|
11 |
#' verify(teal_data(x = iris, code = "x = iris")) |
|
12 |
#' @export |
|
13 |
setMethod("show", signature = "teal_data", function(object) { |
|
14 | ! |
if (object@verified) { |
15 | ! |
cat("\u2705\ufe0e", "verified teal_data object\n") |
16 |
} else { |
|
17 | ! |
cat("\u2716", "unverified teal_data object\n") |
18 |
} |
|
19 | ! |
methods::callNextMethod(object) |
20 | ! |
invisible(object) |
21 |
}) |
1 |
#' @rdname join_keys |
|
2 |
#' @order 2 |
|
3 |
#' |
|
4 |
#' @section Functions: |
|
5 |
#' - `x[names]`: Returns a subset of the `join_keys` object for |
|
6 |
#' given `names`, including parent `names` and symmetric mirror keys between |
|
7 |
#' `names` in the result. |
|
8 |
#' - `x[i, j]`: Returns join keys between datasets `i` and `j`, |
|
9 |
#' including implicit keys inferred from their relationship with a parent. |
|
10 |
#' |
|
11 |
#' @param i,j indices specifying elements to extract or replace. Index should be a |
|
12 |
#' a character vector, but it can also take numeric, logical, `NULL` or missing. |
|
13 |
#' |
|
14 |
#' @export |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' # Getter for join_keys --- |
|
18 |
#' |
|
19 |
#' jk["ds1", "ds2"] |
|
20 |
#' |
|
21 |
#' # Subsetting join_keys ---- |
|
22 |
#' |
|
23 |
#' jk["ds1"] |
|
24 |
#' jk[1:2] |
|
25 |
#' jk[c("ds1", "ds2")] |
|
26 |
#' |
|
27 |
`[.join_keys` <- function(x, i, j) { |
|
28 | 40x |
if (missing(i) && missing(j)) { |
29 |
# because: |
|
30 |
# - list(a = 1)[] returns list(a = 1) |
|
31 |
# - data.frame(a = 1)[] returns data.frame(a = 1) |
|
32 | 1x |
return(x) |
33 | 39x |
} else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { |
34 |
# because list(a = 1)[NULL] returns NULL |
|
35 |
# data.frame(a = 1)[NULL, NULL] returns data.frame( |
|
36 | 2x |
return(join_keys()) |
37 | 37x |
} else if (!missing(i) && !missing(j)) { |
38 |
if ( |
|
39 | 8x |
!any( |
40 | 8x |
checkmate::test_string(i), |
41 | 8x |
checkmate::test_number(i), |
42 | 8x |
checkmate::test_logical(i, len = length(x)) && sum(j) == 1 |
43 |
) || |
|
44 | 8x |
!any( |
45 | 8x |
checkmate::test_string(j), |
46 | 8x |
checkmate::test_number(j), |
47 | 8x |
checkmate::test_logical(j, len = length(x)) && sum(j) == 1 |
48 |
) |
|
49 |
) { |
|
50 | 1x |
stop( |
51 | 1x |
"join_keys[i, j] - Can't extract keys for multiple pairs.", |
52 | 1x |
"When specifying a pair [i, j], both indices must point to a single key pair.\n", |
53 | 1x |
call. = FALSE |
54 |
) |
|
55 |
} |
|
56 | 1x |
if (is.numeric(i)) i <- names(x)[i] |
57 | 1x |
if (is.numeric(j)) j <- names(x)[j] |
58 | ||
59 | 7x |
subset_x <- update_keys_given_parents(x[union(i, j)]) |
60 | 7x |
return(subset_x[[i]][[j]]) |
61 | 29x |
} else if (!missing(j)) { |
62 |
# ie. select all keys which have j as dataset_2 |
|
63 |
# since list is symmetrical it is equivalent to selecting by i |
|
64 | 1x |
i <- j |
65 |
} |
|
66 | ||
67 | 29x |
checkmate::assert( |
68 | 29x |
combine = "or", |
69 | 29x |
checkmate::check_character(i), |
70 | 29x |
checkmate::check_numeric(i), |
71 | 29x |
checkmate::check_logical(i) |
72 |
) |
|
73 | ||
74 | ||
75 |
# Convert integer/logical index to named index |
|
76 | 29x |
if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { |
77 | 2x |
i <- names(x)[i] |
78 |
} |
|
79 | ||
80 |
# When retrieving a relationship pair, it will also return the symmetric key |
|
81 | 29x |
new_jk <- new_join_keys() |
82 | 29x |
queue <- unique(i) |
83 | 29x |
bin <- character(0) |
84 | ||
85 |
# Need to iterate on a mutating queue if subset of a dataset will also |
|
86 |
# select its parent as that parent might have relationships with others |
|
87 |
# already selected. |
|
88 | 29x |
while (length(queue) > 0) { |
89 | 60x |
ix <- queue[1] |
90 | 60x |
queue <- queue[-1] |
91 | 60x |
bin <- c(bin, ix) |
92 | ||
93 | 60x |
ix_parent <- parent(x, ix) |
94 | ||
95 | 60x |
if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) { |
96 | 10x |
queue <- c(queue, ix_parent) |
97 |
} |
|
98 | ||
99 | 60x |
ix_valid_names <- names(x[[ix]]) %in% c(queue, bin) |
100 | ||
101 | 60x |
new_jk[[ix]] <- x[[ix]][ix_valid_names] |
102 | ||
103 |
# Add primary key of parent |
|
104 | 60x |
if (length(ix_parent) > 0) { |
105 | 20x |
new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]] |
106 |
} |
|
107 |
} |
|
108 | ||
109 | 29x |
common_parents_ix <- names(parents(x)) %in% names(new_jk) & |
110 | 29x |
parents(x) %in% names(new_jk) |
111 | ||
112 | 13x |
if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix] |
113 | ||
114 | 29x |
new_jk |
115 |
} |
|
116 | ||
117 |
#' @rdname join_keys |
|
118 |
#' @order 2 |
|
119 |
#' |
|
120 |
#' @param directed (`logical(1)`) Flag that indicates whether it should create |
|
121 |
#' a parent-child relationship between the datasets. |
|
122 |
#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; |
|
123 |
#' - `FALSE` when the relationship is undirected. |
|
124 |
#' @section Functions: |
|
125 |
#' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. |
|
126 |
#' - `x[i] <- value`: This (without `j` parameter) **is not** a supported |
|
127 |
#' operation for `join_keys`. |
|
128 |
#' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`, |
|
129 |
#' such as a `teal_data` object or `join_keys` object itself. |
|
130 |
#' |
|
131 |
#' @export |
|
132 |
#' @examples |
|
133 |
#' # Setting a new primary key --- |
|
134 |
#' |
|
135 |
#' jk["ds4", "ds4"] <- "pk4" |
|
136 |
#' jk["ds5", "ds5"] <- "pk5" |
|
137 |
#' |
|
138 |
#' # Setting a single relationship pair --- |
|
139 |
#' |
|
140 |
#' jk["ds1", "ds4"] <- c("pk1" = "pk4") |
|
141 |
#' |
|
142 |
#' # Removing a key --- |
|
143 |
#' |
|
144 |
#' jk["ds5", "ds5"] <- NULL |
|
145 |
`[<-.join_keys` <- function(x, i, j, directed = TRUE, value) { |
|
146 | 11x |
checkmate::assert_flag(directed) |
147 | 11x |
if (missing(i) || missing(j)) { |
148 | 4x |
stop("join_keys[i, j] specify both indices to set a key pair.") |
149 | 7x |
} else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { |
150 | 2x |
stop("join_keys[i, j] neither i nor j can be NULL.") |
151 |
} else if ( |
|
152 | 5x |
!any( |
153 | 5x |
checkmate::test_string(i), |
154 | 5x |
checkmate::test_number(i), |
155 | 5x |
checkmate::test_logical(i, len = length(x)) && sum(j) == 1 |
156 |
) || |
|
157 | 5x |
!any( |
158 | 5x |
checkmate::test_string(j), |
159 | 5x |
checkmate::test_number(j), |
160 | 5x |
checkmate::test_logical(j, len = length(x)) && sum(j) == 1 |
161 |
) |
|
162 |
) { |
|
163 | 2x |
stop( |
164 | 2x |
"join_keys[i, j] <- Can't set keys to specified indices.\n", |
165 | 2x |
"When setting pair [i, j], both indices must point to a single key pair.\n", |
166 | 2x |
call. = FALSE |
167 |
) |
|
168 |
} |
|
169 | ||
170 |
# Handle join key removal separately |
|
171 | 3x |
if (is.null(value)) { |
172 | 1x |
x[[i]][[j]] <- NULL |
173 | 1x |
return(x) |
174 |
} |
|
175 | ||
176 | 2x |
c(x, join_key(i, j, value, directed)) |
177 |
} |
|
178 | ||
179 |
#' @rdname join_keys |
|
180 |
#' |
|
181 |
#' @order 1000 |
|
182 |
#' @usage ## Preferred method is x[i, j] <- value |
|
183 |
#' x[[i]][[j]] <- value |
|
184 |
#' |
|
185 |
#' @section Functions: |
|
186 |
#' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`. |
|
187 |
#' |
|
188 |
#' @export |
|
189 |
#' @examples |
|
190 |
#' # Setting via x[[i]] <- value --- |
|
191 |
#' |
|
192 |
#' jk <- join_keys() |
|
193 |
#' jk[["ds6"]][["ds6"]] <- "pk6" |
|
194 |
#' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6")) |
|
195 |
#' jk[["ds7"]][["ds7"]] <- NULL # removes key |
|
196 |
#' |
|
197 |
#' jk |
|
198 |
#' |
|
199 |
#' @noRd |
|
200 |
`[[<-.join_keys` <- function(x, i, value) { |
|
201 | 405x |
checkmate::assert( |
202 | 405x |
combine = "or", |
203 | 405x |
checkmate::check_string(i), |
204 | 405x |
checkmate::check_number(i), |
205 | 405x |
checkmate::check_logical(i, len = length(x)) |
206 |
) |
|
207 | 405x |
checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE) |
208 | 398x |
if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { |
209 | 1x |
i <- names(x)[[i]] |
210 |
} |
|
211 | ||
212 |
# Normalize values |
|
213 | 398x |
norm_value <- lapply(seq_along(value), function(.x) { |
214 | 552x |
join_key(i, names(value)[.x], value[[.x]]) |
215 |
}) |
|
216 | 398x |
names(norm_value) <- names(value) |
217 | ||
218 |
# Check if multiple modifications don't have a conflict |
|
219 | 398x |
repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))] |
220 | 398x |
repeated <- norm_value[repeated_value_ix] |
221 | 398x |
vapply( |
222 | 398x |
seq_along(repeated), |
223 | 398x |
function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) { |
224 | 3x |
assert_compatible_keys2( |
225 | 3x |
.x_value, |
226 | 3x |
unlist(unname( |
227 | 3x |
repeated[-.ix][names(repeated[-.ix]) == .x_name] |
228 | 3x |
), recursive = FALSE) |
229 |
) |
|
230 |
}, |
|
231 | 398x |
logical(1) |
232 |
) |
|
233 | ||
234 | 397x |
norm_value <- lapply(norm_value, function(x) x[[1]][[1]]) |
235 | 397x |
names(norm_value) <- names(value) |
236 | ||
237 |
# Safe to do as duplicated are the same |
|
238 | 397x |
norm_value[duplicated(names(norm_value))] <- NULL |
239 | ||
240 |
# Keep only elements with length > 0L |
|
241 | 397x |
norm_value <- Filter(length, norm_value) |
242 | ||
243 |
# Remove classes to use list-based get/assign operations |
|
244 | 397x |
new_x <- unclass(x) |
245 | ||
246 |
# In case a pair is removed, also remove the symmetric pair and update parents |
|
247 | 397x |
removed_names <- setdiff(names(new_x[[i]]), names(norm_value)) |
248 | 397x |
for (.x in removed_names) { |
249 | 2x |
if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL |
250 | 1x |
if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL |
251 | ||
252 | 5x |
new_x[[.x]][[i]] <- NULL |
253 |
} |
|
254 | ||
255 | 397x |
new_x[[i]] <- norm_value |
256 | ||
257 |
# Iterate on all new values to create symmetrical pair |
|
258 | 397x |
for (ds2 in names(norm_value)) { |
259 | 310x |
if (ds2 == i) next |
260 | ||
261 | 238x |
keep_value <- if (is.null(x)) list() else new_x[[ds2]] |
262 | ||
263 |
# Invert key |
|
264 | 238x |
new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]]) |
265 | 238x |
keep_value[[i]] <- new_value |
266 | ||
267 |
# Assign symmetrical |
|
268 | 238x |
new_x[[ds2]] <- keep_value |
269 |
} |
|
270 | ||
271 | 397x |
preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"] |
272 |
# Remove NULL or empty keys |
|
273 | 397x |
new_x <- Filter(function(x) length(x) != 0L, new_x) |
274 | 397x |
attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr) |
275 | ||
276 |
# |
|
277 |
# restore class |
|
278 | 397x |
class(new_x) <- class(x) |
279 | 397x |
new_x |
280 |
} |
1 |
#' Variable labels |
|
2 |
#' |
|
3 |
#' Get or set variable labels in a `data.frame`. |
|
4 |
#' |
|
5 |
#' @details Variable labels can be stored as a `label` attribute set on individual variables. |
|
6 |
#' These functions get or set this attribute, either on all (`col_labels`) or some variables (`col_relabel`). |
|
7 |
#' |
|
8 |
#' @param x (`data.frame` or `DataFrame`) data object |
|
9 |
#' @param fill (`logical(1)`) specifying what to return if variable has no label |
|
10 |
#' @param value (`character`) vector of variable labels of length equal to number of columns in `x`; |
|
11 |
#' if named, names must match variable names in `x` and will be used as key to set labels; |
|
12 |
#' use `NA` to remove label from variable |
|
13 |
#' @param ... name-value pairs, where name corresponds to a variable name in `x` |
|
14 |
#' and value is the new variable label; use `NA` to remove label from variable |
|
15 |
#' |
|
16 |
#' @return |
|
17 |
#' For `col_labels`, named character vector of variable labels, the names being the corresponding variable names. |
|
18 |
#' If the `label` attribute is missing, the vector elements will be |
|
19 |
#' the variable names themselves if `fill = TRUE` and `NA` if `fill = FALSE`. |
|
20 |
#' |
|
21 |
#' For `col_labels<-` and `col_relabel`, copy of `x` with variable labels modified. |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' x <- iris |
|
25 |
#' col_labels(x) |
|
26 |
#' col_labels(x) <- paste("label for", names(iris)) |
|
27 |
#' col_labels(x) |
|
28 |
#' y <- col_relabel(x, Sepal.Length = "Sepal Length of iris flower") |
|
29 |
#' col_labels(y) |
|
30 |
#' |
|
31 |
#' @source These functions were taken from |
|
32 |
#' [formatters](https://cran.r-project.org/package=formatters) package, to reduce the complexity of |
|
33 |
#' the dependency tree and rewritten. |
|
34 |
#' |
|
35 |
#' @rdname col_labels |
|
36 |
#' @export |
|
37 |
#' |
|
38 |
col_labels <- function(x, fill = FALSE) { |
|
39 | 16x |
checkmate::test_multi_class(x, c("data.frame", "DataFrame")) |
40 | 16x |
checkmate::assert_flag(fill) |
41 | ||
42 | 16x |
if (ncol(x) == 0L) { |
43 | 2x |
return(character(0L)) |
44 |
} |
|
45 | ||
46 | 14x |
labels <- sapply(x, function(i) as.vector(attr(i, "label", exact = TRUE)), simplify = FALSE, USE.NAMES = TRUE) |
47 | 14x |
mapply( |
48 | 14x |
function(name, label) { |
49 | 62x |
checkmate::assert_string( |
50 | 62x |
label, |
51 | 62x |
.var.name = sprintf("\"label\" attribute of column \"%s\"", name), |
52 | 62x |
null.ok = TRUE |
53 |
) |
|
54 |
}, |
|
55 | 14x |
name = names(x), |
56 | 14x |
label = labels |
57 |
) |
|
58 | ||
59 | 12x |
nulls <- vapply(labels, is.null, logical(1L)) |
60 | 12x |
if (any(nulls)) { |
61 | 7x |
labels[nulls] <- |
62 | 7x |
if (fill) { |
63 | 1x |
colnames(x)[nulls] |
64 |
} else { |
|
65 | 7x |
NA_character_ |
66 |
} |
|
67 |
} |
|
68 | ||
69 | 12x |
unlist(labels) |
70 |
} |
|
71 | ||
72 |
#' @rdname col_labels |
|
73 |
#' @export |
|
74 |
`col_labels<-` <- function(x, value) { |
|
75 | 13x |
checkmate::test_multi_class(x, c("data.frame", "DataFrame")) |
76 | 13x |
checkmate::assert_character(value) |
77 | 12x |
checkmate::assert_true( |
78 | 12x |
ncol(x) == length(value), |
79 | 12x |
.var.name = "Length of value is equal to the number of columns" |
80 |
) |
|
81 | ||
82 | 11x |
varnames <- |
83 | 11x |
if (is.null(names(value))) { |
84 | 4x |
names(x) |
85 | 11x |
} else if (any(names(value) == "")) { |
86 | 3x |
specified_cols <- names(value)[names(value) != ""] |
87 | 3x |
checkmate::assert_subset(specified_cols, names(x), .var.name = "names of value") |
88 | 3x |
res <- names(value) |
89 | 3x |
res[res == ""] <- setdiff(names(x), specified_cols) |
90 | 3x |
res |
91 |
} else { |
|
92 | 4x |
checkmate::assert_set_equal(names(value), names(x), .var.name = "names of value") |
93 | 3x |
names(value) |
94 |
} |
|
95 | ||
96 | 10x |
for (i in seq_along(value)) { |
97 | 40x |
if (is.na(value[i])) { |
98 | 2x |
attr(x[[varnames[i]]], "label") <- NULL |
99 |
} else { |
|
100 | 38x |
attr(x[[varnames[i]]], "label") <- value[[i]] |
101 |
} |
|
102 |
} |
|
103 | 10x |
x |
104 |
} |
|
105 | ||
106 |
#' @rdname col_labels |
|
107 |
#' @export |
|
108 |
col_relabel <- function(x, ...) { |
|
109 | 4x |
checkmate::test_multi_class(x, c("data.frame", "DataFrame")) |
110 | 4x |
if (missing(...)) { |
111 | 1x |
return(x) |
112 |
} |
|
113 | 3x |
value <- list(...) |
114 | 3x |
varnames <- names(value) |
115 | ||
116 | 3x |
checkmate::assert_subset(varnames, names(x), .var.name = "names of ...") |
117 | 2x |
lapply(value, checkmate::assert_string, .var.name = "element of ...", na.ok = TRUE) |
118 | ||
119 | 2x |
for (i in seq_along(value)) { |
120 | 2x |
if (is.na(value[i])) { |
121 | 1x |
attr(x[[varnames[i]]], "label") <- NULL |
122 |
} else { |
|
123 | 1x |
attr(x[[varnames[i]]], "label") <- value[[i]] |
124 |
} |
|
125 |
} |
|
126 | 2x |
x |
127 |
} |
1 |
#' Create a relationship between a pair of datasets |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("stable")` |
|
5 |
#' |
|
6 |
#' Create a relationship between two datasets, `dataset_1` and `dataset_2`. |
|
7 |
#' By default, this function establishes a directed relationship with `dataset_1` as the parent. |
|
8 |
#' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`. |
|
9 |
#' |
|
10 |
#' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted, |
|
11 |
#' a primary key for `dataset_1` is created. |
|
12 |
#' @param keys (optionally named `character`) Column mapping between the datasets, |
|
13 |
#' where `names(keys)` maps columns in `dataset_1` corresponding to columns of |
|
14 |
#' `dataset_2` given by the elements of `keys`. |
|
15 |
#' - If unnamed, the same column names are used for both datasets. |
|
16 |
#' - If any element of the `keys` vector is empty with a non-empty name, then the name is |
|
17 |
#' used for both datasets. |
|
18 |
#' @param directed (`logical(1)`) Flag that indicates whether it should create |
|
19 |
#' a parent-child relationship between the datasets. |
|
20 |
#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; |
|
21 |
#' - `FALSE` when the relationship is undirected. |
|
22 |
#' |
|
23 |
#' @return object of class `join_key_set` to be passed into `join_keys` function. |
|
24 |
#' |
|
25 |
#' @examples |
|
26 |
#' join_key("d1", "d2", c("A")) |
|
27 |
#' join_key("d1", "d2", c("A" = "B")) |
|
28 |
#' join_key("d1", "d2", c("A" = "B", "C")) |
|
29 |
#' |
|
30 |
#' @export |
|
31 |
#' @seealso [join_keys()], [parents()] |
|
32 |
#' |
|
33 |
join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { |
|
34 | 1106x |
checkmate::assert_string(dataset_1) |
35 | 1106x |
checkmate::assert_string(dataset_2) |
36 | 1103x |
checkmate::assert_character(keys, any.missing = FALSE) |
37 | 1098x |
checkmate::assert_flag(directed) |
38 | ||
39 | 1098x |
if (length(keys) > 0) { |
40 | 1096x |
if (is.null(names(keys))) { |
41 | 528x |
names(keys) <- keys |
42 |
} |
|
43 | ||
44 | 1096x |
keys <- trimws(keys) |
45 | 1096x |
names(keys) <- trimws(names(keys)) |
46 | ||
47 |
# Remove keys with empty value and without name |
|
48 | 1096x |
if (any(keys == "" & names(keys) == "")) { |
49 | 6x |
message("Key with an empty value and name are ignored.") |
50 | 6x |
keys <- keys[keys != "" & names(keys) != ""] |
51 |
} |
|
52 | ||
53 |
# Set name of keys without one: c("A") -> c("A" = "A") |
|
54 | 1096x |
if (any(names(keys) == "")) { |
55 | 4x |
names(keys)[names(keys) == ""] <- keys[names(keys) == ""] |
56 |
} |
|
57 | ||
58 |
# Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A") |
|
59 | 1096x |
if (any(keys == "")) { |
60 | 4x |
keys[keys == ""] <- names(keys[keys == ""]) |
61 |
} |
|
62 | ||
63 | 1096x |
stopifnot(!is.null(names(keys))) |
64 | 1096x |
stopifnot(!anyDuplicated(keys)) |
65 | 1095x |
stopifnot(!anyDuplicated(names(keys))) |
66 | ||
67 | 1094x |
if (dataset_1 == dataset_2 && any(names(keys) != keys)) { |
68 | 2x |
stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed") |
69 |
} |
|
70 |
} else { |
|
71 | 2x |
keys <- NULL |
72 |
} |
|
73 | ||
74 | 1094x |
parents <- if (directed && dataset_1 != dataset_2) { |
75 | 416x |
stats::setNames(list(dataset_1), dataset_2) |
76 |
} else { |
|
77 | 678x |
list() |
78 |
} |
|
79 | ||
80 | 1094x |
structure( |
81 | 1094x |
list( |
82 | 1094x |
structure( |
83 | 1094x |
list(keys), |
84 | 1094x |
names = dataset_2 |
85 |
) |
|
86 |
), |
|
87 | 1094x |
names = dataset_1, |
88 | 1094x |
class = "join_key_set", |
89 | 1094x |
parents = parents |
90 |
) |
|
91 |
} |
1 |
#' Test if two objects are (nearly) equal |
|
2 |
#' |
|
3 |
#' `all.equal(target, current)` is a utility to compare `join_keys` objects target |
|
4 |
#' and current testing `near equality`. |
|
5 |
#' |
|
6 |
#' If they are different, comparison is still made to some extent, and a report |
|
7 |
#' of the differences is returned. |
|
8 |
#' Do not use `all.equal` directly in if expressions—either use `isTRUE(all.equal(....))` |
|
9 |
#' or identical if appropriate. |
|
10 |
#' |
|
11 |
#' The parents attribute comparison tolerates `NULL` and empty lists and will find |
|
12 |
#' no difference. |
|
13 |
#' |
|
14 |
#' The list containing all the relationships is treated like a map and ignores |
|
15 |
#' entries with `NULL` if they exist. |
|
16 |
#' |
|
17 |
#' @inheritParams base::all.equal |
|
18 |
#' @param ... further arguments for different methods. Not used with `join_keys`. |
|
19 |
#' |
|
20 |
#' @seealso [base::all.equal()] |
|
21 |
#' @keywords internal |
|
22 |
#' |
|
23 |
all.equal.join_keys <- function(target, current, ...) { |
|
24 | 21x |
.as_map <- function(.x) { |
25 | 42x |
old_attributes <- attributes(.x) |
26 |
# Keep only non-list attributes |
|
27 | 42x |
old_attributes[["names"]] <- NULL |
28 | 42x |
old_attributes[["original_class"]] <- old_attributes[["class"]] |
29 | 42x |
old_attributes[["class"]] <- NULL |
30 | 42x |
old_attributes[["parents"]] <- if (!length(old_attributes[["parents"]])) { |
31 | 18x |
list() |
32 |
} else { |
|
33 | 24x |
old_attributes[["parents"]][order(names(old_attributes[["parents"]]))] |
34 |
} |
|
35 | 42x |
attr(.x, "class") <- "list" |
36 | ||
37 |
# Remove nulls |
|
38 | 42x |
.x <- Filter(Negate(is.null), .x) |
39 | ||
40 |
# Sort named components, preserving positions of unnamed |
|
41 | 42x |
nx <- rlang::names2(.x) |
42 | 42x |
is_named <- nx != "" |
43 | 42x |
if (any(is_named)) { |
44 | 42x |
idx <- seq_along(.x) |
45 | 42x |
idx[is_named] <- idx[is_named][order(nx[is_named])] |
46 | 42x |
.x <- .x[idx] |
47 |
} |
|
48 | 42x |
new_attributes <- if (is.null(attributes(.x))) list() else attributes(.x) |
49 | 42x |
attributes(.x) <- utils::modifyList(old_attributes, new_attributes) |
50 | 42x |
.x |
51 |
} |
|
52 | 21x |
x <- .as_map(target) |
53 | 21x |
y <- .as_map(current) |
54 | 21x |
all.equal(x, y) |
55 |
} |
1 |
#' |
|
2 |
#' @section Subsetting: |
|
3 |
#' `x[names]` subsets objects in `teal_data` environment and limit the code to the necessary needed to build limited |
|
4 |
#' objects. |
|
5 |
#' |
|
6 |
#' @param names (`character`) names of objects included in `teal_subset` to subset |
|
7 |
#' @param x (`teal_data`) |
|
8 |
#' |
|
9 |
#' @examples |
|
10 |
#' |
|
11 |
#' # Subsetting |
|
12 |
#' data <- teal_data() |
|
13 |
#' data <- eval_code(data, "a <- 1;b<-2") |
|
14 |
#' data["a"] |
|
15 |
#' data[c("a", "b")] |
|
16 |
#' |
|
17 |
#' join_keys(data) <- join_keys(join_key("a", "b", "x")) |
|
18 |
#' join_keys(data["a"]) # should show empty keys |
|
19 |
#' join_keys(data["b"]) |
|
20 |
#' join_keys(data)["a"] # should show empty keys |
|
21 |
#' join_keys(data)["b"] |
|
22 |
#' |
|
23 |
#' @rdname teal_data |
|
24 |
#' |
|
25 |
#' @export |
|
26 |
`[.teal_data` <- function(x, names) { |
|
27 | 5x |
x <- NextMethod("`[`", x, check_code_names = x@verified) # unverified doesn't need warning for code inconsistency |
28 | 5x |
x@join_keys <- x@join_keys[names] |
29 | 5x |
x |
30 |
} |
1 |
#' Manage relationships between datasets using `join_keys` |
|
2 |
#' @order 1 |
|
3 |
#' @name join_keys |
|
4 |
#' |
|
5 |
#' @usage ## Constructor, getter and setter |
|
6 |
#' join_keys(...) |
|
7 |
#' |
|
8 |
#' @description |
|
9 |
#' Facilitates the creation and retrieval of relationships between datasets. |
|
10 |
#' `join_keys` class extends `list` and contains keys connecting pairs of datasets. |
|
11 |
#' Each element of the list contains keys for specific dataset. |
|
12 |
#' Each dataset can have a relationship with itself (primary key) and with other datasets. |
|
13 |
#' |
|
14 |
#' Note that `join_keys` list is symmetrical and assumes a default direction, that is: |
|
15 |
#' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent |
|
16 |
#' in a parent-child relationship and the mapping is automatically mirrored between |
|
17 |
#' `ds2` and `ds1`. |
|
18 |
#' |
|
19 |
#' @section Methods (by class): |
|
20 |
#' - `join_keys()`: Returns an empty `join_keys` object when called without arguments. |
|
21 |
#' - `join_keys(join_keys)`: Returns itself. |
|
22 |
#' - `join_keys(teal_data)`: Returns the `join_keys` object contained in `teal_data` object. |
|
23 |
#' - `join_keys(...)`: Creates a new object with one or more `join_key_set` parameters. |
|
24 |
#' |
|
25 |
#' @param ... optional, |
|
26 |
#' - either `teal_data` or `join_keys` object to extract `join_keys` |
|
27 |
#' - or any number of `join_key_set` objects to create `join_keys` |
|
28 |
#' - or nothing to create an empty `join_keys` |
|
29 |
#' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`) |
|
30 |
#' Column mapping between datasets. |
|
31 |
#' |
|
32 |
#' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship |
|
33 |
#' pairs to add to `join_keys` list. |
|
34 |
#' |
|
35 |
#' |
|
36 |
#' @return `join_keys` object. |
|
37 |
#' |
|
38 |
#' @examples |
|
39 |
#' # Creating a new join keys ---- |
|
40 |
#' |
|
41 |
#' jk <- join_keys( |
|
42 |
#' join_key("ds1", "ds1", "pk1"), |
|
43 |
#' join_key("ds2", "ds2", "pk2"), |
|
44 |
#' join_key("ds3", "ds3", "pk3"), |
|
45 |
#' join_key("ds1", "ds2", c(pk1 = "pk2")), |
|
46 |
#' join_key("ds1", "ds3", c(pk1 = "pk3")) |
|
47 |
#' ) |
|
48 |
#' |
|
49 |
#' jk |
|
50 |
#' |
|
51 |
#' @export |
|
52 |
#' |
|
53 |
#' @seealso [join_key()] for creating `join_keys_set`, |
|
54 |
#' [parents()] for parent operations, |
|
55 |
#' [teal_data()] for `teal_data` constructor _and_ |
|
56 |
#' [default_cdisc_join_keys] for default CDISC keys. |
|
57 |
#' |
|
58 |
join_keys <- function(...) { |
|
59 | 797x |
if (missing(...)) { |
60 | 198x |
return(new_join_keys()) |
61 |
} |
|
62 | 599x |
x <- rlang::list2(...) |
63 | 599x |
if (length(x) == 1L) { |
64 | 534x |
UseMethod("join_keys", x[[1]]) |
65 |
} else { |
|
66 | 65x |
join_keys.default(...) |
67 |
} |
|
68 |
} |
|
69 | ||
70 |
#' @rdname join_keys |
|
71 |
#' @order 1 |
|
72 |
#' @export |
|
73 |
join_keys.default <- function(...) { |
|
74 | 114x |
c(new_join_keys(), ...) |
75 |
} |
|
76 | ||
77 |
#' @rdname join_keys |
|
78 |
#' @order 1 |
|
79 |
#' @export |
|
80 |
join_keys.join_keys <- function(...) { |
|
81 | 459x |
x <- rlang::list2(...) |
82 | 459x |
x[[1]] |
83 |
} |
|
84 | ||
85 |
#' @rdname join_keys |
|
86 |
#' @order 1 |
|
87 |
#' @export |
|
88 |
join_keys.teal_data <- function(...) { |
|
89 | 26x |
x <- rlang::list2(...) |
90 | 26x |
x[[1]]@join_keys |
91 |
} |
|
92 | ||
93 |
#' @rdname join_keys |
|
94 |
#' @order 5 |
|
95 |
#' |
|
96 |
#' @section Functions: |
|
97 |
#' - `join_keys(x) <- value`: Assignment of the `join_keys` in object with `value`. |
|
98 |
#' `value` needs to be an object of class `join_keys` or `join_key_set`. |
|
99 |
#' |
|
100 |
#' @param x (`join_keys`) empty object to set the new relationship pairs. |
|
101 |
#' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)` |
|
102 |
#' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`) |
|
103 |
#' |
|
104 |
#' @export |
|
105 |
`join_keys<-` <- function(x, value) { |
|
106 | 20x |
checkmate::assert_class(value, classes = c("join_keys", "list")) |
107 | 20x |
UseMethod("join_keys<-", x) |
108 |
} |
|
109 | ||
110 |
#' @rdname join_keys |
|
111 |
#' @order 5 |
|
112 |
#' @export |
|
113 |
#' @examples |
|
114 |
#' # Assigning keys via join_keys(x)[i, j] <- value ---- |
|
115 |
#' |
|
116 |
#' obj <- join_keys() |
|
117 |
#' # or |
|
118 |
#' obj <- teal_data() |
|
119 |
#' |
|
120 |
#' join_keys(obj)["ds1", "ds1"] <- "pk1" |
|
121 |
#' join_keys(obj)["ds2", "ds2"] <- "pk2" |
|
122 |
#' join_keys(obj)["ds3", "ds3"] <- "pk3" |
|
123 |
#' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2") |
|
124 |
#' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3") |
|
125 |
#' |
|
126 |
#' identical(jk, join_keys(obj)) |
|
127 |
`join_keys<-.join_keys` <- function(x, value) { |
|
128 | 12x |
value |
129 |
} |
|
130 | ||
131 |
#' @rdname join_keys |
|
132 |
#' @order 5 |
|
133 |
#' @export |
|
134 |
#' @examples |
|
135 |
#' # Setter for join_keys within teal_data ---- |
|
136 |
#' |
|
137 |
#' td <- teal_data() |
|
138 |
#' join_keys(td) <- jk |
|
139 |
#' |
|
140 |
#' join_keys(td)["ds1", "ds2"] <- "new_key" |
|
141 |
#' join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3"))) |
|
142 |
#' join_keys(td) |
|
143 |
`join_keys<-.teal_data` <- function(x, value) { |
|
144 | 8x |
join_keys(x@join_keys) <- value |
145 | 8x |
x |
146 |
} |
|
147 | ||
148 |
#' Internal constructor |
|
149 |
#' |
|
150 |
#' @return an empty `join_keys` list |
|
151 |
#' |
|
152 |
#' @keywords internal |
|
153 |
new_join_keys <- function() { |
|
154 | 341x |
structure( |
155 | 341x |
list(), |
156 | 341x |
class = c("join_keys", "list"), |
157 | 341x |
"parents" = list() |
158 |
) |
|
159 |
} |
1 |
#' Get and set parents in `join_keys` object |
|
2 |
#' |
|
3 |
#' `parents()` facilitates the creation of dependencies between datasets by |
|
4 |
#' assigning a parent-child relationship. |
|
5 |
#' |
|
6 |
#' Each element is defined by a `list` element, where `list("child" = "parent")`. |
|
7 |
#' |
|
8 |
#' @param x (`join_keys` or `teal_data`) object that contains "parents" information |
|
9 |
#' to retrieve or manipulate. |
|
10 |
#' |
|
11 |
#' @return a `list` of `character` representing the parents. |
|
12 |
#' |
|
13 |
#' @export |
|
14 |
#' @seealso [join_keys()] |
|
15 |
parents <- function(x) { |
|
16 | 687x |
UseMethod("parents", x) |
17 |
} |
|
18 | ||
19 |
#' @describeIn parents Retrieves parents of `join_keys` object. |
|
20 |
#' @export |
|
21 |
#' @examples |
|
22 |
#' # Get parents of join_keys --- |
|
23 |
#' |
|
24 |
#' jk <- default_cdisc_join_keys["ADEX"] |
|
25 |
#' parents(jk) |
|
26 |
parents.join_keys <- function(x) { |
|
27 | 1x |
if (is.null(attr(x, "parents"))) list() else attr(x, "parents") |
28 |
} |
|
29 | ||
30 |
#' @describeIn parents Retrieves parents of `join_keys` inside `teal_data` object. |
|
31 |
#' @export |
|
32 |
#' @examples |
|
33 |
#' # Get parents of join_keys inside teal_data object --- |
|
34 |
#' |
|
35 |
#' td <- teal_data( |
|
36 |
#' ADSL = rADSL, |
|
37 |
#' ADTTE = rADTTE, |
|
38 |
#' ADRS = rADRS, |
|
39 |
#' join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")] |
|
40 |
#' ) |
|
41 |
#' parents(td) |
|
42 |
parents.teal_data <- function(x) { |
|
43 | 1x |
parents(x@join_keys) |
44 |
} |
|
45 | ||
46 |
#' @describeIn parents Assignment of parents in `join_keys` object. |
|
47 |
#' |
|
48 |
#' @param value (`named list`) of `character` vectors. |
|
49 |
#' |
|
50 |
#' @export |
|
51 |
`parents<-` <- function(x, value) { |
|
52 | 440x |
UseMethod("parents<-", x) |
53 |
} |
|
54 | ||
55 |
#' @describeIn parents Assignment of parents of `join_keys` object. |
|
56 |
#' @export |
|
57 |
#' @examples |
|
58 |
#' # Assignment of parents --- |
|
59 |
#' |
|
60 |
#' jk <- join_keys( |
|
61 |
#' join_key("ds1", "ds2", "id"), |
|
62 |
#' join_key("ds5", "ds6", "id"), |
|
63 |
#' join_key("ds7", "ds6", "id") |
|
64 |
#' ) |
|
65 |
#' |
|
66 |
#' parents(jk) <- list(ds2 = "ds1") |
|
67 |
#' |
|
68 |
#' # Setting individual parent-child relationship |
|
69 |
#' |
|
70 |
#' parents(jk)["ds6"] <- "ds5" |
|
71 |
#' parents(jk)["ds7"] <- "ds6" |
|
72 |
`parents<-.join_keys` <- function(x, value) { |
|
73 | 439x |
checkmate::assert_list(value, types = "character", names = "named") |
74 | ||
75 | 436x |
new_parents <- list() |
76 | ||
77 | 436x |
for (dataset in names(value)) { |
78 |
# Custom .var.name so it is verbose and helpful for users |
|
79 | 169x |
checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset)) |
80 | ||
81 | 168x |
parent <- new_parents[[dataset]] |
82 | 168x |
checkmate::assert( |
83 | 168x |
checkmate::check_null(parent), |
84 | 168x |
checkmate::check_true( |
85 | 168x |
length(parent) == 0 && |
86 | 168x |
length(value[[dataset]]) == 0 |
87 |
), |
|
88 | 168x |
checkmate::check_true(parent == value[[dataset]]), |
89 | 168x |
"Please check the difference between provided datasets parents and provided join_keys parents.", |
90 | 168x |
.var.name = "value" |
91 |
) |
|
92 | 168x |
if (is.null(parent)) { |
93 | 168x |
new_parents[[dataset]] <- value[[dataset]] |
94 |
} |
|
95 |
} |
|
96 | ||
97 | 435x |
if (is_dag(new_parents)) { |
98 | 4x |
stop("Cycle detected in a parent and child dataset graph.") |
99 |
} |
|
100 | ||
101 | 431x |
attr(x, "parents") <- new_parents |
102 | ||
103 | 431x |
assert_parent_child(x) |
104 | 430x |
x |
105 |
} |
|
106 | ||
107 |
#' @describeIn parents Assignment of parents of `join_keys` inside `teal_data` object. |
|
108 |
#' @export |
|
109 |
#' @examples |
|
110 |
#' # Assignment of parents of join_keys inside teal_data object --- |
|
111 |
#' |
|
112 |
#' parents(td) <- list("ADTTE" = "ADSL") # replace existing |
|
113 |
#' parents(td)["ADRS"] <- "ADSL" # add new parent |
|
114 |
`parents<-.teal_data` <- function(x, value) { |
|
115 | 1x |
parents(x@join_keys) <- value |
116 | 1x |
x |
117 |
} |
|
118 | ||
119 |
#' @describeIn parents Getter for individual parent. |
|
120 |
#' |
|
121 |
#' @param dataset_name (`character(1)`) Name of dataset to query on their parent. |
|
122 |
#' |
|
123 |
#' @return For `parent(x, dataset_name)` returns `NULL` if parent does not exist. |
|
124 |
#' |
|
125 |
#' @export |
|
126 |
#' |
|
127 |
#' @examples |
|
128 |
#' # Get individual parent --- |
|
129 |
#' |
|
130 |
#' parent(jk, "ds2") |
|
131 |
#' parent(td, "ADTTE") |
|
132 |
parent <- function(x, dataset_name) { |
|
133 | 151x |
checkmate::assert_string(dataset_name) |
134 |
# assert x is performed by parents() |
|
135 | 151x |
parents(x)[[dataset_name]] |
136 |
} |
1 |
#' @rdname join_keys |
|
2 |
#' @order 4 |
|
3 |
#' @export |
|
4 |
#' @examples |
|
5 |
#' # Merging multiple `join_keys` objects --- |
|
6 |
#' |
|
7 |
#' jk_merged <- c( |
|
8 |
#' jk, |
|
9 |
#' join_keys( |
|
10 |
#' join_key("ds4", keys = c("pk4", "pk4_2")), |
|
11 |
#' join_key("ds3", "ds4", c(pk3 = "pk4_2")) |
|
12 |
#' ) |
|
13 |
#' ) |
|
14 |
c.join_keys <- function(...) { |
|
15 | 129x |
x <- rlang::list2(...) |
16 | 129x |
checkmate::assert_list(x, types = c("join_keys", "join_key_set")) |
17 | ||
18 | 126x |
Reduce( |
19 | 126x |
init = join_keys(), |
20 | 126x |
x = x, |
21 | 126x |
f = function(.x, .y) { |
22 | 402x |
out <- utils::modifyList(.x, .y, keep.null = FALSE) |
23 | 402x |
parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE) |
24 | 400x |
out |
25 |
} |
|
26 |
) |
|
27 |
} |
|
28 | ||
29 |
#' @rdname join_keys |
|
30 |
#' @order 4 |
|
31 |
#' @export |
|
32 |
#' @examples |
|
33 |
#' # note: merge can be performed with both join_keys and join_key_set |
|
34 |
#' |
|
35 |
#' jk_merged <- c( |
|
36 |
#' jk_merged, |
|
37 |
#' join_key("ds5", keys = "pk5"), |
|
38 |
#' join_key("ds1", "ds5", c(pk1 = "pk5")) |
|
39 |
#' ) |
|
40 |
c.join_key_set <- function(...) { |
|
41 | 2x |
c.join_keys(...) |
42 |
} |
1 |
#' Comprehensive data integration function for `teal` applications |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("stable")` |
|
5 |
#' |
|
6 |
#' Initializes a data for `teal` application. |
|
7 |
#' |
|
8 |
#' @param ... any number of objects (presumably data objects) provided as `name = value` pairs. |
|
9 |
#' |
|
10 |
#' @param join_keys (`join_keys` or single `join_key_set`) |
|
11 |
#' optional object with datasets column names used for joining. |
|
12 |
#' If empty then no joins between pairs of objects. |
|
13 |
#' |
|
14 |
#' @param code (`character`, `language`) optional code to reproduce the datasets provided in `...`. |
|
15 |
#' Note this code is not executed and the `teal_data` may not be reproducible |
|
16 |
#' |
|
17 |
#' Use [verify()] to verify code reproducibility. |
|
18 |
#' |
|
19 |
#' @details |
|
20 |
#' |
|
21 |
#' A `teal_data` is meant to be used for reproducibility purposes. The class inherits from |
|
22 |
#' [`teal.data::qenv`] and we encourage to get familiar with \CRANpkg{teal.code} first. |
|
23 |
#' `teal_data` has following characteristics: |
|
24 |
#' |
|
25 |
#' - It inherits from the environment and methods such as [`$`], [get()], [ls()], [as.list()], |
|
26 |
#' [parent.env()] work out of the box. |
|
27 |
#' - `teal_data` is a locked environment, and data modification is only possible through the |
|
28 |
#' [teal.code::eval_code()] and [within.qenv()] functions. |
|
29 |
#' - It stores metadata about the code used to create the data (see [get_code()]). |
|
30 |
#' - It supports slicing (see [`teal.code::subset-qenv`]) |
|
31 |
#' - Is immutable which means that each code evaluation does not modify the original `teal_data` |
|
32 |
#' environment directly. |
|
33 |
#' - It maintains information about relationships between datasets (see [join_keys()]). |
|
34 |
#' |
|
35 |
#' @return A `teal_data` object. |
|
36 |
#' |
|
37 |
#' @seealso [`teal.code::eval_code`], [get_code()], [join_keys()], [names.teal_data()] |
|
38 |
#' |
|
39 |
#' @export |
|
40 |
#' |
|
41 |
#' @examples |
|
42 |
#' teal_data(x1 = iris, x2 = mtcars) |
|
43 |
#' |
|
44 |
teal_data <- function(..., |
|
45 |
join_keys = teal.data::join_keys(), |
|
46 |
code = character(0)) { |
|
47 | 49x |
data_objects <- rlang::list2(...) |
48 | 49x |
if (inherits(join_keys, "join_key_set")) { |
49 | ! |
join_keys <- teal.data::join_keys(join_keys) |
50 |
} |
|
51 | ||
52 | 49x |
if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) { |
53 | ! |
stop("Dot (`...`) arguments on `teal_data()` must be named.") |
54 |
} |
|
55 | 49x |
methods::new( |
56 | 49x |
"teal_data", |
57 | 49x |
.xData = data_objects, |
58 | 49x |
code = code, |
59 | 49x |
join_keys = join_keys |
60 |
) |
|
61 |
} |
1 |
#' Verify code reproducibility |
|
2 |
#' |
|
3 |
#' Checks whether code in `teal_data` object reproduces the stored objects. |
|
4 |
#' |
|
5 |
#' If objects created by code in the `@code` slot of `x` are `all_equal` to the |
|
6 |
#' contents of the environment (`@.xData` slot), |
|
7 |
#' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object. |
|
8 |
#' Once verified, the slot will always be set to `TRUE`. |
|
9 |
#' If the `@code` fails to recreate objects in `teal_data`'s environment, an |
|
10 |
#' error is raised. |
|
11 |
#' |
|
12 |
#' @return Input `teal_data` object or error. |
|
13 |
#' |
|
14 |
#' @param x `teal_data` object |
|
15 |
#' @examples |
|
16 |
#' tdata1 <- teal_data() |
|
17 |
#' tdata1 <- within(tdata1, { |
|
18 |
#' a <- 1 |
|
19 |
#' b <- a^5 |
|
20 |
#' c <- list(x = 2) |
|
21 |
#' }) |
|
22 |
#' verify(tdata1) |
|
23 |
#' |
|
24 |
#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") |
|
25 |
#' verify(tdata2) |
|
26 |
#' verify(tdata2)@verified |
|
27 |
#' tdata2@verified |
|
28 |
#' |
|
29 |
#' tdata3 <- teal_data() |
|
30 |
#' tdata3 <- within(tdata3, { |
|
31 |
#' stop("error") |
|
32 |
#' }) |
|
33 |
#' try(verify(tdata3)) # fails |
|
34 |
#' |
|
35 |
#' |
|
36 |
#' a <- 1 |
|
37 |
#' b <- a + 2 |
|
38 |
#' c <- list(x = 2) |
|
39 |
#' d <- 5 |
|
40 |
#' tdata4 <- teal_data( |
|
41 |
#' a = a, b = b, c = c, d = d, |
|
42 |
#' code = "a <- 1 |
|
43 |
#' b <- a |
|
44 |
#' c <- list(x = 2) |
|
45 |
#' e <- 1" |
|
46 |
#' ) |
|
47 |
#' tdata4 |
|
48 |
#' \dontrun{ |
|
49 |
#' verify(tdata4) # fails |
|
50 |
#' } |
|
51 |
#' |
|
52 |
#' @name verify |
|
53 |
#' @rdname verify |
|
54 |
#' @aliases verify,teal_data-method |
|
55 |
#' @aliases verify,qenv.error-method |
|
56 |
#' |
|
57 |
#' @export |
|
58 | 5x |
setGeneric("verify", function(x) standardGeneric("verify")) |
59 |
setMethod("verify", signature = "teal_data", definition = function(x) { |
|
60 | 4x |
if (x@verified) { |
61 | 2x |
return(x) |
62 |
} |
|
63 | 2x |
x_name <- deparse(substitute(x)) |
64 | 2x |
y <- eval_code(teal_data(), get_code(x)) |
65 | ||
66 | 2x |
if (inherits(y, "qenv.error")) { |
67 | ! |
stop(conditionMessage(y), call. = FALSE) |
68 |
} |
|
69 | ||
70 | 2x |
reproduced <- isTRUE(all.equal(teal.code::get_env(x), teal.code::get_env(y))) |
71 | 2x |
if (reproduced) { |
72 | 1x |
x@verified <- TRUE |
73 | 1x |
methods::validObject(x) |
74 | 1x |
x |
75 |
} else { |
|
76 | 1x |
error <- "Code verification failed." |
77 | ||
78 | 1x |
objects_diff <- vapply( |
79 | 1x |
intersect(names(x), names(y)), |
80 | 1x |
function(element) { |
81 | 1x |
isTRUE(all.equal(x[[element]], y[[element]])) |
82 |
}, |
|
83 | 1x |
logical(1) |
84 |
) |
|
85 | ||
86 | 1x |
names_diff_other <- setdiff(names(y), names(x)) |
87 | 1x |
names_diff_inenv <- setdiff(names(x), names(y)) |
88 | ||
89 | 1x |
if (length(objects_diff)) { |
90 | 1x |
error <- c( |
91 | 1x |
error, |
92 | 1x |
paste0("Object(s) recreated with code that have different structure in ", x_name, ":"), |
93 | 1x |
paste0(" \u2022 ", names(which(!objects_diff))) |
94 |
) |
|
95 |
} |
|
96 | 1x |
if (length(names_diff_inenv)) { |
97 | ! |
error <- c( |
98 | ! |
error, |
99 | ! |
paste0("Object(s) not created with code that exist in ", x_name, ":"), |
100 | ! |
paste0(" \u2022 ", names_diff_inenv) |
101 |
) |
|
102 |
} |
|
103 | 1x |
if (length(names_diff_other)) { |
104 | ! |
error <- c( |
105 | ! |
error, |
106 | ! |
paste0("Object(s) created with code that do not exist in ", x_name, ":"), |
107 | ! |
paste0(" \u2022 ", names_diff_other) |
108 |
) |
|
109 |
} |
|
110 | ||
111 | 1x |
stop(paste(error, collapse = "\n"), call. = FALSE) |
112 |
} |
|
113 |
}) |
|
114 |
setMethod("verify", signature = "qenv.error", definition = function(x) { |
|
115 | 1x |
stop(conditionMessage(x), call. = FALSE) |
116 |
}) |
1 |
#' Names of data sets in `teal_data` object |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("deprecated")` |
|
5 |
#' |
|
6 |
#' Use `names()` instead of `datanames()`. |
|
7 |
#' |
|
8 |
#' `datanames()` is deprecated. If object should be hidden, then use a `.` (dot) |
|
9 |
#' prefix for the object's name. |
|
10 |
#' |
|
11 |
#' @param x (`teal_data` or `qenv_error`) object to access or modify |
|
12 |
#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@.xData` |
|
13 |
#' |
|
14 |
#' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`. |
|
15 |
#' @aliases `datanames<-.teal_data` |
|
16 |
#' |
|
17 |
#' @name datanames |
|
18 | ||
19 |
#' @rdname datanames |
|
20 |
#' @export |
|
21 |
datanames <- function(x) { |
|
22 | 1x |
lifecycle::deprecate_soft( |
23 | 1x |
when = "0.6.1", |
24 | 1x |
what = "datanames()", |
25 | 1x |
with = "names()" |
26 |
) |
|
27 | 1x |
names(x) |
28 |
} |
|
29 | ||
30 |
#' @rdname datanames |
|
31 |
#' @export |
|
32 |
`datanames<-` <- function(x, value) { |
|
33 | 1x |
lifecycle::deprecate_soft( |
34 | 1x |
when = "0.6.1", |
35 | 1x |
what = "`datanames<-`()", |
36 | 1x |
details = "invalid to use `datanames()<-` or `names()<-` on an object of class `teal_data`. See ?names.teal_data" |
37 |
) |
|
38 | 1x |
x |
39 |
} |
|
40 | ||
41 |
#' @rdname datanames |
|
42 |
#' @export |
|
43 |
#' @keywords internal |
|
44 |
`names<-.teal_data` <- function(x, value) { |
|
45 | 1x |
lifecycle::deprecate_warn( |
46 | 1x |
when = "0.6.1", |
47 | 1x |
what = "`names<-.teal_data`()", |
48 | 1x |
details = "invalid to use `datanames()<-` or `names()<-` on an object of class `teal_data`. See ?names.teal_data" |
49 |
) |
|
50 | 1x |
x |
51 |
} |
1 |
setOldClass("join_keys") |
|
2 | ||
3 |
#' Reproducible data |
|
4 |
#' |
|
5 |
#' Reproducible data container class. Inherits code tracking behavior from [`teal.code::qenv-class`]. |
|
6 |
#' |
|
7 |
#' This class provides an isolated environment in which to store and process data with all code being recorded. |
|
8 |
#' The environment, code, data set names, and data joining keys are stored in their respective slots. |
|
9 |
#' These slots should never be accessed directly, use the provided get/set functions. |
|
10 |
#' |
|
11 |
#' As code is evaluated in `teal_data`, messages and warnings are stored in their respective slots. |
|
12 |
#' If errors are raised, a `qenv.error` object is returned. |
|
13 |
#' |
|
14 |
#' @name teal_data-class |
|
15 |
#' @rdname teal_data-class |
|
16 |
#' |
|
17 |
#' @slot .xData (`environment`) environment containing data sets and possibly |
|
18 |
#' auxiliary variables. |
|
19 |
#' Access variables with [get()], [`$`], [teal.code::get_var()] or [`[[`]. |
|
20 |
#' No setter provided. Evaluate code to add variables into `@.xData`. |
|
21 |
#' @slot code (`list` of `character`) representing code necessary to reproduce the contents of `qenv`. |
|
22 |
#' Access with [teal.code::get_code()]. |
|
23 |
#' No setter provided. Evaluate code to append code to the slot. |
|
24 |
#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in |
|
25 |
#' `@.xData`. |
|
26 |
#' Access or modify with [join_keys()]. |
|
27 |
#' @slot verified (`logical(1)`) flag signifying that code in `@code` has been |
|
28 |
#' proven to yield contents of `@.xData`. |
|
29 |
#' Used internally. See [`verify()`] for more details. |
|
30 |
#' |
|
31 |
#' @inheritSection teal.code::`qenv-class` Code |
|
32 |
#' |
|
33 |
#' @import teal.code |
|
34 |
#' @keywords internal |
|
35 |
setClass( |
|
36 |
Class = "teal_data", |
|
37 |
contains = "qenv", |
|
38 |
slots = c(join_keys = "join_keys", verified = "logical"), |
|
39 |
prototype = list( |
|
40 |
join_keys = join_keys(), |
|
41 |
verified = logical(0) |
|
42 |
) |
|
43 |
) |
|
44 | ||
45 |
#' It initializes the `teal_data` class |
|
46 |
#' |
|
47 |
#' Accepts .xData as a list and converts it to an environment before initializing |
|
48 |
#' parent constructor (`qenv`). |
|
49 |
#' @noRd |
|
50 |
setMethod( |
|
51 |
"initialize", |
|
52 |
"teal_data", |
|
53 |
function(.Object, .xData = list(), join_keys = join_keys(), code = list(), ...) { # nolint: object_name. |
|
54 |
# Allow .xData to be a list and convert it to an environment |
|
55 | 49x |
if (!missing(.xData) && inherits(.xData, "list")) { |
56 | 49x |
.xData <- rlang::env_clone(list2env(.xData), parent = parent.env(.GlobalEnv)) # nolint: object_name. |
57 | 49x |
lockEnvironment(.xData, bindings = TRUE) |
58 |
} |
|
59 | 49x |
args <- list(...) |
60 | 49x |
checkmate::assert_environment(.xData) |
61 | 49x |
checkmate::assert_class(join_keys, "join_keys") |
62 | 49x |
checkmate::assert_list(args, names = "named") |
63 | 49x |
if (!any(is.language(code), is.character(code))) { |
64 | ! |
stop("`code` must be a character or language object.") |
65 |
} |
|
66 | ||
67 | 49x |
if (is.language(code)) { |
68 | 2x |
code <- paste(lang2calls(code), collapse = "\n") |
69 |
} |
|
70 | 49x |
if (length(code)) { |
71 | 7x |
code <- paste(code, collapse = "\n") |
72 |
} |
|
73 | ||
74 | 49x |
methods::callNextMethod( |
75 | 49x |
.Object, |
76 | 49x |
.xData, |
77 | 49x |
join_keys = join_keys, |
78 | 49x |
verified = (length(args$code) == 0L && length(.xData) == 0L), |
79 | 49x |
code = code2list(code), |
80 |
... |
|
81 |
) |
|
82 |
} |
|
83 |
) |
|
84 | ||
85 |
#' Reshape code to the list |
|
86 |
#' |
|
87 |
#' List will be divided by the calls. Each element of the list contains `id` and `dependency` attributes. |
|
88 |
#' |
|
89 |
#' @param code `character` with the code. |
|
90 |
#' |
|
91 |
#' @return list of `character`s of the length equal to the number of calls in `code`. |
|
92 |
#' |
|
93 |
#' @keywords internal |
|
94 |
#' @noRd |
|
95 |
code2list <- function(code) { |
|
96 | 49x |
checkmate::assert_character(code, null.ok = TRUE) |
97 | 49x |
if (length(code) == 0) { |
98 | 42x |
return(list()) |
99 |
} |
|
100 | ||
101 | 7x |
parsed_code <- parse(text = code, keep.source = TRUE) |
102 | ||
103 | 7x |
code_list <- if (length(parsed_code)) { |
104 | 7x |
lapply(split_code(code), function(current_code) { |
105 | 11x |
parsed_code <- parse(text = current_code, keep.source = TRUE) |
106 | 11x |
attr(current_code, "dependency") <- extract_dependency(parsed_code) |
107 | 11x |
current_code |
108 |
}) |
|
109 |
} else { |
|
110 |
# empty code like "", or just comments |
|
111 | ! |
attr(code, "dependency") <- extract_dependency(parsed_code) # in case comment contains @linksto tag |
112 | ! |
list(code) |
113 |
} |
|
114 | 7x |
names(code_list) <- sample.int(.Machine$integer.max, length(code_list)) |
115 | 7x |
code_list |
116 |
} |
1 |
#' Get code from `teal_data` object |
|
2 |
#' |
|
3 |
#' Retrieve code from `teal_data` object. |
|
4 |
#' |
|
5 |
#' Retrieve code stored in `@code`, which (in principle) can be used to recreate |
|
6 |
#' all objects found in the environment (`@.xData`). |
|
7 |
#' Use `names` to limit the code to one or more of the datasets enumerated in |
|
8 |
#' the environment. |
|
9 |
#' |
|
10 |
#' @section Extracting dataset-specific code: |
|
11 |
#' When `names` is specified, the code returned will be limited to the lines needed to _create_ |
|
12 |
#' the requested datasets. The code stored in the `@code` slot is analyzed statically to determine |
|
13 |
#' which lines the datasets of interest depend upon. The analysis works well when objects are created |
|
14 |
#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations. |
|
15 |
#' |
|
16 |
#' Consider the following examples: |
|
17 |
#' |
|
18 |
#' _Case 1: Usual assignments._ |
|
19 |
#' ```r |
|
20 |
#' data <- teal_data() |> |
|
21 |
#' within({ |
|
22 |
#' foo <- function(x) { |
|
23 |
#' x + 1 |
|
24 |
#' } |
|
25 |
#' x <- 0 |
|
26 |
#' y <- foo(x) |
|
27 |
#' }) |
|
28 |
#' get_code(data, names = "y") |
|
29 |
#' ``` |
|
30 |
#' `x` has no dependencies, so `get_code(data, names = "x")` will return only the second call.\cr |
|
31 |
#' `y` depends on `x` and `foo`, so `get_code(data, names = "y")` will contain all three calls. |
|
32 |
#' |
|
33 |
#' _Case 2: Some objects are created by a function's side effects._ |
|
34 |
#' ```r |
|
35 |
#' data <- teal_data() |> |
|
36 |
#' within({ |
|
37 |
#' foo <- function() { |
|
38 |
#' x <<- x + 1 |
|
39 |
#' } |
|
40 |
#' x <- 0 |
|
41 |
#' foo() |
|
42 |
#' y <- x |
|
43 |
#' }) |
|
44 |
#' get_code(data, names = "y") |
|
45 |
#' ``` |
|
46 |
#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment) |
|
47 |
#' and so `get_code(data, names = "y")` will not return the `foo()` call.\cr |
|
48 |
#' To overcome this limitation, code dependencies can be specified manually. |
|
49 |
#' Lines where side effects occur can be flagged by adding "`# @linksto <object name>`" at the end.\cr |
|
50 |
#' Note that `within` evaluates code passed to `expr` as is and comments are ignored. |
|
51 |
#' In order to include comments in code one must use the `eval_code` function instead. |
|
52 |
#' |
|
53 |
#' ```r |
|
54 |
#' data <- teal_data() |> |
|
55 |
#' eval_code(" |
|
56 |
#' foo <- function() { |
|
57 |
#' x <<- x + 1 |
|
58 |
#' } |
|
59 |
#' x <- 0 |
|
60 |
#' foo() # @linksto x |
|
61 |
#' y <- x |
|
62 |
#' ") |
|
63 |
#' get_code(data, names = "y") |
|
64 |
#' ``` |
|
65 |
#' Now the `foo()` call will be properly included in the code required to recreate `y`. |
|
66 |
#' |
|
67 |
#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically. |
|
68 |
#' |
|
69 |
#' Here are known cases where manual tagging is necessary: |
|
70 |
#' - non-standard assignment operators, _e.g._ `%<>%` |
|
71 |
#' - objects used as conditions in `if` statements: `if (<condition>)` |
|
72 |
#' - objects used to iterate over in `for` loops: `for(i in <sequence>)` |
|
73 |
#' - creating and evaluating language objects, _e.g._ `eval(<call>)` |
|
74 |
#' |
|
75 |
#' |
|
76 |
#' @param object (`teal_data`) |
|
77 |
#' @param datanames `r lifecycle::badge("deprecated")` (`character`) vector of dataset names to return the code for. |
|
78 |
#' For more details see the "Extracting dataset-specific code" section. Use `names` instead. |
|
79 |
#' @param names (`character`) Successor of `datanames`. Vector of dataset names to return the code for. |
|
80 |
#' For more details see the "Extracting dataset-specific code" section. |
|
81 |
#' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as |
|
82 |
#' `expression` (`deparse = FALSE`). |
|
83 |
#' @param ... Parameters passed to internal methods. Currently, the only supported parameter is `check_names` |
|
84 |
#' (`logical(1)`) flag, which is `TRUE` by default. Function warns about missing objects, if they do not exist in |
|
85 |
#' `code` but are passed in `datanames`. To remove the warning, set `check_names = FALSE`. |
|
86 |
#' |
|
87 |
#' @return |
|
88 |
#' Either a character string or an expression. If `names` is used to request a specific dataset, |
|
89 |
#' only code that _creates_ that dataset (not code that uses it) is returned. Otherwise, all contents of `@code`. |
|
90 |
#' |
|
91 |
#' @examples |
|
92 |
#' tdata1 <- teal_data() |
|
93 |
#' tdata1 <- within(tdata1, { |
|
94 |
#' a <- 1 |
|
95 |
#' b <- a^5 |
|
96 |
#' c <- list(x = 2) |
|
97 |
#' }) |
|
98 |
#' get_code(tdata1) |
|
99 |
#' get_code(tdata1, names = "a") |
|
100 |
#' get_code(tdata1, names = "b") |
|
101 |
#' |
|
102 |
#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") |
|
103 |
#' get_code(tdata2) |
|
104 |
#' get_code(verify(tdata2)) |
|
105 |
#' |
|
106 |
#' @rdname get_code |
|
107 |
#' @aliases get_code,teal_data-method |
|
108 |
#' |
|
109 |
#' @export |
|
110 |
setMethod("get_code", |
|
111 |
signature = "teal_data", |
|
112 |
definition = function(object, deparse = TRUE, names = NULL, datanames = lifecycle::deprecated(), ...) { |
|
113 | 4x |
if (lifecycle::is_present(datanames)) { |
114 | ! |
lifecycle::deprecate_warn( |
115 | ! |
when = "0.6.1", |
116 | ! |
what = "teal.data::get_code(datanames)", |
117 | ! |
with = "teal.code::get_code(names)", |
118 | ! |
always = TRUE |
119 |
) |
|
120 | ! |
names <- datanames |
121 |
} |
|
122 | ||
123 | 4x |
if (!is.null(names) && lifecycle::is_present(datanames)) { |
124 | ! |
stop("'names' shouldn't be specified with deprecated 'datanames' parameter.") |
125 |
} |
|
126 | ||
127 | 4x |
checkmate::assert_character(names, min.len = 1L, null.ok = TRUE) |
128 | 4x |
checkmate::assert_flag(deparse) |
129 | ||
130 | 4x |
methods::callNextMethod(object = object, deparse = deparse, names = names, ...) |
131 |
} |
|
132 |
) |
1 |
#' The names of a `join_keys` object |
|
2 |
#' |
|
3 |
#' @inheritParams base::`names<-` |
|
4 |
#' @export |
|
5 |
`names<-.join_keys` <- function(x, value) { |
|
6 | 3x |
new_x <- unclass(x) |
7 | 3x |
parent_list <- parents(x) |
8 |
# Update inner keys |
|
9 | 3x |
for (old_name in setdiff(names(new_x), value)) { |
10 | 4x |
old_entry <- new_x[[old_name]] |
11 | 4x |
new_name <- value[names(new_x) == old_name] |
12 | ||
13 |
# Change 2nd-tier first |
|
14 | 4x |
for (sub_name in names(old_entry)) { |
15 | 9x |
names(new_x[[sub_name]])[names(new_x[[sub_name]]) == old_name] <- new_name |
16 |
} |
|
17 | ||
18 |
# Change in first tier |
|
19 | 4x |
names(new_x)[names(new_x) == old_name] <- new_name |
20 | ||
21 |
# changing name in the parents |
|
22 | 4x |
if (length(parent_list)) { |
23 | 4x |
names(parent_list)[names(parent_list) == old_name] <- new_name |
24 | 4x |
ind <- vapply(parent_list, identical, logical(1), old_name) |
25 | 4x |
parent_list[ind] <- new_name |
26 | 4x |
attr(new_x, "parents") <- parent_list |
27 |
} |
|
28 |
} |
|
29 | ||
30 | 3x |
class(new_x) <- c("join_keys", "list") |
31 | 3x |
new_x |
32 |
} |
1 |
#' Names of data sets in `teal_data` object |
|
2 |
#' |
|
3 |
#' Functions to get the names of a `teal_data` object. |
|
4 |
#' The names are obtained from the objects listed in the `qenv` environment. |
|
5 |
#' |
|
6 |
#' Objects named with a `.` (dot) prefix will be ignored and not returned. |
|
7 |
#' To get the names of all objects, use `ls(x, all.names = TRUE)`, however, it |
|
8 |
#' will not group the names by the join_keys topological structure. |
|
9 |
#' |
|
10 |
#' In order to rename objects in the `teal_data` object, use base R functions (see examples). |
|
11 |
#' |
|
12 |
#' @param x A (`teal_data`) object to access or modify. |
|
13 |
#' |
|
14 |
#' @return A character vector of names. |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' td <- teal_data(iris = iris) |
|
18 |
#' td <- within(td, mtcars <- mtcars) |
|
19 |
#' names(td) |
|
20 |
#' |
|
21 |
#' # hidden objects with dot-prefix |
|
22 |
#' td <- within(td, .CO2 <- CO2) |
|
23 |
#' names(td) # '.CO2' will not be returned |
|
24 |
#' |
|
25 |
#' # rename objects |
|
26 |
#' td <- teal_data(iris = iris) |
|
27 |
#' td <- within(td, { |
|
28 |
#' new_iris <- iris |
|
29 |
#' rm(iris) |
|
30 |
#' }) |
|
31 |
#' names(td) # only 'new_iris' will be returned |
|
32 |
#' |
|
33 |
#' @export |
|
34 |
names.teal_data <- function(x) { |
|
35 |
# Sorting from `ls` can be safely done as environments don't have any order |
|
36 |
# nor support numeric-index subsetting |
|
37 | 20x |
envir <- as.environment(x) |
38 | 20x |
.get_sorted_names(names = ls(envir = envir), join_keys = join_keys(x), env = envir) |
39 |
} |
|
40 | ||
41 |
#' @export |
|
42 | ! |
length.teal.data <- function(x) length(ls(x)) |
43 | ||
44 |
#' @keywords internal |
|
45 |
.get_sorted_names <- function(names, join_keys, env) { |
|
46 | 20x |
child_parent <- sapply(names, parent, x = join_keys, USE.NAMES = TRUE, simplify = FALSE) |
47 | ||
48 | 20x |
union( |
49 | 20x |
intersect(unlist(topological_sort(child_parent)), ls(env, all.names = TRUE)), |
50 | 20x |
names |
51 |
) |
|
52 |
} |
1 |
#' Generate sample CDISC datasets |
|
2 |
#' |
|
3 |
#' Retrieves example CDISC datasets for use in examples and testing. |
|
4 |
#' |
|
5 |
#' This function returns a dummy dataset and should only be used within `teal.data`. |
|
6 |
#' Note that the datasets are not created and maintained in `teal.data`, they are retrieved its dependencies. |
|
7 |
#' |
|
8 |
#' @param dataname (`character(1)`) name of a CDISC dataset |
|
9 |
#' |
|
10 |
#' @return A CDISC dataset as a `data.frame`. |
|
11 |
#' |
|
12 |
#' @export |
|
13 |
example_cdisc_data <- function(dataname = c("ADSL", "ADAE", "ADLB", "ADCM", "ADEX", "ADRS", "ADTR", "ADTTE", "ADVS")) { |
|
14 | ! |
dataname <- sprintf("r%s", match.arg(dataname)) |
15 | ! |
dynGet(dataname, ifnotfound = stop(dataname, " not found"), inherits = TRUE) |
16 |
} |
1 |
#' Data input for `teal` app |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("stable")` |
|
5 |
#' |
|
6 |
#' Function is a wrapper around [teal_data()] and guesses `join_keys` |
|
7 |
#' for given datasets whose names match ADAM datasets names. |
|
8 |
#' |
|
9 |
#' @inheritParams teal_data |
|
10 |
#' @param join_keys (`join_keys` or single `join_key_set`) |
|
11 |
#' optional object with datasets column names used for joining. |
|
12 |
#' If empty then it would be automatically derived basing on intersection of datasets primary keys. |
|
13 |
#' For ADAM datasets it would be automatically derived. |
|
14 |
#' |
|
15 |
#' @return A `teal_data` object. |
|
16 |
#' |
|
17 |
#' @details This function checks if there were keys added to all data sets. |
|
18 |
#' |
|
19 |
#' @examples |
|
20 |
#' data <- cdisc_data( |
|
21 |
#' join_keys = join_keys( |
|
22 |
#' join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID")) |
|
23 |
#' ) |
|
24 |
#' ) |
|
25 |
#' |
|
26 |
#' data <- within(data, { |
|
27 |
#' ADSL <- example_cdisc_data("ADSL") |
|
28 |
#' ADTTE <- example_cdisc_data("ADTTE") |
|
29 |
#' }) |
|
30 |
#' |
|
31 |
#' @export |
|
32 |
#' |
|
33 |
cdisc_data <- function(..., |
|
34 |
join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))], |
|
35 |
code = character(0)) { |
|
36 | 1x |
teal_data(..., join_keys = join_keys, code = code) |
37 |
} |