1 |
#' Show `teal_data` object |
|
2 |
#' |
|
3 |
#' Prints `teal_data` object. |
|
4 |
#' |
|
5 |
#' @param object (`teal_data`) |
|
6 |
#' @return Input `teal_data` object. |
|
7 |
#' @importFrom methods show |
|
8 |
#' @examples |
|
9 |
#' teal_data() |
|
10 |
#' teal_data(x = iris, code = "x = iris") |
|
11 |
#' verify(teal_data(x = iris, code = "x = iris")) |
|
12 |
#' @export |
|
13 |
setMethod("show", signature = "teal_data", function(object) { |
|
14 | ! |
if (object@verified) { |
15 | ! |
cat("\u2705\ufe0e", "verified teal_data object\n") |
16 |
} else { |
|
17 | ! |
cat("\u2716", "unverified teal_data object\n") |
18 |
} |
|
19 | ! |
rlang::env_print(object@env) |
20 |
}) |
1 |
#' Deprecated `TealData` class and related functions |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("deprecated")` |
|
5 |
#' |
|
6 |
#' The `TealData` class and associated functions have been deprecated. Use [teal_data()] instead. |
|
7 |
#' See the [Migration guide](https://github.com/insightsengineering/teal/discussions/945) for details. |
|
8 |
#' |
|
9 |
#' @name TealData |
|
10 |
#' |
|
11 |
#' @param ... any argument supported in `TealData` related functions. |
|
12 |
#' |
|
13 |
#' @return nothing |
|
14 |
#' @seealso [cdisc_data()] , [join_keys()] |
|
15 |
#' |
|
16 |
NULL |
|
17 | ||
18 |
.deprecate_function <- function(what, details) { |
|
19 | ! |
lifecycle::deprecate_stop( |
20 | ! |
when = "0.4.0", |
21 | ! |
what = what, |
22 | ! |
details = details |
23 |
) |
|
24 |
} |
|
25 | ||
26 |
deprecation_detail <- "Find more information on https://github.com/insightsengineering/teal/discussions/945" |
|
27 | ||
28 |
#' @rdname TealData |
|
29 |
#' @export |
|
30 |
as_cdisc <- function(...) { |
|
31 | ! |
.deprecate_function("as_cdisc()", deprecation_detail) |
32 |
} |
|
33 | ||
34 |
#' @rdname TealData |
|
35 |
#' @export |
|
36 |
callable_code <- function(...) { |
|
37 | ! |
.deprecate_function("callable_code()", deprecation_detail) |
38 |
} |
|
39 | ||
40 |
#' @rdname TealData |
|
41 |
#' @export |
|
42 |
callable_function <- function(...) { |
|
43 | ! |
.deprecate_function("callable_function()", deprecation_detail) |
44 |
} |
|
45 | ||
46 |
#' @rdname TealData |
|
47 |
#' @export |
|
48 |
code_dataset_connector <- function(...) { |
|
49 | ! |
.deprecate_function("code_dataset_connector()", deprecation_detail) |
50 |
} |
|
51 | ||
52 |
#' @rdname TealData |
|
53 |
#' @export |
|
54 |
code_cdisc_dataset_connector <- function(...) { |
|
55 | ! |
.deprecate_function("code_cdisc_dataset_connector()", deprecation_detail) |
56 |
} |
|
57 | ||
58 |
#' @rdname TealData |
|
59 |
#' @export |
|
60 |
csv_dataset_connector <- function(...) { |
|
61 | ! |
.deprecate_function("csv_dataset_connector()", deprecation_detail) |
62 |
} |
|
63 | ||
64 |
#' @rdname TealData |
|
65 |
#' @export |
|
66 |
csv_cdisc_dataset_connector <- function(...) { |
|
67 | ! |
.deprecate_function("csv_cdisc_dataset_connector()", deprecation_detail) |
68 |
} |
|
69 | ||
70 |
#' @rdname TealData |
|
71 |
#' @export |
|
72 |
python_code <- function(...) { |
|
73 | ! |
.deprecate_function("python_code()", deprecation_detail) |
74 |
} |
|
75 | ||
76 |
#' @rdname TealData |
|
77 |
#' @export |
|
78 |
python_dataset_connector <- function(...) { |
|
79 | ! |
.deprecate_function("python_dataset_connector()", deprecation_detail) |
80 |
} |
|
81 | ||
82 |
#' @rdname TealData |
|
83 |
#' @export |
|
84 |
python_cdisc_dataset_connector <- function(...) { |
|
85 | ! |
.deprecate_function("python_cdisc_dataset_connector()", deprecation_detail) |
86 |
} |
|
87 | ||
88 |
#' @rdname TealData |
|
89 |
#' @export |
|
90 |
cdisc_data_connector <- function(...) { |
|
91 | ! |
.deprecate_function("cdisc_data_connector()", deprecation_detail) |
92 |
} |
|
93 | ||
94 |
#' @rdname TealData |
|
95 |
#' @export |
|
96 |
cdisc_dataset <- function(...) { |
|
97 | ! |
.deprecate_function("cdisc_dataset()", deprecation_detail) |
98 |
} |
|
99 | ||
100 |
#' @rdname TealData |
|
101 |
#' @export |
|
102 |
cdisc_dataset_connector <- function(...) { |
|
103 | ! |
.deprecate_function("cdisc_dataset_connector()", deprecation_detail) |
104 |
} |
|
105 | ||
106 |
#' @rdname TealData |
|
107 |
#' @export |
|
108 |
cdisc_dataset_connector_file <- function(...) { |
|
109 | ! |
.deprecate_function("cdisc_dataset_connector_file()", deprecation_detail) |
110 |
} |
|
111 | ||
112 |
#' @rdname TealData |
|
113 |
#' @export |
|
114 |
cdisc_dataset_file <- function(...) { |
|
115 | ! |
.deprecate_function("cdisc_dataset_file()", deprecation_detail) |
116 |
} |
|
117 | ||
118 |
#' @rdname TealData |
|
119 |
#' @export |
|
120 |
dataset <- function(...) { |
|
121 | ! |
.deprecate_function("dataset()", deprecation_detail) |
122 |
} |
|
123 | ||
124 |
#' @rdname TealData |
|
125 |
#' @export |
|
126 |
dataset_connector <- function(...) { |
|
127 | ! |
.deprecate_function("dataset_connector()", deprecation_detail) |
128 |
} |
|
129 | ||
130 |
#' @rdname TealData |
|
131 |
#' @export |
|
132 |
dataset_connector_file <- function(...) { |
|
133 | ! |
.deprecate_function("dataset_connector_file()", deprecation_detail) |
134 |
} |
|
135 | ||
136 |
#' @rdname TealData |
|
137 |
#' @export |
|
138 |
dataset_file <- function(...) { |
|
139 | ! |
.deprecate_function("dataset_file()", deprecation_detail) |
140 |
} |
|
141 | ||
142 |
#' @rdname TealData |
|
143 |
#' @export |
|
144 |
data_connection <- function(...) { |
|
145 | ! |
.deprecate_function("data_connection()", deprecation_detail) |
146 |
} |
|
147 | ||
148 |
#' @rdname TealData |
|
149 |
#' @export |
|
150 |
fun_dataset_connector <- function(...) { |
|
151 | ! |
.deprecate_function("fun_dataset_connector()", deprecation_detail) |
152 |
} |
|
153 | ||
154 |
#' @rdname TealData |
|
155 |
#' @export |
|
156 |
fun_cdisc_dataset_connector <- function(...) { |
|
157 | ! |
.deprecate_function("fun_cdisc_dataset_connector()", deprecation_detail) |
158 |
} |
|
159 | ||
160 |
#' @rdname TealData |
|
161 |
#' @export |
|
162 |
relational_data_connector <- function(...) { |
|
163 | ! |
.deprecate_function("relational_data_connector()", deprecation_detail) |
164 |
} |
|
165 | ||
166 |
#' @rdname TealData |
|
167 |
#' @export |
|
168 |
mae_dataset <- function(...) { |
|
169 | ! |
.deprecate_function("mae_dataset()", deprecation_detail) |
170 |
} |
|
171 | ||
172 |
#' @rdname TealData |
|
173 |
#' @export |
|
174 |
get_attrs <- function(...) { |
|
175 | ! |
.deprecate_function("get_attrs()", deprecation_detail) |
176 |
} |
|
177 | ||
178 |
#' @rdname TealData |
|
179 |
#' @export |
|
180 |
get_dataset_label <- function(...) { |
|
181 | ! |
.deprecate_function("get_dataset_label()", deprecation_detail) |
182 |
} |
|
183 | ||
184 |
#' @rdname TealData |
|
185 |
#' @export |
|
186 |
get_dataset <- function(...) { |
|
187 | ! |
.deprecate_function("get_dataset()", deprecation_detail) |
188 |
} |
|
189 | ||
190 |
#' @rdname TealData |
|
191 |
#' @export |
|
192 |
get_datasets <- function(...) { |
|
193 | ! |
.deprecate_function("get_datasets()", deprecation_detail) |
194 |
} |
|
195 | ||
196 |
#' @rdname TealData |
|
197 |
#' @export |
|
198 |
get_dataname <- function(...) { |
|
199 | ! |
.deprecate_function("get_dataname()", deprecation_detail) |
200 |
} |
|
201 | ||
202 |
#' @rdname TealData |
|
203 |
#' @export |
|
204 |
get_key_duplicates <- function(...) { |
|
205 | ! |
.deprecate_function("get_key_duplicates()", deprecation_detail) |
206 |
} |
|
207 | ||
208 |
#' @rdname TealData |
|
209 |
#' @export |
|
210 |
get_keys <- function(...) { |
|
211 | ! |
.deprecate_function("get_keys()", deprecation_detail) |
212 |
} |
|
213 | ||
214 |
#' @rdname TealData |
|
215 |
#' @export |
|
216 |
get_raw_data <- function(...) { |
|
217 | ! |
.deprecate_function("get_raw_data()", deprecation_detail) |
218 |
} |
|
219 | ||
220 |
#' @rdname TealData |
|
221 |
#' @export |
|
222 |
is_pulled <- function(...) { |
|
223 | ! |
.deprecate_function("is_pulled()", deprecation_detail) |
224 |
} |
|
225 | ||
226 |
#' @rdname TealData |
|
227 |
#' @export |
|
228 |
load_dataset <- function(...) { |
|
229 | ! |
.deprecate_function("load_dataset()", deprecation_detail) |
230 |
} |
|
231 | ||
232 |
#' @rdname TealData |
|
233 |
#' @export |
|
234 |
load_datasets <- function(...) { |
|
235 | ! |
.deprecate_function("load_datasets()", deprecation_detail) |
236 |
} |
|
237 | ||
238 |
#' @rdname TealData |
|
239 |
#' @export |
|
240 |
mutate_data <- function(...) { |
|
241 | ! |
.deprecate_function("mutate_data()", deprecation_detail) |
242 |
} |
|
243 | ||
244 |
#' @rdname TealData |
|
245 |
#' @export |
|
246 |
mutate_dataset <- function(...) { |
|
247 | ! |
.deprecate_function("mutate_dataset()", deprecation_detail) |
248 |
} |
|
249 | ||
250 |
#' @rdname TealData |
|
251 |
#' @export |
|
252 |
set_args <- function(...) { |
|
253 | ! |
.deprecate_function("set_args()", deprecation_detail) |
254 |
} |
|
255 | ||
256 |
#' @rdname TealData |
|
257 |
#' @export |
|
258 |
rds_dataset_connector <- function(...) { |
|
259 | ! |
.deprecate_function("rds_dataset_connector()", deprecation_detail) |
260 |
} |
|
261 | ||
262 |
#' @rdname TealData |
|
263 |
#' @export |
|
264 |
rds_cdisc_dataset_connector <- function(...) { |
|
265 | ! |
.deprecate_function("rds_cdisc_dataset_connector()", deprecation_detail) |
266 |
} |
|
267 | ||
268 |
#' @rdname TealData |
|
269 |
#' @export |
|
270 |
script_dataset_connector <- function(...) { |
|
271 | ! |
.deprecate_function("script_dataset_connector()", deprecation_detail) |
272 |
} |
|
273 | ||
274 |
#' @rdname TealData |
|
275 |
#' @export |
|
276 |
script_cdisc_dataset_connector <- function(...) { |
|
277 | ! |
.deprecate_function("script_cdisc_dataset_connector()", deprecation_detail) |
278 |
} |
|
279 | ||
280 |
#' @rdname TealData |
|
281 |
#' @export |
|
282 |
set_keys <- function(...) { |
|
283 | ! |
.deprecate_function("set_keys()", deprecation_detail) |
284 |
} |
|
285 | ||
286 |
#' @rdname TealData |
|
287 |
#' @export |
|
288 |
read_script <- function(...) { |
|
289 | ! |
.deprecate_function("read_script()", deprecation_detail) |
290 |
} |
|
291 | ||
292 |
#' @rdname TealData |
|
293 |
#' @export |
|
294 |
to_relational_data <- function(...) { |
|
295 | ! |
.deprecate_function("to_relational_data()", deprecation_detail) |
296 |
} |
|
297 | ||
298 |
#' @rdname TealData |
|
299 |
#' @export |
|
300 |
validate_metadata <- function(...) { |
|
301 | ! |
.deprecate_function("validate_metadata()", deprecation_detail) |
302 |
} |
|
303 | ||
304 |
#' @rdname TealData |
|
305 |
#' @export |
|
306 |
get_cdisc_keys <- function(...) { |
|
307 | ! |
.deprecate_function("get_cdisc_keys()", deprecation_detail) |
308 |
} |
|
309 | ||
310 |
#' @rdname TealData |
|
311 |
#' @export |
|
312 |
cdisc_data_file <- function(...) { |
|
313 | ! |
.deprecate_function("cdisc_data_file()", deprecation_detail) |
314 |
} |
|
315 | ||
316 |
#' @rdname TealData |
|
317 |
#' @export |
|
318 |
teal_data_file <- function(...) { |
|
319 | ! |
.deprecate_function("teal_data_file()", deprecation_detail) |
320 |
} |
|
321 | ||
322 |
#' @rdname TealData |
|
323 |
#' @export |
|
324 |
get_join_keys <- function(...) { |
|
325 | ! |
.deprecate_function("get_join_keys()", "Use `join_keys(data)` instead.") |
326 |
} |
|
327 | ||
328 |
#' @rdname TealData |
|
329 |
#' @param value value to assign |
|
330 |
#' @export |
|
331 |
`get_join_keys<-` <- function(..., value) { |
|
332 | ! |
.deprecate_function("`get_join_keys<-`()", "Use `join_keys(x) <- ...`") |
333 |
} |
|
334 | ||
335 |
#' @rdname col_labels |
|
336 |
#' @include formatters_var_labels.R |
|
337 |
#' @details |
|
338 |
#' `r lifecycle::badge("deprecated")` |
|
339 |
#' |
|
340 |
#' In previous versions of `teal.data` labels were managed with `get_labels()`. |
|
341 |
#' This function is deprecated as of `0.4.0`, use `col_labels` instead. |
|
342 |
#' @export |
|
343 |
get_labels <- function(...) { |
|
344 | ! |
.deprecate_function("get_labels()", "Use col_labels(data)") |
345 |
} |
1 |
#' Create a relationship between a pair of datasets |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("stable")` |
|
5 |
#' |
|
6 |
#' Create a relationship between two datasets, `dataset_1` and `dataset_2`. |
|
7 |
#' By default, this function establishes a directed relationship with `dataset_1` as the parent. |
|
8 |
#' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`. |
|
9 |
#' |
|
10 |
#' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted, |
|
11 |
#' a primary key for `dataset_1` is created. |
|
12 |
#' @param keys (optionally named `character`) Column mapping between the datasets, |
|
13 |
#' where `names(keys)` maps columns in `dataset_1` corresponding to columns of |
|
14 |
#' `dataset_2` given by the elements of `keys`. |
|
15 |
#' - If unnamed, the same column names are used for both datasets. |
|
16 |
#' - If any element of the `keys` vector is empty with a non-empty name, then the name is |
|
17 |
#' used for both datasets. |
|
18 |
#' @param directed (`logical(1)`) Flag that indicates whether it should create |
|
19 |
#' a parent-child relationship between the datasets. |
|
20 |
#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; |
|
21 |
#' - `FALSE` when the relationship is undirected. |
|
22 |
#' |
|
23 |
#' @return object of class `join_key_set` to be passed into `join_keys` function. |
|
24 |
#' |
|
25 |
#' @examples |
|
26 |
#' join_key("d1", "d2", c("A")) |
|
27 |
#' join_key("d1", "d2", c("A" = "B")) |
|
28 |
#' join_key("d1", "d2", c("A" = "B", "C")) |
|
29 |
#' |
|
30 |
#' @export |
|
31 |
#' @seealso [join_keys()], [parents()] |
|
32 |
#' |
|
33 |
join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { |
|
34 | 1070x |
checkmate::assert_string(dataset_1) |
35 | 1070x |
checkmate::assert_string(dataset_2) |
36 | 1067x |
checkmate::assert_character(keys, any.missing = FALSE) |
37 | 1062x |
checkmate::assert_flag(directed) |
38 | ||
39 | 1062x |
if (length(keys) > 0) { |
40 | 1060x |
if (is.null(names(keys))) { |
41 | 520x |
names(keys) <- keys |
42 |
} |
|
43 | ||
44 | 1060x |
keys <- trimws(keys) |
45 | 1060x |
names(keys) <- trimws(names(keys)) |
46 | ||
47 |
# Remove keys with empty value and without name |
|
48 | 1060x |
if (any(keys == "" & names(keys) == "")) { |
49 | 6x |
message("Key with an empty value and name are ignored.") |
50 | 6x |
keys <- keys[keys != "" & names(keys) != ""] |
51 |
} |
|
52 | ||
53 |
# Set name of keys without one: c("A") -> c("A" = "A") |
|
54 | 1060x |
if (any(names(keys) == "")) { |
55 | 4x |
names(keys)[names(keys) == ""] <- keys[names(keys) == ""] |
56 |
} |
|
57 | ||
58 |
# Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A") |
|
59 | 1060x |
if (any(keys == "")) { |
60 | 4x |
keys[keys == ""] <- names(keys[keys == ""]) |
61 |
} |
|
62 | ||
63 | 1060x |
stopifnot(!is.null(names(keys))) |
64 | 1060x |
stopifnot(!anyDuplicated(keys)) |
65 | 1059x |
stopifnot(!anyDuplicated(names(keys))) |
66 | ||
67 | 1058x |
if (dataset_1 == dataset_2 && any(names(keys) != keys)) { |
68 | 2x |
stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed") |
69 |
} |
|
70 |
} else { |
|
71 | 2x |
keys <- NULL |
72 |
} |
|
73 | ||
74 | 1058x |
parents <- if (directed && dataset_1 != dataset_2) { |
75 | 392x |
stats::setNames(list(dataset_1), dataset_2) |
76 |
} else { |
|
77 | 666x |
list() |
78 |
} |
|
79 | ||
80 | 1058x |
structure( |
81 | 1058x |
list( |
82 | 1058x |
structure( |
83 | 1058x |
list(keys), |
84 | 1058x |
names = dataset_2 |
85 |
) |
|
86 |
), |
|
87 | 1058x |
names = dataset_1, |
88 | 1058x |
class = "join_key_set", |
89 | 1058x |
parents = parents |
90 |
) |
|
91 |
} |
1 |
#' Check Compatibility of keys |
|
2 |
#' |
|
3 |
#' Helper function to assert if two key sets contain incompatible keys. |
|
4 |
#' |
|
5 |
#' @return Returns `TRUE` if successful, otherwise raises error. |
|
6 |
#' @keywords internal |
|
7 |
assert_compatible_keys <- function(join_key_1, join_key_2) { |
|
8 | 3x |
stop_message <- function(dataset_1, dataset_2) { |
9 | 1x |
stop( |
10 | 1x |
paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2) |
11 |
) |
|
12 |
} |
|
13 | ||
14 | 3x |
dataset_1_one <- names(join_key_1) |
15 | 3x |
dataset_2_one <- names(join_key_1[[1]]) |
16 | 3x |
keys_one <- join_key_1[[1]][[1]] |
17 | ||
18 | 3x |
dataset_1_two <- names(join_key_2) |
19 | 3x |
dataset_2_two <- names(join_key_2[[1]]) |
20 | 3x |
keys_two <- join_key_2[[1]][[1]] |
21 | ||
22 |
# if first datasets and the second datasets match and keys |
|
23 |
# must contain the same named elements |
|
24 | 3x |
if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) { |
25 | 3x |
if (!identical(sort(keys_one), sort(keys_two))) { |
26 | 1x |
stop_message(dataset_1_one, dataset_2_one) |
27 |
} |
|
28 |
} |
|
29 | ||
30 |
# if first dataset of join_key_1 matches second dataset of join_key_2 |
|
31 |
# and the first dataset of join_key_2 must match second dataset of join_key_1 |
|
32 |
# and keys must contain the same elements but with names and values swapped |
|
33 | 2x |
if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) { |
34 |
if ( |
|
35 | ! |
xor(length(keys_one) == 0, length(keys_two) == 0) || |
36 | ! |
!identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two))) |
37 |
) { |
|
38 | ! |
stop_message(dataset_1_one, dataset_2_one) |
39 |
} |
|
40 |
} |
|
41 | ||
42 |
# otherwise they are compatible |
|
43 | 2x |
return(TRUE) |
44 |
} |
|
45 | ||
46 |
#' Validate parent-child key |
|
47 |
#' |
|
48 |
#' Helper function checks the parent-child relations are valid. |
|
49 |
#' |
|
50 |
#' @param x (`join_keys`) object to assert validity of relations |
|
51 |
#' |
|
52 |
#' @return `join_keys` invisibly |
|
53 |
#' |
|
54 |
#' @keywords internal |
|
55 |
assert_parent_child <- function(x) { |
|
56 | 431x |
jk <- join_keys(x) |
57 | 431x |
jk_parents <- parents(jk) |
58 | ||
59 | 431x |
checkmate::assert_class(jk, c("join_keys", "list")) |
60 | ||
61 | 431x |
if (!is.null(jk_parents)) { |
62 | 431x |
for (idx1 in seq_along(jk_parents)) { |
63 | 170x |
name_from <- names(jk_parents)[[idx1]] |
64 | 170x |
for (idx2 in seq_along(jk_parents[[idx1]])) { |
65 | 170x |
name_to <- jk_parents[[idx1]][[idx2]] |
66 | 170x |
keys_from <- jk[[name_from]][[name_to]] |
67 | 170x |
keys_to <- jk[[name_to]][[name_from]] |
68 | 170x |
if (length(keys_from) == 0 && length(keys_to) == 0) { |
69 | 1x |
stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to)) |
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 |
} |
|
74 | 430x |
invisible(x) |
75 |
} |
|
76 | ||
77 |
#' Verify key set compatibility |
|
78 |
#' |
|
79 |
#' Helper function to ensuring compatibility between two sets of keys |
|
80 |
#' |
|
81 |
#' @return Returns `TRUE` if successful, otherwise raises error. |
|
82 |
#' @keywords internal |
|
83 |
assert_compatible_keys2 <- function(x, y) { |
|
84 |
# Helper to flatten join_keys / join_key_set |
|
85 | 3x |
flatten_join_key_sets <- function(value) { |
86 | 6x |
value <- unclass(value) |
87 | 6x |
Reduce( |
88 | 6x |
init = list(), |
89 | 6x |
f = function(u, v, ...) { |
90 | 6x |
el <- value[v][[1]] |
91 | 6x |
res <- lapply(seq_along(el), function(ix) el[ix]) |
92 | 6x |
names(res) <- rep(v, length(res)) |
93 | 6x |
append(u, res) |
94 |
}, |
|
95 | 6x |
x = names(value) |
96 |
) |
|
97 |
} |
|
98 | ||
99 | 3x |
x <- flatten_join_key_sets(x) |
100 | 3x |
y <- flatten_join_key_sets(y) |
101 | ||
102 | 3x |
for (idx_1 in seq_along(x)) { |
103 | 3x |
for (idx_2 in seq_along(y)) { |
104 | 3x |
assert_compatible_keys(x[idx_1], y[idx_2]) |
105 |
} |
|
106 |
} |
|
107 | 2x |
TRUE |
108 |
} |
|
109 | ||
110 |
#' Updates the keys of the datasets based on the parents |
|
111 |
#' |
|
112 |
#' @param x (`join_keys`) object to update the keys. |
|
113 |
#' |
|
114 |
#' @return (`self`) invisibly for chaining |
|
115 |
#' |
|
116 |
#' @keywords internal |
|
117 |
update_keys_given_parents <- function(x) { |
|
118 | 12x |
jk <- join_keys(x) |
119 | ||
120 | 12x |
checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x)) |
121 | ||
122 | 12x |
datanames <- names(jk) |
123 | 12x |
for (d1_ix in seq_along(datanames)) { |
124 | 34x |
d1 <- datanames[[d1_ix]] |
125 | 34x |
d1_parent <- parent(jk, d1) |
126 | 34x |
for (d2 in datanames[-1 * seq.int(d1_ix)]) { |
127 | 38x |
if (length(jk[[d1]][[d2]]) == 0) { |
128 | 16x |
d2_parent <- parent(jk, d2) |
129 | ||
130 | 12x |
if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next |
131 | ||
132 |
# both has the same parent -> common keys to parent |
|
133 | 4x |
keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) |
134 | 4x |
keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) |
135 | ||
136 | 4x |
common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent) |
137 | 4x |
common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) |
138 | ||
139 |
# No common keys between datasets - leave empty |
|
140 | 1x |
if (all(!common_ix_1)) next |
141 | ||
142 | 3x |
fk <- structure( |
143 | 3x |
names(keys_d2_parent)[common_ix_2], |
144 | 3x |
names = names(keys_d1_parent)[common_ix_1] |
145 |
) |
|
146 | 3x |
jk[[d1]][[d2]] <- fk # mutate join key |
147 |
} |
|
148 |
} |
|
149 |
} |
|
150 |
# check parent child relation |
|
151 | 12x |
assert_parent_child(x = jk) |
152 | ||
153 | 12x |
jk |
154 |
} |
1 |
#' @rdname join_keys |
|
2 |
#' @order 2 |
|
3 |
#' |
|
4 |
#' @section Functions: |
|
5 |
#' - `x[datanames]`: Returns a subset of the `join_keys` object for |
|
6 |
#' given `datanames`, including parent `datanames` and symmetric mirror keys between |
|
7 |
#' `datanames` in the result. |
|
8 |
#' - `x[i, j]`: Returns join keys between datasets `i` and `j`, |
|
9 |
#' including implicit keys inferred from their relationship with a parent. |
|
10 |
#' |
|
11 |
#' @param i,j indices specifying elements to extract or replace. Index should be a |
|
12 |
#' a character vector, but it can also take numeric, logical, `NULL` or missing. |
|
13 |
#' |
|
14 |
#' @export |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' |
|
18 |
#' # Getter for join_keys --- |
|
19 |
#' |
|
20 |
#' jk["ds1", "ds2"] |
|
21 |
#' |
|
22 |
#' # Subsetting join_keys ---- |
|
23 |
#' |
|
24 |
#' jk["ds1"] |
|
25 |
#' jk[1:2] |
|
26 |
#' jk[c("ds1", "ds2")] |
|
27 |
#' |
|
28 |
`[.join_keys` <- function(x, i, j) { |
|
29 | 31x |
if (missing(i) && missing(j)) { |
30 |
# because: |
|
31 |
# - list(a = 1)[] returns list(a = 1) |
|
32 |
# - data.frame(a = 1)[] returns data.frame(a = 1) |
|
33 | 1x |
return(x) |
34 | 30x |
} else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { |
35 |
# because list(a = 1)[NULL] returns NULL |
|
36 |
# data.frame(a = 1)[NULL, NULL] returns data.frame( |
|
37 | 2x |
return(join_keys()) |
38 | 28x |
} else if (!missing(i) && !missing(j)) { |
39 |
if ( |
|
40 | 8x |
!any( |
41 | 8x |
checkmate::test_string(i), |
42 | 8x |
checkmate::test_number(i), |
43 | 8x |
checkmate::test_logical(i, len = length(x)) && sum(j) == 1 |
44 |
) || |
|
45 | 8x |
!any( |
46 | 8x |
checkmate::test_string(j), |
47 | 8x |
checkmate::test_number(j), |
48 | 8x |
checkmate::test_logical(j, len = length(x)) && sum(j) == 1 |
49 |
) |
|
50 |
) { |
|
51 | 1x |
stop( |
52 | 1x |
"join_keys[i, j] - Can't extract keys for multiple pairs.", |
53 | 1x |
"When specifying a pair [i, j], both indices must point to a single key pair.\n", |
54 | 1x |
call. = FALSE |
55 |
) |
|
56 |
} |
|
57 | 1x |
if (is.numeric(i)) i <- names(x)[i] |
58 | 1x |
if (is.numeric(j)) j <- names(x)[j] |
59 | ||
60 | 7x |
subset_x <- update_keys_given_parents(x[union(i, j)]) |
61 | 7x |
return(subset_x[[i]][[j]]) |
62 | 20x |
} else if (!missing(j)) { |
63 |
# ie. select all keys which have j as dataset_2 |
|
64 |
# since list is symmetrical it is equivalent to selecting by i |
|
65 | 1x |
i <- j |
66 |
} |
|
67 | ||
68 | 20x |
checkmate::assert( |
69 | 20x |
combine = "or", |
70 | 20x |
checkmate::check_character(i), |
71 | 20x |
checkmate::check_numeric(i), |
72 | 20x |
checkmate::check_logical(i) |
73 |
) |
|
74 | ||
75 | ||
76 |
# Convert integer/logical index to named index |
|
77 | 20x |
if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { |
78 | 2x |
i <- names(x)[i] |
79 |
} |
|
80 | ||
81 |
# When retrieving a relationship pair, it will also return the symmetric key |
|
82 | 20x |
new_jk <- new_join_keys() |
83 | 20x |
queue <- unique(i) |
84 | 20x |
bin <- character(0) |
85 | ||
86 |
# Need to iterate on a mutating queue if subset of a dataset will also |
|
87 |
# select its parent as that parent might have relationships with others |
|
88 |
# already selected. |
|
89 | 20x |
while (length(queue) > 0) { |
90 | 45x |
ix <- queue[1] |
91 | 45x |
queue <- queue[-1] |
92 | 45x |
bin <- c(bin, ix) |
93 | ||
94 | 45x |
ix_parent <- parent(x, ix) |
95 | ||
96 | 45x |
if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) { |
97 | 10x |
queue <- c(queue, ix_parent) |
98 |
} |
|
99 | ||
100 | 45x |
ix_valid_names <- names(x[[ix]]) %in% c(queue, bin) |
101 | ||
102 | 45x |
new_jk[[ix]] <- x[[ix]][ix_valid_names] |
103 | ||
104 |
# Add primary key of parent |
|
105 | 45x |
if (length(ix_parent) > 0) { |
106 | 16x |
new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]] |
107 |
} |
|
108 |
} |
|
109 | ||
110 | 20x |
common_parents_ix <- names(parents(x)) %in% names(new_jk) & |
111 | 20x |
parents(x) %in% names(new_jk) |
112 | ||
113 | 9x |
if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix] |
114 | ||
115 | 20x |
new_jk |
116 |
} |
|
117 | ||
118 |
#' @rdname join_keys |
|
119 |
#' @order 2 |
|
120 |
#' |
|
121 |
#' @param directed (`logical(1)`) Flag that indicates whether it should create |
|
122 |
#' a parent-child relationship between the datasets. |
|
123 |
#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; |
|
124 |
#' - `FALSE` when the relationship is undirected. |
|
125 |
#' @section Functions: |
|
126 |
#' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. |
|
127 |
#' - `x[i] <- value`: This (without `j` parameter) **is not** a supported |
|
128 |
#' operation for `join_keys`. |
|
129 |
#' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`, |
|
130 |
#' such as a `teal_data` object or `join_keys` object itself. |
|
131 |
#' |
|
132 |
#' @export |
|
133 |
#' @examples |
|
134 |
#' |
|
135 |
#' # Setting a new primary key --- |
|
136 |
#' |
|
137 |
#' jk["ds4", "ds4"] <- "pk4" |
|
138 |
#' jk["ds5", "ds5"] <- "pk5" |
|
139 |
#' |
|
140 |
#' # Setting a single relationship pair --- |
|
141 |
#' |
|
142 |
#' jk["ds1", "ds4"] <- c("pk1" = "pk4") |
|
143 |
#' |
|
144 |
#' # Removing a key --- |
|
145 |
#' |
|
146 |
#' jk["ds5", "ds5"] <- NULL |
|
147 |
`[<-.join_keys` <- function(x, i, j, directed = TRUE, value) { |
|
148 | 11x |
checkmate::assert_flag(directed) |
149 | 11x |
if (missing(i) || missing(j)) { |
150 | 4x |
stop("join_keys[i, j] specify both indices to set a key pair.") |
151 | 7x |
} else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { |
152 | 2x |
stop("join_keys[i, j] neither i nor j can be NULL.") |
153 |
} else if ( |
|
154 | 5x |
!any( |
155 | 5x |
checkmate::test_string(i), |
156 | 5x |
checkmate::test_number(i), |
157 | 5x |
checkmate::test_logical(i, len = length(x)) && sum(j) == 1 |
158 |
) || |
|
159 | 5x |
!any( |
160 | 5x |
checkmate::test_string(j), |
161 | 5x |
checkmate::test_number(j), |
162 | 5x |
checkmate::test_logical(j, len = length(x)) && sum(j) == 1 |
163 |
) |
|
164 |
) { |
|
165 | 2x |
stop( |
166 | 2x |
"join_keys[i, j] <- Can't set keys to specified indices.\n", |
167 | 2x |
"When setting pair [i, j], both indices must point to a single key pair.\n", |
168 | 2x |
call. = FALSE |
169 |
) |
|
170 |
} |
|
171 | ||
172 |
# Handle join key removal separately |
|
173 | 3x |
if (is.null(value)) { |
174 | 1x |
x[[i]][[j]] <- NULL |
175 | 1x |
return(x) |
176 |
} |
|
177 | ||
178 | 2x |
c(x, join_key(i, j, value, directed)) |
179 |
} |
|
180 | ||
181 |
#' @rdname join_keys |
|
182 |
#' |
|
183 |
#' @order 1000 |
|
184 |
#' @usage ## Preferred method is x[i, j] <- value |
|
185 |
#' x[[i]][[j]] <- value |
|
186 |
#' |
|
187 |
#' @section Functions: |
|
188 |
#' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`. |
|
189 |
#' |
|
190 |
#' @export |
|
191 |
#' @examples |
|
192 |
#' |
|
193 |
#' # Setting via x[[i]] <- value --- |
|
194 |
#' |
|
195 |
#' jk <- join_keys() |
|
196 |
#' jk[["ds6"]][["ds6"]] <- "pk6" |
|
197 |
#' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6")) |
|
198 |
#' jk[["ds7"]][["ds7"]] <- NULL # removes key |
|
199 |
#' |
|
200 |
#' jk |
|
201 |
#' |
|
202 |
#' @noRd |
|
203 |
`[[<-.join_keys` <- function(x, i, value) { |
|
204 | 382x |
checkmate::assert( |
205 | 382x |
combine = "or", |
206 | 382x |
checkmate::check_string(i), |
207 | 382x |
checkmate::check_number(i), |
208 | 382x |
checkmate::check_logical(i, len = length(x)) |
209 |
) |
|
210 | 382x |
checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE) |
211 | 375x |
if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { |
212 | 1x |
i <- names(x)[[i]] |
213 |
} |
|
214 | ||
215 |
# Normalize values |
|
216 | 375x |
norm_value <- lapply(seq_along(value), function(.x) { |
217 | 524x |
join_key(i, names(value)[.x], value[[.x]]) |
218 |
}) |
|
219 | 375x |
names(norm_value) <- names(value) |
220 | ||
221 |
# Check if multiple modifications don't have a conflict |
|
222 | 375x |
repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))] |
223 | 375x |
repeated <- norm_value[repeated_value_ix] |
224 | 375x |
vapply( |
225 | 375x |
seq_along(repeated), |
226 | 375x |
function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) { |
227 | 3x |
assert_compatible_keys2( |
228 | 3x |
.x_value, |
229 | 3x |
unlist(unname( |
230 | 3x |
repeated[-.ix][names(repeated[-.ix]) == .x_name] |
231 | 3x |
), recursive = FALSE) |
232 |
) |
|
233 |
}, |
|
234 | 375x |
logical(1) |
235 |
) |
|
236 | ||
237 | 374x |
norm_value <- lapply(norm_value, function(x) x[[1]][[1]]) |
238 | 374x |
names(norm_value) <- names(value) |
239 | ||
240 |
# Safe to do as duplicated are the same |
|
241 | 374x |
norm_value[duplicated(names(norm_value))] <- NULL |
242 | ||
243 |
# Keep only elements with length > 0L |
|
244 | 374x |
norm_value <- Filter(length, norm_value) |
245 | ||
246 |
# Remove classes to use list-based get/assign operations |
|
247 | 374x |
new_x <- unclass(x) |
248 | ||
249 |
# In case a pair is removed, also remove the symmetric pair and update parents |
|
250 | 374x |
removed_names <- setdiff(names(new_x[[i]]), names(norm_value)) |
251 | 374x |
for (.x in removed_names) { |
252 | 2x |
if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL |
253 | 1x |
if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL |
254 | ||
255 | 5x |
new_x[[.x]][[i]] <- NULL |
256 |
} |
|
257 | ||
258 | 374x |
new_x[[i]] <- norm_value |
259 | ||
260 |
# Iterate on all new values to create symmetrical pair |
|
261 | 374x |
for (ds2 in names(norm_value)) { |
262 | 298x |
if (ds2 == i) next |
263 | ||
264 | 222x |
keep_value <- if (is.null(x)) list() else new_x[[ds2]] |
265 | ||
266 |
# Invert key |
|
267 | 222x |
new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]]) |
268 | 222x |
keep_value[[i]] <- new_value |
269 | ||
270 |
# Assign symmetrical |
|
271 | 222x |
new_x[[ds2]] <- keep_value |
272 |
} |
|
273 | ||
274 | 374x |
preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"] |
275 |
# Remove NULL or empty keys |
|
276 | 374x |
new_x <- Filter(function(x) length(x) != 0L, new_x) |
277 | 374x |
attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr) |
278 | ||
279 |
# |
|
280 |
# restore class |
|
281 | 374x |
class(new_x) <- class(x) |
282 | 374x |
new_x |
283 |
} |
1 |
# get_code_dependency ---- |
|
2 | ||
3 |
#' Get code dependency of an object |
|
4 |
#' |
|
5 |
#' Extract subset of code required to reproduce specific object(s), including code producing side-effects. |
|
6 |
#' |
|
7 |
#' Given a character vector with code, this function will extract the part of the code responsible for creating |
|
8 |
#' the variables specified by `names`. |
|
9 |
#' This includes the final call that creates the variable(s) in question as well as all _parent calls_, |
|
10 |
#' _i.e._ calls that create variables used in the final call and their parents, etc. |
|
11 |
#' Also included are calls that create side-effects like establishing connections. |
|
12 |
#' |
|
13 |
#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` . |
|
14 |
#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported. |
|
15 |
#' |
|
16 |
#' Side-effects are not detected automatically and must be marked in the code. |
|
17 |
#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required |
|
18 |
#' to reproduce a variable called `object`. |
|
19 |
#' |
|
20 |
#' @param code `character` with the code. |
|
21 |
#' @param names `character` vector of object names. |
|
22 |
#' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed. |
|
23 |
#' |
|
24 |
#' @return Character vector, a subset of `code`. |
|
25 |
#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector. |
|
26 |
#' |
|
27 |
#' @keywords internal |
|
28 |
get_code_dependency <- function(code, names, check_names = TRUE) { |
|
29 | 44x |
checkmate::assert_character(code) |
30 | 44x |
checkmate::assert_character(names, any.missing = FALSE) |
31 | ||
32 | 44x |
if (identical(code, character(0)) || identical(trimws(code), "")) { |
33 | 2x |
return(code) |
34 |
} |
|
35 | ||
36 | 42x |
code <- parse(text = code, keep.source = TRUE) |
37 | 42x |
pd <- utils::getParseData(code) |
38 | 42x |
calls_pd <- extract_calls(pd) |
39 | ||
40 | 42x |
if (check_names) { |
41 |
# Detect if names are actually in code. |
|
42 | 42x |
symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"])) |
43 | 42x |
if (any(pd$text == "assign")) { |
44 | 4x |
assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd) |
45 | 4x |
ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"])) |
46 | 4x |
ass_str <- gsub("^['\"]|['\"]$", "", ass_str) |
47 | 4x |
symbols <- c(ass_str, symbols) |
48 |
} |
|
49 | 42x |
if (!all(names %in% unique(symbols))) { |
50 | 1x |
warning("Object(s) not found in code: ", toString(setdiff(names, symbols))) |
51 |
} |
|
52 |
} |
|
53 | ||
54 | 42x |
graph <- code_graph(calls_pd) |
55 | 42x |
ind <- unlist(lapply(names, function(x) graph_parser(x, graph))) |
56 | ||
57 | 42x |
lib_ind <- detect_libraries(calls_pd) |
58 | ||
59 | 42x |
as.character(code[unique(c(lib_ind, ind))]) |
60 |
} |
|
61 | ||
62 |
#' Locate function call token |
|
63 |
#' |
|
64 |
#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token. |
|
65 |
#' |
|
66 |
#' Useful for determining occurrence of `assign` or `data` functions in an input call. |
|
67 |
#' |
|
68 |
#' @param call_pd `data.frame` as returned by `extract_calls()` |
|
69 |
#' @param text `character(1)` to look for in `text` column of `call_pd` |
|
70 |
#' |
|
71 |
#' @return |
|
72 |
#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`. |
|
73 |
#' 0 if not found. |
|
74 |
#' |
|
75 |
#' @keywords internal |
|
76 |
#' @noRd |
|
77 |
find_call <- function(call_pd, text) { |
|
78 | 332x |
checkmate::check_data_frame(call_pd) |
79 | 332x |
checkmate::check_names(call_pd, must.include = c("token", "text")) |
80 | 332x |
checkmate::check_string(text) |
81 | ||
82 | 332x |
ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text) |
83 | 332x |
if (length(ans)) { |
84 | 24x |
ans |
85 |
} else { |
|
86 | 308x |
0L |
87 |
} |
|
88 |
} |
|
89 | ||
90 |
#' Split the result of `utils::getParseData()` into separate calls |
|
91 |
#' |
|
92 |
#' @param pd (`data.frame`) A result of `utils::getParseData()`. |
|
93 |
#' |
|
94 |
#' @return |
|
95 |
#' A `list` of `data.frame`s. |
|
96 |
#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained. |
|
97 |
#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded. |
|
98 |
#' |
|
99 |
#' @keywords internal |
|
100 |
#' @noRd |
|
101 |
extract_calls <- function(pd) { |
|
102 | 42x |
calls <- lapply( |
103 | 42x |
pd[pd$parent == 0, "id"], |
104 | 42x |
function(parent) { |
105 | 156x |
rbind( |
106 | 156x |
pd[pd$id == parent, c("token", "text", "id", "parent")], |
107 | 156x |
get_children(pd = pd, parent = parent) |
108 |
) |
|
109 |
} |
|
110 |
) |
|
111 | 42x |
calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) |
112 | 42x |
calls <- Filter(Negate(is.null), calls) |
113 | 42x |
calls <- fix_shifted_comments(calls) |
114 | 42x |
fix_arrows(calls) |
115 |
} |
|
116 | ||
117 |
#' @keywords internal |
|
118 |
#' @noRd |
|
119 |
get_children <- function(pd, parent) { |
|
120 | 1827x |
idx_children <- abs(pd$parent) == parent |
121 | 1827x |
children <- pd[idx_children, c("token", "text", "id", "parent")] |
122 | 1827x |
if (nrow(children) == 0) { |
123 | 1065x |
return(NULL) |
124 |
} |
|
125 | ||
126 | 762x |
if (parent > 0) { |
127 | 762x |
do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd))) |
128 |
} |
|
129 |
} |
|
130 | ||
131 |
#' Fixes edge case of comments being shifted to the next call. |
|
132 |
#' @keywords internal |
|
133 |
#' @noRd |
|
134 |
fix_shifted_comments <- function(calls) { |
|
135 |
# If the first or the second token is a @linksto COMMENT, |
|
136 |
# then it belongs to the previous call. |
|
137 | 42x |
if (length(calls) >= 2) { |
138 | 41x |
for (i in 2:length(calls)) { |
139 | 113x |
comment_idx <- grep("@linksto", calls[[i]][, "text"]) |
140 | 113x |
if (isTRUE(comment_idx[1] <= 2)) { |
141 | 4x |
calls[[i - 1]] <- rbind( |
142 | 4x |
calls[[i - 1]], |
143 | 4x |
calls[[i]][seq_len(comment_idx[1]), ] |
144 |
) |
|
145 | 4x |
calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ] |
146 |
} |
|
147 |
} |
|
148 |
} |
|
149 | 42x |
Filter(nrow, calls) |
150 |
} |
|
151 | ||
152 |
#' Fixes edge case of `<-` assignment operator being called as function, |
|
153 |
#' which is \code{`<-`(y,x)} instead of traditional `y <- x`. |
|
154 |
#' @keywords internal |
|
155 |
#' @noRd |
|
156 |
fix_arrows <- function(calls) { |
|
157 | 42x |
lapply( |
158 | 42x |
calls, |
159 | 42x |
function(call) { |
160 | 154x |
call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-") |
161 | 154x |
call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->`", c("token", "text")] <- c("RIGHT_ASSIGN", "->") |
162 | 154x |
call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-") |
163 | 154x |
call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->>`", c("token", "text")] <- c("RIGHT_ASSIGN", "->") |
164 | 154x |
call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`=`", c("token", "text")] <- c("LEFT_ASSIGN", "<-") |
165 | 154x |
call |
166 |
} |
|
167 |
) |
|
168 |
} |
|
169 | ||
170 |
# code_graph ---- |
|
171 | ||
172 |
#' Create object dependencies graph within parsed code |
|
173 |
#' |
|
174 |
#' Builds dependency graph that identifies dependencies between objects in parsed code. |
|
175 |
#' Helps understand which objects depend on which. |
|
176 |
#' |
|
177 |
#' @param calls_pd `list` of `data.frame`s; |
|
178 |
#' result of `utils::getParseData()` split into subsets representing individual calls; |
|
179 |
#' created by `extract_calls()` function |
|
180 |
#' |
|
181 |
#' @return |
|
182 |
#' A list (of length of input `calls_pd`) where each element represents one call. |
|
183 |
#' Each element is a character vector listing names of objects that depend on this call |
|
184 |
#' and names of objects that this call depends on. |
|
185 |
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` |
|
186 |
#' depends on objects `b` and `c`. |
|
187 |
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. |
|
188 |
#' |
|
189 |
#' @keywords internal |
|
190 |
#' @noRd |
|
191 |
code_graph <- function(calls_pd) { |
|
192 | 42x |
cooccurrence <- extract_occurrence(calls_pd) |
193 | ||
194 | 42x |
side_effects <- extract_side_effects(calls_pd) |
195 | ||
196 | 42x |
mapply(function(x, y) unique(c(x, y)), side_effects, cooccurrence, SIMPLIFY = FALSE) |
197 |
} |
|
198 | ||
199 |
#' Extract object occurrence |
|
200 |
#' |
|
201 |
#' Extracts objects occurrence within calls passed by `calls_pd`. |
|
202 |
#' Also detects which objects depend on which within a call. |
|
203 |
#' |
|
204 |
#' @param calls_pd `list` of `data.frame`s; |
|
205 |
#' result of `utils::getParseData()` split into subsets representing individual calls; |
|
206 |
#' created by `extract_calls()` function |
|
207 |
#' |
|
208 |
#' @return |
|
209 |
#' A list (of length of input `calls_pd`) where each element represents one call. |
|
210 |
#' Each element is a character vector listing names of objects that depend on this call |
|
211 |
#' and names of objects that this call depends on. |
|
212 |
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` |
|
213 |
#' depends on objects `b` and `c`. |
|
214 |
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. |
|
215 |
#' |
|
216 |
#' @keywords internal |
|
217 |
#' @noRd |
|
218 |
extract_occurrence <- function(calls_pd) { |
|
219 | 42x |
is_in_function <- function(x) { |
220 |
# If an object is a function parameter, |
|
221 |
# then in calls_pd there is a `SYMBOL_FORMALS` entry for that object. |
|
222 | 141x |
function_id <- x[x$token == "FUNCTION", "parent"] |
223 | 141x |
if (length(function_id)) { |
224 | 9x |
x$id %in% get_children(x, function_id)$id |
225 |
} else { |
|
226 | 132x |
rep(FALSE, nrow(x)) |
227 |
} |
|
228 |
} |
|
229 | 42x |
lapply( |
230 | 42x |
calls_pd, |
231 | 42x |
function(call_pd) { |
232 |
# Handle data(object)/data("object")/data(object, envir = ) independently. |
|
233 | 154x |
data_call <- find_call(call_pd, "data") |
234 | 154x |
if (data_call) { |
235 | 2x |
sym <- call_pd[data_call + 1, "text"] |
236 | 2x |
return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) |
237 |
} |
|
238 |
# Handle assign(x = ). |
|
239 | 152x |
assign_call <- find_call(call_pd, "assign") |
240 | 152x |
if (assign_call) { |
241 |
# Check if parameters were named. |
|
242 |
# "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. |
|
243 |
# "EQ_SUB" is for `=` appearing after the name of the named parameter. |
|
244 | 11x |
if (any(call_pd$token == "SYMBOL_SUB")) { |
245 | 8x |
params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] |
246 |
# Remove sequence of "=", ",". |
|
247 | 8x |
if (length(params > 1)) { |
248 | 8x |
remove <- integer(0) |
249 | 8x |
for (i in 2:length(params)) { |
250 | 36x |
if (params[i - 1] == "=" & params[i] == ",") { |
251 | 8x |
remove <- c(remove, i - 1, i) |
252 |
} |
|
253 |
} |
|
254 | 7x |
if (length(remove)) params <- params[-remove] |
255 |
} |
|
256 | 8x |
pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) |
257 | 8x |
if (!pos) { |
258 | ! |
return(character(0L)) |
259 |
} |
|
260 |
# pos is indicator of the place of 'x' |
|
261 |
# 1. All parameters are named, but none is 'x' - return(character(0L)) |
|
262 |
# 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) |
|
263 |
# - check "x" in params being just a vector of named parameters. |
|
264 |
# 3. Some parameters are named, 'x' is not in named parameters |
|
265 |
# - check first appearance of "," (unnamed parameter) in vector parameters. |
|
266 |
} else { |
|
267 |
# Object is the first entry after 'assign'. |
|
268 | 3x |
pos <- 1 |
269 |
} |
|
270 | 11x |
sym <- call_pd[assign_call + pos, "text"] |
271 | 11x |
return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) |
272 |
} |
|
273 | ||
274 |
# What occurs in a function body is not tracked. |
|
275 | 141x |
x <- call_pd[!is_in_function(call_pd), ] |
276 | 141x |
sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL")) |
277 | ||
278 | 141x |
if (length(sym_cond) == 0) { |
279 | 2x |
return(character(0L)) |
280 |
} |
|
281 |
# Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not. |
|
282 |
# For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. |
|
283 | 139x |
dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"] |
284 | 139x |
if (length(dollar_ids)) { |
285 | 12x |
object_ids <- x[sym_cond, "id"] |
286 | 12x |
after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids] |
287 | 12x |
sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) |
288 |
} |
|
289 | ||
290 | 139x |
ass_cond <- grep("ASSIGN", x$token) |
291 | 139x |
if (!length(ass_cond)) { |
292 | 19x |
return(c("<-", unique(x[sym_cond, "text"]))) |
293 |
} |
|
294 | ||
295 | 120x |
sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1 |
296 |
# If there was an assignment operation detect direction of it. |
|
297 | 120x |
if (unique(x$text[ass_cond]) == "->") { # NOTE 2 |
298 | 1x |
sym_cond <- rev(sym_cond) |
299 |
} |
|
300 | ||
301 | 120x |
append(unique(x[sym_cond, "text"]), "<-", after = 1) |
302 | ||
303 |
### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c. |
|
304 |
### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. |
|
305 |
} |
|
306 |
) |
|
307 |
} |
|
308 | ||
309 |
#' Extract side effects |
|
310 |
#' |
|
311 |
#' Extracts all object names from the code that are marked with `@linksto` tag. |
|
312 |
#' |
|
313 |
#' The code may contain functions calls that create side effects, e.g. modify the environment. |
|
314 |
#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call. |
|
315 |
#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects. |
|
316 |
#' With this tag a complete object dependency structure can be established. |
|
317 |
#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function. |
|
318 |
#' |
|
319 |
#' @param calls_pd `list` of `data.frame`s; |
|
320 |
#' result of `utils::getParseData()` split into subsets representing individual calls; |
|
321 |
#' created by `extract_calls()` function |
|
322 |
#' |
|
323 |
#' @return |
|
324 |
#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects |
|
325 |
#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`. |
|
326 |
#' |
|
327 |
#' @keywords internal |
|
328 |
#' @noRd |
|
329 |
extract_side_effects <- function(calls_pd) { |
|
330 | 42x |
lapply( |
331 | 42x |
calls_pd, |
332 | 42x |
function(x) { |
333 | 154x |
linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE) |
334 | 154x |
unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+")) |
335 |
} |
|
336 |
) |
|
337 |
} |
|
338 | ||
339 |
# graph_parser ---- |
|
340 | ||
341 |
#' Return the indices of calls needed to reproduce an object |
|
342 |
#' |
|
343 |
#' @param x The name of the object to return code for. |
|
344 |
#' @param graph A result of `code_graph()`. |
|
345 |
#' |
|
346 |
#' @return |
|
347 |
#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`. |
|
348 |
#' |
|
349 |
#' @keywords internal |
|
350 |
#' @noRd |
|
351 |
graph_parser <- function(x, graph) { |
|
352 | 182x |
occurrence <- vapply( |
353 | 182x |
graph, function(call) { |
354 | 552x |
ind <- match("<-", call, nomatch = length(call) + 1L) |
355 | 552x |
x %in% call[seq_len(ind - 1L)] |
356 |
}, |
|
357 | 182x |
logical(1) |
358 |
) |
|
359 | ||
360 | 182x |
dependencies <- lapply(graph[occurrence], function(call) { |
361 | 96x |
ind <- match("<-", call, nomatch = 0L) |
362 | 96x |
call[(ind + 1L):length(call)] |
363 |
}) |
|
364 | 182x |
dependencies <- setdiff(unlist(dependencies), x) |
365 | ||
366 | 182x |
if (length(dependencies) && any(occurrence)) { |
367 | 68x |
dependency_ids <- lapply(dependencies, function(dependency) { |
368 | 139x |
graph_parser(dependency, graph[1:max(which(occurrence))]) |
369 |
}) |
|
370 | 68x |
sort(unique(c(which(occurrence), unlist(dependency_ids)))) |
371 |
} else { |
|
372 | 114x |
which(occurrence) |
373 |
} |
|
374 |
} |
|
375 | ||
376 | ||
377 |
# default_side_effects -------------------------------------------------------------------------------------------- |
|
378 | ||
379 |
#' Detect library calls |
|
380 |
#' |
|
381 |
#' Detects `library()` and `require()` function calls. |
|
382 |
#' |
|
383 |
#' @param calls_pd `list` of `data.frame`s; |
|
384 |
#' result of `utils::getParseData()` split into subsets representing individual calls; |
|
385 |
#' created by `extract_calls()` function |
|
386 |
#' |
|
387 |
#' @return |
|
388 |
#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing |
|
389 |
#' `library()` or `require()` calls that are always returned for reproducibility. |
|
390 |
#' |
|
391 |
#' @keywords internal |
|
392 |
#' @noRd |
|
393 |
detect_libraries <- function(calls_pd) { |
|
394 | 42x |
defaults <- c("library", "require") |
395 | ||
396 | 42x |
which( |
397 | 42x |
vapply( |
398 | 42x |
calls_pd, |
399 | 42x |
function(call) { |
400 | 154x |
any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults) |
401 |
}, |
|
402 | 42x |
logical(1) |
403 |
) |
|
404 |
) |
|
405 |
} |
1 |
#' Variable labels |
|
2 |
#' |
|
3 |
#' Get or set variable labels in a `data.frame`. |
|
4 |
#' |
|
5 |
#' @details Variable labels can be stored as a `label` attribute set on individual variables. |
|
6 |
#' These functions get or set this attribute, either on all (`col_labels`) or some variables (`col_relabel`). |
|
7 |
#' |
|
8 |
#' @param x (`data.frame` or `DataFrame`) data object |
|
9 |
#' @param fill (`logical(1)`) specifying what to return if variable has no label |
|
10 |
#' @param value (`character`) vector of variable labels of length equal to number of columns in `x`; |
|
11 |
#' if named, names must match variable names in `x` and will be used as key to set labels; |
|
12 |
#' use `NA` to remove label from variable |
|
13 |
#' @param ... name-value pairs, where name corresponds to a variable name in `x` |
|
14 |
#' and value is the new variable label |
|
15 |
#' |
|
16 |
#' @return |
|
17 |
#' For `col_labels`, named character vector of variable labels, the names being the corresponding variable names. |
|
18 |
#' If the `label` attribute is missing, the vector elements will be |
|
19 |
#' the variable names themselves if `fill = TRUE` and `NA` if `fill = FALSE`. |
|
20 |
#' |
|
21 |
#' For `col_labels<-` and `col_relabel`, copy of `x` with variable labels modified. |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' x <- iris |
|
25 |
#' col_labels(x) |
|
26 |
#' col_labels(x) <- paste("label for", names(iris)) |
|
27 |
#' col_labels(x) |
|
28 |
#' y <- col_relabel(x, Sepal.Length = "Sepal Length of iris flower") |
|
29 |
#' col_labels(y) |
|
30 |
#' |
|
31 |
#' @source These functions were taken from |
|
32 |
#' [formatters](https://cran.r-project.org/package=formatters) package, to reduce the complexity of |
|
33 |
#' the dependency tree and rewritten. |
|
34 |
#' |
|
35 |
#' @rdname col_labels |
|
36 |
#' @export |
|
37 |
#' |
|
38 |
col_labels <- function(x, fill = FALSE) { |
|
39 | 5x |
checkmate::test_multi_class(x, c("data.frame", "DataFrame")) |
40 | 5x |
checkmate::assert_flag(fill) |
41 | ||
42 | 5x |
if (ncol(x) == 0L) { |
43 | 2x |
return(character(0L)) |
44 |
} |
|
45 | ||
46 | 3x |
labels <- lapply(x, attr, "label") |
47 | ||
48 | 3x |
nulls <- vapply(labels, is.null, logical(1L)) |
49 | 3x |
if (any(nulls)) { |
50 | 3x |
labels[nulls] <- |
51 | 3x |
if (fill) { |
52 | 1x |
colnames(x)[nulls] |
53 |
} else { |
|
54 | 3x |
NA_character_ |
55 |
} |
|
56 |
} |
|
57 | ||
58 | 3x |
not_char <- !vapply(labels, checkmate::test_string, logical(1L), na.ok = TRUE) |
59 | 3x |
if (any(not_char)) { |
60 | ! |
stop("labels for variables ", toString(names(not_char[not_char])), "are not character strings") |
61 |
} |
|
62 | ||
63 | 3x |
unlist(labels, recursive = FALSE) |
64 |
} |
|
65 | ||
66 |
#' @rdname col_labels |
|
67 |
#' @export |
|
68 |
`col_labels<-` <- function(x, value) { |
|
69 | ! |
checkmate::test_multi_class(x, c("data.frame", "DataFrame")) |
70 | ! |
checkmate::assert_character(value) |
71 | ! |
checkmate::assert_true( |
72 | ! |
ncol(x) == length(value), |
73 | ! |
.var.name = "Length of value is equal to the number of columns" |
74 |
) |
|
75 | ||
76 | ! |
if (is.null(names(value))) { |
77 | ! |
names(value) <- names(x) |
78 |
} |
|
79 | ! |
x[names(value)] <- mapply(`attr<-`, x = x[names(value)], which = "label", value = value, SIMPLIFY = FALSE) |
80 | ! |
x |
81 |
} |
|
82 | ||
83 |
#' @rdname col_labels |
|
84 |
#' @export |
|
85 |
col_relabel <- function(x, ...) { |
|
86 | 3x |
checkmate::test_multi_class(x, c("data.frame", "DataFrame")) |
87 | 3x |
if (missing(...)) { |
88 | 1x |
return(x) |
89 |
} |
|
90 | 2x |
dots <- list(...) |
91 | 2x |
varnames <- names(dots) |
92 | ||
93 | 2x |
checkmate::assert_subset(varnames, names(x), .var.name = "names of ...") |
94 | 1x |
lapply(dots, checkmate::assert_string, .var.name = "element of ...") |
95 | ||
96 | 1x |
x[varnames] <- mapply(`attr<-`, x = x[varnames], which = "label", value = dots, SIMPLIFY = FALSE, USE.NAMES = FALSE) |
97 | 1x |
x |
98 |
} |
1 |
#' Verify code reproducibility |
|
2 |
#' |
|
3 |
#' Checks whether code in `teal_data` object reproduces the stored objects. |
|
4 |
#' |
|
5 |
#' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot, |
|
6 |
#' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object. |
|
7 |
#' Once verified, the slot will always be set to `TRUE`. |
|
8 |
#' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised. |
|
9 |
#' |
|
10 |
#' @return Input `teal_data` object or error. |
|
11 |
#' |
|
12 |
#' @param x `teal_data` object |
|
13 |
#' @examples |
|
14 |
#' tdata1 <- teal_data() |
|
15 |
#' tdata1 <- within(tdata1, { |
|
16 |
#' a <- 1 |
|
17 |
#' b <- a^5 |
|
18 |
#' c <- list(x = 2) |
|
19 |
#' }) |
|
20 |
#' verify(tdata1) |
|
21 |
#' |
|
22 |
#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") |
|
23 |
#' verify(tdata2) |
|
24 |
#' verify(tdata2)@verified |
|
25 |
#' tdata2@verified |
|
26 |
#' |
|
27 |
#' tdata3 <- teal_data() |
|
28 |
#' tdata3 <- within(tdata3, { |
|
29 |
#' stop("error") |
|
30 |
#' }) |
|
31 |
#' try(verify(tdata3)) # fails |
|
32 |
#' |
|
33 |
#' |
|
34 |
#' a <- 1 |
|
35 |
#' b <- a + 2 |
|
36 |
#' c <- list(x = 2) |
|
37 |
#' d <- 5 |
|
38 |
#' tdata4 <- teal_data( |
|
39 |
#' a = a, b = b, c = c, d = d, |
|
40 |
#' code = "a <- 1 |
|
41 |
#' b <- a |
|
42 |
#' c <- list(x = 2) |
|
43 |
#' e <- 1" |
|
44 |
#' ) |
|
45 |
#' tdata4 |
|
46 |
#' try(verify(tdata4)) # fails |
|
47 |
#' |
|
48 |
#' @name verify |
|
49 |
#' @rdname verify |
|
50 |
#' @aliases verify,teal_data-method |
|
51 |
#' @aliases verify,qenv.error-method |
|
52 |
#' |
|
53 |
#' @export |
|
54 | 5x |
setGeneric("verify", function(x) standardGeneric("verify")) |
55 |
setMethod("verify", signature = "teal_data", definition = function(x) { |
|
56 | 4x |
if (x@verified) { |
57 | 2x |
return(x) |
58 |
} |
|
59 | 2x |
x_name <- deparse(substitute(x)) |
60 | 2x |
y <- eval_code(teal_data(), get_code(x)) |
61 | ||
62 | 2x |
if (inherits(y, "qenv.error")) { |
63 | ! |
stop(conditionMessage(y), call. = FALSE) |
64 |
} |
|
65 | ||
66 | 2x |
reproduced <- isTRUE(all.equal(x@env, y@env)) |
67 | 2x |
if (reproduced) { |
68 | 1x |
x@verified <- TRUE |
69 | 1x |
methods::validObject(x) |
70 | 1x |
x |
71 |
} else { |
|
72 | 1x |
error <- "Code verification failed." |
73 | ||
74 | 1x |
objects_diff <- vapply( |
75 | 1x |
intersect(names(x@env), names(y@env)), |
76 | 1x |
function(element) { |
77 | 1x |
isTRUE(all.equal(x@env[[element]], y@env[[element]])) |
78 |
}, |
|
79 | 1x |
logical(1) |
80 |
) |
|
81 | ||
82 | 1x |
names_diff_other <- setdiff(names(y@env), names(x@env)) |
83 | 1x |
names_diff_inenv <- setdiff(names(x@env), names(y@env)) |
84 | ||
85 | 1x |
if (length(objects_diff)) { |
86 | 1x |
error <- c( |
87 | 1x |
error, |
88 | 1x |
paste0("Object(s) recreated with code that have different structure in ", x_name, ":"), |
89 | 1x |
paste0(" \u2022 ", names(which(!objects_diff))) |
90 |
) |
|
91 |
} |
|
92 | 1x |
if (length(names_diff_inenv)) { |
93 | ! |
error <- c( |
94 | ! |
error, |
95 | ! |
paste0("Object(s) not created with code that exist in ", x_name, ":"), |
96 | ! |
paste0(" \u2022 ", names_diff_inenv) |
97 |
) |
|
98 |
} |
|
99 | 1x |
if (length(names_diff_other)) { |
100 | ! |
error <- c( |
101 | ! |
error, |
102 | ! |
paste0("Object(s) created with code that do not exist in ", x_name, ":"), |
103 | ! |
paste0(" \u2022 ", names_diff_other) |
104 |
) |
|
105 |
} |
|
106 | ||
107 | 1x |
stop(paste(error, collapse = "\n"), call. = FALSE) |
108 |
} |
|
109 |
}) |
|
110 |
setMethod("verify", signature = "qenv.error", definition = function(x) { |
|
111 | 1x |
stop(conditionMessage(x), call. = FALSE) |
112 |
}) |
1 |
#' Topological graph sort |
|
2 |
#' |
|
3 |
#' Graph is a `list` which for each node contains a vector of child nodes |
|
4 |
#' in the returned list, parents appear before their children. |
|
5 |
#' |
|
6 |
#' Implementation of `Kahn` algorithm with a modification to maintain the order of input elements. |
|
7 |
#' |
|
8 |
#' @param graph (`named list`) with node vector elements |
|
9 |
#' @keywords internal |
|
10 |
topological_sort <- function(graph) { |
|
11 |
# compute in-degrees |
|
12 | 428x |
in_degrees <- list() |
13 | 428x |
for (node in names(graph)) { |
14 | 168x |
in_degrees[[node]] <- 0 |
15 | 168x |
for (to_edge in graph[[node]]) { |
16 | 168x |
in_degrees[[to_edge]] <- 0 |
17 |
} |
|
18 |
} |
|
19 | ||
20 | 428x |
for (node in graph) { |
21 | 168x |
for (to_edge in node) { |
22 | 168x |
in_degrees[[to_edge]] <- in_degrees[[to_edge]] + 1 |
23 |
} |
|
24 |
} |
|
25 | ||
26 |
# sort |
|
27 | 428x |
visited <- 0 |
28 | 428x |
sorted <- list() |
29 | 428x |
zero_in <- list() |
30 | 428x |
for (node in names(in_degrees)) { |
31 | 142x |
if (in_degrees[[node]] == 0) zero_in <- append(zero_in, node) |
32 |
} |
|
33 | 428x |
zero_in <- rev(zero_in) |
34 | ||
35 | 428x |
while (length(zero_in) != 0) { |
36 | 283x |
visited <- visited + 1 |
37 | 283x |
sorted <- c(zero_in[[1]], sorted) |
38 | 283x |
for (edge_to in graph[[zero_in[[1]]]]) { |
39 | 160x |
in_degrees[[edge_to]] <- in_degrees[[edge_to]] - 1 |
40 | 160x |
if (in_degrees[[edge_to]] == 0) { |
41 | 141x |
zero_in <- append(zero_in, edge_to, 1) |
42 |
} |
|
43 |
} |
|
44 | 283x |
zero_in[[1]] <- NULL |
45 |
} |
|
46 | ||
47 | 428x |
if (visited != length(in_degrees)) { |
48 | 4x |
stop( |
49 | 4x |
"Graph is not a directed acyclic graph. Cycles involving nodes: ", |
50 | 4x |
paste0(setdiff(names(in_degrees), sorted), collapse = " ") |
51 |
) |
|
52 |
} else { |
|
53 | 424x |
return(sorted) |
54 |
} |
|
55 |
} |
|
56 | ||
57 |
#' Checks whether a graph is a `Directed Acyclic Graph (DAG)` |
|
58 |
#' |
|
59 |
#' @inheritParams topological_sort |
|
60 |
#' @return `logical(1)` `TRUE` if the graph is a `DAG`; `FALSE` otherwise |
|
61 |
#' @keywords internal |
|
62 |
is_dag <- function(graph) { |
|
63 | 423x |
inherits(try(topological_sort(graph), silent = TRUE), "try-error") |
64 |
} |
1 |
#' Names of data sets in `teal_data` object |
|
2 |
#' |
|
3 |
#' Get or set the value of the `datanames` slot. |
|
4 |
#' |
|
5 |
#' The `@datanames` slot in a `teal_data` object specifies which of the variables stored in its environment |
|
6 |
#' (the `@env` slot) are data sets to be taken into consideration. |
|
7 |
#' The contents of `@datanames` can be specified upon creation and default to all variables in `@env`. |
|
8 |
#' Variables created later, which may well be data sets, are not automatically considered such. |
|
9 |
#' Use this function to update the slot. |
|
10 |
#' |
|
11 |
#' @param x (`teal_data`) object to access or modify |
|
12 |
#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@env` |
|
13 |
#' |
|
14 |
#' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`. |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' td <- teal_data(iris = iris) |
|
18 |
#' td <- within(td, mtcars <- mtcars) |
|
19 |
#' datanames(td) |
|
20 |
#' |
|
21 |
#' datanames(td) <- c("iris", "mtcars") |
|
22 |
#' datanames(td) |
|
23 |
#' |
|
24 |
#' @name datanames |
|
25 |
#' @aliases datanames,teal_data-method |
|
26 |
#' @aliases datanames<-,teal_data,character-method |
|
27 |
#' @aliases datanames,qenv.error-method |
|
28 |
#' @aliases datanames<-,qenv.error,character-method |
|
29 | ||
30 |
#' @rdname datanames |
|
31 |
#' @export |
|
32 | 6x |
setGeneric("datanames", function(x) standardGeneric("datanames")) |
33 |
setMethod("datanames", signature = "teal_data", definition = function(x) { |
|
34 | 4x |
x@datanames |
35 |
}) |
|
36 |
setMethod("datanames", signature = "qenv.error", definition = function(x) { |
|
37 | 2x |
NULL |
38 |
}) |
|
39 | ||
40 |
#' @rdname datanames |
|
41 |
#' @export |
|
42 | 4x |
setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-")) |
43 |
setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) { |
|
44 | 3x |
checkmate::assert_subset(value, names(x@env)) |
45 | 2x |
x@datanames <- value |
46 | 2x |
methods::validObject(x) |
47 | 2x |
x |
48 |
}) |
|
49 |
setMethod("datanames<-", signature = c("qenv.error", "character"), definition = function(x, value) { |
|
50 | 1x |
methods::validObject(x) |
51 | 1x |
x |
52 |
}) |
1 |
#' @rdname join_keys |
|
2 |
#' @order 7 |
|
3 |
#' @export |
|
4 |
format.join_keys <- function(x, ...) { |
|
5 | 6x |
if (length(x) > 0) { |
6 | 5x |
my_parents <- parents(x) |
7 | 5x |
names_sorted <- topological_sort(my_parents) |
8 | 5x |
names <- union(names_sorted, names(x)) |
9 | 5x |
x_implicit <- update_keys_given_parents(x) |
10 | 5x |
out <- lapply(names, function(i) { |
11 | 15x |
out_i <- lapply(union(i, names(x[[i]])), function(j) { |
12 | 35x |
direction <- if (identical(my_parents[[j]], i)) { |
13 |
" <-- " |
|
14 | 35x |
} else if (identical(my_parents[[i]], j)) { |
15 |
" --> " |
|
16 | 35x |
} else if (!identical(i, j)) { |
17 |
" <-> " |
|
18 |
} else { |
|
19 |
"" |
|
20 |
} |
|
21 | ||
22 | 35x |
keys <- x[[i]][[j]] |
23 | 35x |
sprintf( |
24 | 35x |
"%s%s: [%s]", |
25 | 35x |
direction, j, |
26 | 35x |
if (length(keys) == 0) "no primary keys" else toString(keys) |
27 |
) |
|
28 |
}) |
|
29 | ||
30 | 15x |
implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]])) |
31 | 15x |
if (length(implicit_datasets) > 0) { |
32 | 2x |
out_i <- c( |
33 | 2x |
out_i, |
34 | 2x |
paste0( |
35 | 2x |
" --* (implicit via parent with): ", |
36 | 2x |
paste(implicit_datasets, collapse = ", ") |
37 |
) |
|
38 |
) |
|
39 |
} |
|
40 | ||
41 | 15x |
paste(out_i, collapse = "\n") |
42 |
}) |
|
43 | 5x |
paste( |
44 | 5x |
c( |
45 | 5x |
sprintf("A join_keys object containing foreign keys between %s datasets:", length(x)), |
46 | 5x |
out |
47 |
), |
|
48 | 5x |
collapse = "\n" |
49 |
) |
|
50 |
} else { |
|
51 | 1x |
"An empty join_keys object." |
52 |
} |
|
53 |
} |
|
54 | ||
55 |
#' @rdname join_keys |
|
56 |
#' @order 7 |
|
57 |
#' @export |
|
58 |
print.join_keys <- function(x, ...) { |
|
59 | 1x |
cat(format(x, ...), "\n") |
60 | 1x |
invisible(x) |
61 |
} |
1 |
#' Manage relationships between datasets using `join_keys` |
|
2 |
#' @order 1 |
|
3 |
#' @name join_keys |
|
4 |
#' |
|
5 |
#' @usage ## Constructor, getter and setter |
|
6 |
#' join_keys(...) |
|
7 |
#' |
|
8 |
#' @description |
|
9 |
#' Facilitates the creation and retrieval of relationships between datasets. |
|
10 |
#' `join_keys` class extends `list` and contains keys connecting pairs of datasets. |
|
11 |
#' Each element of the list contains keys for specific dataset. |
|
12 |
#' Each dataset can have a relationship with itself (primary key) and with other datasets. |
|
13 |
#' |
|
14 |
#' Note that `join_keys` list is symmetrical and assumes a default direction, that is: |
|
15 |
#' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent |
|
16 |
#' in a parent-child relationship and the mapping is automatically mirrored between |
|
17 |
#' `ds2` and `ds1`. |
|
18 |
#' |
|
19 |
#' @section Methods (by class): |
|
20 |
#' - `join_keys()`: Returns an empty `join_keys` object when called without arguments. |
|
21 |
#' - `join_keys(join_keys)`: Returns itself. |
|
22 |
#' - `join_keys(teal_data)`: Returns the `join_keys` object contained in `teal_data` object. |
|
23 |
#' - `join_keys(...)`: Creates a new object with one or more `join_key_set` parameters. |
|
24 |
#' |
|
25 |
#' @param ... (optional) |
|
26 |
#' - either `teal_data` or `join_keys` object to extract `join_keys` |
|
27 |
#' - or any number of `join_key_set` objects to create `join_keys` |
|
28 |
#' - or nothing to create an empty `join_keys` |
|
29 |
#' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`) |
|
30 |
#' Column mapping between datasets. |
|
31 |
#' |
|
32 |
#' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship |
|
33 |
#' pairs to add to `join_keys` list. |
|
34 |
#' |
|
35 |
#' |
|
36 |
#' @return `join_keys` object. |
|
37 |
#' |
|
38 |
#' @examples |
|
39 |
#' # Creating a new join keys ---- |
|
40 |
#' |
|
41 |
#' jk <- join_keys( |
|
42 |
#' join_key("ds1", "ds1", "pk1"), |
|
43 |
#' join_key("ds2", "ds2", "pk2"), |
|
44 |
#' join_key("ds3", "ds3", "pk3"), |
|
45 |
#' join_key("ds1", "ds2", c(pk1 = "pk2")), |
|
46 |
#' join_key("ds1", "ds3", c(pk1 = "pk3")) |
|
47 |
#' ) |
|
48 |
#' |
|
49 |
#' jk |
|
50 |
#' |
|
51 |
#' @export |
|
52 |
#' |
|
53 |
#' @seealso [join_key()] for creating `join_keys_set`, |
|
54 |
#' [parents()] for parent operations, |
|
55 |
#' [teal_data()] for `teal_data` constructor _and_ |
|
56 |
#' [default_cdisc_join_keys] for default `CDISC` keys. |
|
57 |
#' |
|
58 |
join_keys <- function(...) { |
|
59 | 786x |
if (missing(...)) { |
60 | 224x |
return(new_join_keys()) |
61 |
} |
|
62 | 562x |
x <- rlang::list2(...) |
63 | 562x |
if (length(x) == 1L) { |
64 | 497x |
UseMethod("join_keys", x[[1]]) |
65 |
} else { |
|
66 | 65x |
join_keys.default(...) |
67 |
} |
|
68 |
} |
|
69 | ||
70 |
#' @rdname join_keys |
|
71 |
#' @order 1 |
|
72 |
#' @export |
|
73 |
join_keys.default <- function(...) { |
|
74 | 110x |
c(new_join_keys(), ...) |
75 |
} |
|
76 | ||
77 |
#' @rdname join_keys |
|
78 |
#' @order 1 |
|
79 |
#' @export |
|
80 |
join_keys.join_keys <- function(...) { |
|
81 | 447x |
x <- rlang::list2(...) |
82 | 447x |
x[[1]] |
83 |
} |
|
84 | ||
85 |
#' @rdname join_keys |
|
86 |
#' @order 1 |
|
87 |
#' @export |
|
88 |
join_keys.teal_data <- function(...) { |
|
89 | 5x |
x <- rlang::list2(...) |
90 | 5x |
x[[1]]@join_keys |
91 |
} |
|
92 | ||
93 |
#' @rdname join_keys |
|
94 |
#' @order 5 |
|
95 |
#' |
|
96 |
#' @section Functions: |
|
97 |
#' - `join_keys(x) <- value`: Assignment of the `join_keys` in object with `value`. |
|
98 |
#' `value` needs to be an object of class `join_keys` or `join_key_set`. |
|
99 |
#' |
|
100 |
#' @param x (`join_keys`) empty object to set the new relationship pairs. |
|
101 |
#' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)` |
|
102 |
#' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`) |
|
103 |
#' |
|
104 |
#' @export |
|
105 |
`join_keys<-` <- function(x, value) { |
|
106 | 8x |
checkmate::assert_class(value, classes = c("join_keys", "list")) |
107 | 8x |
UseMethod("join_keys<-", x) |
108 |
} |
|
109 | ||
110 |
#' @rdname join_keys |
|
111 |
#' @order 5 |
|
112 |
#' @export |
|
113 |
#' @examples |
|
114 |
#' |
|
115 |
#' # Assigning keys via join_keys(x)[i, j] <- value ---- |
|
116 |
#' |
|
117 |
#' obj <- join_keys() |
|
118 |
#' # or |
|
119 |
#' obj <- teal_data() |
|
120 |
#' |
|
121 |
#' join_keys(obj)["ds1", "ds1"] <- "pk1" |
|
122 |
#' join_keys(obj)["ds2", "ds2"] <- "pk2" |
|
123 |
#' join_keys(obj)["ds3", "ds3"] <- "pk3" |
|
124 |
#' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2") |
|
125 |
#' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3") |
|
126 |
#' |
|
127 |
#' identical(jk, join_keys(obj)) |
|
128 |
`join_keys<-.join_keys` <- function(x, value) { |
|
129 | 6x |
value |
130 |
} |
|
131 | ||
132 |
#' @rdname join_keys |
|
133 |
#' @order 5 |
|
134 |
#' @export |
|
135 |
#' @examples |
|
136 |
#' |
|
137 |
#' # Setter for join_keys within teal_data ---- |
|
138 |
#' |
|
139 |
#' td <- teal_data() |
|
140 |
#' join_keys(td) <- jk |
|
141 |
#' |
|
142 |
#' join_keys(td)["ds1", "ds2"] <- "new_key" |
|
143 |
#' join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3"))) |
|
144 |
#' join_keys(td) |
|
145 |
`join_keys<-.teal_data` <- function(x, value) { |
|
146 | 2x |
join_keys(x@join_keys) <- value |
147 | 2x |
x |
148 |
} |
|
149 | ||
150 |
#' Internal constructor |
|
151 |
#' |
|
152 |
#' @return an empty `join_keys` list |
|
153 |
#' |
|
154 |
#' @keywords internal |
|
155 |
new_join_keys <- function() { |
|
156 | 354x |
structure( |
157 | 354x |
list(), |
158 | 354x |
class = c("join_keys", "list"), |
159 | 354x |
"parents" = list() |
160 |
) |
|
161 |
} |
1 |
#' Get and set parents in `join_keys` object |
|
2 |
#' |
|
3 |
#' `parents()` facilitates the creation of dependencies between datasets by |
|
4 |
#' assigning a parent-child relationship. |
|
5 |
#' |
|
6 |
#' Each element is defined by a `list` element, where `list("child" = "parent")`. |
|
7 |
#' |
|
8 |
#' @param x (`join_keys` or `teal_data`) object that contains "parents" information |
|
9 |
#' to retrieve or manipulate. |
|
10 |
#' |
|
11 |
#' @return a `list` of `character` representing the parents. |
|
12 |
#' |
|
13 |
#' @export |
|
14 |
#' @seealso [join_keys()] |
|
15 |
parents <- function(x) { |
|
16 | 606x |
UseMethod("parents", x) |
17 |
} |
|
18 | ||
19 |
#' @describeIn parents Retrieves parents of `join_keys` object. |
|
20 |
#' @export |
|
21 |
#' @examples |
|
22 |
#' # Get parents of join_keys --- |
|
23 |
#' |
|
24 |
#' jk <- default_cdisc_join_keys["ADEX"] |
|
25 |
#' parents(jk) |
|
26 |
parents.join_keys <- function(x) { |
|
27 | 1x |
if (is.null(attr(x, "parents"))) list() else attr(x, "parents") |
28 |
} |
|
29 | ||
30 |
#' @describeIn parents Retrieves parents of `join_keys` inside `teal_data` object. |
|
31 |
#' @export |
|
32 |
#' @examples |
|
33 |
#' |
|
34 |
#' # Get parents of join_keys inside teal_data object --- |
|
35 |
#' |
|
36 |
#' td <- teal_data( |
|
37 |
#' ADSL = rADSL, |
|
38 |
#' ADTTE = rADTTE, |
|
39 |
#' ADRS = rADRS, |
|
40 |
#' join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")] |
|
41 |
#' ) |
|
42 |
#' parents(td) |
|
43 |
parents.teal_data <- function(x) { |
|
44 | 1x |
parents(x@join_keys) |
45 |
} |
|
46 | ||
47 |
#' @describeIn parents Assignment of parents in `join_keys` object. |
|
48 |
#' |
|
49 |
#' @param value (`named list`) of `character` vectors. |
|
50 |
#' |
|
51 |
#' @export |
|
52 |
`parents<-` <- function(x, value) { |
|
53 | 428x |
UseMethod("parents<-", x) |
54 |
} |
|
55 | ||
56 |
#' @describeIn parents Assignment of parents of `join_keys` object. |
|
57 |
#' @export |
|
58 |
#' @examples |
|
59 |
#' |
|
60 |
#' # Assignment of parents --- |
|
61 |
#' |
|
62 |
#' jk <- join_keys( |
|
63 |
#' join_key("ds1", "ds2", "id"), |
|
64 |
#' join_key("ds5", "ds6", "id"), |
|
65 |
#' join_key("ds7", "ds6", "id") |
|
66 |
#' ) |
|
67 |
#' |
|
68 |
#' parents(jk) <- list(ds2 = "ds1") |
|
69 |
#' |
|
70 |
#' # Setting individual parent-child relationship |
|
71 |
#' |
|
72 |
#' parents(jk)["ds6"] <- "ds5" |
|
73 |
#' parents(jk)["ds7"] <- "ds6" |
|
74 |
`parents<-.join_keys` <- function(x, value) { |
|
75 | 427x |
checkmate::assert_list(value, types = "character", names = "named") |
76 | ||
77 | 424x |
new_parents <- list() |
78 | ||
79 | 424x |
for (dataset in names(value)) { |
80 |
# Custom .var.name so it is verbose and helpful for users |
|
81 | 161x |
checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset)) |
82 | ||
83 | 160x |
parent <- new_parents[[dataset]] |
84 | 160x |
checkmate::assert( |
85 | 160x |
checkmate::check_null(parent), |
86 | 160x |
checkmate::check_true( |
87 | 160x |
length(parent) == 0 && |
88 | 160x |
length(value[[dataset]]) == 0 |
89 |
), |
|
90 | 160x |
checkmate::check_true(parent == value[[dataset]]), |
91 | 160x |
"Please check the difference between provided datasets parents and provided join_keys parents.", |
92 | 160x |
.var.name = "value" |
93 |
) |
|
94 | 160x |
if (is.null(parent)) { |
95 | 160x |
new_parents[[dataset]] <- value[[dataset]] |
96 |
} |
|
97 |
} |
|
98 | ||
99 | 423x |
if (is_dag(new_parents)) { |
100 | 4x |
stop("Cycle detected in a parent and child dataset graph.") |
101 |
} |
|
102 | ||
103 | 419x |
attr(x, "parents") <- new_parents |
104 | ||
105 | 419x |
assert_parent_child(x) |
106 | 418x |
x |
107 |
} |
|
108 | ||
109 |
#' @describeIn parents Assignment of parents of `join_keys` inside `teal_data` object. |
|
110 |
#' @export |
|
111 |
#' @examples |
|
112 |
#' |
|
113 |
#' # Assignment of parents of join_keys inside teal_data object --- |
|
114 |
#' |
|
115 |
#' parents(td) <- list("ADTTE" = "ADSL") # replace existing |
|
116 |
#' parents(td)["ADRS"] <- "ADSL" # add new parent |
|
117 |
`parents<-.teal_data` <- function(x, value) { |
|
118 | 1x |
parents(x@join_keys) <- value |
119 | 1x |
x |
120 |
} |
|
121 | ||
122 |
#' @describeIn parents Getter for individual parent. |
|
123 |
#' |
|
124 |
#' @param dataset_name (`character(1)`) Name of dataset to query on their parent. |
|
125 |
#' |
|
126 |
#' @return For `parent(x, dataset_name)` returns `NULL` if parent does not exist. |
|
127 |
#' |
|
128 |
#' @export |
|
129 |
#' |
|
130 |
#' @examples |
|
131 |
#' |
|
132 |
#' # Get individual parent --- |
|
133 |
#' |
|
134 |
#' parent(jk, "ds2") |
|
135 |
#' parent(td, "ADTTE") |
|
136 |
parent <- function(x, dataset_name) { |
|
137 | 105x |
checkmate::assert_string(dataset_name) |
138 |
# assert x is performed by parents() |
|
139 | 105x |
parents(x)[[dataset_name]] |
140 |
} |
1 |
#' The names of a `join_keys` object |
|
2 |
#' |
|
3 |
#' @inheritParams base::`names<-` |
|
4 |
#' @export |
|
5 |
`names<-.join_keys` <- function(x, value) { |
|
6 | 2x |
new_x <- unclass(x) |
7 | 2x |
parent_list <- parents(x) |
8 |
# Update inner keys |
|
9 | 2x |
for (old_name in setdiff(names(new_x), value)) { |
10 | 3x |
old_entry <- new_x[[old_name]] |
11 | 3x |
new_name <- value[names(new_x) == old_name] |
12 | ||
13 |
# Change 2nd-tier first |
|
14 | 3x |
for (sub_name in names(old_entry)) { |
15 | 7x |
names(new_x[[sub_name]])[names(new_x[[sub_name]]) == old_name] <- new_name |
16 |
} |
|
17 | ||
18 |
# Change in first tier |
|
19 | 3x |
names(new_x)[names(new_x) == old_name] <- new_name |
20 | ||
21 |
# changing name in the parents |
|
22 | 3x |
if (length(parent_list)) { |
23 | 3x |
names(parent_list)[names(parent_list) == old_name] <- new_name |
24 | 3x |
ind <- vapply(parent_list, identical, logical(1), old_name) |
25 | 3x |
parent_list[ind] <- new_name |
26 | 3x |
attr(new_x, "parents") <- parent_list |
27 |
} |
|
28 |
} |
|
29 | ||
30 | 2x |
class(new_x) <- c("join_keys", "list") |
31 | 2x |
new_x |
32 |
} |
1 |
#' Comprehensive data integration function for `teal` applications |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("stable")` |
|
5 |
#' |
|
6 |
#' Universal function to pass data to teal application. |
|
7 |
#' |
|
8 |
#' @param ... any number of objects (presumably data objects) provided as `name = value` pairs. |
|
9 |
#' |
|
10 |
#' @param join_keys (`join_keys`) object or a single (`join_key_set`) object. |
|
11 |
#' |
|
12 |
#' (optional) object with dataset column relationships used for joining. |
|
13 |
#' If empty then no joins between pairs of objects. |
|
14 |
#' |
|
15 |
#' @param code (`character`, `language`) optional code to reproduce the datasets provided in `...`. |
|
16 |
#' Note this code is not executed and the `teal_data` may not be reproducible |
|
17 |
#' |
|
18 |
#' @param check (`logical`) `r lifecycle::badge("deprecated")` |
|
19 |
#' Use [verify()] to verify code reproducibility . |
|
20 |
#' |
|
21 |
#' @return A `teal_data` object. |
|
22 |
#' |
|
23 |
#' @export |
|
24 |
#' |
|
25 |
#' @examples |
|
26 |
#' teal_data(x1 = iris, x2 = mtcars) |
|
27 |
#' |
|
28 |
teal_data <- function(..., |
|
29 |
join_keys = teal.data::join_keys(), |
|
30 |
code = character(0), |
|
31 |
check) { |
|
32 | 76x |
data_objects <- rlang::list2(...) |
33 | 76x |
if (inherits(join_keys, "join_key_set")) { |
34 | ! |
join_keys <- teal.data::join_keys(join_keys) |
35 |
} |
|
36 | 76x |
if (!missing(check)) { |
37 | ! |
lifecycle::deprecate_stop( |
38 | ! |
when = "0.4.0", |
39 | ! |
"teal_data( |
40 | ! |
check = 'check argument is deprecated. Use `verify()` to verify code reproducibility. |
41 | ! |
Find more information on https://github.com/insightsengineering/teal/discussions/945' |
42 |
)" |
|
43 |
) |
|
44 |
} |
|
45 | ||
46 |
if ( |
|
47 | 76x |
checkmate::test_list( |
48 | 76x |
data_objects, |
49 | 76x |
types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"), |
50 | 76x |
min.len = 1 |
51 |
) |
|
52 |
) { |
|
53 | ! |
lifecycle::deprecate_stop( |
54 | ! |
when = "0.4.0", |
55 | ! |
"teal_data( |
56 | ! |
data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated. |
57 | ! |
Find more information on https://github.com/insightsengineering/teal/discussions/945' |
58 |
)" |
|
59 |
) |
|
60 |
} else { |
|
61 | 76x |
if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) { |
62 | ! |
stop("Dot (`...`) arguments on `teal_data()` must be named.") |
63 |
} |
|
64 | 76x |
new_teal_data( |
65 | 76x |
data = data_objects, |
66 | 76x |
code = code, |
67 | 76x |
join_keys = join_keys |
68 |
) |
|
69 |
} |
|
70 |
} |
1 |
#' Generate sample `CDISC` datasets |
|
2 |
#' |
|
3 |
#' Retrieves example `CDISC` datasets for use in examples and testing. |
|
4 |
#' |
|
5 |
#' This function returns a dummy dataset and should only be used within `teal.data`. |
|
6 |
#' Note that the datasets are not created and maintained in `teal.data`, they are retrieved its dependencies. |
|
7 |
#' |
|
8 |
#' @param dataname (`character(1)`) name of a `CDISC` dataset |
|
9 |
#' |
|
10 |
#' @return A `CDISC` dataset as a `data.frame`. |
|
11 |
#' |
|
12 |
#' @export |
|
13 |
example_cdisc_data <- function(dataname = c("ADSL", "ADAE", "ADLB", "ADCM", "ADEX", "ADRS", "ADTR", "ADTTE", "ADVS")) { |
|
14 | ! |
dataname <- sprintf("r%s", match.arg(dataname)) |
15 | ! |
dynGet(dataname, ifnotfound = stop(dataname, " not found"), inherits = TRUE) |
16 |
} |
1 |
#' Test if two objects are (nearly) equal |
|
2 |
#' |
|
3 |
#' `all.equal(target, current)` is a utility to compare `join_keys` objects target |
|
4 |
#' and current testing `near equality`. |
|
5 |
#' |
|
6 |
#' If they are different, comparison is still made to some extent, and a report |
|
7 |
#' of the differences is returned. |
|
8 |
#' Do not use `all.equal` directly in if expressions—either use `isTRUE(all.equal(....))` |
|
9 |
#' or identical if appropriate. |
|
10 |
#' |
|
11 |
#' The parents attribute comparison tolerates `NULL` and empty lists and will find |
|
12 |
#' no difference. |
|
13 |
#' |
|
14 |
#' The list containing all the relationships is treated like a map and ignores |
|
15 |
#' entries with `NULL` if they exist. |
|
16 |
#' |
|
17 |
#' @inheritParams base::all.equal |
|
18 |
#' @param ... further arguments for different methods. Not used with `join_keys`. |
|
19 |
#' |
|
20 |
#' @seealso [base::all.equal()] |
|
21 |
#' @keywords internal |
|
22 |
#' |
|
23 |
all.equal.join_keys <- function(target, current, ...) { |
|
24 | 21x |
.as_map <- function(.x) { |
25 | 42x |
old_attributes <- attributes(.x) |
26 |
# Keep only non-list attributes |
|
27 | 42x |
old_attributes[["names"]] <- NULL |
28 | 42x |
old_attributes[["original_class"]] <- old_attributes[["class"]] |
29 | 42x |
old_attributes[["class"]] <- NULL |
30 | 42x |
old_attributes[["parents"]] <- if (!length(old_attributes[["parents"]])) { |
31 | 18x |
list() |
32 |
} else { |
|
33 | 24x |
old_attributes[["parents"]][order(names(old_attributes[["parents"]]))] |
34 |
} |
|
35 | 42x |
attr(.x, "class") <- "list" |
36 | ||
37 |
# Remove nulls |
|
38 | 42x |
.x <- Filter(Negate(is.null), .x) |
39 | ||
40 |
# Sort named components, preserving positions of unnamed |
|
41 | 42x |
nx <- rlang::names2(.x) |
42 | 42x |
is_named <- nx != "" |
43 | 42x |
if (any(is_named)) { |
44 | 42x |
idx <- seq_along(.x) |
45 | 42x |
idx[is_named] <- idx[is_named][order(nx[is_named])] |
46 | 42x |
.x <- .x[idx] |
47 |
} |
|
48 | 42x |
new_attributes <- if (is.null(attributes(.x))) list() else attributes(.x) |
49 | 42x |
attributes(.x) <- utils::modifyList(old_attributes, new_attributes) |
50 | 42x |
.x |
51 |
} |
|
52 | 21x |
x <- .as_map(target) |
53 | 21x |
y <- .as_map(current) |
54 | 21x |
all.equal(x, y) |
55 |
} |
1 |
#' Get code from `teal_data` object |
|
2 |
#' |
|
3 |
#' Retrieve code from `teal_data` object. |
|
4 |
#' |
|
5 |
#' Retrieve code stored in `@code`, which (in principle) can be used to recreate all objects found in `@env`. |
|
6 |
#' Use `datanames` to limit the code to one or more of the datasets enumerated in `@datanames`. |
|
7 |
#' If the code has not passed verification (with [`verify()`]), a warning will be prepended. |
|
8 |
#' |
|
9 |
#' @section Extracting dataset-specific code: |
|
10 |
#' When `datanames` is specified, the code returned will be limited to the lines needed to _create_ |
|
11 |
#' the requested datasets. The code stored in the `@code` slot is analyzed statically to determine |
|
12 |
#' which lines the datasets of interest depend upon. The analysis works well when objects are created |
|
13 |
#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations. |
|
14 |
#' |
|
15 |
#' Consider the following examples: |
|
16 |
#' |
|
17 |
#' _Case 1: Usual assignments._ |
|
18 |
#' ```r |
|
19 |
#' data <- teal_data() |> |
|
20 |
#' within({ |
|
21 |
#' foo <- function(x) { |
|
22 |
#' x + 1 |
|
23 |
#' } |
|
24 |
#' x <- 0 |
|
25 |
#' y <- foo(x) |
|
26 |
#' }) |
|
27 |
#' get_code(data, datanames = "y") |
|
28 |
#' ``` |
|
29 |
#' `x` has no dependencies, so `get_code(data, datanames = "x")` will return only the second call.\cr |
|
30 |
#' `y` depends on `x` and `foo`, so `get_code(data, datanames = "y")` will contain all three calls. |
|
31 |
#' |
|
32 |
#' _Case 2: Some objects are created by a function's side effects._ |
|
33 |
#' ```r |
|
34 |
#' data <- teal_data() |> |
|
35 |
#' within({ |
|
36 |
#' foo <- function() { |
|
37 |
#' x <<- x + 1 |
|
38 |
#' } |
|
39 |
#' x <- 0 |
|
40 |
#' foo() |
|
41 |
#' y <- x |
|
42 |
#' }) |
|
43 |
#' get_code(data, datanames = "y") |
|
44 |
#' ``` |
|
45 |
#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment) |
|
46 |
#' and so `get_code(data, datanames = "y")` will not return the `foo()` call.\cr |
|
47 |
#' To overcome this limitation, code dependencies can be specified manually. |
|
48 |
#' Lines where side effects occur can be flagged by adding "`# @linksto <object name>`" at the end.\cr |
|
49 |
#' Note that `within` evaluates code passed to `expr` as is and comments are ignored. |
|
50 |
#' In order to include comments in code one must use the `eval_code` function instead. |
|
51 |
#' |
|
52 |
#' ```r |
|
53 |
#' data <- teal_data() |> |
|
54 |
#' eval_code(" |
|
55 |
#' foo <- function() { |
|
56 |
#' x <<- x + 1 |
|
57 |
#' } |
|
58 |
#' x <- 0 |
|
59 |
#' foo() # @linksto x |
|
60 |
#' y <- x |
|
61 |
#' ") |
|
62 |
#' get_code(data, datanames = "y") |
|
63 |
#' ``` |
|
64 |
#' Now the `foo()` call will be properly included in the code required to recreate `y`. |
|
65 |
#' |
|
66 |
#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically. |
|
67 |
#' |
|
68 |
#' Here are known cases where manual tagging is necessary: |
|
69 |
#' - non-standard assignment operators, _e.g._ `%<>%` |
|
70 |
#' - objects used as conditions in `if` statements: `if (<condition>)` |
|
71 |
#' - objects used to iterate over in `for` loops: `for(i in <sequence>)` |
|
72 |
#' - creating and evaluating language objects, _e.g._ `eval(<call>)` |
|
73 |
#' |
|
74 |
#' |
|
75 |
#' @param object (`teal_data`) |
|
76 |
#' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for. |
|
77 |
#' For more details see the "Extracting dataset-specific code" section. |
|
78 |
#' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as |
|
79 |
#' `expression` (`deparse = FALSE`). |
|
80 |
#' |
|
81 |
#' @return |
|
82 |
#' Either a character string or an expression. If `datanames` is used to request a specific dataset, |
|
83 |
#' only code that _creates_ that dataset (not code that uses it) is returned. Otherwise, all contents of `@code`. |
|
84 |
#' |
|
85 |
#' @examples |
|
86 |
#' tdata1 <- teal_data() |
|
87 |
#' tdata1 <- within(tdata1, { |
|
88 |
#' a <- 1 |
|
89 |
#' b <- a^5 |
|
90 |
#' c <- list(x = 2) |
|
91 |
#' }) |
|
92 |
#' get_code(tdata1) |
|
93 |
#' get_code(tdata1, datanames = "a") |
|
94 |
#' get_code(tdata1, datanames = "b") |
|
95 |
#' |
|
96 |
#' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") |
|
97 |
#' get_code(tdata2) |
|
98 |
#' get_code(verify(tdata2)) |
|
99 |
#' |
|
100 |
#' @rdname get_code |
|
101 |
#' @aliases get_code,teal_data-method |
|
102 |
#' |
|
103 |
#' @export |
|
104 |
setMethod("get_code", signature = "teal_data", definition = function(object, deparse = TRUE, datanames = NULL) { |
|
105 | 48x |
checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE) |
106 | 48x |
checkmate::assert_flag(deparse) |
107 | ||
108 | 48x |
code <- if (!is.null(datanames)) { |
109 | 44x |
get_code_dependency(object@code, datanames) |
110 |
} else { |
|
111 | 4x |
object@code |
112 |
} |
|
113 | ||
114 | 48x |
if (!object@verified) { |
115 | 11x |
code <- c("warning('Code was not verified for reproducibility.')", code) |
116 |
} |
|
117 | ||
118 | 48x |
if (deparse) { |
119 | 47x |
if (length(code) == 0) { |
120 | 1x |
code |
121 |
} else { |
|
122 | 46x |
paste(code, collapse = "\n") |
123 |
} |
|
124 |
} else { |
|
125 | 1x |
parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE) |
126 |
} |
|
127 |
}) |
1 |
setOldClass("join_keys") |
|
2 | ||
3 |
#' Reproducible data |
|
4 |
#' |
|
5 |
#' Reproducible data container class. Inherits code tracking behavior from [`teal.code::qenv-class`]. |
|
6 |
#' |
|
7 |
#' This class provides an isolated environment in which to store and process data with all code being recorded. |
|
8 |
#' The environment, code, data set names, and data joining keys are stored in their respective slots. |
|
9 |
#' These slots should never be accessed directly, use the provided get/set functions. |
|
10 |
#' |
|
11 |
#' As code is evaluated in `teal_data`, messages and warnings are stored in their respective slots. |
|
12 |
#' If errors are raised, a `qenv.error` object is returned. |
|
13 |
#' |
|
14 |
#' @name teal_data-class |
|
15 |
#' @rdname teal_data-class |
|
16 |
#' |
|
17 |
#' @slot env (`environment`) environment containing data sets and possibly auxiliary variables. |
|
18 |
#' Access variables with [get_var()] or [`[[`]. |
|
19 |
#' No setter provided. Evaluate code to add variables into `@env`. |
|
20 |
#' @slot code (`character`) vector representing code necessary to reproduce the contents of `@env`. |
|
21 |
#' Access with [get_code()]. |
|
22 |
#' No setter provided. Evaluate code to append code to the slot. |
|
23 |
#' @slot id (`integer`) random identifier assigned to each element of `@code`. Used internally. |
|
24 |
#' @slot warnings (`character`) vector of warnings raised when evaluating code. |
|
25 |
#' Access with [get_warnings()]. |
|
26 |
#' @slot messages (`character`) vector of messages raised when evaluating code. |
|
27 |
#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in `@env`. |
|
28 |
#' Access or modify with [join_keys()]. |
|
29 |
#' @slot datanames (`character`) vector of names of data sets in `@env`. |
|
30 |
#' Used internally to distinguish them from auxiliary variables. |
|
31 |
#' Access or modify with [datanames()]. |
|
32 |
#' @slot verified (`logical(1)`) flag signifying that code in `@code` has been proven to yield contents of `@env`. |
|
33 |
#' Used internally. See [`verify()`] for more details. |
|
34 |
#' |
|
35 |
#' @import teal.code |
|
36 |
#' @keywords internal |
|
37 |
setClass( |
|
38 |
Class = "teal_data", |
|
39 |
contains = "qenv", |
|
40 |
slots = c(join_keys = "join_keys", datanames = "character", verified = "logical"), |
|
41 |
prototype = list( |
|
42 |
join_keys = join_keys(), |
|
43 |
datanames = character(0), |
|
44 |
verified = logical(0) |
|
45 |
) |
|
46 |
) |
|
47 | ||
48 |
#' Initialize `teal_data` object |
|
49 |
#' |
|
50 |
#' @name new_teal_data |
|
51 |
#' |
|
52 |
#' @param data (`named list`) of data objects. |
|
53 |
#' @param code (`character` or `language`) code to reproduce the `data`. |
|
54 |
#' Accepts and stores comments also. |
|
55 |
#' @param join_keys (`join_keys`) object |
|
56 |
#' @param datanames (`character`) names of datasets passed to `data`. |
|
57 |
#' Needed when non-dataset objects are needed in the `env` slot. |
|
58 |
#' @rdname new_teal_data |
|
59 |
#' @keywords internal |
|
60 |
new_teal_data <- function(data, |
|
61 |
code = character(0), |
|
62 |
join_keys = join_keys(), |
|
63 |
datanames = union(names(data), names(join_keys))) { |
|
64 | 76x |
checkmate::assert_list(data) |
65 | 76x |
checkmate::assert_class(join_keys, "join_keys") |
66 | 45x |
if (is.null(datanames)) datanames <- character(0) # todo: allow to specify |
67 | 76x |
checkmate::assert_character(datanames) |
68 | 76x |
if (!any(is.language(code), is.character(code))) { |
69 | ! |
stop("`code` must be a character or language object.") |
70 |
} |
|
71 | ||
72 | 76x |
if (is.language(code)) { |
73 | 2x |
code <- paste(lang2calls(code), collapse = "\n") |
74 |
} |
|
75 | 76x |
if (length(code)) { |
76 | 13x |
code <- paste(code, collapse = "\n") |
77 |
} |
|
78 | 76x |
verified <- (length(code) == 0L && length(data) == 0L) |
79 | ||
80 | 76x |
id <- sample.int(.Machine$integer.max, size = length(code)) |
81 | ||
82 | 76x |
new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv)) |
83 | 76x |
lockEnvironment(new_env, bindings = TRUE) |
84 | ||
85 | 76x |
methods::new( |
86 | 76x |
"teal_data", |
87 | 76x |
env = new_env, |
88 | 76x |
code = code, |
89 | 76x |
warnings = rep("", length(code)), |
90 | 76x |
messages = rep("", length(code)), |
91 | 76x |
id = id, |
92 | 76x |
join_keys = join_keys, |
93 | 76x |
datanames = datanames, |
94 | 76x |
verified = verified |
95 |
) |
|
96 |
} |
1 |
#' @rdname join_keys |
|
2 |
#' @order 4 |
|
3 |
#' @export |
|
4 |
#' @examples |
|
5 |
#' # Merging multiple `join_keys` objects --- |
|
6 |
#' |
|
7 |
#' jk_merged <- c( |
|
8 |
#' jk, |
|
9 |
#' join_keys( |
|
10 |
#' join_key("ds4", keys = c("pk4", "pk4_2")), |
|
11 |
#' join_key("ds3", "ds4", c(pk3 = "pk4_2")) |
|
12 |
#' ) |
|
13 |
#' ) |
|
14 |
c.join_keys <- function(...) { |
|
15 | 125x |
x <- rlang::list2(...) |
16 | 125x |
checkmate::assert_list(x, types = c("join_keys", "join_key_set")) |
17 | ||
18 | 122x |
Reduce( |
19 | 122x |
init = join_keys(), |
20 | 122x |
x = x, |
21 | 122x |
f = function(.x, .y) { |
22 | 394x |
out <- utils::modifyList(.x, .y, keep.null = FALSE) |
23 | 394x |
parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE) |
24 | 392x |
out |
25 |
} |
|
26 |
) |
|
27 |
} |
|
28 | ||
29 |
#' @rdname join_keys |
|
30 |
#' @order 4 |
|
31 |
#' @export |
|
32 |
#' @examples |
|
33 |
#' # note: merge can be performed with both join_keys and join_key_set |
|
34 |
#' |
|
35 |
#' jk_merged <- c( |
|
36 |
#' jk_merged, |
|
37 |
#' join_key("ds5", keys = "pk5"), |
|
38 |
#' join_key("ds1", "ds5", c(pk1 = "pk5")) |
|
39 |
#' ) |
|
40 |
c.join_key_set <- function(...) { |
|
41 | 2x |
c.join_keys(...) |
42 |
} |
1 |
#' Data input for `teal` app |
|
2 |
#' |
|
3 |
#' @description |
|
4 |
#' `r lifecycle::badge("stable")` |
|
5 |
#' |
|
6 |
#' Function is a wrapper around [teal_data()] and guesses `join_keys` |
|
7 |
#' for given datasets whose names match ADAM datasets names. |
|
8 |
#' |
|
9 |
#' @inheritParams teal_data |
|
10 |
#' @param join_keys (`join_keys`) object or a single (`join_key_set`) object |
|
11 |
#' |
|
12 |
#' (optional) object with datasets column names used for joining. |
|
13 |
#' If empty then it would be automatically derived basing on intersection of datasets primary keys. |
|
14 |
#' For ADAM datasets it would be automatically derived. |
|
15 |
#' |
|
16 |
#' @return A `teal_data` object. |
|
17 |
#' |
|
18 |
#' @details This function checks if there were keys added to all data sets. |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' data <- cdisc_data( |
|
22 |
#' join_keys = join_keys( |
|
23 |
#' join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID")) |
|
24 |
#' ) |
|
25 |
#' ) |
|
26 |
#' |
|
27 |
#' data <- within(data, { |
|
28 |
#' ADSL <- example_cdisc_data("ADSL") |
|
29 |
#' ADTTE <- example_cdisc_data("ADTTE") |
|
30 |
#' }) |
|
31 |
#' |
|
32 |
#' @export |
|
33 |
#' |
|
34 |
cdisc_data <- function(..., |
|
35 |
join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))], |
|
36 |
code = character(0), |
|
37 |
check) { |
|
38 | 1x |
teal_data(..., join_keys = join_keys, code = code, check = check) |
39 |
} |